Esempio n. 1
0
def _get_bwa_mem_cmd(data,
                     out_file,
                     ref_file,
                     fastq1,
                     fastq2="",
                     with_hla=False):
    """Perform piped bwa mem mapping potentially with alternative alleles in GRCh38 + HLA typing.

    Commands for HLA post-processing:
       base=TEST
       run-HLA $base.hla > $base.hla.top
       cat $base.hla.HLA*.gt | grep ^GT | cut -f2- > $base.hla.all
       rm -f $base.hla.HLA*gt
       rm -f $base.hla.HLA*gz
    """
    alt_file = ref_file + ".alt"
    if with_hla:
        bwakit_dir = os.path.dirname(
            os.path.realpath(utils.which("run-bwamem")))
        hla_base = os.path.join(
            utils.safe_makedir(os.path.join(os.path.dirname(out_file), "hla")),
            os.path.basename(out_file) + ".hla")
        alt_cmd = (
            " | {bwakit_dir}/k8 {bwakit_dir}/bwa-postalt.js -p {hla_base} {alt_file}"
        )
    else:
        alt_cmd = ""
    if dd.get_aligner(data) == "sentieon-bwa":
        bwa_exe = "sentieon-bwa"
        exports = sentieon.license_export(data)
    else:
        bwa_exe = "bwa"
        exports = ""
    bwa = config_utils.get_program(bwa_exe, data["config"])
    num_cores = data["config"]["algorithm"].get("num_cores", 1)
    bwa_resources = config_utils.get_resources("bwa", data["config"])
    bwa_params = (" ".join([str(x) for x in bwa_resources.get("options", [])])
                  if "options" in bwa_resources else "")
    rg_info = novoalign.get_rg_info(data["rgnames"])
    # For UMI runs, pass along consensus tags
    c_tags = "-C" if "umi_bam" in data else ""
    pairing = "-p" if not fastq2 else ""
    # Restrict seed occurances to 1/2 of default, manage memory usage for centromere repeats in hg38
    # https://sourceforge.net/p/bio-bwa/mailman/message/31514937/
    # http://ehc.ac/p/bio-bwa/mailman/message/32268544/
    mem_usage = "-c 250"
    bwa_cmd = (
        "{exports}{bwa} mem {pairing} {c_tags} {mem_usage} -M -t {num_cores} {bwa_params} -R '{rg_info}' "
        "-v 1 {ref_file} {fastq1} {fastq2} ")
    return (bwa_cmd + alt_cmd).format(**locals())
Esempio n. 2
0
def _get_bwa_mem_cmd(data, out_file, ref_file, fastq1, fastq2=""):
    """Perform piped bwa mem mapping potentially with alternative alleles in GRCh38 + HLA typing.

    Commands for HLA post-processing:
       base=TEST
       run-HLA $base.hla > $base.hla.top
       cat $base.hla.HLA*.gt | grep ^GT | cut -f2- > $base.hla.all
       rm -f $base.hla.HLA*gt
       rm -f $base.hla.HLA*gz
    """
    alt_file = ref_file + ".alt"
    if utils.file_exists(alt_file) and dd.get_hlacaller(data):
        bwakit_dir = os.path.dirname(os.path.realpath(utils.which("run-bwamem")))
        hla_base = os.path.join(utils.safe_makedir(os.path.join(os.path.dirname(out_file), "hla")),
                                os.path.basename(out_file) + ".hla")
        alt_cmd = (" | {bwakit_dir}/k8 {bwakit_dir}/bwa-postalt.js -p {hla_base} {alt_file}")
    else:
        alt_cmd = ""
    if dd.get_aligner(data) == "sentieon-bwa":
        bwa_exe = "sentieon-bwa"
        exports = sentieon.license_export(data)
    else:
        bwa_exe = "bwa"
        exports = ""
    bwa = config_utils.get_program(bwa_exe, data["config"])
    num_cores = data["config"]["algorithm"].get("num_cores", 1)
    bwa_resources = config_utils.get_resources("bwa", data["config"])
    bwa_params = (" ".join([str(x) for x in bwa_resources.get("options", [])])
                  if "options" in bwa_resources else "")
    rg_info = novoalign.get_rg_info(data["rgnames"])
    # For UMI runs, pass along consensus tags
    c_tags = "-C" if "umi_bam" in data else ""
    pairing = "-p" if not fastq2 else ""
    # Restrict seed occurances to 1/2 of default, manage memory usage for centromere repeats in hg38
    # https://sourceforge.net/p/bio-bwa/mailman/message/31514937/
    # http://ehc.ac/p/bio-bwa/mailman/message/32268544/
    mem_usage = "-c 250"
    bwa_cmd = ("{exports}{bwa} mem {pairing} {c_tags} {mem_usage} -M -t {num_cores} {bwa_params} -R '{rg_info}' "
               "-v 1 {ref_file} {fastq1} {fastq2} ")
    return (bwa_cmd + alt_cmd).format(**locals())