def _get_bwa_mem_cmd(data, out_file, ref_file, fastq1, fastq2="", with_hla=False): """Perform piped bwa mem mapping potentially with alternative alleles in GRCh38 + HLA typing. Commands for HLA post-processing: base=TEST run-HLA $base.hla > $base.hla.top cat $base.hla.HLA*.gt | grep ^GT | cut -f2- > $base.hla.all rm -f $base.hla.HLA*gt rm -f $base.hla.HLA*gz """ alt_file = ref_file + ".alt" if with_hla: bwakit_dir = os.path.dirname( os.path.realpath(utils.which("run-bwamem"))) hla_base = os.path.join( utils.safe_makedir(os.path.join(os.path.dirname(out_file), "hla")), os.path.basename(out_file) + ".hla") alt_cmd = ( " | {bwakit_dir}/k8 {bwakit_dir}/bwa-postalt.js -p {hla_base} {alt_file}" ) else: alt_cmd = "" if dd.get_aligner(data) == "sentieon-bwa": bwa_exe = "sentieon-bwa" exports = sentieon.license_export(data) else: bwa_exe = "bwa" exports = "" bwa = config_utils.get_program(bwa_exe, data["config"]) num_cores = data["config"]["algorithm"].get("num_cores", 1) bwa_resources = config_utils.get_resources("bwa", data["config"]) bwa_params = (" ".join([str(x) for x in bwa_resources.get("options", [])]) if "options" in bwa_resources else "") rg_info = novoalign.get_rg_info(data["rgnames"]) # For UMI runs, pass along consensus tags c_tags = "-C" if "umi_bam" in data else "" pairing = "-p" if not fastq2 else "" # Restrict seed occurances to 1/2 of default, manage memory usage for centromere repeats in hg38 # https://sourceforge.net/p/bio-bwa/mailman/message/31514937/ # http://ehc.ac/p/bio-bwa/mailman/message/32268544/ mem_usage = "-c 250" bwa_cmd = ( "{exports}{bwa} mem {pairing} {c_tags} {mem_usage} -M -t {num_cores} {bwa_params} -R '{rg_info}' " "-v 1 {ref_file} {fastq1} {fastq2} ") return (bwa_cmd + alt_cmd).format(**locals())
def _get_bwa_mem_cmd(data, out_file, ref_file, fastq1, fastq2=""): """Perform piped bwa mem mapping potentially with alternative alleles in GRCh38 + HLA typing. Commands for HLA post-processing: base=TEST run-HLA $base.hla > $base.hla.top cat $base.hla.HLA*.gt | grep ^GT | cut -f2- > $base.hla.all rm -f $base.hla.HLA*gt rm -f $base.hla.HLA*gz """ alt_file = ref_file + ".alt" if utils.file_exists(alt_file) and dd.get_hlacaller(data): bwakit_dir = os.path.dirname(os.path.realpath(utils.which("run-bwamem"))) hla_base = os.path.join(utils.safe_makedir(os.path.join(os.path.dirname(out_file), "hla")), os.path.basename(out_file) + ".hla") alt_cmd = (" | {bwakit_dir}/k8 {bwakit_dir}/bwa-postalt.js -p {hla_base} {alt_file}") else: alt_cmd = "" if dd.get_aligner(data) == "sentieon-bwa": bwa_exe = "sentieon-bwa" exports = sentieon.license_export(data) else: bwa_exe = "bwa" exports = "" bwa = config_utils.get_program(bwa_exe, data["config"]) num_cores = data["config"]["algorithm"].get("num_cores", 1) bwa_resources = config_utils.get_resources("bwa", data["config"]) bwa_params = (" ".join([str(x) for x in bwa_resources.get("options", [])]) if "options" in bwa_resources else "") rg_info = novoalign.get_rg_info(data["rgnames"]) # For UMI runs, pass along consensus tags c_tags = "-C" if "umi_bam" in data else "" pairing = "-p" if not fastq2 else "" # Restrict seed occurances to 1/2 of default, manage memory usage for centromere repeats in hg38 # https://sourceforge.net/p/bio-bwa/mailman/message/31514937/ # http://ehc.ac/p/bio-bwa/mailman/message/32268544/ mem_usage = "-c 250" bwa_cmd = ("{exports}{bwa} mem {pairing} {c_tags} {mem_usage} -M -t {num_cores} {bwa_params} -R '{rg_info}' " "-v 1 {ref_file} {fastq1} {fastq2} ") return (bwa_cmd + alt_cmd).format(**locals())