def _piped_realign_gatk(data, region, cl, out_base_file, tmp_dir, prep_params): """Perform realignment with GATK, using input commandline. GATK requires writing to disk and indexing before realignment. """ broad_runner = broad.runner_from_config(data["config"]) pa_bam = "%s-prealign%s" % os.path.splitext(out_base_file) if not utils.file_exists(pa_bam): with file_transaction(data, pa_bam) as tx_out_file: cmd = "{cl} -o {tx_out_file}".format(**locals()) do.run(cmd, "GATK re-alignment {0}".format(region), data) bam.index(pa_bam, data["config"]) recal_file = realign.gatk_realigner_targets( broad_runner, pa_bam, data["sam_ref"], data["config"], region=region_to_gatk(region), known_vrns=dd.get_variation_resources(data)) recal_cl = realign.gatk_indel_realignment_cl( broad_runner, pa_bam, data["sam_ref"], recal_file, tmp_dir, region=region_to_gatk(region), known_vrns=dd.get_variation_resources(data)) return pa_bam, recal_cl
def _piped_realign_gatk(data, region, cl, out_base_file, tmp_dir): """Perform realignment with GATK, using input commandline. GATK requires writing to disk and indexing before realignment. """ broad_runner = broad.runner_from_config(data["config"]) pa_bam = "%s-prealign%s" % os.path.splitext(out_base_file) if not utils.file_exists(pa_bam): with file_transaction(pa_bam) as tx_out_file: subprocess.check_call("{cl} > {tx_out_file}".format(**locals()), shell=True) broad_runner.run_fn("picard_index", pa_bam) dbsnp_vcf = shared.configured_ref_file("dbsnp", data["config"], data["sam_ref"]) recal_file = realign.gatk_realigner_targets(broad_runner, pa_bam, data["sam_ref"], dbsnp=dbsnp_vcf, region=region_to_gatk(region)) recal_cl = realign.gatk_indel_realignment_cl(broad_runner, pa_bam, data["sam_ref"], recal_file, tmp_dir, region=region_to_gatk(region)) return pa_bam, " ".join(recal_cl)
def _piped_realign_gatk(data, region, cl, out_base_file, tmp_dir, prep_params): """Perform realignment with GATK, using input commandline. GATK requires writing to disk and indexing before realignment. """ broad_runner = broad.runner_from_config(data["config"]) pa_bam = "%s-prealign%s" % os.path.splitext(out_base_file) if not utils.file_exists(pa_bam): with file_transaction(pa_bam) as tx_out_file: pipe = ">" if prep_params["dup"] else "-o" cmd = "{cl} {pipe} {tx_out_file}".format(**locals()) do.run(cmd, "GATK pre-alignment {0}".format(region), data) bam.index(pa_bam, data["config"]) dbsnp_vcf = data["genome_resources"]["variation"]["dbsnp"] recal_file = realign.gatk_realigner_targets(broad_runner, pa_bam, data["sam_ref"], dbsnp=dbsnp_vcf, region=region_to_gatk(region)) recal_cl = realign.gatk_indel_realignment_cl(broad_runner, pa_bam, data["sam_ref"], recal_file, tmp_dir, region=region_to_gatk(region)) return pa_bam, " ".join(recal_cl)
def _piped_realign_gatk(data, region, cl, out_base_file, tmp_dir, prep_params): """Perform realignment with GATK, using input commandline. GATK requires writing to disk and indexing before realignment. """ broad_runner = broad.runner_from_config(data["config"]) pa_bam = "%s-prealign%s" % os.path.splitext(out_base_file) if not utils.file_exists(pa_bam): with file_transaction(data, pa_bam) as tx_out_file: cmd = "{cl} -o {tx_out_file}".format(**locals()) do.run(cmd, "GATK pre-alignment {0}".format(region), data) bam.index(pa_bam, data["config"]) recal_file = realign.gatk_realigner_targets( broad_runner, pa_bam, data["sam_ref"], data["config"], region=region_to_gatk(region), known_vrns=dd.get_variation_resources(data), ) recal_cl = realign.gatk_indel_realignment_cl( broad_runner, pa_bam, data["sam_ref"], recal_file, tmp_dir, region=region_to_gatk(region), known_vrns=dd.get_variation_resources(data), ) return pa_bam, " ".join(recal_cl)
def _piped_realign_gatk(data, region, cl, out_base_file, tmp_dir): """Perform realignment with GATK, using input commandline. GATK requires writing to disk and indexing before realignment. """ broad_runner = broad.runner_from_config(data["config"]) pa_bam = "%s-prealign%s" % os.path.splitext(out_base_file) if not utils.file_exists(pa_bam): with file_transaction(pa_bam) as tx_out_file: subprocess.check_call("{cl} > {tx_out_file}".format(**locals()), shell=True) broad_runner.run_fn("picard_index", pa_bam) recal_file = realign.gatk_realigner_targets(broad_runner, pa_bam, data["sam_ref"], dbsnp=shared.configured_ref_file("dbsnp", data["config"], data["sam_ref"]), region=_region_to_gatk(region)) recal_cl = realign.gatk_indel_realignment_cl(broad_runner, pa_bam, data["sam_ref"], recal_file, tmp_dir, region=_region_to_gatk(region)) return pa_bam, " ".join(recal_cl)
def _piped_realign_gatk(data, region, cl, out_base_file, tmp_dir, prep_params): """Perform realignment with GATK, using input commandline. GATK requires writing to disk and indexing before realignment. """ broad_runner = broad.runner_from_config(data["config"]) pa_bam = "%s-prealign%s" % os.path.splitext(out_base_file) if not utils.file_exists(pa_bam): with file_transaction(pa_bam) as tx_out_file: pipe = ">" if prep_params["dup"] else "-o" cmd = "{cl} {pipe} {tx_out_file}".format(**locals()) do.run(cmd, "GATK pre-alignment {0}".format(region), data) broad_runner.run_fn("picard_index", pa_bam) dbsnp_vcf = data["genome_resources"]["variation"]["dbsnp"] recal_file = realign.gatk_realigner_targets(broad_runner, pa_bam, data["sam_ref"], dbsnp=dbsnp_vcf, region=region_to_gatk(region)) recal_cl = realign.gatk_indel_realignment_cl(broad_runner, pa_bam, data["sam_ref"], recal_file, tmp_dir, region=region_to_gatk(region)) return pa_bam, " ".join(recal_cl)