def _run_wham_coords(inputs, background_bams, coords, final_file): """Run WHAM on a specific set of chromosome, start, end coordinates. """ base, ext = os.path.splitext(final_file) out_file = "%s-%s%s" % (base, region.to_safestr(coords), ext) if not utils.file_exists(out_file): with file_transaction(inputs[0], out_file) as tx_out_file: cores = dd.get_cores(inputs[0]) ref_file = dd.get_ref_file(inputs[0]) all_bams = ",".join([x["align_bam"] for x in inputs] + background_bams) coord_str = bamprep.region_to_gatk(coords) opts = "-k -m 30" cmd = ("WHAM-GRAPHENING {opts} -x {cores} -a {ref_file} -f {all_bams} -r {coord_str} " "> {tx_out_file}") do.run(cmd.format(**locals()), "Run WHAM: %s" % region.to_safestr(coords)) return [[coords, out_file]]
def _run_wham_coords(inputs, background_bams, coords, final_file): """Run WHAM on a specific set of chromosome, start, end coordinates. """ base, ext = utils.splitext_plus(final_file) raw_file = "%s-%s.vcf" % (base, region.to_safestr(coords)) all_bams = ",".join([x["align_bam"] for x in inputs] + background_bams) if not utils.file_exists(raw_file): with file_transaction(inputs[0], raw_file) as tx_raw_file: cores = dd.get_cores(inputs[0]) ref_file = dd.get_ref_file(inputs[0]) coord_str = bamprep.region_to_gatk(coords) opts = "-k -m 30" cmd = ("WHAM-GRAPHENING {opts} -x {cores} -a {ref_file} -f {all_bams} -r {coord_str} " "> {tx_raw_file}") do.run(cmd.format(**locals()), "Run WHAM: %s" % region.to_safestr(coords)) merge_vcf = _run_wham_merge(raw_file, inputs[0]) gt_vcf = _run_wham_genotype(merge_vcf, all_bams, coords, inputs[0]) prep_vcf = vcfutils.sort_by_ref(gt_vcf, inputs[0]) return [[coords, prep_vcf]]
def _run_wham_coords(inputs, background_bams, coords, final_file): """Run WHAM on a specific set of chromosome, start, end coordinates. """ base, ext = os.path.splitext(final_file) out_file = "%s-%s%s" % (base, region.to_safestr(coords), ext) if not utils.file_exists(out_file): with file_transaction(inputs[0], out_file) as tx_out_file: cores = dd.get_cores(inputs[0]) ref_file = dd.get_ref_file(inputs[0]) all_bams = ",".join([x["align_bam"] for x in inputs] + background_bams) coord_str = bamprep.region_to_gatk(coords) opts = "-k -m 30" cmd = ( "WHAM-GRAPHENING {opts} -x {cores} -a {ref_file} -f {all_bams} -r {coord_str} " "> {tx_out_file}") do.run(cmd.format(**locals()), "Run WHAM: %s" % region.to_safestr(coords)) return [[coords, out_file]]
def _run_wham_genotype(in_file, all_bams, coords, data): """Run genotyping on a prepped, merged VCF file. """ out_file = "%s-wgts%s" % utils.splitext_plus(in_file) if not utils.file_exists(out_file): with file_transaction(data, out_file) as tx_out_file: cores = dd.get_cores(data) ref_file = dd.get_ref_file(data) coord_str = bamprep.region_to_gatk(coords) cmd = ("WHAM-GRAPHENING -b {in_file} -x {cores} -a {ref_file} -f {all_bams} -r {coord_str} " "> {tx_out_file}") do.run(cmd.format(**locals()), "Genotype WHAM: %s" % region.to_safestr(coords)) return out_file
def _do_work(data): if "region" in data: name = data["group"][0] if "group" in data else data["description"] out_dir = os.path.join(data["dirs"]["work"], dir_ext_fn(data)) out_file = os.path.join(out_dir, "%s%s" % (name, ext)) out_parts = [] out_region_dir = os.path.join(out_dir, data["region"][0]) out_region_file = os.path.join(out_region_dir, "%s-%s%s" % (name, region.to_safestr(data["region"]), ext)) out_parts = [(data["region"], out_region_file)] return out_file, out_parts else: return None, []
def _do_work(data): if "region" in data and not data["region"][0] in ["nochrom", "noanalysis"]: name = data["group"][0] if "group" in data else data["description"] out_dir = os.path.join(data["dirs"]["work"], dir_ext_fn(data)) out_file = os.path.join(out_dir, "%s%s" % (name, ext)) out_parts = [] if not utils.file_exists(out_file) or utils.get_in(data, ("metadata", "phenotype")) in batch_drivers: out_region_dir = os.path.join(out_dir, data["region"][0]) out_region_file = os.path.join(out_region_dir, "%s-%s%s" % (name, region.to_safestr(data["region"]), ext)) out_parts = [(data["region"], out_region_file)] return out_file, out_parts else: return None, []
def _do_work(data): if "region" in data: name = data["group"][0] if "group" in data else data["description"] out_dir = os.path.join(data["dirs"]["work"], dir_ext_fn(data)) out_file = os.path.join(out_dir, "%s%s" % (name, ext)) assert isinstance(data["region"], (list, tuple)) out_parts = [] for r, work_bams in sorted(_assign_bams_to_regions(data), key=_sort_by_size, reverse=True): out_region_dir = os.path.join(out_dir, r[0]) out_region_file = os.path.join(out_region_dir, "%s-%s%s" % (name, region.to_safestr(r), ext)) out_parts.append((r, work_bams, out_region_file)) return out_file, out_parts else: return None, []
def _split_by_callable_region(data): """Split by callable or variant regions. We expect joint calling to be deep in numbers of samples per region, so prefer splitting aggressively by regions. """ batch = tz.get_in(("metadata", "batch"), data) name = batch if batch else tz.get_in(("rgnames", "sample"), data) out_dir = utils.safe_makedir( os.path.join(data["dirs"]["work"], "joint", name)) parts = [] for feat in _get_callable_regions(data): region_dir = utils.safe_makedir(os.path.join(out_dir, feat[0])) region_outfile = os.path.join( region_dir, "%s-%s.vcf.gz" % (batch, region.to_safestr(feat))) parts.append( (feat, data["work_bams"], data["vrn_files"], region_outfile)) out_file = os.path.join(out_dir, "%s-joint.vcf.gz" % name) return out_file, parts
def _split_by_callable_region(data): """Split by callable or variant regions. We expect joint calling to be deep in numbers of samples per region, so prefer splitting aggressively by regions. """ batch = tz.get_in(("metadata", "batch"), data) name = batch if batch else tz.get_in(("rgnames", "sample"), data) out_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "joint", name)) utils.safe_makedir(os.path.join(out_dir, "inprep")) parts = [] for feat in _get_callable_regions(data): region_dir = utils.safe_makedir(os.path.join(out_dir, feat[0])) region_prep_dir = os.path.join(region_dir, "inprep") if not os.path.exists(region_prep_dir): os.symlink(os.path.join(os.pardir, "inprep"), region_prep_dir) region_outfile = os.path.join(region_dir, "%s-%s.vcf.gz" % (batch, region.to_safestr(feat))) parts.append((feat, data["work_bams"], data["vrn_files"], region_outfile)) out_file = os.path.join(out_dir, "%s-joint.vcf.gz" % name) return out_file, parts
def _do_work(data): if "region" in data: name = data["group"][0] if "group" in data else data["description"] out_dir = os.path.join(data["dirs"]["work"], dir_ext_fn(data)) out_file = os.path.join(out_dir, "%s%s" % (name, ext)) assert isinstance(data["region"], (list, tuple)) out_parts = [] for i, r in enumerate(data["region"]): out_region_dir = os.path.join(out_dir, r[0]) out_region_file = os.path.join(out_region_dir, "%s-%s%s" % (name, region.to_safestr(r), ext)) work_bams = [] for xs in data["region_bams"]: if len(xs) == 1: work_bams.append(xs[0]) else: work_bams.append(xs[i]) for work_bam in work_bams: assert os.path.exists(work_bam), work_bam out_parts.append((r, work_bams, out_region_file)) return out_file, out_parts else: return None, []