Ejemplo n.º 1
0
def _run_wham_coords(inputs, background_bams, coords, final_file):
    """Run WHAM on a specific set of chromosome, start, end coordinates.
    """
    base, ext = os.path.splitext(final_file)
    out_file = "%s-%s%s" % (base, region.to_safestr(coords), ext)
    if not utils.file_exists(out_file):
        with file_transaction(inputs[0], out_file) as tx_out_file:
            cores = dd.get_cores(inputs[0])
            ref_file = dd.get_ref_file(inputs[0])
            all_bams = ",".join([x["align_bam"] for x in inputs] + background_bams)
            coord_str = bamprep.region_to_gatk(coords)
            opts = "-k -m 30"
            cmd = ("WHAM-GRAPHENING {opts} -x {cores} -a {ref_file} -f {all_bams} -r {coord_str} "
                   "> {tx_out_file}")
            do.run(cmd.format(**locals()), "Run WHAM: %s" % region.to_safestr(coords))
    return [[coords, out_file]]
Ejemplo n.º 2
0
def _run_wham_coords(inputs, background_bams, coords, final_file):
    """Run WHAM on a specific set of chromosome, start, end coordinates.
    """
    base, ext = utils.splitext_plus(final_file)
    raw_file = "%s-%s.vcf" % (base, region.to_safestr(coords))
    all_bams = ",".join([x["align_bam"] for x in inputs] + background_bams)
    if not utils.file_exists(raw_file):
        with file_transaction(inputs[0], raw_file) as tx_raw_file:
            cores = dd.get_cores(inputs[0])
            ref_file = dd.get_ref_file(inputs[0])
            coord_str = bamprep.region_to_gatk(coords)
            opts = "-k -m 30"
            cmd = ("WHAM-GRAPHENING {opts} -x {cores} -a {ref_file} -f {all_bams} -r {coord_str} "
                   "> {tx_raw_file}")
            do.run(cmd.format(**locals()), "Run WHAM: %s" % region.to_safestr(coords))
    merge_vcf = _run_wham_merge(raw_file, inputs[0])
    gt_vcf = _run_wham_genotype(merge_vcf, all_bams, coords, inputs[0])
    prep_vcf = vcfutils.sort_by_ref(gt_vcf, inputs[0])
    return [[coords, prep_vcf]]
Ejemplo n.º 3
0
def _run_wham_coords(inputs, background_bams, coords, final_file):
    """Run WHAM on a specific set of chromosome, start, end coordinates.
    """
    base, ext = os.path.splitext(final_file)
    out_file = "%s-%s%s" % (base, region.to_safestr(coords), ext)
    if not utils.file_exists(out_file):
        with file_transaction(inputs[0], out_file) as tx_out_file:
            cores = dd.get_cores(inputs[0])
            ref_file = dd.get_ref_file(inputs[0])
            all_bams = ",".join([x["align_bam"]
                                 for x in inputs] + background_bams)
            coord_str = bamprep.region_to_gatk(coords)
            opts = "-k -m 30"
            cmd = (
                "WHAM-GRAPHENING {opts} -x {cores} -a {ref_file} -f {all_bams} -r {coord_str} "
                "> {tx_out_file}")
            do.run(cmd.format(**locals()),
                   "Run WHAM: %s" % region.to_safestr(coords))
    return [[coords, out_file]]
Ejemplo n.º 4
0
def _run_wham_coords(inputs, background_bams, coords, final_file):
    """Run WHAM on a specific set of chromosome, start, end coordinates.
    """
    base, ext = utils.splitext_plus(final_file)
    raw_file = "%s-%s.vcf" % (base, region.to_safestr(coords))
    all_bams = ",".join([x["align_bam"] for x in inputs] + background_bams)
    if not utils.file_exists(raw_file):
        with file_transaction(inputs[0], raw_file) as tx_raw_file:
            cores = dd.get_cores(inputs[0])
            ref_file = dd.get_ref_file(inputs[0])
            coord_str = bamprep.region_to_gatk(coords)
            opts = "-k -m 30"
            cmd = ("WHAM-GRAPHENING {opts} -x {cores} -a {ref_file} -f {all_bams} -r {coord_str} "
                   "> {tx_raw_file}")
            do.run(cmd.format(**locals()), "Run WHAM: %s" % region.to_safestr(coords))
    merge_vcf = _run_wham_merge(raw_file, inputs[0])
    gt_vcf = _run_wham_genotype(merge_vcf, all_bams, coords, inputs[0])
    prep_vcf = vcfutils.sort_by_ref(gt_vcf, inputs[0])
    return [[coords, prep_vcf]]
Ejemplo n.º 5
0
def _run_wham_genotype(in_file, all_bams, coords, data):
    """Run genotyping on a prepped, merged VCF file.
    """
    out_file = "%s-wgts%s" % utils.splitext_plus(in_file)
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            cores = dd.get_cores(data)
            ref_file = dd.get_ref_file(data)
            coord_str = bamprep.region_to_gatk(coords)
            cmd = ("WHAM-GRAPHENING -b {in_file} -x {cores} -a {ref_file} -f {all_bams} -r {coord_str} "
                   "> {tx_out_file}")
            do.run(cmd.format(**locals()), "Genotype WHAM: %s" % region.to_safestr(coords))
    return out_file
Ejemplo n.º 6
0
def _run_wham_genotype(in_file, all_bams, coords, data):
    """Run genotyping on a prepped, merged VCF file.
    """
    out_file = "%s-wgts%s" % utils.splitext_plus(in_file)
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            cores = dd.get_cores(data)
            ref_file = dd.get_ref_file(data)
            coord_str = bamprep.region_to_gatk(coords)
            cmd = ("WHAM-GRAPHENING -b {in_file} -x {cores} -a {ref_file} -f {all_bams} -r {coord_str} "
                   "> {tx_out_file}")
            do.run(cmd.format(**locals()), "Genotype WHAM: %s" % region.to_safestr(coords))
    return out_file
Ejemplo n.º 7
0
 def _do_work(data):
     if "region" in data:
         name = data["group"][0] if "group" in data else data["description"]
         out_dir = os.path.join(data["dirs"]["work"], dir_ext_fn(data))
         out_file = os.path.join(out_dir, "%s%s" % (name, ext))
         out_parts = []
         out_region_dir = os.path.join(out_dir, data["region"][0])
         out_region_file = os.path.join(out_region_dir,
                                        "%s-%s%s" % (name, region.to_safestr(data["region"]), ext))
         out_parts = [(data["region"], out_region_file)]
         return out_file, out_parts
     else:
         return None, []
Ejemplo n.º 8
0
 def _do_work(data):
     if "region" in data and not data["region"][0] in ["nochrom", "noanalysis"]:
         name = data["group"][0] if "group" in data else data["description"]
         out_dir = os.path.join(data["dirs"]["work"], dir_ext_fn(data))
         out_file = os.path.join(out_dir, "%s%s" % (name, ext))
         out_parts = []
         if not utils.file_exists(out_file) or utils.get_in(data, ("metadata", "phenotype")) in batch_drivers:
             out_region_dir = os.path.join(out_dir, data["region"][0])
             out_region_file = os.path.join(out_region_dir,
                                            "%s-%s%s" % (name, region.to_safestr(data["region"]), ext))
             out_parts = [(data["region"], out_region_file)]
         return out_file, out_parts
     else:
         return None, []
Ejemplo n.º 9
0
 def _do_work(data):
     if "region" in data:
         name = data["group"][0] if "group" in data else data["description"]
         out_dir = os.path.join(data["dirs"]["work"], dir_ext_fn(data))
         out_file = os.path.join(out_dir, "%s%s" % (name, ext))
         assert isinstance(data["region"], (list, tuple))
         out_parts = []
         for r, work_bams in sorted(_assign_bams_to_regions(data), key=_sort_by_size, reverse=True):
             out_region_dir = os.path.join(out_dir, r[0])
             out_region_file = os.path.join(out_region_dir,
                                            "%s-%s%s" % (name, region.to_safestr(r), ext))
             out_parts.append((r, work_bams, out_region_file))
         return out_file, out_parts
     else:
         return None, []
Ejemplo n.º 10
0
 def _do_work(data):
     if "region" in data:
         name = data["group"][0] if "group" in data else data["description"]
         out_dir = os.path.join(data["dirs"]["work"], dir_ext_fn(data))
         out_file = os.path.join(out_dir, "%s%s" % (name, ext))
         assert isinstance(data["region"], (list, tuple))
         out_parts = []
         for r, work_bams in sorted(_assign_bams_to_regions(data), key=_sort_by_size, reverse=True):
             out_region_dir = os.path.join(out_dir, r[0])
             out_region_file = os.path.join(out_region_dir,
                                            "%s-%s%s" % (name, region.to_safestr(r), ext))
             out_parts.append((r, work_bams, out_region_file))
         return out_file, out_parts
     else:
         return None, []
Ejemplo n.º 11
0
def _split_by_callable_region(data):
    """Split by callable or variant regions.

    We expect joint calling to be deep in numbers of samples per region, so prefer
    splitting aggressively by regions.
    """
    batch = tz.get_in(("metadata", "batch"), data)
    name = batch if batch else tz.get_in(("rgnames", "sample"), data)
    out_dir = utils.safe_makedir(
        os.path.join(data["dirs"]["work"], "joint", name))
    parts = []
    for feat in _get_callable_regions(data):
        region_dir = utils.safe_makedir(os.path.join(out_dir, feat[0]))
        region_outfile = os.path.join(
            region_dir, "%s-%s.vcf.gz" % (batch, region.to_safestr(feat)))
        parts.append(
            (feat, data["work_bams"], data["vrn_files"], region_outfile))
    out_file = os.path.join(out_dir, "%s-joint.vcf.gz" % name)
    return out_file, parts
Ejemplo n.º 12
0
def _split_by_callable_region(data):
    """Split by callable or variant regions.

    We expect joint calling to be deep in numbers of samples per region, so prefer
    splitting aggressively by regions.
    """
    batch = tz.get_in(("metadata", "batch"), data)
    name = batch if batch else tz.get_in(("rgnames", "sample"), data)
    out_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "joint", name))
    utils.safe_makedir(os.path.join(out_dir, "inprep"))
    parts = []
    for feat in _get_callable_regions(data):
        region_dir = utils.safe_makedir(os.path.join(out_dir, feat[0]))
        region_prep_dir = os.path.join(region_dir, "inprep")
        if not os.path.exists(region_prep_dir):
            os.symlink(os.path.join(os.pardir, "inprep"), region_prep_dir)
        region_outfile = os.path.join(region_dir, "%s-%s.vcf.gz" % (batch, region.to_safestr(feat)))
        parts.append((feat, data["work_bams"], data["vrn_files"], region_outfile))
    out_file = os.path.join(out_dir, "%s-joint.vcf.gz" % name)
    return out_file, parts
Ejemplo n.º 13
0
 def _do_work(data):
     if "region" in data:
         name = data["group"][0] if "group" in data else data["description"]
         out_dir = os.path.join(data["dirs"]["work"], dir_ext_fn(data))
         out_file = os.path.join(out_dir, "%s%s" % (name, ext))
         assert isinstance(data["region"], (list, tuple))
         out_parts = []
         for i, r in enumerate(data["region"]):
             out_region_dir = os.path.join(out_dir, r[0])
             out_region_file = os.path.join(out_region_dir, "%s-%s%s" % (name, region.to_safestr(r), ext))
             work_bams = []
             for xs in data["region_bams"]:
                 if len(xs) == 1:
                     work_bams.append(xs[0])
                 else:
                     work_bams.append(xs[i])
             for work_bam in work_bams:
                 assert os.path.exists(work_bam), work_bam
             out_parts.append((r, work_bams, out_region_file))
         return out_file, out_parts
     else:
         return None, []
Ejemplo n.º 14
0
 def _do_work(data):
     if "region" in data:
         name = data["group"][0] if "group" in data else data["description"]
         out_dir = os.path.join(data["dirs"]["work"], dir_ext_fn(data))
         out_file = os.path.join(out_dir, "%s%s" % (name, ext))
         assert isinstance(data["region"], (list, tuple))
         out_parts = []
         for i, r in enumerate(data["region"]):
             out_region_dir = os.path.join(out_dir, r[0])
             out_region_file = os.path.join(out_region_dir,
                                            "%s-%s%s" % (name, region.to_safestr(r), ext))
             work_bams = []
             for xs in data["region_bams"]:
                 if len(xs) == 1:
                     work_bams.append(xs[0])
                 else:
                     work_bams.append(xs[i])
             for work_bam in work_bams:
                 assert os.path.exists(work_bam), work_bam
             out_parts.append((r, work_bams, out_region_file))
         return out_file, out_parts
     else:
         return None, []