Beispiel #1
0
def _get_target_access_files(cov_interval, data, work_dir):
    """Retrieve target and access files based on the type of data to process.

    pick targets, anti-targets and access files based on analysis type
    http://cnvkit.readthedocs.org/en/latest/nonhybrid.html
    """
    base_regions = regions.get_sv_bed(data)
    # if we don't have a configured BED or regions to use for SV caling
    if not base_regions:
        # For genome calls, subset to regions within 10kb of genes
        if cov_interval == "genome":
            base_regions = regions.get_sv_bed(data, "transcripts1e4", work_dir)
            if base_regions:
                base_regions = shared.remove_exclude_regions(base_regions, base_regions, [data])
        # Finally, default to the defined variant regions
        if not base_regions:
            base_regions = dd.get_variant_regions(data)

    target_bed = bedutils.merge_overlaps(base_regions, data, out_dir=work_dir)
    if cov_interval == "amplicon":
        return target_bed, target_bed
    elif cov_interval == "genome":
        return target_bed, target_bed
    else:
        access_file = _create_access_file(dd.get_ref_file(data), _sv_workdir(data), data)
        return target_bed, access_file
Beispiel #2
0
def _get_target_access_files(cov_interval, data, work_dir):
    """Retrieve target and access files based on the type of data to process.

    pick targets, anti-targets and access files based on analysis type
    http://cnvkit.readthedocs.org/en/latest/nonhybrid.html
    """
    base_regions = regions.get_sv_bed(data)
    # if we don't have a configured BED or regions to use for SV caling
    if not base_regions:
        # For genome calls, subset to regions within 10kb of genes
        if cov_interval == "genome":
            base_regions = regions.get_sv_bed(data, "transcripts1e4", work_dir)
            if base_regions:
                base_regions = shared.remove_exclude_regions(
                    base_regions, base_regions, [data])
        # Finally, default to the defined variant regions
        if not base_regions:
            base_regions = dd.get_variant_regions(data)

    target_bed = bedutils.merge_overlaps(base_regions, data, out_dir=work_dir)
    if cov_interval == "amplicon":
        return target_bed, target_bed
    elif cov_interval == "genome":
        return target_bed, target_bed
    else:
        access_file = _create_access_file(dd.get_ref_file(data),
                                          _sv_workdir(data), data)
        return target_bed, access_file
Beispiel #3
0
def _run_wham(inputs, background_bams):
    """Run WHAM on a defined set of inputs and targets.
    """
    out_file = os.path.join(_sv_workdir(inputs[0]), "%s-wham.vcf" % dd.get_sample_name(inputs[0]))
    input_bams = [x["align_bam"] for x in inputs]
    if not utils.file_exists(out_file):
        with file_transaction(inputs[0], out_file) as tx_out_file:
            cores = dd.get_cores(inputs[0])
            background = "-b %s" % ",".join(background_bams) if background_bams else ""
            target_bams = ",".join(x["align_bam"] for x in inputs)
            target_bed = shared.remove_exclude_regions(
                tz.get_in(["config", "algorithm", "variant_regions"], inputs[0]), out_file, inputs)
            ref_file = dd.get_ref_file(inputs[0])
            target_str = "-e %s" % target_bed if target_bed else ""
            cmd = ("WHAM-BAM -x {cores} -f {ref_file} -t {target_bams} {background} {target_str} > {tx_out_file}")
            do.run(cmd.format(**locals()), "Run WHAM")
    return out_file
Beispiel #4
0
def _run_wham(inputs, background_bams):
    """Run WHAM on a defined set of inputs and targets.
    """
    out_file = os.path.join(_sv_workdir(inputs[0]),
                            "%s-wham.vcf" % dd.get_sample_name(inputs[0]))
    input_bams = [x["align_bam"] for x in inputs]
    if not utils.file_exists(out_file):
        with file_transaction(inputs[0], out_file) as tx_out_file:
            cores = dd.get_cores(inputs[0])
            background = "-b %s" % ",".join(
                background_bams) if background_bams else ""
            target_bams = ",".join(x["align_bam"] for x in inputs)
            target_bed = shared.remove_exclude_regions(
                tz.get_in(["config", "algorithm", "variant_regions"],
                          inputs[0]), out_file, inputs)
            ref_file = dd.get_ref_file(inputs[0])
            target_str = "-e %s" % target_bed if target_bed else ""
            cmd = (
                "WHAM-BAM -x {cores} -f {ref_file} -t {target_bams} {background} {target_str} > {tx_out_file}"
            )
            do.run(cmd.format(**locals()), "Run WHAM")
    return out_file