Esempio n. 1
0
def calculate_sv_bins(*items):
    """Determine bin sizes and regions to use for samples.

    Unified approach to prepare regional bins for coverage calculations across
    multiple CNV callers. Splits into target and antitarget regions allowing
    callers to take advantage of both. Provides consistent target/anti-target
    bin sizes across batches.

    Uses callable_regions as the access BED file and mosdepth regions in
    variant_regions to estimate depth for bin sizes.
    """
    from bcbio.structural import cnvkit
    items = [utils.to_single_data(x) for x in cwlutils.handle_combined_input(items)]
    if all(not cnvkit.use_general_sv_bins(x) for x in items):
        return [[d] for d in items]
    out = []
    for i, cnv_group in enumerate(_group_by_cnv_method(multi.group_by_batch(items, False))):
        size_calc_fn = MemoizedSizes(cnv_group.region_file, cnv_group.items).get_target_antitarget_bin_sizes
        for data in cnv_group.items:
            if cnvkit.use_general_sv_bins(data):
                if dd.get_background_cnv_reference(data):
                    target_bed, anti_bed = cnvkit.targets_from_background(dd.get_background_cnv_reference(data),
                                                                          cnv_group.work_dir, data)
                else:
                    target_bed, anti_bed = cnvkit.targets_w_bins(cnv_group.region_file, cnv_group.access_file,
                                                                size_calc_fn, cnv_group.work_dir, data)
                if not data.get("regions"):
                    data["regions"] = {}
                data["regions"]["bins"] = {"target": target_bed, "antitarget": anti_bed, "group": str(i)}
            out.append([data])
    if not len(out) == len(items):
        raise AssertionError("Inconsistent samples in and out of SV bin calculation:\nout: %s\nin : %s" %
                             (sorted([dd.get_sample_name(utils.to_single_data(x)) for x in out]),
                              sorted([dd.get_sample_name(x) for x in items])))
    return out
Esempio n. 2
0
def _calculate_sv_bins_cnvkit(data, cnv_group, size_calc_fn):
    """Calculate structural variant bins using target/anti-target approach from CNVkit.
    """
    from bcbio.structural import cnvkit
    if dd.get_background_cnv_reference(data, "cnvkit"):
        target_bed, anti_bed = cnvkit.targets_from_background(dd.get_background_cnv_reference(data, "cnvkit"),
                                                              cnv_group.work_dir, data)
    else:
        target_bed, anti_bed = cnvkit.targets_w_bins(cnv_group.region_file, cnv_group.access_file,
                                                     size_calc_fn, cnv_group.work_dir, data)
    return target_bed, anti_bed, None
Esempio n. 3
0
def _calculate_sv_bins_cnvkit(data, cnv_group, size_calc_fn):
    """Calculate structural variant bins using target/anti-target approach from CNVkit.
    """
    from bcbio.structural import cnvkit
    if dd.get_background_cnv_reference(data, "cnvkit"):
        target_bed, anti_bed = cnvkit.targets_from_background(dd.get_background_cnv_reference(data, "cnvkit"),
                                                              cnv_group.work_dir, data)
    else:
        target_bed, anti_bed = cnvkit.targets_w_bins(cnv_group.region_file, cnv_group.access_file,
                                                     size_calc_fn, cnv_group.work_dir, data)
    return target_bed, anti_bed, None