Esempio n. 1
0
def _group_by_cnv_method(batches):
    """Group into batches samples with identical CNV/SV approaches.

    Allows sharing of background samples across multiple batches,
    using all normals from tumor/normal pairs with the same prep method
    for background.
    """
    CnvGroup = collections.namedtuple(
        "CnvGroup", "items, work_dir, access_file, region_file")
    out = []
    groups = collections.defaultdict(list)
    for batch, items in batches.items():
        for data in items:
            work_dir = utils.safe_makedir(
                os.path.join(dd.get_work_dir(data), "structural", "bins",
                             batch))
            cnv_file = get_base_cnv_regions(data,
                                            work_dir,
                                            "transcripts100",
                                            include_gene_names=False)
            if cnv_file:
                break
        assert cnv_file, (
            "Did not find coverage regions for batch %s: %s" %
            (batch, " ".join([dd.get_sample_name(d) for d in items])))
        groups[(cnv_file, dd.get_prep_method(data))].append(
            (items, data, work_dir))
    for (cnv_file, _), cur_group in groups.items():
        group_items = reduce(operator.add, [xs[0] for xs in cur_group])
        access_file = tz.get_in(["config", "algorithm", "callable_regions"],
                                cur_group[0][1])
        out.append(
            CnvGroup(group_items, cur_group[0][2], access_file, cnv_file))
    return out
Esempio n. 2
0
def _group_by_cnv_method(batches):
    """Group into batches samples with identical CNV/SV approaches.

    Allows sharing of background samples across multiple batches,
    using all normals from tumor/normal pairs with the same prep method
    for background.
    """
    CnvGroup = collections.namedtuple("CnvGroup", "items, work_dir, access_file, region_file")
    out = []
    groups = collections.defaultdict(list)
    for batch, items in batches.items():
        for data in items:
            work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "structural", "bins", batch))
            cnv_file = get_base_cnv_regions(data, work_dir, "transcripts100", include_gene_names=False)
            if cnv_file:
                break
        assert cnv_file, ("Did not find coverage regions for batch %s: %s" %
                          (batch, " ".join([dd.get_sample_name(d) for d in items])))
        groups[(cnv_file, dd.get_prep_method(data))].append((items, data, work_dir))
    for (cnv_file, _), cur_group in groups.items():
        group_items = reduce(operator.add, [xs[0] for xs in cur_group])
        access_file = tz.get_in(["config", "algorithm", "callable_regions"], cur_group[0][1])
        out.append(CnvGroup(group_items, cur_group[0][2], access_file, cnv_file))
    return out