def _group_by_cnv_method(batches): """Group into batches samples with identical CNV/SV approaches. Allows sharing of background samples across multiple batches, using all normals from tumor/normal pairs with the same prep method for background. """ CnvGroup = collections.namedtuple( "CnvGroup", "items, work_dir, access_file, region_file") out = [] groups = collections.defaultdict(list) for batch, items in batches.items(): for data in items: work_dir = utils.safe_makedir( os.path.join(dd.get_work_dir(data), "structural", "bins", batch)) cnv_file = get_base_cnv_regions(data, work_dir, "transcripts100", include_gene_names=False) if cnv_file: break assert cnv_file, ( "Did not find coverage regions for batch %s: %s" % (batch, " ".join([dd.get_sample_name(d) for d in items]))) groups[(cnv_file, dd.get_prep_method(data))].append( (items, data, work_dir)) for (cnv_file, _), cur_group in groups.items(): group_items = reduce(operator.add, [xs[0] for xs in cur_group]) access_file = tz.get_in(["config", "algorithm", "callable_regions"], cur_group[0][1]) out.append( CnvGroup(group_items, cur_group[0][2], access_file, cnv_file)) return out
def _group_by_cnv_method(batches): """Group into batches samples with identical CNV/SV approaches. Allows sharing of background samples across multiple batches, using all normals from tumor/normal pairs with the same prep method for background. """ CnvGroup = collections.namedtuple("CnvGroup", "items, work_dir, access_file, region_file") out = [] groups = collections.defaultdict(list) for batch, items in batches.items(): for data in items: work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "structural", "bins", batch)) cnv_file = get_base_cnv_regions(data, work_dir, "transcripts100", include_gene_names=False) if cnv_file: break assert cnv_file, ("Did not find coverage regions for batch %s: %s" % (batch, " ".join([dd.get_sample_name(d) for d in items]))) groups[(cnv_file, dd.get_prep_method(data))].append((items, data, work_dir)) for (cnv_file, _), cur_group in groups.items(): group_items = reduce(operator.add, [xs[0] for xs in cur_group]) access_file = tz.get_in(["config", "algorithm", "callable_regions"], cur_group[0][1]) out.append(CnvGroup(group_items, cur_group[0][2], access_file, cnv_file)) return out