def aggregate_by_tad(all_TADs_by_celltype, aggregations, other, extension=0.1, n_windows=100): tot_windows = n_windows + int(n_windows * extension) * 2 tad_start_window = int(n_windows * extension) tad_end_window = n_windows + int(n_windows * extension) regions = all_TADs_by_celltype[coords + ['tad_uid']].copy() regions['tad_uid'] = regions.tad_uid.map(lambda x: x.replace("_", "-")) windows = BedTool().window_maker(b=BedTool.from_dataframe(regions)\ .slop(l=extension, r=extension, pct=True, genome="hg19"), n=tot_windows, i='srcwinnum')\ .to_dataframe(names=coords + ['window_uid']) windows_idxs = windows.window_uid.str.split("_", expand=True) windows_idxs.columns = ['tad_uid', 'win_num'] windows = pd.concat((windows, windows_idxs), axis=1) windows['win_num'] = windows['win_num'].astype(int) windows = windows.sort_values(coords).reset_index(drop=True) windows_with_ctcfs = coverage_by_window(windows, other, aggregations) aggregations_by_tad = {} for c in aggregations.keys(): print(" " * 100, end='\r') print(c, end="\r") cagg = windows_with_ctcfs.pivot_table(index='tad_uid', columns='win_num', values=c).sort_index(axis=1) cagg = cagg.sort_index(axis=1) aggregations_by_tad[c] = cagg return aggregations_by_tad, tad_start_window, tad_end_window
def windowing_by_number(all_TADs_by_celltype, n_windows): windows = BedTool().window_maker(b=BedTool.from_dataframe(all_TADs_by_celltype), n=n_windows, i='srcwinnum')\ .to_dataframe(names=all_TADs_by_celltype.columns.tolist()) idxs = windows[all_TADs_by_celltype.columns[-1]].str.split("_", expand=True) tad_ids = idxs.iloc[:, :-1].apply(lambda x: "_".join(x), axis=1) w_nums = idxs.iloc[:, -1].astype(int) - 1 windows[all_TADs_by_celltype.columns[-1]] = tad_ids windows['w_num'] = w_nums windows = windows.sort_values(coords).reset_index(drop=True) return windows
def windowing_by_size(centered_boundaries, window_size): windows = BedTool().window_maker(b=BedTool.from_dataframe(centered_boundaries), w=window_size, i='srcwinnum')\ .to_dataframe(names=centered_boundaries.columns.tolist()) idxs = windows[centered_boundaries.columns[-1]].str.split("_", expand=True) tad_ids = idxs.iloc[:, :-1].apply(lambda x: "_".join(x), axis=1) w_nums = idxs.iloc[:, -1].astype(int) - 1 windows[centered_boundaries.columns[-1]] = tad_ids windows['w_num'] = w_nums windows = windows.sort_values(coords).reset_index(drop=True) return windows