コード例 #1
0
def aggregate_by_tad(all_TADs_by_celltype,
                     aggregations,
                     other,
                     extension=0.1,
                     n_windows=100):
    tot_windows = n_windows + int(n_windows * extension) * 2
    tad_start_window = int(n_windows * extension)
    tad_end_window = n_windows + int(n_windows * extension)

    regions = all_TADs_by_celltype[coords + ['tad_uid']].copy()
    regions['tad_uid'] = regions.tad_uid.map(lambda x: x.replace("_", "-"))
    windows = BedTool().window_maker(b=BedTool.from_dataframe(regions)\
                                     .slop(l=extension, r=extension,
                                           pct=True, genome="hg19"),
                                     n=tot_windows, i='srcwinnum')\
                           .to_dataframe(names=coords + ['window_uid'])
    windows_idxs = windows.window_uid.str.split("_", expand=True)
    windows_idxs.columns = ['tad_uid', 'win_num']
    windows = pd.concat((windows, windows_idxs), axis=1)
    windows['win_num'] = windows['win_num'].astype(int)
    windows = windows.sort_values(coords).reset_index(drop=True)

    windows_with_ctcfs = coverage_by_window(windows, other, aggregations)
    aggregations_by_tad = {}
    for c in aggregations.keys():
        print(" " * 100, end='\r')
        print(c, end="\r")
        cagg = windows_with_ctcfs.pivot_table(index='tad_uid',
                                              columns='win_num',
                                              values=c).sort_index(axis=1)
        cagg = cagg.sort_index(axis=1)
        aggregations_by_tad[c] = cagg
    return aggregations_by_tad, tad_start_window, tad_end_window
コード例 #2
0
def windowing_by_number(all_TADs_by_celltype, n_windows):
    windows = BedTool().window_maker(b=BedTool.from_dataframe(all_TADs_by_celltype), 
                                     n=n_windows, i='srcwinnum')\
                       .to_dataframe(names=all_TADs_by_celltype.columns.tolist())
    idxs = windows[all_TADs_by_celltype.columns[-1]].str.split("_", expand=True)
    tad_ids = idxs.iloc[:, :-1].apply(lambda x: "_".join(x), axis=1)
    w_nums = idxs.iloc[:, -1].astype(int) - 1
    windows[all_TADs_by_celltype.columns[-1]] = tad_ids
    windows['w_num'] = w_nums
    windows = windows.sort_values(coords).reset_index(drop=True)
    return windows
コード例 #3
0
def windowing_by_size(centered_boundaries, window_size):    
    windows = BedTool().window_maker(b=BedTool.from_dataframe(centered_boundaries), 
                                     w=window_size, i='srcwinnum')\
                       .to_dataframe(names=centered_boundaries.columns.tolist())
    idxs = windows[centered_boundaries.columns[-1]].str.split("_", expand=True)
    tad_ids = idxs.iloc[:, :-1].apply(lambda x: "_".join(x), axis=1)
    w_nums = idxs.iloc[:, -1].astype(int) - 1
    windows[centered_boundaries.columns[-1]] = tad_ids
    windows['w_num'] = w_nums
    windows = windows.sort_values(coords).reset_index(drop=True)
    return windows