def __init__(self, bedgraph, res): self.binsize = res ref_pre = '' self.load_bedGraph(bedgraph) ori_pre = find_chrom_pre(list(self.cnv_segment)) D = {} for c in self.cnv_segment: ref_k = ref_pre + c.lstrip(ori_pre) chromlen = self.cnv_segment[c][-1][1] arr = [] if self.cnv_res == res: for s, e, v in self.cnv_segment[c]: arr.append(v) else: for i in range(0, chromlen, res): start = i end = min(i + res, chromlen) tmp = self.calculate_bin(self.cnv_segment[c], [start, end]) arr.append(tmp) arr = np.r_[arr] D[ref_k] = arr # average copy numbers self.bin_cnv = D
def __init__(self, clr, cnv_file, n_jobs=1): self.clr = clr self.res = self.clr.binsize self.n_jobs = n_jobs binCNV.__init__(self, cnv_file, self.res) self.queue = {} self.pre = find_chrom_pre(clr.chromnames) for c in self.bin_cnv: tmp = self.seg(c) for v in tmp: if not v in self.queue: self.queue[v] = [] self.queue[v].extend(tmp[v])
def assign_cnv(self, cooler_uri): cooler_lib = cooler.Cooler(cooler_uri) ref_pre = find_chrom_pre(cooler_lib.chromnames) cnv = np.r_[[]] for ref_k in cooler_lib.chromnames: # line with bin table bias = cooler_lib.bins().fetch(ref_k)['weight'].values c = ref_k.lstrip(ref_pre) if not c in self.bin_cnv: pre = np.zeros(len(bias)) cnv = np.r_[cnv, pre] continue pre = self.bin_cnv[c] if len(bias) <= pre.size: pre = pre[:len(bias)] else: add = np.zeros(len(bias) - pre.size) pre = np.r_[pre, add] mask = np.isnan(bias) | (bias == 0) pre[mask] = 0 cnv = np.r_[cnv, pre] cnvi = rankdata(cnv, method='dense') - 1 # indices for quick Bias retrival # pre-check the CNV column cool_path, group_path = util.parse_cooler_uri(cooler_uri) with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] if 'CNV' in grp['bins']: del grp['bins']['CNV'] # Overwrite the CNV column del grp['bins']['CNVI'] with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] h5opts = dict(compression='gzip', compression_opts=6) grp['bins'].create_dataset('CNV', data=cnv, **h5opts) grp['bins'].create_dataset('CNVI', data=cnvi, dtype='i1', **h5opts) del cnv, cnvi
def __init__(self, clr, candidate, span=5000000, col='sweight', protocol='insitu', flexible=True, slopes={}): self.clr = clr self.res = clr.binsize self.protocol = protocol self.pre = find_chrom_pre(list(self.clr.chromnames)) self.span = span self.flexible = flexible self.slopes = slopes if col in ['weight', 'sweight']: self.balance_type = col else: self.balance_type = False self.chroms = {} for c in self.clr.chromnames: self.chroms[c] = [0, self.clr.chromsizes[c]] self.parse_input(candidate) if len(self.sv_list)==1: self.tb, self.to = self.get_single_block(self.sv_list[0], left_bound=self.bounds[0], right_bound=self.bounds[1]) else: self.tb = [] self.to = [] for i, sv in enumerate(self.sv_list): if i == 0: intervals, directs = self.get_single_block(sv, left_bound=self.bounds[0]) elif i == len(self.sv_list)-1: intervals, directs = self.get_single_block(sv, right_bound=self.bounds[1]) else: intervals, directs = self.get_single_block(sv) self.tb.extend(intervals) self.to.extend(directs) self.load_expected()
def __init__(self, clr, c1, c2, p1, p2, s1, s2, sv_type, span=5000000, col='sweight', trim=True, protocol='insitu'): self.clr = clr self.p1 = p1 self.p2 = p2 self.s1 = s1 self.s2 = s2 self.res = clr.binsize pre = find_chrom_pre(list(self.clr.chromnames)) self.c1 = pre + c1.lstrip('chr') self.c2 = pre + c2.lstrip('chr') self.chromsize1 = self.clr.chromsizes[self.c1] self.chromsize2 = self.clr.chromsizes[self.c2] self.balance_type = col self.protocol = protocol self.name = ','.join( map(str, [sv_type, c1.lstrip('chr'), p1, s1, c2.lstrip('chr'), p2, s2])) self.get_matrix(span, col, trim) self.load_expected()