Exemple #1
0
    def __init__(self, bedgraph, res):

        self.binsize = res

        ref_pre = ''

        self.load_bedGraph(bedgraph)
        ori_pre = find_chrom_pre(list(self.cnv_segment))
        D = {}
        for c in self.cnv_segment:
            ref_k = ref_pre + c.lstrip(ori_pre)
            chromlen = self.cnv_segment[c][-1][1]
            arr = []
            if self.cnv_res == res:
                for s, e, v in self.cnv_segment[c]:
                    arr.append(v)
            else:
                for i in range(0, chromlen, res):
                    start = i
                    end = min(i + res, chromlen)
                    tmp = self.calculate_bin(self.cnv_segment[c], [start, end])
                    arr.append(tmp)
            arr = np.r_[arr]
            D[ref_k] = arr

        # average copy numbers
        self.bin_cnv = D
Exemple #2
0
    def __init__(self, clr, cnv_file, n_jobs=1):

        self.clr = clr
        self.res = self.clr.binsize
        self.n_jobs = n_jobs

        binCNV.__init__(self, cnv_file, self.res)

        self.queue = {}
        self.pre = find_chrom_pre(clr.chromnames)
        for c in self.bin_cnv:
            tmp = self.seg(c)
            for v in tmp:
                if not v in self.queue:
                    self.queue[v] = []
                self.queue[v].extend(tmp[v])
Exemple #3
0
    def assign_cnv(self, cooler_uri):

        cooler_lib = cooler.Cooler(cooler_uri)
        ref_pre = find_chrom_pre(cooler_lib.chromnames)
        cnv = np.r_[[]]
        for ref_k in cooler_lib.chromnames:  # line with bin table
            bias = cooler_lib.bins().fetch(ref_k)['weight'].values
            c = ref_k.lstrip(ref_pre)
            if not c in self.bin_cnv:
                pre = np.zeros(len(bias))
                cnv = np.r_[cnv, pre]
                continue

            pre = self.bin_cnv[c]
            if len(bias) <= pre.size:
                pre = pre[:len(bias)]
            else:
                add = np.zeros(len(bias) - pre.size)
                pre = np.r_[pre, add]

            mask = np.isnan(bias) | (bias == 0)
            pre[mask] = 0

            cnv = np.r_[cnv, pre]

        cnvi = rankdata(cnv,
                        method='dense') - 1  # indices for quick Bias retrival

        # pre-check the CNV column
        cool_path, group_path = util.parse_cooler_uri(cooler_uri)
        with h5py.File(cool_path, 'r+') as h5:
            grp = h5[group_path]
            if 'CNV' in grp['bins']:
                del grp['bins']['CNV']  # Overwrite the CNV column
                del grp['bins']['CNVI']

        with h5py.File(cool_path, 'r+') as h5:
            grp = h5[group_path]
            h5opts = dict(compression='gzip', compression_opts=6)
            grp['bins'].create_dataset('CNV', data=cnv, **h5opts)
            grp['bins'].create_dataset('CNVI', data=cnvi, dtype='i1', **h5opts)

        del cnv, cnvi
    def __init__(self, clr, candidate, span=5000000, col='sweight', protocol='insitu',
        flexible=True, slopes={}):

        self.clr = clr
        self.res = clr.binsize
        self.protocol = protocol
        self.pre = find_chrom_pre(list(self.clr.chromnames))
        self.span = span
        self.flexible = flexible
        self.slopes = slopes
        
        if col in ['weight', 'sweight']:
            self.balance_type = col
        else:
            self.balance_type = False

        self.chroms = {}
        for c in self.clr.chromnames:
            self.chroms[c] = [0, self.clr.chromsizes[c]]
        
        self.parse_input(candidate)
        if len(self.sv_list)==1:
            self.tb, self.to = self.get_single_block(self.sv_list[0],
                                            left_bound=self.bounds[0],
                                            right_bound=self.bounds[1])
        else:
            self.tb = []
            self.to = []
            for i, sv in enumerate(self.sv_list):
                if i == 0:
                    intervals, directs = self.get_single_block(sv, left_bound=self.bounds[0])
                elif i == len(self.sv_list)-1:
                    intervals, directs = self.get_single_block(sv, right_bound=self.bounds[1])
                else:
                    intervals, directs = self.get_single_block(sv)
                self.tb.extend(intervals)
                self.to.extend(directs)
        
        self.load_expected()
Exemple #5
0
    def __init__(self,
                 clr,
                 c1,
                 c2,
                 p1,
                 p2,
                 s1,
                 s2,
                 sv_type,
                 span=5000000,
                 col='sweight',
                 trim=True,
                 protocol='insitu'):

        self.clr = clr
        self.p1 = p1
        self.p2 = p2
        self.s1 = s1
        self.s2 = s2
        self.res = clr.binsize
        pre = find_chrom_pre(list(self.clr.chromnames))
        self.c1 = pre + c1.lstrip('chr')
        self.c2 = pre + c2.lstrip('chr')
        self.chromsize1 = self.clr.chromsizes[self.c1]
        self.chromsize2 = self.clr.chromsizes[self.c2]
        self.balance_type = col
        self.protocol = protocol

        self.name = ','.join(
            map(str,
                [sv_type,
                 c1.lstrip('chr'), p1, s1,
                 c2.lstrip('chr'), p2, s2]))

        self.get_matrix(span, col, trim)
        self.load_expected()