def rename_weights(cooleruri, name_map): cool_path, group_path = parse_cooler_uri(cooleruri) with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] h5opts = dict(compression='gzip', compression_opts=6) for old_name, new_name in name_map.items(): # add the bias column to the file weights = grp['bins'][old_name][()].copy() grp['bins'].create_dataset(new_name, data=weights, **h5opts) del grp['bins'][old_name]
def test_parse_cooler_uri(): for uri in [ '/foo/bar/baz.mcool::resolutions/1000', '/foo/bar/baz.mcool::/resolutions/1000' ]: fp, gp = util.parse_cooler_uri(uri) assert fp == '/foo/bar/baz.mcool' assert gp == '/resolutions/1000' for uri in ['/foo/bar/baz.cool', '/foo/bar/baz.cool::/']: fp, gp = util.parse_cooler_uri(uri) assert fp == '/foo/bar/baz.cool' assert gp == '/' for uri in [ '/foo/bar/baz.cool::/a/b::c.cool', ]: with pytest.raises(ValueError): util.parse_cooler_uri(uri)
def matrix_balance(cool_uri, nproc=1, chunksize=int(1e7), mad_max=5, min_nnz=10, min_count=0, ignore_diags=1, tol=1e-5, max_iters=1000): ''' Perform separate matrix balancing for regions with different copy numbers and output the bias vector in the "sweight" column. ''' cool_path, group_path = util.parse_cooler_uri(cool_uri) # Overwrite the existing sweight column with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] if 'sweight' in grp['bins']: del grp['bins']['sweight'] clr = Cooler(cool_uri) try: if nproc > 1: pool = balance.Pool(nproc) map_ = pool.imap_unordered else: map_ = map bias, stats = iterative_correction(clr, chunksize=chunksize, tol=tol, min_nnz=min_nnz, min_count=min_count, mad_max=mad_max, max_iters=max_iters, ignore_diags=ignore_diags, rescale_marginals=True, use_lock=False, map=map_) finally: if nproc > 1: pool.close() if not stats['converged']: logger.error('Iteration limit reached without convergence') logger.error('Storing final result. Check log to assess convergence.') with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] # add the bias column to the file h5opts = dict(compression='gzip', compression_opts=6) grp['bins'].create_dataset('sweight', data=bias, **h5opts) grp['bins']['sweight'].attrs.update(stats)
def store_weights(cooleruri, bias, weightname): ''' stores an iterable of values as a new weight column in the given cooleruri with name set to wightname. code taken from cooler's cooler balance see also https://github.com/mirnylab/cooler/blob/master/cooler/cli/balance.py :param cooleruri: uri to a given cooler :param bias: iterable containing balancing weights for each genomic bin :param weightname: name of the weight column :return: None ''' cool_path, group_path = parse_cooler_uri(cooleruri) with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] # add the bias column to the file h5opts = dict(compression='gzip', compression_opts=6) grp['bins'].create_dataset(weightname, data=bias, **h5opts)
def check_weight(cooleruri, weight_name): ''' checks if weight_name already exist in cooler file :param cooleruri: uri to a given cooleruri :param weight_name: name of the weight to check for :return: True if weight already in cooler else False ''' cool_path, group_path = parse_cooler_uri(cooleruri) weight_exists = False with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] if grp['bins'].get(weight_name): weight_exists = True return weight_exists
def assign_cnv(self, cooler_uri): cooler_lib = cooler.Cooler(cooler_uri) ref_pre = find_chrom_pre(cooler_lib.chromnames) cnv = np.r_[[]] for ref_k in cooler_lib.chromnames: # line with bin table bias = cooler_lib.bins().fetch(ref_k)['weight'].values c = ref_k.lstrip(ref_pre) if not c in self.bin_cnv: pre = np.zeros(len(bias)) cnv = np.r_[cnv, pre] continue pre = self.bin_cnv[c] if len(bias) <= pre.size: pre = pre[:len(bias)] else: add = np.zeros(len(bias) - pre.size) pre = np.r_[pre, add] mask = np.isnan(bias) | (bias == 0) pre[mask] = 0 cnv = np.r_[cnv, pre] cnvi = rankdata(cnv, method='dense') - 1 # indices for quick Bias retrival # pre-check the CNV column cool_path, group_path = util.parse_cooler_uri(cooler_uri) with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] if 'CNV' in grp['bins']: del grp['bins']['CNV'] # Overwrite the CNV column del grp['bins']['CNVI'] with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] h5opts = dict(compression='gzip', compression_opts=6) grp['bins'].create_dataset('CNV', data=cnv, **h5opts) grp['bins'].create_dataset('CNVI', data=cnvi, dtype='i1', **h5opts) del cnv, cnvi
def save_weight(coolfile, chroms, weights, bal_info={}, col_name='weight'): """ coolfile - File to add weights too chroms - List of chromosomes to modify weights - List of weight vectors to write to corresponding chromosomes bal_info - Dict to add as attributes of the weight column""" cool_path, group_path = util.parse_cooler_uri(coolfile) c = cooler.Cooler(coolfile) with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] # add the bias column to the file h5opts = dict(compression='gzip', compression_opts=6) allweights = np.asarray([np.nan]*grp['bins']['chrom'].shape[0]) if col_name not in grp['bins'].keys(): grp['bins'].create_dataset(col_name, data=allweights, **h5opts) for chrom, weight in zip(chroms, weights): s, e = c.extent(chrom) grp['bins'][col_name][s:e] = weight grp['bins'][col_name].attrs.update(bal_info) # grp['bins']['weight'][grp['bins']['chrom']==chrom]=weights return
def balance(cool_uri, nproc=1, chunksize=int(1e7), mad_max=5, min_nnz=10, min_count=0, ignore_diags=1, tol=1e-5, max_iters=200): """ Cooler contact matrix balancing. Parameters ---------- cool_uri : str URI of cooler group. nproc : int Number of processes. (Default: 1) """ cool_path, group_path = parse_cooler_uri(cool_uri) # pre-check the weight column with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] if 'weight' in grp['bins']: del grp['bins']['weight'] # Overwrite the weight column log.info('Balancing {0}'.format(cool_uri)) clr = Cooler(cool_uri) try: if nproc > 1: pool = Pool(nproc) map_ = pool.imap_unordered else: map_ = map if clr.info['metadata']['onlyIntra']=='True': onlyIntra = True else: onlyIntra = False bias, stats = ice.iterative_correction( clr, chunksize=chunksize, cis_only=onlyIntra, trans_only=False, tol=tol, min_nnz=min_nnz, min_count=min_count, blacklist=None, mad_max=mad_max, max_iters=max_iters, ignore_diags=ignore_diags, rescale_marginals=True, use_lock=False, map=map_) finally: if nproc > 1: pool.close() if not stats['converged']: log.error('Iteration limit reached without convergence') log.error('Storing final result. Check log to assess convergence.') with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] # add the bias column to the file h5opts = dict(compression='gzip', compression_opts=6) grp['bins'].create_dataset('weight', data=bias, **h5opts) grp['bins']['weight'].attrs.update(stats)