def rename_weights(cooleruri, name_map):
    cool_path, group_path = parse_cooler_uri(cooleruri)
    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        h5opts = dict(compression='gzip', compression_opts=6)
        for old_name, new_name in name_map.items():
            # add the bias column to the file
            weights = grp['bins'][old_name][()].copy()
            grp['bins'].create_dataset(new_name, data=weights, **h5opts)
            del grp['bins'][old_name]
Beispiel #2
0
def test_parse_cooler_uri():
    for uri in [
            '/foo/bar/baz.mcool::resolutions/1000',
            '/foo/bar/baz.mcool::/resolutions/1000'
    ]:
        fp, gp = util.parse_cooler_uri(uri)
        assert fp == '/foo/bar/baz.mcool'
        assert gp == '/resolutions/1000'

    for uri in ['/foo/bar/baz.cool', '/foo/bar/baz.cool::/']:
        fp, gp = util.parse_cooler_uri(uri)
        assert fp == '/foo/bar/baz.cool'
        assert gp == '/'

    for uri in [
            '/foo/bar/baz.cool::/a/b::c.cool',
    ]:
        with pytest.raises(ValueError):
            util.parse_cooler_uri(uri)
def matrix_balance(cool_uri,
                   nproc=1,
                   chunksize=int(1e7),
                   mad_max=5,
                   min_nnz=10,
                   min_count=0,
                   ignore_diags=1,
                   tol=1e-5,
                   max_iters=1000):
    '''
    Perform separate matrix balancing for regions with different copy numbers
    and output the bias vector in the "sweight" column.
    '''
    cool_path, group_path = util.parse_cooler_uri(cool_uri)
    # Overwrite the existing sweight column
    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        if 'sweight' in grp['bins']:
            del grp['bins']['sweight']

    clr = Cooler(cool_uri)

    try:
        if nproc > 1:
            pool = balance.Pool(nproc)
            map_ = pool.imap_unordered
        else:
            map_ = map

        bias, stats = iterative_correction(clr,
                                           chunksize=chunksize,
                                           tol=tol,
                                           min_nnz=min_nnz,
                                           min_count=min_count,
                                           mad_max=mad_max,
                                           max_iters=max_iters,
                                           ignore_diags=ignore_diags,
                                           rescale_marginals=True,
                                           use_lock=False,
                                           map=map_)
    finally:
        if nproc > 1:
            pool.close()

    if not stats['converged']:
        logger.error('Iteration limit reached without convergence')
        logger.error('Storing final result. Check log to assess convergence.')

    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        # add the bias column to the file
        h5opts = dict(compression='gzip', compression_opts=6)
        grp['bins'].create_dataset('sweight', data=bias, **h5opts)
        grp['bins']['sweight'].attrs.update(stats)
def store_weights(cooleruri, bias, weightname):
    '''
    stores an iterable of values as a new weight column in the given cooleruri
    with name set to wightname. code taken from cooler's cooler balance see also
    https://github.com/mirnylab/cooler/blob/master/cooler/cli/balance.py

    :param cooleruri:   uri to a given cooler
    :param bias:        iterable containing balancing weights for each genomic bin
    :param weightname:  name of the weight column

    :return:            None
    '''
    cool_path, group_path = parse_cooler_uri(cooleruri)
    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        # add the bias column to the file
        h5opts = dict(compression='gzip', compression_opts=6)
        grp['bins'].create_dataset(weightname, data=bias, **h5opts)
def check_weight(cooleruri, weight_name):
    '''
    checks if weight_name already exist in cooler file

    :param cooleruri:   uri to a given cooleruri
    :param weight_name: name of the weight to check for

    :return:            True if weight already in cooler else False
    '''

    cool_path, group_path = parse_cooler_uri(cooleruri)
    weight_exists = False
    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        if grp['bins'].get(weight_name):
            weight_exists = True

    return weight_exists
Beispiel #6
0
    def assign_cnv(self, cooler_uri):

        cooler_lib = cooler.Cooler(cooler_uri)
        ref_pre = find_chrom_pre(cooler_lib.chromnames)
        cnv = np.r_[[]]
        for ref_k in cooler_lib.chromnames:  # line with bin table
            bias = cooler_lib.bins().fetch(ref_k)['weight'].values
            c = ref_k.lstrip(ref_pre)
            if not c in self.bin_cnv:
                pre = np.zeros(len(bias))
                cnv = np.r_[cnv, pre]
                continue

            pre = self.bin_cnv[c]
            if len(bias) <= pre.size:
                pre = pre[:len(bias)]
            else:
                add = np.zeros(len(bias) - pre.size)
                pre = np.r_[pre, add]

            mask = np.isnan(bias) | (bias == 0)
            pre[mask] = 0

            cnv = np.r_[cnv, pre]

        cnvi = rankdata(cnv,
                        method='dense') - 1  # indices for quick Bias retrival

        # pre-check the CNV column
        cool_path, group_path = util.parse_cooler_uri(cooler_uri)
        with h5py.File(cool_path, 'r+') as h5:
            grp = h5[group_path]
            if 'CNV' in grp['bins']:
                del grp['bins']['CNV']  # Overwrite the CNV column
                del grp['bins']['CNVI']

        with h5py.File(cool_path, 'r+') as h5:
            grp = h5[group_path]
            h5opts = dict(compression='gzip', compression_opts=6)
            grp['bins'].create_dataset('CNV', data=cnv, **h5opts)
            grp['bins'].create_dataset('CNVI', data=cnvi, dtype='i1', **h5opts)

        del cnv, cnvi
def save_weight(coolfile, chroms, weights, bal_info={}, col_name='weight'):
    """
    coolfile - File to add weights too
    chroms - List of chromosomes to modify
    weights - List of weight vectors to write to corresponding chromosomes
    bal_info - Dict to add as attributes of the weight column"""
    cool_path, group_path = util.parse_cooler_uri(coolfile)
    c = cooler.Cooler(coolfile)
    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        # add the bias column to the file
        h5opts = dict(compression='gzip', compression_opts=6)
        allweights = np.asarray([np.nan]*grp['bins']['chrom'].shape[0])
        if col_name not in grp['bins'].keys():
            grp['bins'].create_dataset(col_name, data=allweights, **h5opts)
        for chrom, weight in zip(chroms, weights):
            s, e = c.extent(chrom)
            grp['bins'][col_name][s:e] = weight
            grp['bins'][col_name].attrs.update(bal_info)

#        grp['bins']['weight'][grp['bins']['chrom']==chrom]=weights
    return
Beispiel #8
0
def balance(cool_uri, nproc=1, chunksize=int(1e7), mad_max=5, min_nnz=10,
            min_count=0, ignore_diags=1, tol=1e-5, max_iters=200):
    """
    Cooler contact matrix balancing.
    
    Parameters
    ----------
    cool_uri : str
        URI of cooler group.
    nproc : int
        Number of processes. (Default: 1)
        
    """
    cool_path, group_path = parse_cooler_uri(cool_uri)
    # pre-check the weight column
    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        if 'weight' in grp['bins']:
            del grp['bins']['weight'] # Overwrite the weight column
    
    log.info('Balancing {0}'.format(cool_uri))
    
    clr = Cooler(cool_uri)
    
    try:
        if nproc > 1:
            pool = Pool(nproc)
            map_ = pool.imap_unordered
        else:
            map_ = map
        
        if clr.info['metadata']['onlyIntra']=='True':
            onlyIntra = True
        else:
            onlyIntra = False
        
        bias, stats = ice.iterative_correction(
                clr,
                chunksize=chunksize,
                cis_only=onlyIntra,
                trans_only=False,
                tol=tol,
                min_nnz=min_nnz,
                min_count=min_count,
                blacklist=None,
                mad_max=mad_max,
                max_iters=max_iters,
                ignore_diags=ignore_diags,
                rescale_marginals=True,
                use_lock=False,
                map=map_)
    finally:
        if nproc > 1:
            pool.close()
    
    if not stats['converged']:
        log.error('Iteration limit reached without convergence')
        log.error('Storing final result. Check log to assess convergence.')
    
    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        # add the bias column to the file
        h5opts = dict(compression='gzip', compression_opts=6)
        grp['bins'].create_dataset('weight', data=bias, **h5opts)
        grp['bins']['weight'].attrs.update(stats)
Beispiel #9
0
def balance(cool_uri, nproc=1, chunksize=int(1e7), mad_max=5, min_nnz=10,
            min_count=0, ignore_diags=1, tol=1e-5, max_iters=200):
    """
    Cooler contact matrix balancing.
    
    Parameters
    ----------
    cool_uri : str
        URI of cooler group.
    nproc : int
        Number of processes. (Default: 1)
        
    """
    cool_path, group_path = parse_cooler_uri(cool_uri)
    # pre-check the weight column
    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        if 'weight' in grp['bins']:
            del grp['bins']['weight'] # Overwrite the weight column
    
    log.info('Balancing {0}'.format(cool_uri))
    
    clr = Cooler(cool_uri)
    
    try:
        if nproc > 1:
            pool = Pool(nproc)
            map_ = pool.imap_unordered
        else:
            map_ = map
        
        if clr.info['metadata']['onlyIntra']=='True':
            onlyIntra = True
        else:
            onlyIntra = False
        
        bias, stats = ice.iterative_correction(
                clr,
                chunksize=chunksize,
                cis_only=onlyIntra,
                trans_only=False,
                tol=tol,
                min_nnz=min_nnz,
                min_count=min_count,
                blacklist=None,
                mad_max=mad_max,
                max_iters=max_iters,
                ignore_diags=ignore_diags,
                rescale_marginals=True,
                use_lock=False,
                map=map_)
    finally:
        if nproc > 1:
            pool.close()
    
    if not stats['converged']:
        log.error('Iteration limit reached without convergence')
        log.error('Storing final result. Check log to assess convergence.')
    
    with h5py.File(cool_path, 'r+') as h5:
        grp = h5[group_path]
        # add the bias column to the file
        h5opts = dict(compression='gzip', compression_opts=6)
        grp['bins'].create_dataset('weight', data=bias, **h5opts)
        grp['bins']['weight'].attrs.update(stats)