def fetch_array(self, genome_range, genome_range2=None, balance=None, resolution='auto'): """ Parameters ---------- genome_range : {str, GenomeRange} Intervals within input chromosome range. genome_range2 : {str, GenomeRange}, optional. balance : bool, optional balance matrix or not, default `self.is_balance`. resolution : {'auto', int} resolution of the data. for example 5000. 'auto' for calculate resolution automatically. default 'auto' Return ------ arr : numpy.ndarray """ from coolbox.utilities.hic.wrap import CoolerWrap path = self.properties['file'] if balance is None: balance = self.is_balance wrap = CoolerWrap(path, balance=balance, binsize=resolution) arr = wrap.fetch(genome_range, genome_range2) return arr
def infer_binsize(self, gr: GenomeRange, **kwargs) -> int: from coolbox.utilities.hic.wrap import CoolerWrap path = self.properties['file'] wrap = CoolerWrap(path, balance=self.balance, binsize=kwargs.get('resolution', 'auto')) return wrap.infer_binsize(gr)
def fetch_data(self, gr: GenomeRange, gr2=None, **kwargs) -> np.ndarray: from coolbox.utilities.hic.wrap import CoolerWrap path = self.properties['file'] binsize = kwargs.get('resolution', self.properties.get('resolution', 'auto')) wrap = CoolerWrap(path, balance=self.balance, binsize=binsize) arr = wrap.fetch(gr, gr2) self.fetched_binsize = wrap.fetched_binsize # expose fetched binsize return self.fill_zero_nan(arr)
def fetch_pixels(self, genome_range, genome_range2=None, balance=None, resolution='auto', join=True): """ Parameters ---------- genome_range : {str, GenomeRange} Intervals within input chromosome range. genome_range2 : {str, GenomeRange}, optional. balance : bool, optional balance matrix or not, default `self.is_balance`. resolution : {'auto', int} resolution of the data. for example 5000. 'auto' for calculate resolution automatically. default 'auto' join : bool whether to expand the bin ID columns into (chrom, start, end). default True Return ------ pixels : pandas.core.frame.DataFrame Hi-C pixels table. The pixel table contains the non-zero upper triangle entries of the contact map. """ from coolbox.utilities.hic.wrap import CoolerWrap genome_range = to_gr(genome_range) if genome_range2 is not None: genome_range2 = to_gr(genome_range2) path = self.properties['file'] if balance is None: balance = self.is_balance wrap = CoolerWrap(path, balance=balance, binsize=resolution) pixels = wrap.fetch_pixels(genome_range, genome_range2, join=join) return pixels
def fetch_matrix(self, genome_range, genome_range2=None, resolution='auto') -> np.ndarray: """ ${fetch_matrix} """ from coolbox.utilities.hic.wrap import CoolerWrap path = self.properties['file'] wrap = CoolerWrap(path, balance=self.balance, binsize=resolution) arr = wrap.fetch(genome_range, genome_range2) self.fetched_binsize = wrap.fetched_binsize # expose fetched binsize return self.fill_zero_nan(arr)
def fetch_pixels(self, gr: GenomeRange, **kwargs): """ Fetch the pixels table of upper triangle of the original contact matrix(not processed). Parameters ---------- gr2 : GenomeRange, optional. balance : bool, optional balance matrix or not, default `self.is_balance`. resolution : {'auto', int} resolution of the data. for example 5000. 'auto' for calculate resolution automatically. default 'auto' join : bool whether to expand the bin ID columns into (chrom, start, end). default True Returns ------- pixels : pandas.core.frame.DataFrame Hi-C pixels table. The pixel table contains the non-zero upper triangle entries of the contact map. """ from coolbox.utilities.hic.wrap import CoolerWrap path = self.properties['file'] balance = kwargs.get('balance', self.is_balance) wrap = CoolerWrap(path, balance=balance, binsize=kwargs.get('resolution', 'auto')) pixels = wrap.fetch_pixels(gr, kwargs.get('gr2'), join=kwargs.get('join', True)) return pixels
def __fetch_matrix(self, genome_range, resolution='auto'): """ Fetch the matrix. Parameters ---------- genome_range : coolbox.utilities.GenomeRange The genome range to fetch. resolution : {'auto', int} The matrix resolution, for multi-resolution(.hic or multi-cool) file. Use 'auto' to infer the resolution automatically. default 'auto' """ from coolbox.utilities.hic.wrap import StrawWrap, CoolerWrap path = self.properties['file'] if self.file_type == '.hic': wrap = StrawWrap(path, normalization=self.balance, binsize=resolution) else: wrap = CoolerWrap(path, balance=self.balance, binsize=resolution) arr = wrap.fetch(genome_range) self.fetched_binsize = wrap.fetched_binsize # expose fetched binsize # fill zero and nan with small value small = self.small_value arr[arr == 0] = small arr[np.isnan(arr)] = small if 'transform' in self.properties and self.properties[ 'transform'] != 'no': arr = self.__transform_matrix(arr) return arr