Ejemplo n.º 1
0
    def fetch_array(self, genome_range, genome_range2=None, balance=None, resolution='auto'):
        """
        Parameters
        ----------
        genome_range : {str, GenomeRange}
            Intervals within input chromosome range.

        genome_range2 : {str, GenomeRange}, optional.

        balance : bool, optional
            balance matrix or not,
            default `self.is_balance`.

        resolution : {'auto', int}
            resolution of the data. for example 5000.
            'auto' for calculate resolution automatically.
            default 'auto'

        Return
        ------
        arr : numpy.ndarray
        """
        from coolbox.utilities.hic.wrap import CoolerWrap

        path = self.properties['file']
        if balance is None:
            balance = self.is_balance
        wrap = CoolerWrap(path, balance=balance, binsize=resolution)

        arr = wrap.fetch(genome_range, genome_range2)
        return arr
Ejemplo n.º 2
0
    def infer_binsize(self, gr: GenomeRange, **kwargs) -> int:
        from coolbox.utilities.hic.wrap import CoolerWrap

        path = self.properties['file']
        wrap = CoolerWrap(path,
                          balance=self.balance,
                          binsize=kwargs.get('resolution', 'auto'))
        return wrap.infer_binsize(gr)
Ejemplo n.º 3
0
    def fetch_data(self, gr: GenomeRange, gr2=None, **kwargs) -> np.ndarray:
        from coolbox.utilities.hic.wrap import CoolerWrap

        path = self.properties['file']
        binsize = kwargs.get('resolution',
                             self.properties.get('resolution', 'auto'))
        wrap = CoolerWrap(path, balance=self.balance, binsize=binsize)
        arr = wrap.fetch(gr, gr2)

        self.fetched_binsize = wrap.fetched_binsize  # expose fetched binsize

        return self.fill_zero_nan(arr)
Ejemplo n.º 4
0
    def fetch_pixels(self,
                     genome_range,
                     genome_range2=None,
                     balance=None,
                     resolution='auto',
                     join=True):
        """
        Parameters
        ----------
        genome_range : {str, GenomeRange}
            Intervals within input chromosome range.

        genome_range2 : {str, GenomeRange}, optional.

        balance : bool, optional
            balance matrix or not,
            default `self.is_balance`.

        resolution : {'auto', int}
            resolution of the data. for example 5000.
            'auto' for calculate resolution automatically.
            default 'auto'

        join : bool
            whether to expand the bin ID columns
            into (chrom, start, end).
            default True

        Return
        ------
        pixels : pandas.core.frame.DataFrame
            Hi-C pixels table.
            The pixel table contains the non-zero upper triangle entries of the contact map.
        """
        from coolbox.utilities.hic.wrap import CoolerWrap

        genome_range = to_gr(genome_range)
        if genome_range2 is not None:
            genome_range2 = to_gr(genome_range2)

        path = self.properties['file']
        if balance is None:
            balance = self.is_balance
        wrap = CoolerWrap(path, balance=balance, binsize=resolution)

        pixels = wrap.fetch_pixels(genome_range, genome_range2, join=join)
        return pixels
Ejemplo n.º 5
0
    def fetch_matrix(self,
                     genome_range,
                     genome_range2=None,
                     resolution='auto') -> np.ndarray:
        """
        ${fetch_matrix}
        """
        from coolbox.utilities.hic.wrap import CoolerWrap

        path = self.properties['file']
        wrap = CoolerWrap(path, balance=self.balance, binsize=resolution)

        arr = wrap.fetch(genome_range, genome_range2)

        self.fetched_binsize = wrap.fetched_binsize  # expose fetched binsize

        return self.fill_zero_nan(arr)
Ejemplo n.º 6
0
    def fetch_pixels(self, gr: GenomeRange, **kwargs):
        """
        Fetch the pixels table of upper triangle of the original contact matrix(not processed).

        Parameters
        ----------
        gr2 : GenomeRange, optional.

        balance : bool, optional
            balance matrix or not,
            default `self.is_balance`.

        resolution : {'auto', int}
            resolution of the data. for example 5000.
            'auto' for calculate resolution automatically.
            default 'auto'

        join : bool
            whether to expand the bin ID columns
            into (chrom, start, end).
            default True

        Returns
        -------
        pixels : pandas.core.frame.DataFrame
            Hi-C pixels table.
            The pixel table contains the non-zero upper triangle entries of the contact map.
        """
        from coolbox.utilities.hic.wrap import CoolerWrap

        path = self.properties['file']
        balance = kwargs.get('balance', self.is_balance)
        wrap = CoolerWrap(path,
                          balance=balance,
                          binsize=kwargs.get('resolution', 'auto'))

        pixels = wrap.fetch_pixels(gr,
                                   kwargs.get('gr2'),
                                   join=kwargs.get('join', True))
        return pixels
Ejemplo n.º 7
0
    def __fetch_matrix(self, genome_range, resolution='auto'):
        """
        Fetch the matrix.

        Parameters
        ----------
        genome_range : coolbox.utilities.GenomeRange
            The genome range to fetch.

        resolution : {'auto', int}
            The matrix resolution, for multi-resolution(.hic or multi-cool) file.
            Use 'auto' to infer the resolution automatically.
            default 'auto'
        """
        from coolbox.utilities.hic.wrap import StrawWrap, CoolerWrap

        path = self.properties['file']
        if self.file_type == '.hic':
            wrap = StrawWrap(path,
                             normalization=self.balance,
                             binsize=resolution)
        else:
            wrap = CoolerWrap(path, balance=self.balance, binsize=resolution)

        arr = wrap.fetch(genome_range)

        self.fetched_binsize = wrap.fetched_binsize  # expose fetched binsize

        # fill zero and nan with small value
        small = self.small_value
        arr[arr == 0] = small
        arr[np.isnan(arr)] = small

        if 'transform' in self.properties and self.properties[
                'transform'] != 'no':
            arr = self.__transform_matrix(arr)

        return arr