Example #1
0
    def test_load_cool(self):
        """Test loading of matrices in cool format"""

        # Write a dummy bedgraph2 (basically a diagonal)
        res, n_bins = 5000, 100000
        chrom_names = ["c1", "c2", "c3"]
        bins_per_chrom = [n_bins // 3, n_bins // 3, n_bins // 3 + n_bins % 3]
        bins = pd.DataFrame(
            {
                "chrom": np.repeat(chrom_names, bins_per_chrom),
                "start": range(0, res * (n_bins), res),
                "end": range(res, res * (n_bins + 1), res),
            }
        )
        pixels = pd.DataFrame(
            {
                "bin1_id": range(n_bins),
                "bin2_id": range(n_bins),
                "count": np.random.randint(0, 100, n_bins),
            }
        )

        # Save dataframes into a cool file using cool API
        cooler.create_cooler(self.tmp_path, bins, pixels)

        # Load cool and check whether it was parsed correctly
        mat, chroms, bins, bin_size = cio.load_cool(self.tmp_path)

        # Median should work to estimate resolution id nbins >> nchroms
        assert res == abs(int(np.nanmedian(bins.start.shift(1) - bins.start)))
        assert res == bin_size
        assert n_bins == bins.shape[0]
        assert np.all(bins.columns == BIN_COLS)
        assert np.all(chroms.columns == CHR_COLS)
        assert mat.sum() == pixels["count"].sum()
Example #2
0
import numpy as np
import unittest
from nose2.tools import params
import scipy.sparse as sp
import chromosight
import chromosight.utils
import chromosight.utils.preprocessing as preproc
import chromosight.utils.io as cio

mat, chroms, bins, res = cio.load_cool("data_test/example.cool")
mat = mat.tocsr()
# Get all intra-chromosomal matrices
intra_mats = [
    mat[s:e, s:e] for s, e in zip(chroms["start_bin"], chroms["end_bin"])
]


class TestPreprocessing(unittest.TestCase):
    def test_get_detectable_bins(self):
        """Check if correct bin indices are reported as detectable."""
        # Make random matrix using uniform sample between 0 and 1
        uniform_mat = sp.random(1000, 1000, density=0.1, format="csr")
        # introduce outlier bin
        uniform_mat[10, :] = 0.0
        uniform_mat[:, 10] = 0.0
        uniform_mat = uniform_mat.tocoo()
        uniform_mat.eliminate_zeros()
        det_bins = preproc.get_detectable_bins(uniform_mat,
                                               inter=False,
                                               n_mads=1)
        # Check if symmetric mode return same detectable rows and cols