def test_load_cool(self): """Test loading of matrices in cool format""" # Write a dummy bedgraph2 (basically a diagonal) res, n_bins = 5000, 100000 chrom_names = ["c1", "c2", "c3"] bins_per_chrom = [n_bins // 3, n_bins // 3, n_bins // 3 + n_bins % 3] bins = pd.DataFrame( { "chrom": np.repeat(chrom_names, bins_per_chrom), "start": range(0, res * (n_bins), res), "end": range(res, res * (n_bins + 1), res), } ) pixels = pd.DataFrame( { "bin1_id": range(n_bins), "bin2_id": range(n_bins), "count": np.random.randint(0, 100, n_bins), } ) # Save dataframes into a cool file using cool API cooler.create_cooler(self.tmp_path, bins, pixels) # Load cool and check whether it was parsed correctly mat, chroms, bins, bin_size = cio.load_cool(self.tmp_path) # Median should work to estimate resolution id nbins >> nchroms assert res == abs(int(np.nanmedian(bins.start.shift(1) - bins.start))) assert res == bin_size assert n_bins == bins.shape[0] assert np.all(bins.columns == BIN_COLS) assert np.all(chroms.columns == CHR_COLS) assert mat.sum() == pixels["count"].sum()
import numpy as np import unittest from nose2.tools import params import scipy.sparse as sp import chromosight import chromosight.utils import chromosight.utils.preprocessing as preproc import chromosight.utils.io as cio mat, chroms, bins, res = cio.load_cool("data_test/example.cool") mat = mat.tocsr() # Get all intra-chromosomal matrices intra_mats = [ mat[s:e, s:e] for s, e in zip(chroms["start_bin"], chroms["end_bin"]) ] class TestPreprocessing(unittest.TestCase): def test_get_detectable_bins(self): """Check if correct bin indices are reported as detectable.""" # Make random matrix using uniform sample between 0 and 1 uniform_mat = sp.random(1000, 1000, density=0.1, format="csr") # introduce outlier bin uniform_mat[10, :] = 0.0 uniform_mat[:, 10] = 0.0 uniform_mat = uniform_mat.tocoo() uniform_mat.eliminate_zeros() det_bins = preproc.get_detectable_bins(uniform_mat, inter=False, n_mads=1) # Check if symmetric mode return same detectable rows and cols