def test_u_statistic(): X = data.generate_positive_sparse_matrix(shape=(500, 3), seed=42, poisson_mean=0.2) Y = data.generate_positive_sparse_matrix(shape=(500, 3), seed=42, poisson_mean=0.3) u_stat = [ stats.mannwhitneyu(X[:, i], Y[:, i], alternative="two-sided")[0] for i in range(X.shape[1]) ] def test_fun(X): return scprep.stats.rank_sum_statistic( scprep.select.select_rows(X, idx=np.arange(500)), scprep.select.select_rows(X, idx=np.arange(500, 1000)), ) matrix.test_all_matrix_types( np.vstack([X, Y]), utils.assert_transform_equals, Y=u_stat, transform=test_fun, check=utils.assert_all_close, )
def test_t_statistic(): X = data.generate_positive_sparse_matrix(shape=(500, 3), seed=42, poisson_mean=0.2) Y = data.generate_positive_sparse_matrix(shape=(500, 3), seed=42, poisson_mean=0.3) u_stat = [ stats.ttest_ind(X[:, i], Y[:, i], equal_var=False)[0] for i in range(X.shape[1]) ] def test_fun(X): return scprep.stats.t_statistic( scprep.select.select_rows(X, idx=np.arange(500)), scprep.select.select_rows(X, idx=np.arange(500, 1000)), ) matrix.test_all_matrix_types( np.vstack([X, Y]), utils.assert_transform_equals, Y=u_stat, transform=test_fun, check=partial(utils.assert_all_close, rtol=2e-3), )
def test_batch_mean_center(): X = data.generate_positive_sparse_matrix() sample_idx = np.random.choice([0, 1], X.shape[0], replace=True) X[sample_idx == 1] += 1 Y = X.copy() Y[sample_idx == 0] -= np.mean(Y[sample_idx == 0], axis=0)[None, :] Y[sample_idx == 1] -= np.mean(Y[sample_idx == 1], axis=0)[None, :] utils.assert_all_close(np.mean(Y[sample_idx == 0], axis=0), 0) utils.assert_all_close(np.mean(Y[sample_idx == 1], axis=0), 0) matrix.test_dense_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=partial( scprep.normalize.batch_mean_center, sample_idx=sample_idx)) matrix.test_sparse_matrix_types( X, utils.assert_transform_raises, transform=partial( scprep.normalize.batch_mean_center, sample_idx=sample_idx), exception=ValueError) X = data.generate_positive_sparse_matrix() Y = X.copy() Y -= np.mean(Y, axis=0)[None, :] matrix.test_dense_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=partial( scprep.normalize.batch_mean_center)) matrix.test_sparse_matrix_types( X, utils.assert_transform_raises, transform=partial( scprep.normalize.batch_mean_center), exception=ValueError)
def test_sqrt_transform(): X = data.generate_positive_sparse_matrix() Y = np.sqrt(X) matrix.test_all_matrix_types(X, utils.assert_transform_equivalent, Y=Y, transform=scprep.transform.sqrt)
def test_matrix_elementwise_multiply_guess_wrong_size(): X = data.generate_positive_sparse_matrix(shape=(50, 100)) assert_raise_message( ValueError, "Expected `multiplier` to be a vector of length `data.shape[0]` (50) " "or `data.shape[1]` (100). Got (10,)", scprep.utils.matrix_vector_elementwise_multiply, X, X[0, :10])
def test_to_array_or_spmatrix_list_of_strings(): X = data.generate_positive_sparse_matrix(shape=(50, 50)) X = scprep.utils.to_array_or_spmatrix( [X, sparse.csr_matrix(X), "hello", "world", [1, 2, 3]]) assert isinstance(X[0], np.ndarray) assert isinstance(X[1], sparse.csr_matrix) assert isinstance(X[4], np.ndarray)
def test_toarray_vector(): X = data.generate_positive_sparse_matrix(shape=(50,)) def test_fun(X): assert isinstance(scprep.utils.toarray(X), np.ndarray) matrix.test_matrix_types(X, test_fun, matrix._pandas_vector_types)
def test_matrix_sum(): X = data.generate_positive_sparse_matrix(shape=(50, 100)) sums = np.array(X.sum(0)).flatten() def test_fun(X): assert np.allclose(np.array(scprep.utils.matrix_sum(X, axis=0)), sums) matrix.test_all_matrix_types(X, test_fun) test_fun(np.matrix(X)) sums = np.array(X.sum(1)).flatten() def test_fun(X): assert np.allclose(np.array(scprep.utils.matrix_sum(X, axis=1)), sums) matrix.test_all_matrix_types(X, test_fun) test_fun(np.matrix(X)) sums = np.array(X.sum(None)).flatten() def test_fun(X): assert np.allclose(scprep.utils.matrix_sum(X, axis=None), sums) matrix.test_all_matrix_types(X, test_fun) test_fun(np.matrix(X)) assert_raise_message(ValueError, "Expected axis in [0, 1, None]. Got 5", scprep.utils.matrix_sum, data, 5)
def test_pairwise_correlation(): def test_fun(X, *args, **kwargs): return scprep.stats.pairwise_correlation( X, scprep.select.select_cols(X, idx=np.arange(10)), *args, **kwargs) D = data.generate_positive_sparse_matrix( shape=(500, 100), seed=42, poisson_mean=5) Y = test_fun(D) assert Y.shape == (D.shape[1], 10) assert np.allclose(Y[(np.arange(10), np.arange(10))], 1, atol=0) matrix.test_all_matrix_types( D, utils.assert_transform_equals, Y=Y, transform=test_fun, check=utils.assert_all_close) matrix.test_all_matrix_types( D, utils.assert_transform_equals, Y=Y, transform=partial(scprep.stats.pairwise_correlation, Y=scprep.select.select_cols(D, idx=np.arange(10))), check=utils.assert_all_close) def test_fun(X, *args, **kwargs): return scprep.stats.pairwise_correlation( X=D, Y=X, *args, **kwargs) matrix.test_all_matrix_types( scprep.select.select_cols(D, idx=np.arange(10)), utils.assert_transform_equals, Y=Y, transform=test_fun, check=utils.assert_all_close)
def test_subsample(self): self.X = data.generate_positive_sparse_matrix(shape=(50, 100)) Y = scprep.select.subsample(self.X, n=20, seed=42) matrix.test_all_matrix_types( self.X, utils.assert_transform_equals, Y=Y, transform=scprep.select.subsample, check=utils.assert_all_equal, n=20, seed=42)
def test_deprecated(): X = data.generate_positive_sparse_matrix() Y = scprep.transform.sqrt(X) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) utils.assert_transform_equivalent( X, Y=Y, transform=scprep.transform.sqrt_transform) assert_warns_message( FutureWarning, "scprep.transform.sqrt_transform is deprecated. Please use " "scprep.transform.sqrt in future.", scprep.transform.sqrt_transform, data=X) Y = scprep.transform.log(X) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) utils.assert_transform_equivalent( X, Y=Y, transform=scprep.transform.log_transform) assert_warns_message( FutureWarning, "scprep.transform.log_transform is deprecated. Please use " "scprep.transform.log in future.", scprep.transform.log_transform, data=X) Y = scprep.transform.arcsinh(X) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) utils.assert_transform_equivalent( X, Y=Y, transform=scprep.transform.arcsinh_transform) assert_warns_message( FutureWarning, "scprep.transform.arcsinh_transform is deprecated. Please use " "scprep.transform.arcsinh in future.", scprep.transform.arcsinh_transform, data=X)
def test_knnDREMI(): X = data.generate_positive_sparse_matrix( shape=(500, 2), seed=42, poisson_mean=5) Y = scprep.stats.knnDREMI(X[:, 0], X[:, 1]) assert isinstance(Y, float) np.testing.assert_allclose(Y, 0.16238906) Y2, drevi = scprep.stats.knnDREMI(X[:, 0], X[:, 1], plot=True, filename="test.png", return_drevi=True) assert Y2 == Y assert drevi.shape == (20, 20) matrix.test_all_matrix_types( X, utils.assert_transform_equals, Y=Y, transform=partial(_test_fun_2d, fun=scprep.stats.knnDREMI), check=utils.assert_all_close) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) assert scprep.stats.knnDREMI(X[:, 0], np.repeat(X[0, 1], X.shape[0]), return_drevi=True) == (0, None) assert_raise_message( ValueError, "Expected k as an integer. Got ", scprep.stats.knnDREMI, X[:, 0], X[:, 1], k="invalid") assert_raise_message( ValueError, "Expected n_bins as an integer. Got ", scprep.stats.knnDREMI, X[:, 0], X[:, 1], n_bins="invalid") assert_raise_message( ValueError, "Expected n_mesh as an integer. Got ", scprep.stats.knnDREMI, X[:, 0], X[:, 1], n_mesh="invalid") assert_warns_message( UserWarning, "Attempting to calculate kNN-DREMI on a constant array. " "Returning `0`", scprep.stats.knnDREMI, X[:, 0], np.zeros_like(X[:, 1]))
def test_matrix_elementwise_multiply_square_guess(): X = data.generate_positive_sparse_matrix(shape=(50, 50)) assert_raise_message( RuntimeError, "`data` is square, cannot guess axis from input. Please provide " "`axis=0` to multiply along rows or " "`axis=1` to multiply along columns.", scprep.utils.matrix_vector_elementwise_multiply, X, X[0])
def test_matrix_elementwise_multiply_invalid_axis(): X = data.generate_positive_sparse_matrix(shape=(50, 100)) assert_raise_message(ValueError, "Expected axis in [0, 1, None]. Got 5", scprep.utils.matrix_vector_elementwise_multiply, X, X[0], axis=5)
def test_toarray(): X = data.generate_positive_sparse_matrix(shape=(50, 50)) def test_fun(X): assert isinstance(scprep.utils.toarray(X), np.ndarray) matrix.test_all_matrix_types(X, test_fun) test_fun([X, np.matrix(X)])
def setUpClass(self): self.X = data.generate_positive_sparse_matrix(shape=[100, 3000]) self.X_sparse = sparse.csr_matrix(self.X) random_pca_op = decomposition.PCA(100, random_state=42) self.Y_random = random_pca_op.fit_transform(self.X) self.S_random = random_pca_op.singular_values_ full_pca_op = decomposition.PCA(50, svd_solver="full") self.Y_full = full_pca_op.fit_transform(self.X) self.S_full = full_pca_op.singular_values_
def setUpClass(self): self.X = data.generate_positive_sparse_matrix() self.libsize = self.X.sum(axis=1) self.median = np.median(self.libsize) self.mean = np.mean(self.X.sum(axis=1)) self.X_norm = normalize(self.X, 'l1') self.sample_idx = np.random.choice([0, 1], self.X.shape[0], replace=True)
def test_toarray(): X = data.generate_positive_sparse_matrix(shape=(50, 50)) def test_fun(X): assert isinstance(scprep.utils.toarray(X), np.ndarray) matrix.test_all_matrix_types(X, test_fun) test_fun([X, np.matrix(X)]) assert_raise_message(TypeError, "Expected array-like. Got ", scprep.utils.toarray, "hello")
def test_matrix_elementwise_multiply_col_wrong_size(): X = data.generate_positive_sparse_matrix(shape=(50, 100)) assert_raise_message( ValueError, "Expected `multiplier` to be a vector of length `data.shape[1]` (100)." " Got (50,)", scprep.utils.matrix_vector_elementwise_multiply, X, X[:, 0], axis=1)
def test_mutual_information(): X = data.generate_positive_sparse_matrix( shape=(500, 2), seed=42, poisson_mean=5) Y = scprep.stats.mutual_information(X[:, 0], X[:, 1], bins=20) assert isinstance(Y, float) np.testing.assert_allclose(Y, calc_MI(X[:, 0], X[:, 1], bins=20)) matrix.test_all_matrix_types( X, utils.assert_transform_equals, Y=Y, transform=partial(_test_fun_2d, fun=scprep.stats.mutual_information), check=utils.assert_all_close, bins=20)
def test_toarray(): X = data.generate_positive_sparse_matrix(shape=(50, 50)) def test_fun(X): assert isinstance(scprep.utils.toarray(X), np.ndarray) matrix.test_all_matrix_types(X, test_fun) test_fun(np.matrix(X)) assert_raise_message( TypeError, "Expected pandas DataFrame, scipy sparse matrix or " "numpy matrix. Got ", scprep.utils.toarray, "hello")
def test_matrix_elementwise_multiply_guess_col(): X = data.generate_positive_sparse_matrix(shape=(50, 100)) x = X[0] + 1 Y = pd.DataFrame(X).mul(x, axis=1) matrix.test_all_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=scprep.utils.matrix_vector_elementwise_multiply, check=utils.assert_all_close, axis=None, multiplier=x)
def test_libsize_norm(): X = data.generate_positive_sparse_matrix() median = np.median(X.sum(axis=1)) Y = normalize(X, 'l1') * median utils.assert_all_close(Y.sum(1), np.median(np.sum(X, 1))) matrix.test_all_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=scprep.normalize.library_size_normalize, check=utils.assert_all_close) mean = np.mean(X.sum(axis=1)) X = data.generate_positive_sparse_matrix() Y = normalize(X, 'l1') * mean utils.assert_all_close(Y.sum(1), np.mean(np.sum(X, 1))) matrix.test_all_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=scprep.normalize.library_size_normalize, check=utils.assert_all_close, rescale='mean') X = data.generate_positive_sparse_matrix() Y = normalize(X, 'l1') matrix.test_all_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=scprep.normalize.library_size_normalize, check=utils.assert_all_close, rescale=None) matrix.test_all_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=scprep.normalize.library_size_normalize, check=utils.assert_all_close, rescale=1) assert_raise_message( ValueError, "Expected rescale in ['median', 'mean'], a number or `None`. " "Got invalid", scprep.normalize.library_size_normalize, X, rescale='invalid') X[:X.shape[0] // 2 + 1] = 0 assert_warns_message( UserWarning, "Median library size is zero. " "Rescaling to mean instead.", scprep.normalize.library_size_normalize, X, rescale='median')
def test_arcsinh_transform(): X = data.generate_positive_sparse_matrix() Y = np.arcsinh(X / 5) matrix.test_all_matrix_types(X, utils.assert_transform_equivalent, Y=Y, transform=scprep.transform.arcsinh, check=utils.assert_all_close) assert_raise_message(ValueError, "Expected cofactor > 0 or None. " "Got 0", scprep.transform.arcsinh, data=X, cofactor=0)
def test_matrix_any(): X = data.generate_positive_sparse_matrix(shape=(50, 50)) assert not np.any(X == 500000) def test_fun(X): assert not scprep.utils.matrix_any(X == 500000) matrix.test_all_matrix_types(X, test_fun) def test_fun(X): assert scprep.utils.matrix_any(X == 500000) X[0, 0] = 500000 matrix.test_all_matrix_types(X, test_fun)
def test_EMD(): X = data.generate_positive_sparse_matrix( shape=(500, 2), seed=42, poisson_mean=5) Y = scprep.stats.EMD(X[:, 0], X[:, 1]) assert isinstance(Y, float) np.testing.assert_allclose(Y, 0.5537161) matrix.test_all_matrix_types( X, utils.assert_transform_equals, Y=Y, transform=partial(_test_fun_2d, fun=scprep.stats.EMD), check=utils.assert_all_close) assert_raise_message( ValueError, "Expected x and y to be 1D arrays. " "Got shapes x {}, y {}".format(X.shape, X[:, 1].shape), scprep.stats.EMD, X, X[:, 1])
def test_log_transform(): X = data.generate_positive_sparse_matrix() Y = np.log10(X + 1) matrix.test_all_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=scprep.transform.log, base=10, ) Y = np.log(X + 1) matrix.test_all_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=scprep.transform.log, base="e", ) Y = np.log2(X + 1) matrix.test_all_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=scprep.transform.log, base=2, ) Y = np.log2(X + 5) def test_fun(X): utils.assert_warns_message( RuntimeWarning, "log transform on sparse data requires pseudocount = 1", scprep.transform.log, data=X, base=2, pseudocount=5, ) matrix.test_sparse_matrix_types(X, test_fun) matrix.test_dense_matrix_types( X, utils.assert_transform_equivalent, Y=Y, transform=scprep.transform.log, base=2, pseudocount=5, )
def test_matrix_sum(): X = data.generate_positive_sparse_matrix(shape=(50, 100)) sums = np.array(X.sum(0)).flatten() matrix.test_all_matrix_types(X, utils.assert_transform_equals, Y=sums, transform=scprep.utils.matrix_sum, axis=0, check=utils.assert_all_close) matrix.test_numpy_matrix(X, utils.assert_transform_equals, Y=sums, transform=scprep.utils.matrix_sum, axis=0, check=utils.assert_all_close) sums = np.array(X.sum(1)).flatten() matrix.test_all_matrix_types(X, utils.assert_transform_equals, Y=sums, transform=scprep.utils.matrix_sum, axis=1, check=utils.assert_all_close) matrix.test_numpy_matrix(X, utils.assert_transform_equals, Y=sums, transform=scprep.utils.matrix_sum, axis=1, check=utils.assert_all_close) sums = np.array(X.sum(None)).flatten() matrix.test_all_matrix_types(X, utils.assert_transform_equals, Y=sums, transform=scprep.utils.matrix_sum, axis=None, check=utils.assert_all_close) matrix.test_numpy_matrix(X, utils.assert_transform_equals, Y=sums, transform=scprep.utils.matrix_sum, axis=None, check=utils.assert_all_close) assert_raise_message(ValueError, "Expected axis in [0, 1, None]. Got 5", scprep.utils.matrix_sum, data, 5)
def test_matrix_transpose(): X = data.generate_positive_sparse_matrix(shape=(50, 50)) X_T = X.T matrix.test_all_matrix_types( X, utils.assert_transform_equals, Y=X_T, transform=scprep.utils.matrix_transpose, ) X = scprep.utils.SparseDataFrame(X) X.iloc[:, 1] = X.iloc[:, 1].astype(pd.SparseDtype(float, fill_value=1)) utils.assert_raises_message( TypeError, "Can only transpose sparse dataframes with constant fill value.", scprep.utils.matrix_transpose, X, )
def test_pca(): X = data.generate_positive_sparse_matrix(shape=[100, 1000]) Y = decomposition.PCA(100, random_state=42).fit_transform(X) matrix.test_dense_matrix_types(X, utils.assert_transform_equals, Y=Y, transform=scprep.reduce.pca, n_pca=100, seed=42) Y = decomposition.PCA(50, svd_solver='full').fit_transform(X) matrix.test_sparse_matrix_types(X, utils.assert_transform_equals, Y=Y, transform=scprep.reduce.pca, check=partial(utils.assert_all_close, rtol=1e-3, atol=1e-5), n_pca=50, svd_multiples=8, seed=42)