def test_matrix_to_dataframe_no_names_dataframe_sparse(self): Y = scprep.io.utils._matrix_to_data_frame(self.X_dense, sparse=True) assert scprep.utils.is_sparse_dataframe(Y) assert not scprep.utils.is_SparseDataFrame(Y) assert np.all(scprep.utils.toarray(Y) == self.X_numpy) utils.assert_matrix_class_equivalent(Y, self.X_sparse) Y = scprep.io.utils._matrix_to_data_frame(self.X_sparse, sparse=True) assert scprep.utils.is_sparse_dataframe(Y) assert not scprep.utils.is_SparseDataFrame(Y) assert np.all(scprep.utils.toarray(Y) == self.X_numpy) utils.assert_matrix_class_equivalent(Y, self.X_sparse)
def test_combine_batches(): X = data.load_10X() Y = pd.concat([X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))]) Y2, sample_labels = scprep.utils.combine_batches( [X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))], batch_labels=[0, 1]) assert utils.assert_matrix_class_equivalent(Y, Y2) utils.assert_all_equal(Y, Y2) assert np.all(Y.index == Y2.index) assert np.all(sample_labels == np.concatenate( [np.repeat(0, X.shape[0]), np.repeat(1, X.shape[0] // 2)])) Y2, sample_labels = scprep.utils.combine_batches( [X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))], batch_labels=[0, 1], append_to_cell_names=True) assert np.all(Y.index == np.array([i[:-2] for i in Y2.index])) assert np.all( np.core.defchararray.add("_", sample_labels.astype(str)) == np.array( [i[-2:] for i in Y2.index], dtype=str)) transform = lambda X: scprep.utils.combine_batches( [X, scprep.utils.select_rows(X, np.arange(X.shape[0] // 2))], batch_labels=[0, 1])[0] matrix.test_matrix_types(X, utils.assert_transform_equals, matrix._indexable_matrix_types, Y=Y, transform=transform, check=utils.assert_all_equal)
def test_matrix_to_dataframe_names_dense(self): Y = scprep.io.utils._matrix_to_data_frame( self.X_dense, cell_names=self.cell_names, gene_names=self.gene_names, sparse=False, ) assert isinstance(Y, pd.DataFrame) assert not scprep.utils.is_sparse_dataframe(Y) assert not scprep.utils.is_SparseDataFrame(Y) assert np.all(scprep.utils.toarray(Y) == self.X_numpy) utils.assert_matrix_class_equivalent(Y, self.X_dense) Y = scprep.io.utils._matrix_to_data_frame( self.X_sparse, cell_names=self.cell_names, gene_names=self.gene_names, sparse=False, ) assert isinstance(Y, pd.DataFrame) assert not scprep.utils.is_sparse_dataframe(Y) assert not scprep.utils.is_SparseDataFrame(Y) assert np.all(scprep.utils.toarray(Y) == self.X_numpy) utils.assert_matrix_class_equivalent(Y, self.X_dense) Y = scprep.io.utils._matrix_to_data_frame( self.X_numpy, cell_names=self.cell_names, gene_names=self.gene_names, sparse=False, ) assert isinstance(Y, pd.DataFrame) assert not scprep.utils.is_sparse_dataframe(Y) assert not scprep.utils.is_SparseDataFrame(Y) assert np.all(scprep.utils.toarray(Y) == self.X_numpy) utils.assert_matrix_class_equivalent(Y, self.X_dense)
def test_fun(X): X = scprep.utils.SparseDataFrame(X, index=index, columns=columns) utils.assert_matrix_class_equivalent(X, Y)
def test_combine_batches(): X = data.load_10X() Y = pd.concat( [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], axis=0, sort=True, ) Y2, sample_labels = scprep.utils.combine_batches( [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], batch_labels=[0, 1], append_to_cell_names=False, ) assert utils.assert_matrix_class_equivalent(Y, Y2) utils.assert_all_equal(Y, Y2) assert np.all(Y.index == Y2.index) assert np.all( sample_labels == np.concatenate([np.repeat(0, X.shape[0]), np.repeat(1, X.shape[0] // 2)]) ) assert np.all(sample_labels.index == Y2.index) assert sample_labels.name == "sample_labels" Y2, sample_labels = scprep.utils.combine_batches( [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], batch_labels=[0, 1], append_to_cell_names=True, ) assert np.all(Y.index == np.array([i[:-2] for i in Y2.index])) assert np.all( np.core.defchararray.add("_", sample_labels.astype(str)) == np.array([i[-2:] for i in Y2.index], dtype=str) ) assert np.all(sample_labels.index == Y2.index) assert sample_labels.name == "sample_labels" def transform(X): return scprep.utils.combine_batches( [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], batch_labels=[0, 1], )[0] matrix.test_matrix_types( X, utils.assert_transform_equals, matrix._pandas_matrix_types, Y=Y, transform=transform, check=utils.assert_all_equal, ) # don't sort for non pandas Y = pd.concat( [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], axis=0, sort=False, ) matrix.test_matrix_types( X, utils.assert_transform_equals, matrix._scipy_indexable_matrix_types + matrix._numpy_matrix_types, Y=Y, transform=transform, check=utils.assert_all_equal, ) def test_fun(X): Y, sample_labels = scprep.utils.combine_batches( [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], batch_labels=[0, 1], ) assert np.all(sample_labels.index == Y.index) assert sample_labels.name == "sample_labels" matrix.test_pandas_matrix_types(X, test_fun)