def setUp(self): self.data_file = DataLocator(f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad") config = AppConfig() config.update_server_config(single_dataset__datapath=self.data_file.path) config.update_server_config(app__flask_secret_key="secret") config.complete_config() self.data = AnndataAdaptor(self.data_file, config)
class DataLoadAdaptorTest(unittest.TestCase): """ Test file loading, including deferred loading/update. """ def setUp(self): self.data_file = DataLocator(f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad") config = AppConfig() config.update_server_config(single_dataset__datapath=self.data_file.path) config.update_server_config(app__flask_secret_key="secret") config.complete_config() self.data = AnndataAdaptor(self.data_file, config) def test_delayed_load_data(self): self.data._create_schema() self.assertEqual(self.data.cell_count, 2638) self.assertEqual(self.data.gene_count, 1838) epsilon = 0.000_005 self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon) def test_diffexp_topN(self): f1 = {"filter": {"obs": {"index": [[0, 500]]}}} f2 = {"filter": {"obs": {"index": [[500, 1000]]}}} result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"])) self.assertEqual(len(result), 10) result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"], 20)) self.assertEqual(len(result), 20)
def setUp(self): self.data_locator = DataLocator(f"{FIXTURES_ROOT}/nan.h5ad") self.config = app_config(self.data_locator.path) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) self.data = AnndataAdaptor(self.data_locator, self.config) self.data._create_schema()
class NaNTest(unittest.TestCase): def setUp(self): self.data_locator = DataLocator(f"{FIXTURES_ROOT}/nan.h5ad") self.config = app_config(self.data_locator.path) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) self.data = AnndataAdaptor(self.data_locator, self.config) self.data._create_schema() def test_load(self): with self.assertLogs(level="WARN") as logger: self.data = AnndataAdaptor(self.data_locator, self.config) self.assertTrue(logger.output) def test_init(self): self.assertEqual(self.data.cell_count, 100) self.assertEqual(self.data.gene_count, 100) epsilon = 0.000_005 self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon) def test_dataframe(self): data_frame_var = decode_fbs.decode_matrix_FBS(self.data.data_frame_to_fbs_matrix(None, "var")) self.assertIsNotNone(data_frame_var) self.assertEqual(data_frame_var["n_rows"], 100) self.assertEqual(data_frame_var["n_cols"], 100) self.assertTrue(math.isnan(data_frame_var["columns"][3][3])) with pytest.raises(FilterError): self.data.data_frame_to_fbs_matrix("an erroneous filter", "var") with pytest.raises(FilterError): filter_ = {"filter": {"obs": {"index": [1, 99, [200, 300]]}}} self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") def test_dataframe_obs_not_implemented(self): with self.assertRaises(ValueError) as cm: decode_fbs.decode_matrix_FBS(self.data.data_frame_to_fbs_matrix(None, "obs")) self.assertIsNotNone(cm.exception) def test_annotation(self): annotations = decode_fbs.decode_matrix_FBS(self.data.annotation_to_fbs_matrix("obs")) obs_index_col_name = self.data.schema["annotations"]["obs"]["index"] self.assertEqual(annotations["col_idx"], [obs_index_col_name, "n_genes", "percent_mito", "n_counts", "louvain"]) self.assertEqual(annotations["n_rows"], 100) self.assertTrue(math.isnan(annotations["columns"][2][0])) annotations = decode_fbs.decode_matrix_FBS(self.data.annotation_to_fbs_matrix("var")) var_index_col_name = self.data.schema["annotations"]["var"]["index"] self.assertEqual(annotations["col_idx"], [var_index_col_name, "n_cells", "var_with_nans"]) self.assertEqual(annotations["n_rows"], 100) self.assertTrue(math.isnan(annotations["columns"][2][0]))
def test_posix_file(self): locator = DataLocator("../example-dataset/pbmc3k.h5ad") config = self.get_basic_config() config.update_server_config(single_dataset__datapath=locator.path) config.complete_config() data = AnndataAdaptor(locator, config) self.stdAsserts(data)
def test_url_http(self): url = "http://raw.githubusercontent.com/chanzuckerberg/cellxgene/main/example-dataset/pbmc3k.h5ad" locator = DataLocator(url) config = self.get_basic_config() data = AnndataAdaptor(locator, config) self.stdAsserts(data)
def test_load(self): with self.assertLogs(level="WARN") as logger: self.data = AnndataAdaptor(self.data_locator, self.config) self.assertTrue(logger.output)
def setUp(self): config = app_config(self.data_locator, self.backed) self.data = AnndataAdaptor(DataLocator(self.data_locator), config)
class AdaptorTest(unittest.TestCase): def setUp(self): config = app_config(self.data_locator, self.backed) self.data = AnndataAdaptor(DataLocator(self.data_locator), config) def test_init(self): self.assertEqual(self.data.cell_count, 2638) self.assertEqual(self.data.gene_count, 1838) epsilon = 0.000_005 self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon) def test_mandatory_annotations(self): obs_index_col_name = self.data.get_schema( )["annotations"]["obs"]["index"] self.assertIn(obs_index_col_name, self.data.data.obs) self.assertEqual(list(self.data.data.obs.index), list(range(2638))) var_index_col_name = self.data.get_schema( )["annotations"]["var"]["index"] self.assertIn(var_index_col_name, self.data.data.var) self.assertEqual(list(self.data.data.var.index), list(range(1838))) @pytest.mark.filterwarnings("ignore:Anndata data matrix") def test_data_type(self): # don't run the test on the more exotic data types, as they don't # support the astype() interface (used by this test, but not underlying app) if isinstance(self.data.data.X, np.ndarray): self.data.data.X = self.data.data.X.astype("float64") with self.assertWarns(UserWarning): self.data._validate_data_types() def test_filter_idx(self): filter_ = {"filter": {"var": {"index": [1, 99, [200, 300]]}}} fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 102) def test_filter_complex(self): filter_ = { "filter": { "var": { "annotation_value": [{ "name": "n_cells", "min": 10 }], "index": [1, 99, [200, 300]] } } } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 91) def test_obs_and_var_names(self): self.assertEqual( np.sum(self.data.data.var[self.data.get_schema()["annotations"] ["var"]["index"]].isna()), 0) self.assertEqual( np.sum(self.data.data.obs[self.data.get_schema()["annotations"] ["obs"]["index"]].isna()), 0) def test_get_colors(self): self.assertEqual(self.data.get_colors(), pbmc3k_colors) def test_get_schema(self): with open(f"{FIXTURES_ROOT}/schema.json") as fh: schema = json.load(fh) self.assertDictEqual(self.data.get_schema(), schema) def test_schema_produces_error(self): self.data.data.obs["time"] = pd.Series( list([time.time() for i in range(self.data.cell_count)]), dtype="datetime64[ns]", ) with pytest.raises(TypeError): self.data._create_schema() def test_layout(self): fbs = self.data.layout_to_fbs_matrix(fields=None) layout = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(layout["n_cols"], 6) self.assertEqual(layout["n_rows"], 2638) X = layout["columns"][0] self.assertTrue((X >= 0).all() and (X <= 1).all()) Y = layout["columns"][1] self.assertTrue((Y >= 0).all() and (Y <= 1).all()) def test_layout_fields(self): """ X_pca, X_tsne, X_umap are available """ fbs = self.data.layout_to_fbs_matrix(["pca"]) layout = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(layout["n_cols"], 2) self.assertEqual(layout["n_rows"], 2638) self.assertCountEqual(layout["col_idx"], ["pca_0", "pca_1"]) fbs = self.data.layout_to_fbs_matrix(["tsne", "pca"]) layout = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(layout["n_cols"], 4) self.assertEqual(layout["n_rows"], 2638) self.assertCountEqual(layout["col_idx"], ["tsne_0", "tsne_1", "pca_0", "pca_1"]) def test_annotations(self): fbs = self.data.annotation_to_fbs_matrix("obs") annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations["n_rows"], 2638) self.assertEqual(annotations["n_cols"], 5) obs_index_col_name = self.data.get_schema( )["annotations"]["obs"]["index"] self.assertEqual( annotations["col_idx"], [ obs_index_col_name, "n_genes", "percent_mito", "n_counts", "louvain" ], ) fbs = self.data.annotation_to_fbs_matrix("var") annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations["n_rows"], 1838) self.assertEqual(annotations["n_cols"], 2) var_index_col_name = self.data.get_schema( )["annotations"]["var"]["index"] self.assertEqual(annotations["col_idx"], [var_index_col_name, "n_cells"]) def test_annotation_fields(self): fbs = self.data.annotation_to_fbs_matrix("obs", ["n_genes", "n_counts"]) annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations["n_rows"], 2638) self.assertEqual(annotations["n_cols"], 2) var_index_col_name = self.data.get_schema( )["annotations"]["var"]["index"] fbs = self.data.annotation_to_fbs_matrix("var", [var_index_col_name]) annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations["n_rows"], 1838) self.assertEqual(annotations["n_cols"], 1) def test_diffexp_topN(self): f1 = {"filter": {"obs": {"index": [[0, 500]]}}} f2 = {"filter": {"obs": {"index": [[500, 1000]]}}} result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"])) self.assertEqual(len(result), 10) result = json.loads( self.data.diffexp_topN(f1["filter"], f2["filter"], 20)) self.assertEqual(len(result), 20) def test_data_frame(self): f1 = {"var": {"index": [[0, 10]]}} fbs = self.data.data_frame_to_fbs_matrix(f1, "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 10) with self.assertRaises(ValueError): self.data.data_frame_to_fbs_matrix(None, "obs") def test_filtered_data_frame(self): filter_ = { "filter": { "var": { "annotation_value": [{ "name": "n_cells", "min": 100 }] } } } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 1040) filter_ = { "filter": { "obs": { "annotation_value": [{ "name": "n_counts", "min": 3000 }] } } } with self.assertRaises(FilterError): self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") def test_data_named_gene(self): var_index_col_name = self.data.get_schema( )["annotations"]["var"]["index"] filter_ = { "filter": { "var": { "annotation_value": [{ "name": var_index_col_name, "values": ["RER1"] }] } } } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 1) self.assertEqual(data["col_idx"], [4]) filter_ = { "filter": { "var": { "annotation_value": [{ "name": var_index_col_name, "values": ["SPEN", "TYMP", "PRMT2"] }] } } } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 3) self.assertTrue((data["col_idx"] == [15, 1818, 1837]).all()) def test_compute_embedding(self): filter = {"obs": {"index": [[0, 100]]}} # Verify that we correctly handle the case where we lack scanpy import unittest.mock with unittest.mock.patch.dict(sys.modules, {"scanpy": None}): with self.assertRaises(NotImplementedError): self.data.compute_embedding("umap", filter) # if we happen to have scanpy, test the full API, else punt import importlib scanpy_spec = importlib.util.find_spec("scanpy") if scanpy_spec is None: print("Skipping compute_embedding test as ScanPy not installed") return # this feature is unsupported in backed mode, and we expect an error if self.data.data.isbacked: with self.assertRaises(NotImplementedError): self.data.compute_embedding("umap", filter) return schema = self.data.compute_embedding("umap", filter) self.assertIsInstance(schema["name"], str) name = schema["name"] self.assertEqual(schema["type"], "float32") self.assertEqual(schema["dims"], [f"{name}_0", f"{name}_1"]) emb = self.data.data.obsm[f"X_{name}"] self.assertEqual(emb.shape, (2638, 2)) self.assertTrue(np.isfinite(emb[0:100]).all()) self.assertTrue(np.isnan(emb[100:]).all())