class DataLoadEngineTest(unittest.TestCase): """ Test file loading, including deferred loading/update. """ def setUp(self): self.data_file = DataLocator("example-dataset/pbmc3k.h5ad") self.data = ScanpyEngine() def test_init(self): self.assertIsNone(self.data.data) def test_delayed_load_args(self): args = { "layout": ["tsne"], "max_category_items": 1000, "obs_names": "foo", "var_names": "bar", "diffexp_lfc_cutoff": 0.1, "annotations": False, "annotations_file": None, "annotations_output_dir": None, "backed": False, "diffexp_may_be_slow": False, "disable_diffexp": False } self.data.update(args=args) self.assertEqual(args, self.data.config) def test_requires_data(self): with self.assertRaises(DriverError): self.data._create_schema() def test_delayed_load_data(self): self.data.update(data_locator=self.data_file) self.data._create_schema() self.assertEqual(self.data.cell_count, 2638) self.assertEqual(self.data.gene_count, 1838) epsilon = 0.000_005 self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon) def test_diffexp_topN(self): self.data.update(data_locator=self.data_file) f1 = {"filter": {"obs": {"index": [[0, 500]]}}} f2 = {"filter": {"obs": {"index": [[500, 1000]]}}} result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"])) self.assertEqual(len(result), 10) result = json.loads( self.data.diffexp_topN(f1["filter"], f2["filter"], 20)) self.assertEqual(len(result), 20)
class DataLoadEngineTest(unittest.TestCase): def setUp(self): self.data_file = "example-dataset/pbmc3k.h5ad" self.data = ScanpyEngine() def test_init(self): self.assertIsNone(self.data.data) def test_delayed_load_args(self): args = { "layout": ["tsne"], "max_category_items": 1000, "obs_names": "foo", "var_names": "bar", "diffexp_lfc_cutoff": 0.1, } self.data.update(args=args) self.assertEqual(args, self.data.config) def test_requires_data(self): with self.assertRaises(DriverError): self.data._create_schema() def test_delayed_load_data(self): self.data.update(data=self.data_file) self.data._create_schema() self.assertEqual(self.data.cell_count, 2638) self.assertEqual(self.data.gene_count, 1838) epsilon = 0.000_005 self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon) def test_diffexp_topN(self): self.data.update(data=self.data_file) f1 = {"filter": {"obs": {"index": [[0, 500]]}}} f2 = {"filter": {"obs": {"index": [[500, 1000]]}}} result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"])) self.assertEqual(len(result), 10) result = json.loads( self.data.diffexp_topN(f1["filter"], f2["filter"], 20)) self.assertEqual(len(result), 20) if __name__ == "__main__": unittest.main()
class EngineTest(unittest.TestCase): def setUp(self): args = { "layout": ["umap"], "max_category_items": 100, "obs_names": None, "var_names": None, "diffexp_lfc_cutoff": 0.01, "layout_file": None, "backed": self.backed } self.data = ScanpyEngine(DataLocator(self.data_locator), args) def test_init(self): self.assertEqual(self.data.cell_count, 2638) self.assertEqual(self.data.gene_count, 1838) epsilon = 0.000_005 self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon) def test_mandatory_annotations(self): obs_index_col_name = self.data.get_schema( )["annotations"]["obs"]["index"] self.assertIn(obs_index_col_name, self.data.data.obs) self.assertEqual(list(self.data.data.obs.index), list(range(2638))) var_index_col_name = self.data.get_schema( )["annotations"]["var"]["index"] self.assertIn(var_index_col_name, self.data.data.var) self.assertEqual(list(self.data.data.var.index), list(range(1838))) @pytest.mark.filterwarnings("ignore:Scanpy data matrix") def test_data_type(self): # don't run the test on the more exotic data types, as they don't # support the astype() interface (used by this test, but not underlying app) if isinstance(self.data.data.X, np.ndarray): self.data.data.X = self.data.data.X.astype("float64") with self.assertWarns(UserWarning): self.data._validate_data_types() def test_filter_idx(self): filter_ = {"filter": {"var": {"index": [1, 99, [200, 300]]}}} fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 102) def test_filter_complex(self): filter_ = { "filter": { "var": { "annotation_value": [{ "name": "n_cells", "min": 10 }], "index": [1, 99, [200, 300]] } } } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 91) def test_obs_and_var_names(self): self.assertEqual( np.sum(self.data.data.var[self.data.get_schema()["annotations"] ["var"]["index"]].isna()), 0) self.assertEqual( np.sum(self.data.data.obs[self.data.get_schema()["annotations"] ["obs"]["index"]].isna()), 0) def test_get_schema(self): with open(path.join(path.dirname(__file__), "schema.json")) as fh: schema = json.load(fh) self.assertEqual(self.data.get_schema(), schema) def test_schema_produces_error(self): self.data.data.obs["time"] = pd.Series( list([time.time() for i in range(self.data.cell_count)]), dtype="datetime64[ns]", ) with pytest.raises(TypeError): self.data._create_schema() def test_config(self): self.assertEqual( self.data.features["layout"]["obs"], { "available": True, "interactiveLimit": 50000 }, ) def test_layout(self): fbs = self.data.layout_to_fbs_matrix() layout = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(layout["n_cols"], 2) self.assertEqual(layout["n_rows"], 2638) X = layout["columns"][0] self.assertTrue((X >= 0).all() and (X <= 1).all()) Y = layout["columns"][1] self.assertTrue((Y >= 0).all() and (Y <= 1).all()) def test_annotations(self): fbs = self.data.annotation_to_fbs_matrix("obs") annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations["n_rows"], 2638) self.assertEqual(annotations["n_cols"], 5) obs_index_col_name = self.data.get_schema( )["annotations"]["obs"]["index"] self.assertEqual( annotations["col_idx"], [ obs_index_col_name, "n_genes", "percent_mito", "n_counts", "louvain" ], ) fbs = self.data.annotation_to_fbs_matrix("var") annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations['n_rows'], 1838) self.assertEqual(annotations['n_cols'], 2) var_index_col_name = self.data.get_schema( )["annotations"]["var"]["index"] self.assertEqual(annotations["col_idx"], [var_index_col_name, "n_cells"]) def test_annotation_fields(self): fbs = self.data.annotation_to_fbs_matrix("obs", ["n_genes", "n_counts"]) annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations["n_rows"], 2638) self.assertEqual(annotations['n_cols'], 2) var_index_col_name = self.data.get_schema( )["annotations"]["var"]["index"] fbs = self.data.annotation_to_fbs_matrix("var", [var_index_col_name]) annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations['n_rows'], 1838) self.assertEqual(annotations['n_cols'], 1) def test_annotation_put(self): with self.assertRaises(DisabledFeatureError): self.data.annotation_put_fbs(None, "obs") def test_diffexp_topN(self): f1 = {"filter": {"obs": {"index": [[0, 500]]}}} f2 = {"filter": {"obs": {"index": [[500, 1000]]}}} result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"])) self.assertEqual(len(result), 10) result = json.loads( self.data.diffexp_topN(f1["filter"], f2["filter"], 20)) self.assertEqual(len(result), 20) def test_data_frame(self): f1 = {"var": {"index": [[0, 10]]}} fbs = self.data.data_frame_to_fbs_matrix(f1, "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 10) with self.assertRaises(ValueError): self.data.data_frame_to_fbs_matrix(None, "obs") def test_filtered_data_frame(self): filter_ = { "filter": { "var": { "annotation_value": [{ "name": "n_cells", "min": 100 }] } } } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 1040) filter_ = { "filter": { "obs": { "annotation_value": [{ "name": "n_counts", "min": 3000 }] } } } with self.assertRaises(FilterError): self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") def test_data_named_gene(self): var_index_col_name = self.data.get_schema( )["annotations"]["var"]["index"] filter_ = { "filter": { "var": { "annotation_value": [{ "name": var_index_col_name, "values": ["RER1"] }] } } } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 1) self.assertEqual(data["col_idx"], [4]) filter_ = { "filter": { "var": { "annotation_value": [{ "name": var_index_col_name, "values": ["SPEN", "TYMP", "PRMT2"] }] } } } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 3) self.assertTrue((data["col_idx"] == [15, 1818, 1837]).all())
class UtilTest(unittest.TestCase): def setUp(self): args = { "layout": "umap", "diffexp": "ttest", "max_category_items": 100, "obs_names": None, "var_names": None, "diffexp_lfc_cutoff": 0.01, } self.data = ScanpyEngine("example-dataset/pbmc3k.h5ad", args) self.data._create_schema() def test_init(self): self.assertEqual(self.data.cell_count, 2638) self.assertEqual(self.data.gene_count, 1838) epsilon = 0.000_005 self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon) def test_mandatory_annotations(self): self.assertIn("name", self.data.data.obs) self.assertEqual(list(self.data.data.obs.index), list(range(2638))) self.assertIn("name", self.data.data.var) self.assertEqual(list(self.data.data.var.index), list(range(1838))) @pytest.mark.filterwarnings("ignore:Scanpy data matrix") def test_data_type(self): self.data.data.X = self.data.data.X.astype("float64") with self.assertWarns(UserWarning): self.data._validate_data_types() def test_filter_idx(self): filter_ = { "filter": { "var": { "index": [1, 99, [200, 300]] }, "obs": { "index": [1, 99, [1000, 2000]] }, } } data = self.data.filter_dataframe(filter_["filter"]) self.assertEqual(data.shape, (1002, 102)) def test_filter_annotation(self): filter_ = { "filter": { "obs": { "annotation_value": [{ "name": "louvain", "values": ["NK cells", "CD8 T cells"] }] } } } data = self.data.filter_dataframe(filter_["filter"]) self.assertEqual(data.shape, (470, 1838)) filter_ = { "filter": { "obs": { "annotation_value": [{ "name": "n_counts", "min": 3000 }] } } } data = self.data.filter_dataframe(filter_["filter"]) self.assertEqual(data.shape, (497, 1838)) def test_filter_annotation_no_uns(self): filter_ = { "filter": { "var": { "annotation_value": [{ "name": "name", "values": ["RER1"] }] } } } data = self.data.filter_dataframe(filter_["filter"]) self.assertEqual(data.shape[1], 1) def test_filter_complex(self): filter_ = { "filter": { "var": { "index": [1, 99, [200, 300]] }, "obs": { "annotation_value": [ { "name": "louvain", "values": ["NK cells", "CD8 T cells"] }, { "name": "n_counts", "min": 3000 }, ], "index": [1, 99, [1000, 2000]], }, } } data = self.data.filter_dataframe(filter_["filter"]) self.assertEqual(data.shape, (15, 102)) def test_obs_and_var_names(self): self.assertEqual(np.sum(self.data.data.var["name"].isna()), 0) self.assertEqual(np.sum(self.data.data.obs["name"].isna()), 0) def test_schema(self): with open(path.join(path.dirname(__file__), "schema.json")) as fh: schema = json.load(fh) self.assertEqual(self.data.schema, schema) def test_schema_produces_error(self): self.data.data.obs["time"] = Series( list([time.time() for i in range(self.data.cell_count)]), dtype="datetime64[ns]", ) with pytest.raises(TypeError): self.data._create_schema() def test_config(self): self.assertEqual( self.data.features["layout"]["obs"], { "available": True, "interactiveLimit": 50000 }, ) def test_layout(self): layout = json.loads(self.data.layout(None)) self.assertEqual(layout["layout"]["ndims"], 2) self.assertEqual(len(layout["layout"]["coordinates"]), 2638) self.assertEqual(layout["layout"]["coordinates"][0][0], 0) for idx, val in enumerate(layout["layout"]["coordinates"]): self.assertLessEqual(val[1], 1) self.assertLessEqual(val[2], 1) def test_annotations(self): annotations = json.loads(self.data.annotation(None, "obs")) self.assertEqual( annotations["names"], ["name", "n_genes", "percent_mito", "n_counts", "louvain"], ) self.assertEqual(len(annotations["data"]), 2638) annotations = json.loads(self.data.annotation(None, "var")) self.assertEqual(annotations["names"], ["name", "n_cells"]) self.assertEqual(len(annotations["data"]), 1838) def test_annotation_fields(self): annotations = json.loads( self.data.annotation(None, "obs", ["n_genes", "n_counts"])) self.assertEqual(annotations["names"], ["n_genes", "n_counts"]) self.assertEqual(len(annotations["data"]), 2638) annotations = json.loads(self.data.annotation(None, "var", ["name"])) self.assertEqual(annotations["names"], ["name"]) self.assertEqual(len(annotations["data"]), 1838) def test_filtered_annotation(self): filter_ = { "filter": { "obs": { "annotation_value": [{ "name": "n_counts", "min": 3000 }] }, "var": { "annotation_value": [{ "name": "name", "values": ["ATAD3C", "RER1"] }] }, } } annotations = json.loads(self.data.annotation(filter_["filter"], "obs")) self.assertEqual( annotations["names"], ["name", "n_genes", "percent_mito", "n_counts", "louvain"], ) self.assertEqual(len(annotations["data"]), 497) annotations = json.loads(self.data.annotation(filter_["filter"], "var")) self.assertEqual(annotations["names"], ["name", "n_cells"]) self.assertEqual(len(annotations["data"]), 2) def test_filtered_layout(self): filter_ = { "filter": { "obs": { "annotation_value": [{ "name": "n_counts", "min": 3000 }] } } } layout = json.loads(self.data.layout(filter_["filter"])) self.assertEqual(len(layout["layout"]["coordinates"]), 497) def test_diffexp_topN(self): f1 = {"filter": {"obs": {"index": [[0, 500]]}}} f2 = {"filter": {"obs": {"index": [[500, 1000]]}}} result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"])) self.assertEqual(len(result), 10) result = json.loads( self.data.diffexp_topN(f1["filter"], f2["filter"], 20)) self.assertEqual(len(result), 20) def test_data_frame(self): data_frame_obs = json.loads(self.data.data_frame(None, "obs")) self.assertEqual(len(data_frame_obs["var"]), 1838) self.assertEqual(len(data_frame_obs["obs"]), 2638) data_frame_var = json.loads(self.data.data_frame(None, "var")) self.assertEqual(len(data_frame_var["var"]), 1838) self.assertEqual(len(data_frame_var["obs"]), 2638) def test_filtered_data_frame(self): filter_ = { "filter": { "obs": { "annotation_value": [{ "name": "n_counts", "min": 3000 }] } } } data_frame_obs = json.loads( self.data.data_frame(filter_["filter"], "obs")) self.assertEqual(len(data_frame_obs["var"]), 1838) self.assertEqual(len(data_frame_obs["obs"]), 497) self.assertIsInstance(data_frame_obs["obs"][0], (list, tuple)) self.assertEqual(type(data_frame_obs["var"][0]), int) data_frame_var = json.loads( self.data.data_frame(filter_["filter"], "var")) self.assertEqual(len(data_frame_var["var"]), 1838) self.assertEqual(len(data_frame_var["obs"]), 497) self.assertIsInstance(data_frame_var["var"][0], (list, tuple)) self.assertEqual(type(data_frame_var["obs"][0]), int) def test_data_single_gene(self): for axis in ["obs", "var"]: filter_ = { "filter": { "var": { "annotation_value": [{ "name": "name", "values": ["RER1"] }] } } } data_frame_var = json.loads( self.data.data_frame(filter_["filter"], axis)) if axis == "obs": self.assertEqual(type(data_frame_var["var"][0]), int) self.assertIsInstance(data_frame_var["obs"][0], (list, tuple)) elif axis == "var": self.assertEqual(type(data_frame_var["obs"][0]), int) self.assertIsInstance(data_frame_var["var"][0], (list, tuple)) if __name__ == "__main__": unittest.main()