def test_data_named_gene(self): var_index_col_name = self.data.get_schema( )["annotations"]["var"]["index"] filter_ = { "filter": { "var": { "annotation_value": [{ "name": var_index_col_name, "values": ["RER1"] }] } } } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 1) self.assertEqual(data["col_idx"], [4]) filter_ = { "filter": { "var": { "annotation_value": [{ "name": var_index_col_name, "values": ["SPEN", "TYMP", "PRMT2"] }] } } } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 3) self.assertTrue((data["col_idx"] == [15, 1818, 1837]).all())
def test_annotation_fields(self): fbs = self.data.annotation_to_fbs_matrix("obs", ["n_genes", "n_counts"]) annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations["n_rows"], 2638) self.assertEqual(annotations["n_cols"], 2) var_index_col_name = self.data.get_schema()["annotations"]["var"]["index"] fbs = self.data.annotation_to_fbs_matrix("var", [var_index_col_name]) annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations["n_rows"], 1838) self.assertEqual(annotations["n_cols"], 1)
def test_annotation(self): annotations = decode_fbs.decode_matrix_FBS(self.data.annotation_to_fbs_matrix("obs")) obs_index_col_name = self.data.schema["annotations"]["obs"]["index"] self.assertEqual(annotations["col_idx"], [obs_index_col_name, "n_genes", "percent_mito", "n_counts", "louvain"]) self.assertEqual(annotations["n_rows"], 100) self.assertTrue(math.isnan(annotations["columns"][2][0])) annotations = decode_fbs.decode_matrix_FBS(self.data.annotation_to_fbs_matrix("var")) var_index_col_name = self.data.schema["annotations"]["var"]["index"] self.assertEqual(annotations["col_idx"], [var_index_col_name, "n_cells", "var_with_nans"]) self.assertEqual(annotations["n_rows"], 100) self.assertTrue(math.isnan(annotations["columns"][2][0]))
def test_layout_fields(self): """ X_pca, X_tsne, X_umap are available """ fbs = self.data.layout_to_fbs_matrix(["pca"]) layout = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(layout["n_cols"], 2) self.assertEqual(layout["n_rows"], 2638) self.assertCountEqual(layout["col_idx"], ["pca_0", "pca_1"]) fbs = self.data.layout_to_fbs_matrix(["tsne", "pca"]) layout = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(layout["n_cols"], 4) self.assertEqual(layout["n_rows"], 2638) self.assertCountEqual(layout["col_idx"], ["tsne_0", "tsne_1", "pca_0", "pca_1"])
def on_start(self): self.client.verify = False self.dataset = random.choice(DataSets) with self.client.get(f"{self.dataset}{API}/config", catch_response=True) as r: if r.status_code == 200: self.config = r.json()["config"] r.success() else: self.config = None r.failure(f"bad response code {r.status_code}") with self.client.get(f"{self.dataset}{API}/schema", catch_response=True) as r: if r.status_code == 200: self.schema = r.json()["schema"] r.success() else: self.schema = None r.failure(f"bad response code {r.status_code}") with self.client.get( f"{self.dataset}{API}/annotations/var?annotation-name={self.var_index_name()}", headers={"Accept": "application/octet-stream"}, catch_response=True, ) as r: if r.status_code == 200: df = decode_fbs.decode_matrix_FBS(r.content) gene_names_idx = df["col_idx"].index(self.var_index_name()) self.gene_names = df["columns"][gene_names_idx] else: self.gene_names = None r.failure(f"bad response code {r.status_code}")
def test_annotations(self): fbs = self.data.annotation_to_fbs_matrix("obs") annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations["n_rows"], 2638) self.assertEqual(annotations["n_cols"], 5) obs_index_col_name = self.data.get_schema()["annotations"]["obs"]["index"] self.assertEqual( annotations["col_idx"], [obs_index_col_name, "n_genes", "percent_mito", "n_counts", "louvain"], ) fbs = self.data.annotation_to_fbs_matrix("var") annotations = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(annotations["n_rows"], 1838) self.assertEqual(annotations["n_cols"], 2) var_index_col_name = self.data.get_schema()["annotations"]["var"]["index"] self.assertEqual(annotations["col_idx"], [var_index_col_name, "n_cells"])
def test_compute_embedding(self): filter = {"obs": {"index": [[0, 100]]}} # Verify that we correctly handle the case where we lack scanpy import unittest.mock with unittest.mock.patch.dict(sys.modules, {"scanpy": None}): with self.assertRaises(NotImplementedError): self.data.compute_embedding("umap", filter) # if we happen to have scanpy, test the full API, else punt import importlib scanpy_spec = importlib.util.find_spec("scanpy") if scanpy_spec is None: print("Skipping compute_embedding test as ScanPy not installed") return # this feature is unsupported in backed mode, and we expect an error if self.data.data.isbacked: with self.assertRaises(NotImplementedError): self.data.compute_embedding("umap", filter) return (schema, fbs) = self.data.compute_embedding("umap", filter) self.assertIsInstance(schema["name"], str) name = schema["name"] self.assertEqual(schema["type"], "float32") self.assertEqual(schema["dims"], [f"{name}_0", f"{name}_1"]) emb = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(emb["n_rows"], 100) self.assertEqual(emb["n_cols"], 2) self.assertEqual(emb["col_idx"], [f"{name}_0", f"{name}_1"])
def test_filtered_data_frame(self): filter_ = { "filter": { "var": { "annotation_value": [{ "name": "n_cells", "min": 100 }] } } } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 1040) filter_ = { "filter": { "obs": { "annotation_value": [{ "name": "n_counts", "min": 3000 }] } } } with self.assertRaises(FilterError): self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
def test_filter_complex(self): filter_ = { "filter": {"var": {"annotation_value": [{"name": "n_cells", "min": 10}], "index": [1, 99, [200, 300]]}} } fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 91)
def test_annotation_var(self): endpoint = "annotations/var" url = f"{URL_BASE}{endpoint}" result = self.session.get(url) self.assertEqual(result.status_code, HTTPStatus.OK) self.assertEqual(result.headers["Content-Type"], "application/octet-stream") df = decode_fbs.decode_matrix_FBS(result.content) self.assertTrue(math.isnan(df["columns"][2][0]))
def test_data(self): endpoint = "data/var" url = f"{URL_BASE}{endpoint}" filter = {"filter": {"var": {"index": [[0, 20]]}}} result = self.session.put(url, json=filter) self.assertEqual(result.status_code, HTTPStatus.OK) self.assertEqual(result.headers["Content-Type"], "application/octet-stream") df = decode_fbs.decode_matrix_FBS(result.content) self.assertTrue(math.isnan(df["columns"][3][3]))
def test_data_frame(self): f1 = {"var": {"index": [[0, 10]]}} fbs = self.data.data_frame_to_fbs_matrix(f1, "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 10) with self.assertRaises(ValueError): self.data.data_frame_to_fbs_matrix(None, "obs")
def test_layout(self): fbs = self.data.layout_to_fbs_matrix(fields=None) layout = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(layout["n_cols"], 6) self.assertEqual(layout["n_rows"], 2638) X = layout["columns"][0] self.assertTrue((X >= 0).all() and (X <= 1).all()) Y = layout["columns"][1] self.assertTrue((Y >= 0).all() and (Y <= 1).all())
def test_dataframe(self): data_frame_var = decode_fbs.decode_matrix_FBS(self.data.data_frame_to_fbs_matrix(None, "var")) self.assertIsNotNone(data_frame_var) self.assertEqual(data_frame_var["n_rows"], 100) self.assertEqual(data_frame_var["n_cols"], 100) self.assertTrue(math.isnan(data_frame_var["columns"][3][3])) with pytest.raises(FilterError): self.data.data_frame_to_fbs_matrix("an erroneous filter", "var") with pytest.raises(FilterError): filter_ = {"filter": {"obs": {"index": [1, 99, [200, 300]]}}} self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
def fbs_checks(self, fbs, dims, expected_types, expected_column_idx): d = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(d["n_rows"], dims[0]) self.assertEqual(d["n_cols"], dims[1]) self.assertIsNone(d["row_idx"]) self.assertEqual(len(d["columns"]), dims[1]) for i in range(0, len(d["columns"])): self.assertEqual(len(d["columns"][i]), dims[0]) self.assertIsInstance(d["columns"][i], expected_types[i][0]) if expected_types[i][1] is not None: self.assertEqual(d["columns"][i].dtype, expected_types[i][1]) if expected_column_idx is not None: self.assertSetEqual(set(expected_column_idx), set(d["col_idx"]))
def test_data_get_filter_fbs(self): index_col_name = self.schema["schema"]["annotations"]["var"]["index"] query = f"var:{index_col_name}=SIK1" endpoint = f"data/var" url = f"{self.URL_BASE}{endpoint}?{query}" header = {"Accept": "application/octet-stream"} result = self.session.get(url, headers=header) self.assertEqual(result.status_code, HTTPStatus.OK) self.assertEqual(result.headers["Content-Type"], "application/octet-stream") df = decode_fbs.decode_matrix_FBS(result.content) self.assertEqual(df["n_rows"], 2638) self.assertEqual(df["n_cols"], 1)
def test_data_put_filter_fbs(self): endpoint = f"data/var" url = f"{self.URL_BASE}{endpoint}" header = {"Accept": "application/octet-stream"} filter = {"filter": {"var": {"index": [0, 1, 4]}}} result = self.session.put(url, headers=header, json=filter) self.assertEqual(result.status_code, HTTPStatus.OK) self.assertEqual(result.headers["Content-Type"], "application/octet-stream") df = decode_fbs.decode_matrix_FBS(result.content) self.assertEqual(df["n_rows"], 2638) self.assertEqual(df["n_cols"], 3) self.assertIsNotNone(df["columns"]) self.assertIsNone(df["row_idx"]) self.assertEqual(len(df["columns"]), df["n_cols"]) self.assertListEqual(df["col_idx"].tolist(), [0, 1, 4])
def test_get_annotations_var_keys_fbs(self): endpoint = "annotations/var" query = "annotation-name=n_cells" url = f"{self.URL_BASE}{endpoint}?{query}" header = {"Accept": "application/octet-stream"} result = self.session.get(url, headers=header) self.assertEqual(result.status_code, HTTPStatus.OK) self.assertEqual(result.headers["Content-Type"], "application/octet-stream") df = decode_fbs.decode_matrix_FBS(result.content) self.assertEqual(df["n_rows"], 1838) self.assertEqual(df["n_cols"], 1) self.assertIsNotNone(df["columns"]) self.assertIsNone(df["row_idx"]) self.assertEqual(len(df["columns"]), df["n_cols"]) self.assertCountEqual(df["col_idx"], ["n_cells"])
def _test_get_user_annotations_obs_keys_fbs(self, annotation_name, columns): endpoint = "annotations/obs" query = f"annotation-name={annotation_name}" url = f"{self.URL_BASE}{endpoint}?{query}" header = {"Accept": "application/octet-stream"} result = self.session.get(url, headers=header) self.assertEqual(result.status_code, HTTPStatus.OK) self.assertEqual(result.headers["Content-Type"], "application/octet-stream") df = decode_fbs.decode_matrix_FBS(result.content) self.assertEqual(df["n_rows"], 2638) self.assertEqual(df["n_cols"], 1) self.assertListEqual(df["col_idx"], [annotation_name]) self.assertEqual(set(df["columns"][0]), columns) self.assertIsNone(df["row_idx"]) self.assertEqual(len(df["columns"]), df["n_cols"])
def test_get_layout_fbs(self): endpoint = "layout/obs" url = f"{self.URL_BASE}{endpoint}" header = {"Accept": "application/octet-stream"} result = self.session.get(url, headers=header) self.assertEqual(result.status_code, HTTPStatus.OK) self.assertEqual(result.headers["Content-Type"], "application/octet-stream") df = decode_fbs.decode_matrix_FBS(result.content) self.assertEqual(df["n_rows"], 2638) self.assertEqual(df["n_cols"], 8) self.assertIsNotNone(df["columns"]) self.assertSetEqual( set(df["col_idx"]), { "pca_0", "pca_1", "tsne_0", "tsne_1", "umap_0", "umap_1", "draw_graph_fr_0", "draw_graph_fr_1" }, ) self.assertIsNone(df["row_idx"]) self.assertEqual(len(df["columns"]), df["n_cols"])
def test_data_put_single_var(self): endpoint = f"data/var" url = f"{self.URL_BASE}{endpoint}" header = {"Accept": "application/octet-stream"} index_col_name = self.schema["schema"]["annotations"]["var"]["index"] var_filter = { "filter": { "var": { "annotation_value": [{ "name": index_col_name, "values": ["RER1"] }] } } } result = self.session.put(url, headers=header, json=var_filter) self.assertEqual(result.status_code, HTTPStatus.OK) self.assertEqual(result.headers["Content-Type"], "application/octet-stream") df = decode_fbs.decode_matrix_FBS(result.content) self.assertEqual(df["n_rows"], 2638) self.assertEqual(df["n_cols"], 1)
def test_get_annotations_obs_fbs(self): endpoint = "annotations/obs" url = f"{self.URL_BASE}{endpoint}" header = {"Accept": "application/octet-stream"} result = self.session.get(url, headers=header) self.assertEqual(result.status_code, HTTPStatus.OK) self.assertEqual(result.headers["Content-Type"], "application/octet-stream") df = decode_fbs.decode_matrix_FBS(result.content) self.assertEqual(df["n_rows"], 2638) self.assertEqual(df["n_cols"], 6 if self.ANNOTATIONS_ENABLED else 5) self.assertIsNotNone(df["columns"]) self.assertIsNone(df["row_idx"]) self.assertEqual(len(df["columns"]), df["n_cols"]) obs_index_col_name = self.schema["schema"]["annotations"]["obs"][ "index"] self.assertCountEqual( df["col_idx"], [ obs_index_col_name, "n_genes", "percent_mito", "n_counts", "louvain" ] + (["cluster-test"] if self.ANNOTATIONS_ENABLED else []), )
def test_put_get_roundtrip(self): # verify that OBS PUTs (annotation_put_fbs) are accessible via # GET (annotation_to_fbs_matrix) n_rows = self.data.get_shape()[0] fbs = make_fbs({ "cat_A": pd.Series(["label_A"] * n_rows, dtype="category"), "cat_B": pd.Series(["label_B"] * n_rows, dtype="category"), }) # put res = self.annotation_put_fbs(fbs) self.assertEqual(res, json.dumps({"status": "OK"})) # get labels = self.annotations.read_labels(None) fbsAll = self.data.annotation_to_fbs_matrix("obs", None, labels) schema = schema_get_helper(self.data) annotations = decode_fbs.decode_matrix_FBS(fbsAll) obs_index_col_name = schema["annotations"]["obs"]["index"] self.assertEqual(annotations["n_rows"], n_rows) self.assertEqual(annotations["n_cols"], 7) self.assertIsNone(annotations["row_idx"]) self.assertEqual( annotations["col_idx"], [ obs_index_col_name, "n_genes", "percent_mito", "n_counts", "louvain", "cat_A", "cat_B" ], ) col_idx = annotations["col_idx"] self.assertEqual(annotations["columns"][col_idx.index("cat_A")], ["label_A"] * n_rows) self.assertEqual(annotations["columns"][col_idx.index("cat_B")], ["label_B"] * n_rows) # verify the schema was updated all_col_schema = { c["name"]: c for c in schema["annotations"]["obs"]["columns"] } self.assertEqual( all_col_schema["cat_A"], { "name": "cat_A", "type": "categorical", "categories": ["label_A"], "writable": True }, ) self.assertEqual( all_col_schema["cat_B"], { "name": "cat_B", "type": "categorical", "categories": ["label_B"], "writable": True }, )
def test_dataframe_obs_not_implemented(self): with self.assertRaises(ValueError) as cm: decode_fbs.decode_matrix_FBS(self.data.data_frame_to_fbs_matrix(None, "obs")) self.assertIsNotNone(cm.exception)
def test_filter_idx(self): filter_ = {"filter": {"var": {"index": [1, 99, [200, 300]]}}} fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var") data = decode_fbs.decode_matrix_FBS(fbs) self.assertEqual(data["n_rows"], 2638) self.assertEqual(data["n_cols"], 102)