def setUp(self):
     self.data_file = DataLocator(f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad")
     config = AppConfig()
     config.update_server_config(single_dataset__datapath=self.data_file.path)
     config.update_server_config(app__flask_secret_key="secret")
     config.complete_config()
     self.data = AnndataAdaptor(self.data_file, config)
class DataLoadAdaptorTest(unittest.TestCase):
    """
    Test file loading, including deferred loading/update.
    """

    def setUp(self):
        self.data_file = DataLocator(f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad")
        config = AppConfig()
        config.update_server_config(single_dataset__datapath=self.data_file.path)
        config.update_server_config(app__flask_secret_key="secret")
        config.complete_config()
        self.data = AnndataAdaptor(self.data_file, config)

    def test_delayed_load_data(self):
        self.data._create_schema()
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_diffexp_topN(self):
        f1 = {"filter": {"obs": {"index": [[0, 500]]}}}
        f2 = {"filter": {"obs": {"index": [[500, 1000]]}}}
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"]))
        self.assertEqual(len(result), 10)
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"], 20))
        self.assertEqual(len(result), 20)
Esempio n. 3
0
    def setUp(self):
        self.data_locator = DataLocator(f"{FIXTURES_ROOT}/nan.h5ad")
        self.config = app_config(self.data_locator.path)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            self.data = AnndataAdaptor(self.data_locator, self.config)
            self.data._create_schema()
Esempio n. 4
0
class NaNTest(unittest.TestCase):
    def setUp(self):
        self.data_locator = DataLocator(f"{FIXTURES_ROOT}/nan.h5ad")
        self.config = app_config(self.data_locator.path)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            self.data = AnndataAdaptor(self.data_locator, self.config)
            self.data._create_schema()

    def test_load(self):
        with self.assertLogs(level="WARN") as logger:
            self.data = AnndataAdaptor(self.data_locator, self.config)
            self.assertTrue(logger.output)

    def test_init(self):
        self.assertEqual(self.data.cell_count, 100)
        self.assertEqual(self.data.gene_count, 100)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_dataframe(self):
        data_frame_var = decode_fbs.decode_matrix_FBS(self.data.data_frame_to_fbs_matrix(None, "var"))
        self.assertIsNotNone(data_frame_var)
        self.assertEqual(data_frame_var["n_rows"], 100)
        self.assertEqual(data_frame_var["n_cols"], 100)
        self.assertTrue(math.isnan(data_frame_var["columns"][3][3]))

        with pytest.raises(FilterError):
            self.data.data_frame_to_fbs_matrix("an erroneous filter", "var")
        with pytest.raises(FilterError):
            filter_ = {"filter": {"obs": {"index": [1, 99, [200, 300]]}}}
            self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")

    def test_dataframe_obs_not_implemented(self):
        with self.assertRaises(ValueError) as cm:
            decode_fbs.decode_matrix_FBS(self.data.data_frame_to_fbs_matrix(None, "obs"))
        self.assertIsNotNone(cm.exception)

    def test_annotation(self):
        annotations = decode_fbs.decode_matrix_FBS(self.data.annotation_to_fbs_matrix("obs"))
        obs_index_col_name = self.data.schema["annotations"]["obs"]["index"]
        self.assertEqual(annotations["col_idx"], [obs_index_col_name, "n_genes", "percent_mito", "n_counts", "louvain"])
        self.assertEqual(annotations["n_rows"], 100)
        self.assertTrue(math.isnan(annotations["columns"][2][0]))

        annotations = decode_fbs.decode_matrix_FBS(self.data.annotation_to_fbs_matrix("var"))
        var_index_col_name = self.data.schema["annotations"]["var"]["index"]
        self.assertEqual(annotations["col_idx"], [var_index_col_name, "n_cells", "var_with_nans"])
        self.assertEqual(annotations["n_rows"], 100)
        self.assertTrue(math.isnan(annotations["columns"][2][0]))
 def test_posix_file(self):
     locator = DataLocator("../example-dataset/pbmc3k.h5ad")
     config = self.get_basic_config()
     config.update_server_config(single_dataset__datapath=locator.path)
     config.complete_config()
     data = AnndataAdaptor(locator, config)
     self.stdAsserts(data)
 def test_url_http(self):
     url = "http://raw.githubusercontent.com/chanzuckerberg/cellxgene/main/example-dataset/pbmc3k.h5ad"
     locator = DataLocator(url)
     config = self.get_basic_config()
     data = AnndataAdaptor(locator, config)
     self.stdAsserts(data)
Esempio n. 7
0
 def test_load(self):
     with self.assertLogs(level="WARN") as logger:
         self.data = AnndataAdaptor(self.data_locator, self.config)
         self.assertTrue(logger.output)
Esempio n. 8
0
 def setUp(self):
     config = app_config(self.data_locator, self.backed)
     self.data = AnndataAdaptor(DataLocator(self.data_locator), config)
Esempio n. 9
0
class AdaptorTest(unittest.TestCase):
    def setUp(self):
        config = app_config(self.data_locator, self.backed)
        self.data = AnndataAdaptor(DataLocator(self.data_locator), config)

    def test_init(self):
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_mandatory_annotations(self):
        obs_index_col_name = self.data.get_schema(
        )["annotations"]["obs"]["index"]
        self.assertIn(obs_index_col_name, self.data.data.obs)
        self.assertEqual(list(self.data.data.obs.index), list(range(2638)))
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        self.assertIn(var_index_col_name, self.data.data.var)
        self.assertEqual(list(self.data.data.var.index), list(range(1838)))

    @pytest.mark.filterwarnings("ignore:Anndata data matrix")
    def test_data_type(self):
        # don't run the test on the more exotic data types, as they don't
        # support the astype() interface (used by this test, but not underlying app)
        if isinstance(self.data.data.X, np.ndarray):
            self.data.data.X = self.data.data.X.astype("float64")
            with self.assertWarns(UserWarning):
                self.data._validate_data_types()

    def test_filter_idx(self):
        filter_ = {"filter": {"var": {"index": [1, 99, [200, 300]]}}}
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 102)

    def test_filter_complex(self):
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": "n_cells",
                        "min": 10
                    }],
                    "index": [1, 99, [200, 300]]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 91)

    def test_obs_and_var_names(self):
        self.assertEqual(
            np.sum(self.data.data.var[self.data.get_schema()["annotations"]
                                      ["var"]["index"]].isna()), 0)
        self.assertEqual(
            np.sum(self.data.data.obs[self.data.get_schema()["annotations"]
                                      ["obs"]["index"]].isna()), 0)

    def test_get_colors(self):
        self.assertEqual(self.data.get_colors(), pbmc3k_colors)

    def test_get_schema(self):
        with open(f"{FIXTURES_ROOT}/schema.json") as fh:
            schema = json.load(fh)
            self.assertDictEqual(self.data.get_schema(), schema)

    def test_schema_produces_error(self):
        self.data.data.obs["time"] = pd.Series(
            list([time.time() for i in range(self.data.cell_count)]),
            dtype="datetime64[ns]",
        )
        with pytest.raises(TypeError):
            self.data._create_schema()

    def test_layout(self):
        fbs = self.data.layout_to_fbs_matrix(fields=None)
        layout = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(layout["n_cols"], 6)
        self.assertEqual(layout["n_rows"], 2638)

        X = layout["columns"][0]
        self.assertTrue((X >= 0).all() and (X <= 1).all())
        Y = layout["columns"][1]
        self.assertTrue((Y >= 0).all() and (Y <= 1).all())

    def test_layout_fields(self):
        """ X_pca, X_tsne, X_umap are available """
        fbs = self.data.layout_to_fbs_matrix(["pca"])
        layout = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(layout["n_cols"], 2)
        self.assertEqual(layout["n_rows"], 2638)
        self.assertCountEqual(layout["col_idx"], ["pca_0", "pca_1"])

        fbs = self.data.layout_to_fbs_matrix(["tsne", "pca"])
        layout = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(layout["n_cols"], 4)
        self.assertEqual(layout["n_rows"], 2638)
        self.assertCountEqual(layout["col_idx"],
                              ["tsne_0", "tsne_1", "pca_0", "pca_1"])

    def test_annotations(self):
        fbs = self.data.annotation_to_fbs_matrix("obs")
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations["n_rows"], 2638)
        self.assertEqual(annotations["n_cols"], 5)
        obs_index_col_name = self.data.get_schema(
        )["annotations"]["obs"]["index"]
        self.assertEqual(
            annotations["col_idx"],
            [
                obs_index_col_name, "n_genes", "percent_mito", "n_counts",
                "louvain"
            ],
        )

        fbs = self.data.annotation_to_fbs_matrix("var")
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations["n_rows"], 1838)
        self.assertEqual(annotations["n_cols"], 2)
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        self.assertEqual(annotations["col_idx"],
                         [var_index_col_name, "n_cells"])

    def test_annotation_fields(self):
        fbs = self.data.annotation_to_fbs_matrix("obs",
                                                 ["n_genes", "n_counts"])
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations["n_rows"], 2638)
        self.assertEqual(annotations["n_cols"], 2)

        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        fbs = self.data.annotation_to_fbs_matrix("var", [var_index_col_name])
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations["n_rows"], 1838)
        self.assertEqual(annotations["n_cols"], 1)

    def test_diffexp_topN(self):
        f1 = {"filter": {"obs": {"index": [[0, 500]]}}}
        f2 = {"filter": {"obs": {"index": [[500, 1000]]}}}
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"]))
        self.assertEqual(len(result), 10)
        result = json.loads(
            self.data.diffexp_topN(f1["filter"], f2["filter"], 20))
        self.assertEqual(len(result), 20)

    def test_data_frame(self):
        f1 = {"var": {"index": [[0, 10]]}}
        fbs = self.data.data_frame_to_fbs_matrix(f1, "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 10)

        with self.assertRaises(ValueError):
            self.data.data_frame_to_fbs_matrix(None, "obs")

    def test_filtered_data_frame(self):
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": "n_cells",
                        "min": 100
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 1040)

        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                }
            }
        }
        with self.assertRaises(FilterError):
            self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")

    def test_data_named_gene(self):
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": var_index_col_name,
                        "values": ["RER1"]
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 1)
        self.assertEqual(data["col_idx"], [4])

        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": var_index_col_name,
                        "values": ["SPEN", "TYMP", "PRMT2"]
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 3)
        self.assertTrue((data["col_idx"] == [15, 1818, 1837]).all())

    def test_compute_embedding(self):
        filter = {"obs": {"index": [[0, 100]]}}

        # Verify that we correctly handle the case where we lack scanpy
        import unittest.mock

        with unittest.mock.patch.dict(sys.modules, {"scanpy": None}):
            with self.assertRaises(NotImplementedError):
                self.data.compute_embedding("umap", filter)

        # if we happen to have scanpy, test the full API, else punt
        import importlib

        scanpy_spec = importlib.util.find_spec("scanpy")
        if scanpy_spec is None:
            print("Skipping compute_embedding test as ScanPy not installed")
            return

        # this feature is unsupported in backed mode, and we expect an error
        if self.data.data.isbacked:
            with self.assertRaises(NotImplementedError):
                self.data.compute_embedding("umap", filter)
            return

        schema = self.data.compute_embedding("umap", filter)

        self.assertIsInstance(schema["name"], str)
        name = schema["name"]
        self.assertEqual(schema["type"], "float32")
        self.assertEqual(schema["dims"], [f"{name}_0", f"{name}_1"])

        emb = self.data.data.obsm[f"X_{name}"]
        self.assertEqual(emb.shape, (2638, 2))
        self.assertTrue(np.isfinite(emb[0:100]).all())
        self.assertTrue(np.isnan(emb[100:]).all())