Example #1
0
class EngineTest(unittest.TestCase):
    def setUp(self):
        args = {
            "layout": ["umap"],
            "max_category_items": 100,
            "obs_names": None,
            "var_names": None,
            "diffexp_lfc_cutoff": 0.01,
            "layout_file": None,
            "backed": self.backed
        }
        self.data = ScanpyEngine(DataLocator(self.data_locator), args)

    def test_init(self):
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_mandatory_annotations(self):
        obs_index_col_name = self.data.get_schema(
        )["annotations"]["obs"]["index"]
        self.assertIn(obs_index_col_name, self.data.data.obs)
        self.assertEqual(list(self.data.data.obs.index), list(range(2638)))
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        self.assertIn(var_index_col_name, self.data.data.var)
        self.assertEqual(list(self.data.data.var.index), list(range(1838)))

    @pytest.mark.filterwarnings("ignore:Scanpy data matrix")
    def test_data_type(self):
        # don't run the test on the more exotic data types, as they don't
        # support the astype() interface (used by this test, but not underlying app)
        if isinstance(self.data.data.X, np.ndarray):
            self.data.data.X = self.data.data.X.astype("float64")
            with self.assertWarns(UserWarning):
                self.data._validate_data_types()

    def test_filter_idx(self):
        filter_ = {"filter": {"var": {"index": [1, 99, [200, 300]]}}}
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 102)

    def test_filter_complex(self):
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": "n_cells",
                        "min": 10
                    }],
                    "index": [1, 99, [200, 300]]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 91)

    def test_obs_and_var_names(self):
        self.assertEqual(
            np.sum(self.data.data.var[self.data.get_schema()["annotations"]
                                      ["var"]["index"]].isna()), 0)
        self.assertEqual(
            np.sum(self.data.data.obs[self.data.get_schema()["annotations"]
                                      ["obs"]["index"]].isna()), 0)

    def test_get_schema(self):
        with open(path.join(path.dirname(__file__), "schema.json")) as fh:
            schema = json.load(fh)
            self.assertEqual(self.data.get_schema(), schema)

    def test_schema_produces_error(self):
        self.data.data.obs["time"] = pd.Series(
            list([time.time() for i in range(self.data.cell_count)]),
            dtype="datetime64[ns]",
        )
        with pytest.raises(TypeError):
            self.data._create_schema()

    def test_config(self):
        self.assertEqual(
            self.data.features["layout"]["obs"],
            {
                "available": True,
                "interactiveLimit": 50000
            },
        )

    def test_layout(self):
        fbs = self.data.layout_to_fbs_matrix()
        layout = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(layout["n_cols"], 2)
        self.assertEqual(layout["n_rows"], 2638)

        X = layout["columns"][0]
        self.assertTrue((X >= 0).all() and (X <= 1).all())
        Y = layout["columns"][1]
        self.assertTrue((Y >= 0).all() and (Y <= 1).all())

    def test_annotations(self):
        fbs = self.data.annotation_to_fbs_matrix("obs")
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations["n_rows"], 2638)
        self.assertEqual(annotations["n_cols"], 5)
        obs_index_col_name = self.data.get_schema(
        )["annotations"]["obs"]["index"]
        self.assertEqual(
            annotations["col_idx"],
            [
                obs_index_col_name, "n_genes", "percent_mito", "n_counts",
                "louvain"
            ],
        )

        fbs = self.data.annotation_to_fbs_matrix("var")
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations['n_rows'], 1838)
        self.assertEqual(annotations['n_cols'], 2)
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        self.assertEqual(annotations["col_idx"],
                         [var_index_col_name, "n_cells"])

    def test_annotation_fields(self):
        fbs = self.data.annotation_to_fbs_matrix("obs",
                                                 ["n_genes", "n_counts"])
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations["n_rows"], 2638)
        self.assertEqual(annotations['n_cols'], 2)

        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        fbs = self.data.annotation_to_fbs_matrix("var", [var_index_col_name])
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations['n_rows'], 1838)
        self.assertEqual(annotations['n_cols'], 1)

    def test_annotation_put(self):
        with self.assertRaises(DisabledFeatureError):
            self.data.annotation_put_fbs(None, "obs")

    def test_diffexp_topN(self):
        f1 = {"filter": {"obs": {"index": [[0, 500]]}}}
        f2 = {"filter": {"obs": {"index": [[500, 1000]]}}}
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"]))
        self.assertEqual(len(result), 10)
        result = json.loads(
            self.data.diffexp_topN(f1["filter"], f2["filter"], 20))
        self.assertEqual(len(result), 20)

    def test_data_frame(self):
        f1 = {"var": {"index": [[0, 10]]}}
        fbs = self.data.data_frame_to_fbs_matrix(f1, "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 10)

        with self.assertRaises(ValueError):
            self.data.data_frame_to_fbs_matrix(None, "obs")

    def test_filtered_data_frame(self):
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": "n_cells",
                        "min": 100
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 1040)

        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                }
            }
        }
        with self.assertRaises(FilterError):
            self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")

    def test_data_named_gene(self):
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": var_index_col_name,
                        "values": ["RER1"]
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 1)
        self.assertEqual(data["col_idx"], [4])

        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": var_index_col_name,
                        "values": ["SPEN", "TYMP", "PRMT2"]
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 3)
        self.assertTrue((data["col_idx"] == [15, 1818, 1837]).all())
Example #2
0
class WritableAnnotationTest(unittest.TestCase):
    def setUp(self):
        self.tmpDir = tempfile.mkdtemp()
        self.label_file = path.join(self.tmpDir, "labels.csv")
        args = {
            "layout": ["umap"],
            "max_category_items": 100,
            "obs_names": None,
            "var_names": None,
            "diffexp_lfc_cutoff": 0.01,
            "label_file": self.label_file
        }
        self.data = ScanpyEngine(DataLocator("example-dataset/pbmc3k.h5ad"),
                                 args)

    def tearDown(self):
        shutil.rmtree(self.tmpDir)

    def make_fbs(self, data):
        df = pd.DataFrame(data)
        return encode_matrix_fbs(matrix=df, row_idx=None, col_idx=df.columns)

    def test_error_checks(self):
        # verify that the expected errors are generated

        n_rows = self.data.data.obs.shape[0]
        fbs_bad = self.make_fbs({
            'louvain':
            pd.Series(['undefined' for l in range(0, n_rows)],
                      dtype='category')
        })

        # ensure attempt to change VAR annotation
        with self.assertRaises(ValueError):
            self.data.annotation_put_fbs("var", fbs_bad)

        # ensure we catch attempt to overwrite non-writable data
        with self.assertRaises(KeyError):
            self.data.annotation_put_fbs("obs", fbs_bad)

    def test_write_to_file(self):
        # verify the file is written as expected
        n_rows = self.data.data.obs.shape[0]
        fbs = self.make_fbs({
            'cat_A':
            pd.Series(['label_A' for l in range(0, n_rows)], dtype='category'),
            'cat_B':
            pd.Series(['label_B' for l in range(0, n_rows)], dtype='category')
        })
        res = self.data.annotation_put_fbs("obs", fbs)
        self.assertEqual(res, json.dumps({"status": "OK"}))
        self.assertTrue(path.exists(self.label_file))
        df = pd.read_csv(self.label_file, index_col=0)
        self.assertEqual(df.shape, (n_rows, 2))
        self.assertEqual(set(df.columns), set(['cat_A', 'cat_B']))
        self.assertTrue(self.data.original_obs_index.equals(df.index))
        self.assertTrue(
            np.all(df['cat_A'] == ['label_A' for l in range(0, n_rows)]))
        self.assertTrue(
            np.all(df['cat_B'] == ['label_B' for l in range(0, n_rows)]))

        # verify complete overwrite on second attempt, AND rotation occurs
        fbs = self.make_fbs({
            'cat_A':
            pd.Series(['label_A1' for l in range(0, n_rows)],
                      dtype='category'),
            'cat_C':
            pd.Series(['label_C' for l in range(0, n_rows)], dtype='category')
        })
        res = self.data.annotation_put_fbs("obs", fbs)
        self.assertEqual(res, json.dumps({"status": "OK"}))
        self.assertTrue(path.exists(self.label_file))
        df = pd.read_csv(self.label_file, index_col=0)
        self.assertEqual(set(df.columns), set(['cat_A', 'cat_C']))
        self.assertTrue(
            np.all(df['cat_A'] == ['label_A1' for l in range(0, n_rows)]))
        self.assertTrue(
            np.all(df['cat_C'] == ['label_C' for l in range(0, n_rows)]))

        # rotation
        name, ext = path.splitext(self.label_file)
        self.assertTrue(path.exists(f"{name}-1{ext}"))

    def test_file_rotation_to_max_9(self):
        # verify we stop rotation at 9
        n_rows = self.data.data.obs.shape[0]
        fbs = self.make_fbs({
            'cat_A':
            pd.Series(['label_A' for l in range(0, n_rows)], dtype='category'),
            'cat_B':
            pd.Series(['label_B' for l in range(0, n_rows)], dtype='category')
        })
        for i in range(0, 11):
            res = self.data.annotation_put_fbs("obs", fbs)
            self.assertEqual(res, json.dumps({"status": "OK"}))

        name, ext = path.splitext(self.label_file)
        expected_files = [self.label_file
                          ] + [f"{name}-{i}{ext}" for i in range(1, 10)]
        found_files = [path.join(self.tmpDir, p) for p in listdir(self.tmpDir)]
        self.assertEqual(set(expected_files), set(found_files))

    def test_put_get_roundtrip(self):
        # verify that OBS PUTs (annotation_put_fbs) are accessible via
        # GET (annotation_to_fbs_matrix)

        n_rows = self.data.data.obs.shape[0]
        fbs = self.make_fbs({
            'cat_A':
            pd.Series(['label_A' for l in range(0, n_rows)], dtype='category'),
            'cat_B':
            pd.Series(['label_B' for l in range(0, n_rows)], dtype='category')
        })

        # put
        res = self.data.annotation_put_fbs("obs", fbs)
        self.assertEqual(res, json.dumps({"status": "OK"}))

        # get
        fbsAll = self.data.annotation_to_fbs_matrix("obs")
        schema = self.data.get_schema()
        annotations = decode_fbs.decode_matrix_FBS(fbsAll)
        obs_index_col_name = schema["annotations"]["obs"]["index"]
        self.assertEqual(annotations["n_rows"], n_rows)
        self.assertEqual(annotations["n_cols"], 7)
        self.assertIsNone(annotations["row_idx"])
        self.assertEqual(annotations["col_idx"], [
            obs_index_col_name, "n_genes", "percent_mito", "n_counts",
            "louvain", "cat_A", "cat_B"
        ])
        col_idx = annotations["col_idx"]
        self.assertEqual(annotations["columns"][col_idx.index('cat_A')],
                         ['label_A' for l in range(0, n_rows)])
        self.assertEqual(annotations["columns"][col_idx.index('cat_B')],
                         ['label_B' for l in range(0, n_rows)])

        # verify the schema was updated
        all_col_schema = {
            c["name"]: c
            for c in schema["annotations"]["obs"]["columns"]
        }
        self.assertEqual(
            all_col_schema["cat_A"], {
                "name": "cat_A",
                "type": "categorical",
                "categories": ["label_A"],
                "writable": True
            })
        self.assertEqual(
            all_col_schema["cat_B"], {
                "name": "cat_B",
                "type": "categorical",
                "categories": ["label_B"],
                "writable": True
            })