Esempio n. 1
0
 def setUp(self):
     args = {
         "layout": ["umap"],
         "max_category_items": 100,
         "obs_names": None,
         "var_names": None,
         "diffexp_lfc_cutoff": 0.01,
     }
     self.data = ScanpyEngine("example-dataset/pbmc3k.h5ad", args)
Esempio n. 2
0
 def setUp(self):
     args = {
         "layout": ["umap"],
         "max_category_items": 100,
         "obs_names": None,
         "var_names": None,
         "diffexp_lfc_cutoff": 0.01,
         "layout_file": None
     }
     self.data = ScanpyEngine(DataLocator(self.data_locator), args)
Esempio n. 3
0
 def setUp(self):
     self.args = {
         "layout": ["umap"],
         "max_category_items": 100,
         "obs_names": None,
         "var_names": None,
         "diffexp_lfc_cutoff": 0.01,
     }
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=UserWarning)
         self.data = ScanpyEngine(DataLocator("server/test/test_datasets/nan.h5ad"), self.args)
         self.data._create_schema()
Esempio n. 4
0
 def setUp(self):
     self.tmpDir = tempfile.mkdtemp()
     self.label_file = path.join(self.tmpDir, "labels.csv")
     args = {
         "layout": ["umap"],
         "max_category_items": 100,
         "obs_names": None,
         "var_names": None,
         "diffexp_lfc_cutoff": 0.01,
         "label_file": self.label_file
     }
     self.data = ScanpyEngine(DataLocator("example-dataset/pbmc3k.h5ad"),
                              args)
Esempio n. 5
0
    def setUp(self):
        args = {
            "layout": "umap",
            "diffexp": "ttest",
            "max_category_items": 100,
            "obs_names": None,
            "var_names": None,
            "diffexp_lfc_cutoff": 0.01,
            "nan_to_num": True,
        }

        self.data = ScanpyEngine("example-dataset/pbmc3k.h5ad", args)
        self.data._create_schema()
Esempio n. 6
0
 def run(self):
     super(Worker, self).run()
     if not self.data_file:
         self.emit("finished")
         return
     from server.app.app import Server
     from server.app.scanpy_engine.scanpy_engine import ScanpyEngine
     # create server
     try:
         server = Server()
         server.create_app()
     except Exception as e:
         self.emit("server_error", str(e))
         self.emit("finished")
         return
     # load data
     try:
         args = {
             "max_category_items": 100,
             "diffexp_lfc_cutoff": 0.01,
             "obs_names": None,
             "var_names": None,
         }
         args.update(self.engine_options)
         data = ScanpyEngine(self.data_file, args)
         server.attach_data(data, self.title)
         self.emit("ready")
     except Exception as e:
         self.emit("engine_error", str(e))
         self.emit("finished")
         return
     # launch server
     try:
         server.app.run(host=self.host,
                        debug=False,
                        port=self.port,
                        threaded=True)
     except Exception as e:
         self.emit("server_error", str(e))
     finally:
         self.emit("finished")
Esempio n. 7
0
class EngineTest(unittest.TestCase):
    def setUp(self):
        args = {
            "layout": ["umap"],
            "max_category_items": 100,
            "obs_names": None,
            "var_names": None,
            "diffexp_lfc_cutoff": 0.01,
            "layout_file": None,
            "backed": self.backed
        }
        self.data = ScanpyEngine(DataLocator(self.data_locator), args)

    def test_init(self):
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_mandatory_annotations(self):
        obs_index_col_name = self.data.get_schema(
        )["annotations"]["obs"]["index"]
        self.assertIn(obs_index_col_name, self.data.data.obs)
        self.assertEqual(list(self.data.data.obs.index), list(range(2638)))
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        self.assertIn(var_index_col_name, self.data.data.var)
        self.assertEqual(list(self.data.data.var.index), list(range(1838)))

    @pytest.mark.filterwarnings("ignore:Scanpy data matrix")
    def test_data_type(self):
        # don't run the test on the more exotic data types, as they don't
        # support the astype() interface (used by this test, but not underlying app)
        if isinstance(self.data.data.X, np.ndarray):
            self.data.data.X = self.data.data.X.astype("float64")
            with self.assertWarns(UserWarning):
                self.data._validate_data_types()

    def test_filter_idx(self):
        filter_ = {"filter": {"var": {"index": [1, 99, [200, 300]]}}}
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 102)

    def test_filter_complex(self):
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": "n_cells",
                        "min": 10
                    }],
                    "index": [1, 99, [200, 300]]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 91)

    def test_obs_and_var_names(self):
        self.assertEqual(
            np.sum(self.data.data.var[self.data.get_schema()["annotations"]
                                      ["var"]["index"]].isna()), 0)
        self.assertEqual(
            np.sum(self.data.data.obs[self.data.get_schema()["annotations"]
                                      ["obs"]["index"]].isna()), 0)

    def test_get_schema(self):
        with open(path.join(path.dirname(__file__), "schema.json")) as fh:
            schema = json.load(fh)
            self.assertEqual(self.data.get_schema(), schema)

    def test_schema_produces_error(self):
        self.data.data.obs["time"] = pd.Series(
            list([time.time() for i in range(self.data.cell_count)]),
            dtype="datetime64[ns]",
        )
        with pytest.raises(TypeError):
            self.data._create_schema()

    def test_config(self):
        self.assertEqual(
            self.data.features["layout"]["obs"],
            {
                "available": True,
                "interactiveLimit": 50000
            },
        )

    def test_layout(self):
        fbs = self.data.layout_to_fbs_matrix()
        layout = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(layout["n_cols"], 2)
        self.assertEqual(layout["n_rows"], 2638)

        X = layout["columns"][0]
        self.assertTrue((X >= 0).all() and (X <= 1).all())
        Y = layout["columns"][1]
        self.assertTrue((Y >= 0).all() and (Y <= 1).all())

    def test_annotations(self):
        fbs = self.data.annotation_to_fbs_matrix("obs")
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations["n_rows"], 2638)
        self.assertEqual(annotations["n_cols"], 5)
        obs_index_col_name = self.data.get_schema(
        )["annotations"]["obs"]["index"]
        self.assertEqual(
            annotations["col_idx"],
            [
                obs_index_col_name, "n_genes", "percent_mito", "n_counts",
                "louvain"
            ],
        )

        fbs = self.data.annotation_to_fbs_matrix("var")
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations['n_rows'], 1838)
        self.assertEqual(annotations['n_cols'], 2)
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        self.assertEqual(annotations["col_idx"],
                         [var_index_col_name, "n_cells"])

    def test_annotation_fields(self):
        fbs = self.data.annotation_to_fbs_matrix("obs",
                                                 ["n_genes", "n_counts"])
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations["n_rows"], 2638)
        self.assertEqual(annotations['n_cols'], 2)

        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        fbs = self.data.annotation_to_fbs_matrix("var", [var_index_col_name])
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations['n_rows'], 1838)
        self.assertEqual(annotations['n_cols'], 1)

    def test_annotation_put(self):
        with self.assertRaises(DisabledFeatureError):
            self.data.annotation_put_fbs(None, "obs")

    def test_diffexp_topN(self):
        f1 = {"filter": {"obs": {"index": [[0, 500]]}}}
        f2 = {"filter": {"obs": {"index": [[500, 1000]]}}}
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"]))
        self.assertEqual(len(result), 10)
        result = json.loads(
            self.data.diffexp_topN(f1["filter"], f2["filter"], 20))
        self.assertEqual(len(result), 20)

    def test_data_frame(self):
        f1 = {"var": {"index": [[0, 10]]}}
        fbs = self.data.data_frame_to_fbs_matrix(f1, "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 10)

        with self.assertRaises(ValueError):
            self.data.data_frame_to_fbs_matrix(None, "obs")

    def test_filtered_data_frame(self):
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": "n_cells",
                        "min": 100
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 1040)

        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                }
            }
        }
        with self.assertRaises(FilterError):
            self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")

    def test_data_named_gene(self):
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": var_index_col_name,
                        "values": ["RER1"]
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 1)
        self.assertEqual(data["col_idx"], [4])

        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": var_index_col_name,
                        "values": ["SPEN", "TYMP", "PRMT2"]
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 3)
        self.assertTrue((data["col_idx"] == [15, 1818, 1837]).all())
Esempio n. 8
0
def launch(data, verbose, debug, open_browser, port, host, embedding,
           obs_names, var_names, max_category_items, diffexp_lfc_cutoff, title,
           scripts, about, experimental_label_file, backed, disable_diffexp):
    """Launch the cellxgene data viewer.
    This web app lets you explore single-cell expression data.
    Data must be in a format that cellxgene expects, read the
    "getting started" guide.

    Examples:

    > cellxgene launch example_dataset/pbmc3k.h5ad --title pbmc3k

    > cellxgene launch <your data file> --title <your title>

    > cellxgene launch <url>"""

    e_args = parse_engine_args(embedding, obs_names, var_names,
                               max_category_items, diffexp_lfc_cutoff,
                               experimental_label_file, backed,
                               disable_diffexp)
    try:
        data_locator = DataLocator(data)
    except RuntimeError as re:
        raise click.ClickException(
            f"Unable to access data at {data}.  {str(re)}")

    # Startup message
    click.echo("[cellxgene] Starting the CLI...")

    # Argument checking
    if data_locator.islocal():
        # if data locator is local, apply file system conventions and other "cheap"
        # validation checks.  If a URI, defer until we actually fetch the data and
        # try to read it.  Many of these tests don't make sense for URIs (eg, extension-
        # based typing).
        if not data_locator.exists():
            raise click.FileError(data, hint="file does not exist")
        if not data_locator.isfile():
            raise click.FileError(data, hint="data is not a file")
        name, extension = splitext(data)
        if extension != ".h5ad":
            raise click.FileError(basename(data),
                                  hint="file type must be .h5ad")

    if debug:
        verbose = True
        open_browser = False
    else:
        warnings.formatwarning = custom_format_warning

    if not verbose:
        sys.tracebacklimit = 0

    if scripts:
        click.echo(r"""
    / / /\ \ \__ _ _ __ _ __ (_)_ __   __ _
    \ \/  \/ / _` | '__| '_ \| | '_ \ / _` |
     \  /\  / (_| | |  | | | | | | | | (_| |
      \/  \/ \__,_|_|  |_| |_|_|_| |_|\__, |
                                      |___/
    The --scripts flag is intended for developers to include google analytics etc. You could be opening yourself to a
    security risk by including the --scripts flag. Make sure you trust the scripts that you are including.
            """)
        scripts_pretty = ", ".join(scripts)
        click.confirm(
            f"Are you sure you want to inject these scripts: {scripts_pretty}?",
            abort=True)

    if not title:
        file_parts = splitext(basename(data))
        title = file_parts[0]

    if port:
        if debug:
            raise click.ClickException(
                "--port and --debug may not be used together (try --verbose for error logging)."
            )
        if not is_port_available(host, int(port)):
            raise click.ClickException(
                f"The port selected {port} is in use, please specify an open port using the --port flag."
            )
    else:
        port = find_available_port(host)

    if experimental_label_file:
        lf_name, lf_ext = splitext(experimental_label_file)
        if lf_ext and lf_ext != ".csv":
            raise click.FileError(basename(experimental_label_file),
                                  hint="label file type must be .csv")

    if about:

        def url_check(url):
            try:
                result = urlparse(url)
                if all([result.scheme, result.netloc]):
                    return True
                else:
                    return False
            except ValueError:
                return False

        if not url_check(about):
            raise click.ClickException(
                "Must provide an absolute URL for --about. (Example format: http://example.com)"
            )

    # Setup app
    cellxgene_url = f"http://{host}:{port}"

    # Import Flask app
    server = Server()

    server.create_app()
    server.app.config.update(SCRIPTS=scripts)

    if not verbose:
        log = logging.getLogger("werkzeug")
        log.setLevel(logging.ERROR)

    file_size = data_locator.size() if data_locator.islocal() else 0

    # if a big file, let the user know it may take a while to load.
    if file_size > BIG_FILE_SIZE_THRESHOLD:
        click.echo(
            f"[cellxgene] Loading data from {basename(data)}, this may take a while..."
        )
    else:
        click.echo(f"[cellxgene] Loading data from {basename(data)}.")

    from server.app.scanpy_engine.scanpy_engine import ScanpyEngine

    try:
        server.attach_data(ScanpyEngine(data_locator, e_args),
                           title=title,
                           about=about)
    except ScanpyFileError as e:
        raise click.ClickException(f"{e}")

    if not disable_diffexp and server.app.data.config['diffexp_may_be_slow']:
        click.echo(f"[cellxgene] CAUTION: due to the size of your dataset, "
                   f"running differential expression may take longer or fail.")

    if open_browser:
        click.echo(
            f"[cellxgene] Launching! Opening your browser to {cellxgene_url} now."
        )
        webbrowser.open(cellxgene_url)
    else:
        click.echo(
            f"[cellxgene] Launching! Please go to {cellxgene_url} in your browser."
        )

    click.echo("[cellxgene] Type CTRL-C at any time to exit.")

    if not verbose:
        f = open(devnull, "w")
        sys.stdout = f

    try:
        server.app.run(host=host,
                       debug=debug,
                       port=port,
                       threaded=False if debug else True,
                       use_debugger=False)
    except OSError as e:
        if e.errno == errno.EADDRINUSE:
            raise click.ClickException(
                "Port is in use, please specify an open port using the --port flag."
            ) from e
        raise
Esempio n. 9
0
class UtilTest(unittest.TestCase):
    def setUp(self):
        args = {
            "layout": "umap",
            "diffexp": "ttest",
            "max_category_items": 100,
            "obs_names": None,
            "var_names": None,
            "diffexp_lfc_cutoff": 0.01,
        }

        self.data = ScanpyEngine("example-dataset/pbmc3k.h5ad", args)
        self.data._create_schema()

    def test_init(self):
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_mandatory_annotations(self):
        self.assertIn("name", self.data.data.obs)
        self.assertEqual(list(self.data.data.obs.index), list(range(2638)))
        self.assertIn("name", self.data.data.var)
        self.assertEqual(list(self.data.data.var.index), list(range(1838)))

    @pytest.mark.filterwarnings("ignore:Scanpy data matrix")
    def test_data_type(self):
        self.data.data.X = self.data.data.X.astype("float64")
        with self.assertWarns(UserWarning):
            self.data._validate_data_types()

    def test_filter_idx(self):
        filter_ = {
            "filter": {
                "var": {
                    "index": [1, 99, [200, 300]]
                },
                "obs": {
                    "index": [1, 99, [1000, 2000]]
                },
            }
        }
        data = self.data.filter_dataframe(filter_["filter"])
        self.assertEqual(data.shape, (1002, 102))

    def test_filter_annotation(self):
        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "louvain",
                        "values": ["NK cells", "CD8 T cells"]
                    }]
                }
            }
        }
        data = self.data.filter_dataframe(filter_["filter"])
        self.assertEqual(data.shape, (470, 1838))
        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                }
            }
        }
        data = self.data.filter_dataframe(filter_["filter"])
        self.assertEqual(data.shape, (497, 1838))

    def test_filter_annotation_no_uns(self):
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": "name",
                        "values": ["RER1"]
                    }]
                }
            }
        }
        data = self.data.filter_dataframe(filter_["filter"])
        self.assertEqual(data.shape[1], 1)

    def test_filter_complex(self):
        filter_ = {
            "filter": {
                "var": {
                    "index": [1, 99, [200, 300]]
                },
                "obs": {
                    "annotation_value": [
                        {
                            "name": "louvain",
                            "values": ["NK cells", "CD8 T cells"]
                        },
                        {
                            "name": "n_counts",
                            "min": 3000
                        },
                    ],
                    "index": [1, 99, [1000, 2000]],
                },
            }
        }
        data = self.data.filter_dataframe(filter_["filter"])
        self.assertEqual(data.shape, (15, 102))

    def test_obs_and_var_names(self):
        self.assertEqual(np.sum(self.data.data.var["name"].isna()), 0)
        self.assertEqual(np.sum(self.data.data.obs["name"].isna()), 0)

    def test_schema(self):
        with open(path.join(path.dirname(__file__), "schema.json")) as fh:
            schema = json.load(fh)
            self.assertEqual(self.data.schema, schema)

    def test_schema_produces_error(self):
        self.data.data.obs["time"] = Series(
            list([time.time() for i in range(self.data.cell_count)]),
            dtype="datetime64[ns]",
        )
        with pytest.raises(TypeError):
            self.data._create_schema()

    def test_config(self):
        self.assertEqual(
            self.data.features["layout"]["obs"],
            {
                "available": True,
                "interactiveLimit": 50000
            },
        )

    def test_layout(self):
        layout = json.loads(self.data.layout(None))
        self.assertEqual(layout["layout"]["ndims"], 2)
        self.assertEqual(len(layout["layout"]["coordinates"]), 2638)
        self.assertEqual(layout["layout"]["coordinates"][0][0], 0)
        for idx, val in enumerate(layout["layout"]["coordinates"]):
            self.assertLessEqual(val[1], 1)
            self.assertLessEqual(val[2], 1)

    def test_annotations(self):
        annotations = json.loads(self.data.annotation(None, "obs"))
        self.assertEqual(
            annotations["names"],
            ["name", "n_genes", "percent_mito", "n_counts", "louvain"],
        )
        self.assertEqual(len(annotations["data"]), 2638)
        annotations = json.loads(self.data.annotation(None, "var"))
        self.assertEqual(annotations["names"], ["name", "n_cells"])
        self.assertEqual(len(annotations["data"]), 1838)

    def test_annotation_fields(self):
        annotations = json.loads(
            self.data.annotation(None, "obs", ["n_genes", "n_counts"]))
        self.assertEqual(annotations["names"], ["n_genes", "n_counts"])
        self.assertEqual(len(annotations["data"]), 2638)
        annotations = json.loads(self.data.annotation(None, "var", ["name"]))
        self.assertEqual(annotations["names"], ["name"])
        self.assertEqual(len(annotations["data"]), 1838)

    def test_filtered_annotation(self):
        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                },
                "var": {
                    "annotation_value": [{
                        "name": "name",
                        "values": ["ATAD3C", "RER1"]
                    }]
                },
            }
        }
        annotations = json.loads(self.data.annotation(filter_["filter"],
                                                      "obs"))
        self.assertEqual(
            annotations["names"],
            ["name", "n_genes", "percent_mito", "n_counts", "louvain"],
        )
        self.assertEqual(len(annotations["data"]), 497)
        annotations = json.loads(self.data.annotation(filter_["filter"],
                                                      "var"))
        self.assertEqual(annotations["names"], ["name", "n_cells"])
        self.assertEqual(len(annotations["data"]), 2)

    def test_filtered_layout(self):
        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                }
            }
        }
        layout = json.loads(self.data.layout(filter_["filter"]))
        self.assertEqual(len(layout["layout"]["coordinates"]), 497)

    def test_diffexp_topN(self):
        f1 = {"filter": {"obs": {"index": [[0, 500]]}}}
        f2 = {"filter": {"obs": {"index": [[500, 1000]]}}}
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"]))
        self.assertEqual(len(result), 10)
        result = json.loads(
            self.data.diffexp_topN(f1["filter"], f2["filter"], 20))
        self.assertEqual(len(result), 20)

    def test_data_frame(self):
        data_frame_obs = json.loads(self.data.data_frame(None, "obs"))
        self.assertEqual(len(data_frame_obs["var"]), 1838)
        self.assertEqual(len(data_frame_obs["obs"]), 2638)
        data_frame_var = json.loads(self.data.data_frame(None, "var"))
        self.assertEqual(len(data_frame_var["var"]), 1838)
        self.assertEqual(len(data_frame_var["obs"]), 2638)

    def test_filtered_data_frame(self):
        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                }
            }
        }
        data_frame_obs = json.loads(
            self.data.data_frame(filter_["filter"], "obs"))
        self.assertEqual(len(data_frame_obs["var"]), 1838)
        self.assertEqual(len(data_frame_obs["obs"]), 497)
        self.assertIsInstance(data_frame_obs["obs"][0], (list, tuple))
        self.assertEqual(type(data_frame_obs["var"][0]), int)
        data_frame_var = json.loads(
            self.data.data_frame(filter_["filter"], "var"))
        self.assertEqual(len(data_frame_var["var"]), 1838)
        self.assertEqual(len(data_frame_var["obs"]), 497)
        self.assertIsInstance(data_frame_var["var"][0], (list, tuple))
        self.assertEqual(type(data_frame_var["obs"][0]), int)

    def test_data_single_gene(self):
        for axis in ["obs", "var"]:
            filter_ = {
                "filter": {
                    "var": {
                        "annotation_value": [{
                            "name": "name",
                            "values": ["RER1"]
                        }]
                    }
                }
            }
            data_frame_var = json.loads(
                self.data.data_frame(filter_["filter"], axis))
            if axis == "obs":
                self.assertEqual(type(data_frame_var["var"][0]), int)
                self.assertIsInstance(data_frame_var["obs"][0], (list, tuple))
            elif axis == "var":
                self.assertEqual(type(data_frame_var["obs"][0]), int)
                self.assertIsInstance(data_frame_var["var"][0], (list, tuple))

    if __name__ == "__main__":
        unittest.main()
 def test_url_http(self):
     url = "http://raw.githubusercontent.com/chanzuckerberg/cellxgene/master/example-dataset/pbmc3k.h5ad"
     locator = DataLocator(url)
     data = ScanpyEngine(locator, self.args)
     self.stdAsserts(data)
class DataLoadEngineTest(unittest.TestCase):
    """
    Test file loading, including deferred loading/update.
    """
    def setUp(self):
        self.data_file = DataLocator("example-dataset/pbmc3k.h5ad")
        self.data = ScanpyEngine()

    def test_init(self):
        self.assertIsNone(self.data.data)

    def test_delayed_load_args(self):
        args = {
            "layout": ["tsne"],
            "max_category_items": 1000,
            "obs_names": "foo",
            "var_names": "bar",
            "diffexp_lfc_cutoff": 0.1,
            "annotations": False,
            "annotations_file": None,
            "annotations_output_dir": None,
            "backed": False,
            "diffexp_may_be_slow": False,
            "disable_diffexp": False
        }
        self.data.update(args=args)
        self.assertEqual(args, self.data.config)

    def test_requires_data(self):
        with self.assertRaises(DriverError):
            self.data._create_schema()

    def test_delayed_load_data(self):
        self.data.update(data_locator=self.data_file)
        self.data._create_schema()
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_diffexp_topN(self):
        self.data.update(data_locator=self.data_file)
        f1 = {"filter": {"obs": {"index": [[0, 500]]}}}
        f2 = {"filter": {"obs": {"index": [[500, 1000]]}}}
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"]))
        self.assertEqual(len(result), 10)
        result = json.loads(
            self.data.diffexp_topN(f1["filter"], f2["filter"], 20))
        self.assertEqual(len(result), 20)
 def test_posix_file(self):
     locator = DataLocator("example-dataset/pbmc3k.h5ad")
     data = ScanpyEngine(locator, self.args)
     self.stdAsserts(data)
 def setUp(self):
     self.data_file = DataLocator("example-dataset/pbmc3k.h5ad")
     self.data = ScanpyEngine()
 def test_load(self):
     with self.assertWarns(UserWarning):
         ScanpyEngine("server/test/test_datasets/nan.h5ad", self.args)
Esempio n. 15
0
class DataLoadEngineTest(unittest.TestCase):
    def setUp(self):
        self.data_file = "example-dataset/pbmc3k.h5ad"
        self.data = ScanpyEngine()

    def test_init(self):
        self.assertIsNone(self.data.data)

    def test_delayed_load_args(self):
        args = {
            "layout": ["tsne"],
            "max_category_items": 1000,
            "obs_names": "foo",
            "var_names": "bar",
            "diffexp_lfc_cutoff": 0.1,
        }
        self.data.update(args=args)
        self.assertEqual(args, self.data.config)

    def test_requires_data(self):
        with self.assertRaises(DriverError):
            self.data._create_schema()

    def test_delayed_load_data(self):
        self.data.update(data=self.data_file)
        self.data._create_schema()
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_diffexp_topN(self):
        self.data.update(data=self.data_file)
        f1 = {"filter": {"obs": {"index": [[0, 500]]}}}
        f2 = {"filter": {"obs": {"index": [[500, 1000]]}}}
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"]))
        self.assertEqual(len(result), 10)
        result = json.loads(
            self.data.diffexp_topN(f1["filter"], f2["filter"], 20))
        self.assertEqual(len(result), 20)

    if __name__ == "__main__":
        unittest.main()
Esempio n. 16
0
class UtilTest(unittest.TestCase):
    def setUp(self):
        self.data = ScanpyEngine("example-dataset/", schema="data_schema.json")

    def test_init(self):
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000005
        self.assertTrue(self.data.data.X[0, 0] - -0.17146951 < epsilon)

    def test_schema(self):
        self.assertEqual(
            self.data.schema, {
                'CellName': {
                    'type': 'string',
                    'variabletype': 'categorical',
                    'displayname': 'Name',
                    'include': True
                },
                'n_genes': {
                    'type': 'int',
                    'variabletype': 'continuous',
                    'displayname': 'Num Genes',
                    'include': True
                },
                'percent_mito': {
                    'type': 'float',
                    'variabletype': 'continuous',
                    'displayname': 'Mitochondrial Percentage',
                    'include': True
                },
                'n_counts': {
                    'type': 'float',
                    'variabletype': 'continuous',
                    'displayname': 'Num Counts',
                    'include': True
                },
                'louvain': {
                    'type': 'string',
                    'variabletype': 'categorical',
                    'displayname': 'Louvain Cluster',
                    'include': True
                }
            })

    def test_cells(self):
        cells = self.data.cells()
        self.assertIn("AAACATACAACCAC-1", cells)
        self.assertEqual(len(cells), 2638)

    def test_genes(self):
        genes = self.data.genes()
        self.assertIn("SEPT4", genes)
        self.assertEqual(len(genes), 1838)

    def test_filter_categorical(self):
        filter = {
            "louvain": {
                "variable_type": "categorical",
                "value_type": "string",
                "query": ["B cells"]
            }
        }
        filtered_data = self.data.filter_cells(filter)
        self.assertEqual(filtered_data.shape, (342, 1838))
        louvain_vals = filtered_data.obs['louvain'].tolist()
        self.assertIn("B cells", louvain_vals)
        self.assertNotIn("NK cells", louvain_vals)

    def test_filter_continuous(self):
        # print(self.data.data.obs["n_genes"].tolist())
        filter = {
            "n_genes": {
                "variable_type": "continuous",
                "value_type": "int",
                "query": {
                    "min": 300,
                    "max": 400
                }
            }
        }
        filtered_data = self.data.filter_cells(filter)
        self.assertEqual(filtered_data.shape, (71, 1838))
        n_genes_vals = filtered_data.obs['n_genes'].tolist()
        for val in n_genes_vals:
            self.assertTrue(300 <= val <= 400)

    def test_metadata(self):
        metadata = self.data.metadata(df=self.data.data)
        self.assertEqual(len(metadata), 2638)
        self.assertIn('louvain', metadata[0])

    @unittest.skip(
        "Umap not producing the same graph on different systems, even with the same seed. Skipping for now"
    )
    def test_create_graph(self):
        graph = self.data.create_graph(df=self.data.data)
        self.assertEqual(graph[0][1], 0.5545382653143183)
        self.assertEqual(graph[0][2], 0.6021833809031731)

    def test_diffexp(self):
        diffexp = self.data.diffexp(["AAACATACAACCAC-1", "AACCGATGGTCATG-1"],
                                    ["CCGATAGACCTAAG-1", "GGTGGAGAAGTAGA-1"],
                                    0.5, 7)
        self.assertEqual(diffexp["celllist1"]["topgenes"], [
            'EBNA1BP2', 'DIAPH1', 'SLC25A11', 'SNRNP27', 'COMMD8', 'COTL1',
            'GTF3A'
        ])

    def test_expression(self):
        expression = self.data.expression(cells=["AAACATACAACCAC-1"])
        data_exp = self.data.data[["AAACATACAACCAC-1"], :].X
        for idx in range(len(expression["cells"][0]["e"])):
            self.assertEqual(expression["cells"][0]["e"][idx], data_exp[idx])
Esempio n. 17
0
class WritableAnnotationTest(unittest.TestCase):
    def setUp(self):
        self.tmpDir = tempfile.mkdtemp()
        self.label_file = path.join(self.tmpDir, "labels.csv")
        args = {
            "layout": ["umap"],
            "max_category_items": 100,
            "obs_names": None,
            "var_names": None,
            "diffexp_lfc_cutoff": 0.01,
            "label_file": self.label_file
        }
        self.data = ScanpyEngine(DataLocator("example-dataset/pbmc3k.h5ad"),
                                 args)

    def tearDown(self):
        shutil.rmtree(self.tmpDir)

    def make_fbs(self, data):
        df = pd.DataFrame(data)
        return encode_matrix_fbs(matrix=df, row_idx=None, col_idx=df.columns)

    def test_error_checks(self):
        # verify that the expected errors are generated

        n_rows = self.data.data.obs.shape[0]
        fbs_bad = self.make_fbs({
            'louvain':
            pd.Series(['undefined' for l in range(0, n_rows)],
                      dtype='category')
        })

        # ensure attempt to change VAR annotation
        with self.assertRaises(ValueError):
            self.data.annotation_put_fbs("var", fbs_bad)

        # ensure we catch attempt to overwrite non-writable data
        with self.assertRaises(KeyError):
            self.data.annotation_put_fbs("obs", fbs_bad)

    def test_write_to_file(self):
        # verify the file is written as expected
        n_rows = self.data.data.obs.shape[0]
        fbs = self.make_fbs({
            'cat_A':
            pd.Series(['label_A' for l in range(0, n_rows)], dtype='category'),
            'cat_B':
            pd.Series(['label_B' for l in range(0, n_rows)], dtype='category')
        })
        res = self.data.annotation_put_fbs("obs", fbs)
        self.assertEqual(res, json.dumps({"status": "OK"}))
        self.assertTrue(path.exists(self.label_file))
        df = pd.read_csv(self.label_file, index_col=0)
        self.assertEqual(df.shape, (n_rows, 2))
        self.assertEqual(set(df.columns), set(['cat_A', 'cat_B']))
        self.assertTrue(self.data.original_obs_index.equals(df.index))
        self.assertTrue(
            np.all(df['cat_A'] == ['label_A' for l in range(0, n_rows)]))
        self.assertTrue(
            np.all(df['cat_B'] == ['label_B' for l in range(0, n_rows)]))

        # verify complete overwrite on second attempt, AND rotation occurs
        fbs = self.make_fbs({
            'cat_A':
            pd.Series(['label_A1' for l in range(0, n_rows)],
                      dtype='category'),
            'cat_C':
            pd.Series(['label_C' for l in range(0, n_rows)], dtype='category')
        })
        res = self.data.annotation_put_fbs("obs", fbs)
        self.assertEqual(res, json.dumps({"status": "OK"}))
        self.assertTrue(path.exists(self.label_file))
        df = pd.read_csv(self.label_file, index_col=0)
        self.assertEqual(set(df.columns), set(['cat_A', 'cat_C']))
        self.assertTrue(
            np.all(df['cat_A'] == ['label_A1' for l in range(0, n_rows)]))
        self.assertTrue(
            np.all(df['cat_C'] == ['label_C' for l in range(0, n_rows)]))

        # rotation
        name, ext = path.splitext(self.label_file)
        self.assertTrue(path.exists(f"{name}-1{ext}"))

    def test_file_rotation_to_max_9(self):
        # verify we stop rotation at 9
        n_rows = self.data.data.obs.shape[0]
        fbs = self.make_fbs({
            'cat_A':
            pd.Series(['label_A' for l in range(0, n_rows)], dtype='category'),
            'cat_B':
            pd.Series(['label_B' for l in range(0, n_rows)], dtype='category')
        })
        for i in range(0, 11):
            res = self.data.annotation_put_fbs("obs", fbs)
            self.assertEqual(res, json.dumps({"status": "OK"}))

        name, ext = path.splitext(self.label_file)
        expected_files = [self.label_file
                          ] + [f"{name}-{i}{ext}" for i in range(1, 10)]
        found_files = [path.join(self.tmpDir, p) for p in listdir(self.tmpDir)]
        self.assertEqual(set(expected_files), set(found_files))

    def test_put_get_roundtrip(self):
        # verify that OBS PUTs (annotation_put_fbs) are accessible via
        # GET (annotation_to_fbs_matrix)

        n_rows = self.data.data.obs.shape[0]
        fbs = self.make_fbs({
            'cat_A':
            pd.Series(['label_A' for l in range(0, n_rows)], dtype='category'),
            'cat_B':
            pd.Series(['label_B' for l in range(0, n_rows)], dtype='category')
        })

        # put
        res = self.data.annotation_put_fbs("obs", fbs)
        self.assertEqual(res, json.dumps({"status": "OK"}))

        # get
        fbsAll = self.data.annotation_to_fbs_matrix("obs")
        schema = self.data.get_schema()
        annotations = decode_fbs.decode_matrix_FBS(fbsAll)
        obs_index_col_name = schema["annotations"]["obs"]["index"]
        self.assertEqual(annotations["n_rows"], n_rows)
        self.assertEqual(annotations["n_cols"], 7)
        self.assertIsNone(annotations["row_idx"])
        self.assertEqual(annotations["col_idx"], [
            obs_index_col_name, "n_genes", "percent_mito", "n_counts",
            "louvain", "cat_A", "cat_B"
        ])
        col_idx = annotations["col_idx"]
        self.assertEqual(annotations["columns"][col_idx.index('cat_A')],
                         ['label_A' for l in range(0, n_rows)])
        self.assertEqual(annotations["columns"][col_idx.index('cat_B')],
                         ['label_B' for l in range(0, n_rows)])

        # verify the schema was updated
        all_col_schema = {
            c["name"]: c
            for c in schema["annotations"]["obs"]["columns"]
        }
        self.assertEqual(
            all_col_schema["cat_A"], {
                "name": "cat_A",
                "type": "categorical",
                "categories": ["label_A"],
                "writable": True
            })
        self.assertEqual(
            all_col_schema["cat_B"], {
                "name": "cat_B",
                "type": "categorical",
                "categories": ["label_B"],
                "writable": True
            })
Esempio n. 18
0
def launch(data, verbose, debug, open_browser, port, host, layout, obs_names,
           var_names, max_category_items, diffexp_lfc_cutoff, title, scripts):
    """Launch the cellxgene data viewer.
    This web app lets you explore single-cell expression data.
    Data must be in a format that cellxgene expects, read the
    "getting started" guide.

    Examples:

    > cellxgene launch example_dataset/pbmc3k.h5ad --title pbmc3k

    > cellxgene launch <your data file> --title <your title>"""

    e_args = parse_engine_args(layout, obs_names, var_names,
                               max_category_items, diffexp_lfc_cutoff)
    # Startup message
    click.echo("[cellxgene] Starting the CLI...")

    # Argument checking
    name, extension = splitext(data)
    if extension != ".h5ad":
        raise click.FileError(basename(data), hint="file type must be .h5ad")

    if debug:
        verbose = True
        open_browser = False
    else:
        warnings.formatwarning = custom_format_warning

    if not verbose:
        sys.tracebacklimit = 0

    if scripts:
        click.echo(r"""
    / / /\ \ \__ _ _ __ _ __ (_)_ __   __ _
    \ \/  \/ / _` | '__| '_ \| | '_ \ / _` |
     \  /\  / (_| | |  | | | | | | | | (_| |
      \/  \/ \__,_|_|  |_| |_|_|_| |_|\__, |
                                      |___/
    The --scripts flag is intended for developers to include google analytics etc. You could be opening yourself to a
    security risk by including the --scripts flag. Make sure you trust the scripts that you are including.
            """)
        scripts_pretty = ", ".join(scripts)
        click.confirm(
            f"Are you sure you want to inject these scripts: {scripts_pretty}?",
            abort=True)

    if not title:
        file_parts = splitext(basename(data))
        title = file_parts[0]

    if port:
        if debug:
            raise click.ClickException(
                "--port and --debug may not be used together (try --verbose for error logging)."
            )
        if not is_port_available(host, int(port)):
            raise click.ClickException(
                f"The port selected {port} is in use, please specify an open port using the --port flag."
            )
    else:
        port = find_available_port(host)

    # Setup app
    cellxgene_url = f"http://{host}:{port}"

    # Import Flask app
    server = Server()

    server.create_app()
    server.app.config.update(SCRIPTS=scripts)

    if not verbose:
        log = logging.getLogger("werkzeug")
        log.setLevel(logging.ERROR)

    file_size = getsize(data)

    # if a big file, let the user know it may take a while to load.
    if file_size > BIG_FILE_SIZE_THRESHOLD:
        click.echo(
            f"[cellxgene] Loading data from {basename(data)}, this may take awhile..."
        )
    else:
        click.echo(f"[cellxgene] Loading data from {basename(data)}.")

    # Fix for anaconda python. matplotlib typically expects python to be installed as a framework TKAgg is usually
    # available and fixes this issue. See https://matplotlib.org/faq/virtualenv_faq.html
    import matplotlib as mpl

    mpl.use("TkAgg")
    from server.app.scanpy_engine.scanpy_engine import ScanpyEngine

    try:
        server.attach_data(ScanpyEngine(data, e_args), title=title)
    except ScanpyFileError as e:
        raise click.ClickException(f"{e}")

    if open_browser:
        click.echo(
            f"[cellxgene] Launching! Opening your browser to {cellxgene_url} now."
        )
        webbrowser.open(cellxgene_url)
    else:
        click.echo(
            f"[cellxgene] Launching! Please go to {cellxgene_url} in your browser."
        )

    click.echo("[cellxgene] Type CTRL-C at any time to exit.")

    if not verbose:
        f = open(devnull, "w")
        sys.stdout = f

    try:
        server.app.run(host=host,
                       debug=debug,
                       port=port,
                       threaded=True,
                       use_debugger=False)
    except OSError as e:
        if e.errno == errno.EADDRINUSE:
            raise click.ClickException(
                "Port is in use, please specify an open port using the --port flag."
            ) from e
        raise
Esempio n. 19
0
def launch(
    data,
    layout,
    diffexp,
    title,
    verbose,
    debug,
    obs_names,
    var_names,
    open_browser,
    port,
    host,
    max_category_items,
    diffexp_lfc_cutoff,
):
    """Launch the cellxgene data viewer.
    This web app lets you explore single-cell expression data.
    Data must be in a format that cellxgene expects, read the
    "getting started" guide.

    Examples:

    > cellxgene launch example_dataset/pbmc3k.h5ad --title pbmc3k

    > cellxgene launch <your data file> --title <your title>"""

    # Startup message
    click.echo("[cellxgene] Starting the CLI...")

    # Argument checking
    name, extension = splitext(data)
    if extension != ".h5ad":
        raise click.FileError(basename(data), hint="file type must be .h5ad")

    if debug:
        verbose = True
        open_browser = False
    else:
        warnings.formatwarning = custom_format_warning

    if not verbose:
        sys.tracebacklimit = 0

    if not title:
        file_parts = splitext(basename(data))
        title = file_parts[0]

    # Setup app
    cellxgene_url = f"http://{host}:{port}"

    # Import Flask app
    from server.app.app import app

    app.config.update(DATASET_TITLE=title)

    if not verbose:
        log = logging.getLogger("werkzeug")
        log.setLevel(logging.ERROR)

    click.echo(
        f"[cellxgene] Loading data from {basename(data)}, this may take awhile..."
    )

    # Fix for anaconda python. matplotlib typically expects python to be installed as a framework TKAgg is usually
    # available and fixes this issue. See https://matplotlib.org/faq/virtualenv_faq.html
    import matplotlib as mpl

    mpl.use("TkAgg")
    from server.app.scanpy_engine.scanpy_engine import ScanpyEngine

    args = {
        "layout": layout,
        "diffexp": diffexp,
        "max_category_items": max_category_items,
        "diffexp_lfc_cutoff": diffexp_lfc_cutoff,
        "obs_names": obs_names,
        "var_names": var_names,
    }

    try:
        app.data = ScanpyEngine(data, args)
    except ScanpyFileError as e:
        raise click.ClickException(f"{e}")

    if open_browser:
        click.echo(
            f"[cellxgene] Launching! Opening your browser to {cellxgene_url} now."
        )
        webbrowser.open(cellxgene_url)
    else:
        click.echo(
            f"[cellxgene] Launching! Please go to {cellxgene_url} in your browser."
        )

    click.echo("[cellxgene] Type CTRL-C at any time to exit.")

    if not verbose:
        f = open(devnull, "w")
        sys.stdout = f

    app.run(host=host, debug=debug, port=port, threaded=True)
Esempio n. 20
0
 def setUp(self):
     self.data_file = "example-dataset/pbmc3k.h5ad"
     self.data = ScanpyEngine()
Esempio n. 21
0
 def setUp(self):
     self.data = ScanpyEngine("example-dataset/", schema="data_schema.json")
class NaNTest(unittest.TestCase):
    def setUp(self):
        self.args = {
            "layout": "umap",
            "diffexp": "ttest",
            "max_category_items": 100,
            "obs_names": None,
            "var_names": None,
            "diffexp_lfc_cutoff": 0.01,
        }
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            self.data = ScanpyEngine("server/test/test_datasets/nan.h5ad",
                                     self.args)
            self.data._create_schema()

    def test_load(self):
        with self.assertWarns(UserWarning):
            ScanpyEngine("server/test/test_datasets/nan.h5ad", self.args)

    def test_init(self):
        self.assertEqual(self.data.cell_count, 100)
        self.assertEqual(self.data.gene_count, 100)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_dataframe(self):
        data_frame_var = decode_fbs.decode_matrix_FBS(
            self.data.data_frame_to_fbs_matrix(None, "var"))
        self.assertIsNotNone(data_frame_var)
        self.assertEqual(data_frame_var["n_rows"], 100)
        self.assertEqual(data_frame_var["n_cols"], 100)
        self.assertTrue(math.isnan(data_frame_var["columns"][3][3]))

        with pytest.raises(JSONEncodingValueError):
            json.loads(self.data.data_frame(None, "obs"))
        with pytest.raises(JSONEncodingValueError):
            json.loads(self.data.data_frame(None, "var"))

    def test_dataframe_obs_not_implemented(self):
        with self.assertRaises(ValueError) as cm:
            decode_fbs.decode_matrix_FBS(
                self.data.data_frame_to_fbs_matrix(None, "obs"))
        self.assertIsNotNone(cm.exception)

    def test_annotation(self):
        annotations = decode_fbs.decode_matrix_FBS(
            self.data.annotation_to_fbs_matrix("obs"))
        self.assertEqual(
            annotations["col_idx"],
            ["name", "n_genes", "percent_mito", "n_counts", "louvain"])
        self.assertEqual(annotations["n_rows"], 100)
        self.assertTrue(math.isnan(annotations["columns"][2][0]))

        annotations = decode_fbs.decode_matrix_FBS(
            self.data.annotation_to_fbs_matrix("var"))
        self.assertEqual(annotations["col_idx"],
                         ["name", "n_cells", "var_with_nans"])
        self.assertEqual(annotations["n_rows"], 100)
        self.assertTrue(math.isnan(annotations["columns"][2][0]))

        with pytest.raises(JSONEncodingValueError):
            json.loads(self.data.annotation(None, "obs"))
        with pytest.raises(JSONEncodingValueError):
            json.loads(self.data.annotation(None, "var"))