Esempio n. 1
0
def data_with_tmp_annotations(ext: MatrixDataType, annotations_fixture=False):
    tmp_dir = tempfile.mkdtemp()
    annotations_file = path.join(tmp_dir, "test_annotations.csv")
    if annotations_fixture:
        shutil.copyfile(f"{FIXTURES_ROOT}/pbmc3k-annotations.csv", annotations_file)
    fname = {
        MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad",
        MatrixDataType.CXG: f"{FIXTURES_ROOT}/pbmc3k.cxg",
    }[ext]
    data_locator = DataLocator(fname)
    config = AppConfig()
    config.update_server_config(
        app__flask_secret_key="secret",
        single_dataset__obs_names=None,
        single_dataset__var_names=None,
        single_dataset__datapath=data_locator.path,
    )
    config.update_default_dataset_config(
        embeddings__names=["umap"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01,
    )

    config.complete_config()
    data = MatrixDataLoader(data_locator.abspath()).open(config)
    annotations = AnnotationsLocalFile(None, annotations_file)
    return data, tmp_dir, annotations
Esempio n. 2
0
def data_with_tmp_tiledb_annotations(ext: MatrixDataType):
    tmp_dir = tempfile.mkdtemp()
    fname = {
        MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad",
        MatrixDataType.CXG: "test/fixtures/pbmc3k.cxg",
    }[ext]
    data_locator = DataLocator(fname)
    config = AppConfig()
    config.update_server_config(
        app__flask_secret_key="secret",
        multi_dataset__dataroot=data_locator.path,
        authentication__type="test",
        authentication__insecure_test_environment=True,
    )
    config.update_default_dataset_config(
        embeddings__names=["umap"],
        presentation__max_categories=100,
        diffexp__lfc_cutoff=0.01,
        user_annotations__type="hosted_tiledb_array",
        user_annotations__hosted_tiledb_array__db_uri="postgresql://*****:*****@localhost:5432",
        user_annotations__hosted_tiledb_array__hosted_file_directory=tmp_dir,
    )

    config.complete_config()

    data = MatrixDataLoader(data_locator.abspath()).open(config)
    annotations = AnnotationsHostedTileDB(tmp_dir, DbUtils("postgresql://*****:*****@localhost:5432"),)
    return data, tmp_dir, annotations
Esempio n. 3
0
 def get_basic_config(self):
     config = AppConfig()
     config.update_server_config(
         single_dataset__obs_names=None,
         single_dataset__var_names=None,
     )
     config.update_server_config(app__flask_secret_key="secret")
     config.update_default_dataset_config(
         embeddings__names=["umap"],
         presentation__max_categories=100,
         diffexp__lfc_cutoff=0.01,
     )
     return config
Esempio n. 4
0
def app_config(data_locator, backed=False, extra_server_config={}, extra_dataset_config={}):
    config = AppConfig()
    config.update_server_config(
        app__flask_secret_key="secret",
        single_dataset__obs_names=None,
        single_dataset__var_names=None,
        adaptor__anndata_adaptor__backed=backed,
        single_dataset__datapath=data_locator,
        limits__diffexp_cellcount_max=None,
        limits__column_request_max=None,
    )
    config.update_default_dataset_config(
        embeddings__names=["umap", "tsne", "pca"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01
    )
    config.update_server_config(**extra_server_config)
    config.update_default_dataset_config(**extra_dataset_config)
    config.complete_config()
    return config
Esempio n. 5
0
    def test_auth_none(self):
        app_config = AppConfig()
        app_config.update_server_config(app__flask_secret_key="secret")
        app_config.update_server_config(
            authentication__type=None,
            multi_dataset__dataroot=self.dataset_dataroot)
        app_config.update_default_dataset_config(
            user_annotations__enable=False)

        app_config.complete_config()

        with test_server(app_config=app_config) as server:
            session = requests.Session()
            config = session.get(
                f"{server}/d/pbmc3k.cxg/api/v0.2/config").json()
            userinfo = session.get(
                f"{server}/d/pbmc3k.cxg/api/v0.2/userinfo").json()
            self.assertNotIn("authentication", config["config"])
            self.assertIsNone(userinfo)
Esempio n. 6
0
    def test_auth_session(self):
        app_config = AppConfig()
        app_config.update_server_config(app__flask_secret_key="secret")
        app_config.update_server_config(
            authentication__type="session",
            multi_dataset__dataroot=self.dataset_dataroot)
        app_config.update_default_dataset_config(user_annotations__enable=True)
        app_config.complete_config()

        with test_server(app_config=app_config) as server:
            session = requests.Session()
            config = session.get(
                f"{server}/d/pbmc3k.cxg/api/v0.2/config").json()
            userinfo = session.get(
                f"{server}/d/pbmc3k.cxg/api/v0.2/userinfo").json()

            self.assertFalse(
                config["config"]["authentication"]["requires_client_login"])
            self.assertTrue(userinfo["userinfo"]["is_authenticated"])
            self.assertEqual(userinfo["userinfo"]["username"], "anonymous")
Esempio n. 7
0
    def test_update_app_config(self):
        config = AppConfig()
        config.update_server_config(app__verbose=True,
                                    multi_dataset__dataroot="datadir")
        vars = config.server_config.changes_from_default()
        self.assertCountEqual(vars,
                              [("app__verbose", True, False),
                               ("multi_dataset__dataroot", "datadir", None)])

        config = AppConfig()
        config.update_default_dataset_config(app__scripts=(),
                                             app__inline_scripts=())
        vars = config.server_config.changes_from_default()
        self.assertCountEqual(vars, [])

        config = AppConfig()
        config.update_default_dataset_config(app__scripts=[],
                                             app__inline_scripts=[])
        vars = config.default_dataset_config.changes_from_default()
        self.assertCountEqual(vars, [])

        config = AppConfig()
        config.update_default_dataset_config(app__scripts=("a", "b"),
                                             app__inline_scripts=["c", "d"])
        vars = config.default_dataset_config.changes_from_default()
        self.assertCountEqual(vars, [("app__scripts", ["a", "b"], []),
                                     ("app__inline_scripts", ["c", "d"], [])])
Esempio n. 8
0
def launch(
    datapath,
    dataroot,
    verbose,
    debug,
    open_browser,
    port,
    host,
    embedding,
    obs_names,
    var_names,
    max_category_items,
    disable_custom_colors,
    diffexp_lfc_cutoff,
    title,
    scripts,
    about,
    disable_annotations,
    annotations_file,
    annotations_dir,
    backed,
    disable_diffexp,
    experimental_annotations_ontology,
    experimental_annotations_ontology_obo,
    experimental_enable_reembedding,
    config_file,
    dump_default_config,
):
    """Launch the cellxgene data viewer.
    This web app lets you explore single-cell expression data.
    Data must be in a format that cellxgene expects.
    Read the "getting started" guide to learn more:
    https://chanzuckerberg.github.io/cellxgene/getting-started.html

    Examples:

    > cellxgene launch example-dataset/pbmc3k.h5ad --title pbmc3k

    > cellxgene launch <your data file> --title <your title>

    > cellxgene launch <url>"""

    # TODO Examples to provide when "--dataroot" is unhidden
    # > cellxgene launch --dataroot example-dataset/
    #
    # > cellxgene launch --dataroot <url>

    if dump_default_config:
        print(default_config)
        sys.exit(0)
    # Startup message
    click.echo("[cellxgene] Starting the CLI...")

    # app config
    app_config = AppConfig()
    server_config = app_config.server_config

    try:
        if config_file:
            app_config.update_from_config_file(config_file)

        # Determine which config options were give on the command line.
        # Those will override the ones provided in the config file (if provided).
        cli_config = AppConfig()
        cli_config.update_server_config(
            app__verbose=verbose,
            app__debug=debug,
            app__host=host,
            app__port=port,
            app__open_browser=open_browser,
            single_dataset__datapath=datapath,
            single_dataset__title=title,
            single_dataset__about=about,
            single_dataset__obs_names=obs_names,
            single_dataset__var_names=var_names,
            multi_dataset__dataroot=dataroot,
            adaptor__anndata_adaptor__backed=backed,
        )
        cli_config.update_default_dataset_config(
            app__scripts=scripts,
            user_annotations__enable=not disable_annotations,
            user_annotations__local_file_csv__file=annotations_file,
            user_annotations__local_file_csv__directory=annotations_dir,
            user_annotations__ontology__enable=
            experimental_annotations_ontology,
            user_annotations__ontology__obo_location=
            experimental_annotations_ontology_obo,
            presentation__max_categories=max_category_items,
            presentation__custom_colors=not disable_custom_colors,
            embeddings__names=embedding,
            embeddings__enable_reembedding=experimental_enable_reembedding,
            diffexp__enable=not disable_diffexp,
            diffexp__lfc_cutoff=diffexp_lfc_cutoff,
        )

        diff = cli_config.server_config.changes_from_default()
        changes = {key: val for key, val, _ in diff}
        app_config.update_server_config(**changes)

        diff = cli_config.default_dataset_config.changes_from_default()
        changes = {key: val for key, val, _ in diff}
        app_config.update_default_dataset_config(**changes)

        # process the configuration
        #  any errors will be thrown as an exception.
        #  any info messages will be passed to the messagefn function.

        def messagefn(message):
            click.echo("[cellxgene] " + message)

        # Use a default secret if one is not provided
        if not server_config.app__flask_secret_key:
            app_config.update_server_config(
                app__flask_secret_key="SparkleAndShine")

        app_config.complete_config(messagefn)

    except (ConfigurationError, DatasetAccessError) as e:
        raise click.ClickException(e)

    handle_scripts(scripts)

    # create the server
    server = CliLaunchServer(app_config)

    if not server_config.app__verbose:
        log = logging.getLogger("werkzeug")
        log.setLevel(logging.ERROR)

    cellxgene_url = f"http://{app_config.server_config.app__host}:{app_config.server_config.app__port}"
    if server_config.app__open_browser:
        click.echo(
            f"[cellxgene] Launching! Opening your browser to {cellxgene_url} now."
        )
        webbrowser.open(cellxgene_url)
    else:
        click.echo(
            f"[cellxgene] Launching! Please go to {cellxgene_url} in your browser."
        )

    click.echo("[cellxgene] Type CTRL-C at any time to exit.")

    if not server_config.app__verbose:
        f = open(os.devnull, "w")
        sys.stdout = f

    try:
        server.app.run(
            host=server_config.app__host,
            debug=server_config.app__debug,
            port=server_config.app__port,
            threaded=not server_config.app__debug,
            use_debugger=False,
            use_reloader=False,
        )
    except OSError as e:
        if e.errno == errno.EADDRINUSE:
            raise click.ClickException(
                "Port is in use, please specify an open port using the --port flag."
            ) from e
        raise
Esempio n. 9
0
    def test_multi_dataset(self):
        config = AppConfig()
        # test for illegal url_dataroots
        for illegal in ("../b", "!$*", "\\n", "", "(bad)"):
            config.update_server_config(
                app__flask_secret_key="secret",
                multi_dataset__dataroot={"tag": {"base_url": illegal, "dataroot": f"{PROJECT_ROOT}/example-dataset"}},
            )
            with self.assertRaises(ConfigurationError):
                config.complete_config()

        # test for legal url_dataroots
        for legal in ("d", "this.is-okay_", "a/b"):
            config.update_server_config(
                app__flask_secret_key="secret",
                multi_dataset__dataroot={"tag": {"base_url": legal, "dataroot": f"{PROJECT_ROOT}/example-dataset"}},
            )
            config.complete_config()

        # test that multi dataroots work end to end
        config.update_server_config(
            app__flask_secret_key="secret",
            multi_dataset__dataroot=dict(
                s1=dict(dataroot=f"{PROJECT_ROOT}/example-dataset", base_url="set1/1/2"),
                s2=dict(dataroot=f"{FIXTURES_ROOT}", base_url="set2"),
                s3=dict(dataroot=f"{FIXTURES_ROOT}", base_url="set3"),
            ),
        )

        # Change this default to test if the dataroot overrides below work.
        config.update_default_dataset_config(app__about_legal_tos="tos_default.html")

        # specialize the configs for set1
        config.add_dataroot_config(
            "s1", user_annotations__enable=False, diffexp__enable=True, app__about_legal_tos="tos_set1.html"
        )

        # specialize the configs for set2
        config.add_dataroot_config(
            "s2", user_annotations__enable=True, diffexp__enable=False, app__about_legal_tos="tos_set2.html"
        )

        # no specializations for set3 (they get the default dataset config)
        config.complete_config()

        with test_server(app_config=config) as server:
            session = requests.Session()

            response = session.get(f"{server}/set1/1/2/pbmc3k.h5ad/api/v0.2/config")
            data_config = response.json()
            assert data_config["config"]["displayNames"]["dataset"] == "pbmc3k"
            assert data_config["config"]["parameters"]["annotations"] is False
            assert data_config["config"]["parameters"]["disable-diffexp"] is False
            assert data_config["config"]["parameters"]["about_legal_tos"] == "tos_set1.html"

            response = session.get(f"{server}/set2/pbmc3k.cxg/api/v0.2/config")
            data_config = response.json()
            assert data_config["config"]["displayNames"]["dataset"] == "pbmc3k"
            assert data_config["config"]["parameters"]["annotations"] is True
            assert data_config["config"]["parameters"]["about_legal_tos"] == "tos_set2.html"

            response = session.get(f"{server}/set3/pbmc3k.cxg/api/v0.2/config")
            data_config = response.json()
            assert data_config["config"]["displayNames"]["dataset"] == "pbmc3k"
            assert data_config["config"]["parameters"]["annotations"] is True
            assert data_config["config"]["parameters"]["disable-diffexp"] is False
            assert data_config["config"]["parameters"]["about_legal_tos"] == "tos_default.html"

            response = session.get(f"{server}/health")
            assert response.json()["status"] == "pass"
Esempio n. 10
0
                    has_config = True
            else:
                logging.critical(f"Configuration file not found {config_file}")
                sys.exit(1)

    if not has_config:
        logging.critical("No config file found")
        sys.exit(1)

    dataroot = os.getenv("CXG_DATAROOT")
    if dataroot:
        logging.info("Configuration from CXG_DATAROOT")
        app_config.update_server_config(multi_dataset__dataroot=dataroot)

    # overwrite configuration for the eb app
    app_config.update_default_dataset_config(embeddings__enable_reembedding=False,)
    app_config.update_server_config(multi_dataset__allowed_matrix_types=["cxg"],)

    # complete config
    app_config.complete_config(logging.info)

    server = WSGIServer(app_config)
    debug = False
    application = server.app

except Exception:
    logging.critical("Caught exception during initialization", exc_info=True)
    sys.exit(1)

if app_config.is_multi_dataset():
    logging.info(f"starting server with multi_dataset__dataroot={app_config.server_config.multi_dataset__dataroot}")
Esempio n. 11
0
class TestServerConfig(ConfigTests):
    def setUp(self):
        self.config_file_name = f"{unittest.TestCase.id(self).split('.')[-1]}.yml"
        self.config = AppConfig()
        self.config.update_server_config(app__flask_secret_key="secret")
        self.config.update_server_config(multi_dataset__dataroot=FIXTURES_ROOT)
        self.server_config = self.config.server_config
        self.config.complete_config()

        message_list = []

        def noop(message):
            message_list.append(message)

        messagefn = noop
        self.context = dict(messagefn=messagefn, messages=message_list)

    def get_config(self, **kwargs):
        file_name = self.custom_app_config(
            dataroot=f"{FIXTURES_ROOT}", config_file_name=self.config_file_name, **kwargs
        )
        config = AppConfig()
        config.update_from_config_file(file_name)
        return config

    def test_init_raises_error_if_default_config_is_invalid(self):
        invalid_config = self.get_config(port="not_valid")
        with self.assertRaises(ConfigurationError):
            invalid_config.complete_config()

    @patch("backend.czi_hosted.common.config.server_config.BaseConfig.validate_correct_type_of_configuration_attribute")
    def test_complete_config_checks_all_attr(self, mock_check_attrs):
        mock_check_attrs.side_effect = BaseConfig.validate_correct_type_of_configuration_attribute()
        self.server_config.complete_config(self.context)
        self.assertEqual(mock_check_attrs.call_count, 41)

    def test_handle_app__throws_error_if_port_doesnt_exist(self):
        config = self.get_config(port=99999999)
        with self.assertRaises(ConfigurationError):
            config.server_config.handle_app(self.context)

    @patch("backend.czi_hosted.common.config.server_config.discover_s3_region_name")
    def test_handle_data_locator_works_for_default_types(self, mock_discover_region_name):
        mock_discover_region_name.return_value = None
        # Default config
        self.assertEqual(self.config.server_config.data_locator__s3__region_name, None)
        # hard coded
        config = self.get_config()
        self.assertEqual(config.server_config.data_locator__s3__region_name, "us-east-1")
        # incorrectly formatted
        dataroot = {
            "d1": {"base_url": "set1", "dataroot": "/path/to/set1_datasets/"},
            "d2": {"base_url": "set2/subdir", "dataroot": "s3://shouldnt/work"},
        }
        file_name = self.custom_app_config(
            dataroot=dataroot, config_file_name=self.config_file_name, data_locater_region_name="true"
        )
        config = AppConfig()
        config.update_from_config_file(file_name)
        with self.assertRaises(ConfigurationError):
            config.server_config.handle_data_locator()

    @patch("backend.czi_hosted.common.config.server_config.discover_s3_region_name")
    def test_handle_data_locator_can_read_from_dataroot(self, mock_discover_region_name):
        mock_discover_region_name.return_value = "us-west-2"
        dataroot = {
            "d1": {"base_url": "set1", "dataroot": "/path/to/set1_datasets/"},
            "d2": {"base_url": "set2/subdir", "dataroot": "s3://hosted-cellxgene-dev"},
        }
        file_name = self.custom_app_config(
            dataroot=dataroot, config_file_name=self.config_file_name, data_locater_region_name="true"
        )
        config = AppConfig()
        config.update_from_config_file(file_name)
        config.server_config.handle_data_locator()
        self.assertEqual(config.server_config.data_locator__s3__region_name, "us-west-2")
        mock_discover_region_name.assert_called_once_with("s3://hosted-cellxgene-dev")

    def test_handle_app___can_use_envar_port(self):
        config = self.get_config(port=24)
        self.assertEqual(config.server_config.app__port, 24)

        # Note if the port is set in the config file it will NOT be overwritten by a different envvar
        os.environ["CXG_SERVER_PORT"] = "4008"
        self.config = AppConfig()
        self.config.update_server_config(app__flask_secret_key="secret")
        self.config.server_config.handle_app(self.context)
        self.assertEqual(self.config.server_config.app__port, 4008)
        del os.environ["CXG_SERVER_PORT"]

    def test_handle_app__can_get_secret_key_from_envvar_or_config_file_with_envvar_given_preference(self):
        config = self.get_config(flask_secret_key="KEY_FROM_FILE")
        self.assertEqual(config.server_config.app__flask_secret_key, "KEY_FROM_FILE")

        os.environ["CXG_SECRET_KEY"] = "KEY_FROM_ENV"
        config.external_config.handle_environment(self.context)
        self.assertEqual(config.server_config.app__flask_secret_key, "KEY_FROM_ENV")

    def test_handle_app__sets_web_base_url(self):
        config = self.get_config(web_base_url="anything.com")
        self.assertEqual(config.server_config.app__web_base_url, "anything.com")

    def test_handle_auth__gets_client_secret_from_envvars_or_config_with_envvars_given_preference(self):
        config = self.get_config(client_secret="KEY_FROM_FILE")
        config.server_config.handle_authentication()
        self.assertEqual(config.server_config.authentication__params_oauth__client_secret, "KEY_FROM_FILE")

        os.environ["CXG_OAUTH_CLIENT_SECRET"] = "KEY_FROM_ENV"
        config.external_config.handle_environment(self.context)

        self.assertEqual(config.server_config.authentication__params_oauth__client_secret, "KEY_FROM_ENV")

    def test_handle_data_source__errors_when_passed_zero_or_two_dataroots(self):
        file_name = self.custom_app_config(
            dataroot=f"{FIXTURES_ROOT}",
            config_file_name="two_data_roots.yml",
            dataset_datapath=f"{FIXTURES_ROOT}/pbmc3k-CSC-gz.h5ad",
        )
        config = AppConfig()
        config.update_from_config_file(file_name)
        with self.assertRaises(ConfigurationError):
            config.server_config.handle_data_source()

        file_name = self.custom_app_config(config_file_name="zero_roots.yml")
        config = AppConfig()
        config.update_from_config_file(file_name)
        with self.assertRaises(ConfigurationError):
            config.server_config.handle_data_source()

    def test_get_api_base_url_works(self):

        # test the api_base_url feature, and that it can contain a path
        config = AppConfig()
        backend_port = find_available_port("localhost", 10000)
        config.update_server_config(
            app__flask_secret_key="secret",
            app__api_base_url=f"http://*****:*****@patch("backend.czi_hosted.common.config.server_config.diffexp_tiledb.set_config")
    def test_handle_diffexp(self, mock_tiledb_config):
        custom_config_file = self.custom_app_config(
            dataroot=f"{FIXTURES_ROOT}",
            cpu_multiplier=3,
            diffexp_max_workers=1,
            target_workunit=4,
            config_file_name=self.config_file_name,
        )
        config = AppConfig()
        config.update_from_config_file(custom_config_file)
        config.server_config.handle_diffexp()
        # called with the min of diffexp_max_workers and cpus*cpu_multiplier
        mock_tiledb_config.assert_called_once_with(1, 4)

    @patch("backend.czi_hosted.data_cxg.cxg_adaptor.CxgAdaptor.set_tiledb_context")
    def test_handle_adaptor(self, mock_tiledb_context):
        custom_config = self.custom_app_config(
            dataroot=f"{FIXTURES_ROOT}", cxg_tile_cache_size=10, cxg_num_reader_threads=2
        )
        config = AppConfig()
        config.update_from_config_file(custom_config)
        config.server_config.handle_adaptor()
        mock_tiledb_context.assert_called_once_with(
            {"sm.tile_cache_size": 10, "sm.num_reader_threads": 2, "vfs.s3.region": "us-east-1"}
        )

    def test_test_auth_only_in_insecure(self):

        config = self.get_config(auth_type="test")
        with self.assertRaises(ConfigurationError):
            config.complete_config()

        config.update_server_config(authentication__insecure_test_environment=True)
        config.complete_config()