def test_run(self): tempdir = tempfile.TemporaryDirectory(dir=f"{PROJECT_ROOT}/server") tempdirname = tempdir.name c = AppConfig() # test that eb works c.update_server_config( multi_dataset__dataroot=f"{PROJECT_ROOT}/server/test/test_datasets", app__flask_secret_key="open sesame") c.complete_config() c.write_config(f"{tempdirname}/config.yaml") subprocess.check_call(f"git ls-files . | cpio -pdm {tempdirname}", cwd=f"{PROJECT_ROOT}/server/eb", shell=True) subprocess.check_call(["make", "build"], cwd=tempdirname) with run_eb_app(tempdirname) as server: session = requests.Session() r = session.get(f"{server}/d/pbmc3k.cxg/api/v0.2/config") data_config = r.json() assert data_config["config"]["displayNames"]["dataset"] == "pbmc3k"
def data_with_tmp_tiledb_annotations(ext: MatrixDataType): tmp_dir = tempfile.mkdtemp() fname = { MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad", MatrixDataType.CXG: "test/fixtures/pbmc3k.cxg", }[ext] data_locator = DataLocator(fname) config = AppConfig() config.update_server_config( multi_dataset__dataroot=data_locator.path, authentication__type="test", ) config.update_default_dataset_config( embeddings__names=["umap"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01, user_annotations__type="hosted_tiledb_array", user_annotations__hosted_tiledb_array__db_uri= "postgresql://*****:*****@localhost:5432", user_annotations__hosted_tiledb_array__hosted_file_directory=tmp_dir) config.complete_config() data = MatrixDataLoader(data_locator.abspath()).open(config) annotations = AnnotationsHostedTileDB( tmp_dir, DbUtils("postgresql://*****:*****@localhost:5432"), ) return data, tmp_dir, annotations
def test_auth_test(self): c = AppConfig() c.update_server_config(authentication__type="test") c.update_server_config( multi_dataset__dataroot=dict( a1=dict(dataroot=self.dataset_dataroot, base_url="auth"), a2=dict(dataroot=self.dataset_dataroot, base_url="no-auth"), ) ) # specialize the configs c.add_dataroot_config("a1", app__authentication_enable=True, user_annotations__enable=True) c.add_dataroot_config("a2", app__authentication_enable=False, user_annotations__enable=False) c.complete_config() with test_server(app_config=c) as server: session = requests.Session() # auth datasets config = session.get(f"{server}/auth/pbmc3k.cxg/api/v0.2/config").json() userinfo = session.get(f"{server}/auth/pbmc3k.cxg/api/v0.2/userinfo").json() self.assertFalse(userinfo["userinfo"]["is_authenticated"]) self.assertIsNone(userinfo["userinfo"]["username"]) self.assertTrue(config["config"]["authentication"]["requires_client_login"]) self.assertTrue(config["config"]["parameters"]["annotations"]) login_uri = config["config"]["authentication"]["login"] logout_uri = config["config"]["authentication"]["logout"] self.assertEqual(login_uri, "/login?dataset=auth/pbmc3k.cxg") self.assertEqual(logout_uri, "/logout?dataset=auth/pbmc3k.cxg") r = session.get(f"{server}/{login_uri}") # check that the login redirect worked self.assertEqual(r.history[0].status_code, 302) self.assertEqual(r.url, f"{server}/auth/pbmc3k.cxg/") config = session.get(f"{server}/auth/pbmc3k.cxg/api/v0.2/config").json() userinfo = session.get(f"{server}/auth/pbmc3k.cxg/api/v0.2/userinfo").json() self.assertTrue(userinfo["userinfo"]["is_authenticated"]) self.assertEqual(userinfo["userinfo"]["username"], "test_account") self.assertTrue(config["config"]["parameters"]["annotations"]) r = session.get(f"{server}/{logout_uri}") # check that the logout redirect worked self.assertEqual(r.history[0].status_code, 302) self.assertEqual(r.url, f"{server}/auth/pbmc3k.cxg/") config = session.get(f"{server}/auth/pbmc3k.cxg/api/v0.2/config").json() userinfo = session.get(f"{server}/auth/pbmc3k.cxg/api/v0.2/userinfo").json() self.assertFalse(userinfo["userinfo"]["is_authenticated"]) self.assertIsNone(userinfo["userinfo"]["username"]) self.assertTrue(config["config"]["parameters"]["annotations"]) # no-auth datasets config = session.get(f"{server}/no-auth/pbmc3k.cxg/api/v0.2/config").json() userinfo = session.get(f"{server}/no-auth/pbmc3k.cxg/api/v0.2/userinfo").json() self.assertIsNone(userinfo) self.assertFalse(config["config"]["parameters"]["annotations"])
def test_update(self): c = AppConfig() c.update_server_config(app__verbose=True, multi_dataset__dataroot="datadir") v = c.server_config.changes_from_default() self.assertCountEqual(v, [("app__verbose", True, False), ("multi_dataset__dataroot", "datadir", None)]) c = AppConfig() c.update_default_dataset_config(app__scripts=(), app__inline_scripts=()) v = c.server_config.changes_from_default() self.assertCountEqual(v, []) c = AppConfig() c.update_default_dataset_config(app__scripts=[], app__inline_scripts=[]) v = c.default_dataset_config.changes_from_default() self.assertCountEqual(v, []) c = AppConfig() c.update_default_dataset_config(app__scripts=("a", "b"), app__inline_scripts=["c", "d"]) v = c.default_dataset_config.changes_from_default() self.assertCountEqual(v, [("app__scripts", ["a", "b"], []), ("app__inline_scripts", ["c", "d"], [])])
def data_with_tmp_annotations(ext: MatrixDataType, annotations_fixture=False): tmp_dir = tempfile.mkdtemp() annotations_file = path.join(tmp_dir, "test_annotations.csv") if annotations_fixture: shutil.copyfile( f"{PROJECT_ROOT}/server/test/fixtures/pbmc3k-annotations.csv", annotations_file) fname = { MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad", MatrixDataType.CXG: "test/fixtures/pbmc3k.cxg", }[ext] data_locator = DataLocator(fname) config = AppConfig() config.update_server_config(single_dataset__obs_names=None, single_dataset__var_names=None, single_dataset__datapath=data_locator.path) config.update_default_dataset_config( embeddings__names=["umap"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01, ) config.complete_config() data = MatrixDataLoader(data_locator.abspath()).open(config) annotations = AnnotationsLocalFile(None, annotations_file) return data, tmp_dir, annotations
def setUp(self): self.data_file = DataLocator( f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad") config = AppConfig() config.update_server_config( single_dataset__datapath=self.data_file.path) config.complete_config() self.data = AnndataAdaptor(self.data_file, config)
def get_basic_config(self): config = AppConfig() config.update_server_config( single_dataset__obs_names=None, single_dataset__var_names=None, ) config.update_default_dataset_config( embeddings__names=["umap"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01, ) return config
def test_auth_none(self): c = AppConfig() c.update_server_config( authentication__type=None, multi_dataset__dataroot=self.dataset_dataroot ) c.update_default_dataset_config(user_annotations__enable=False) c.complete_config() with test_server(app_config=c) as server: session = requests.Session() config = session.get(f"{server}/d/pbmc3k.cxg/api/v0.2/config").json() userinfo = session.get(f"{server}/d/pbmc3k.cxg/api/v0.2/userinfo").json() self.assertNotIn("authentication", config["config"]) self.assertIsNone(userinfo)
def test_auth_session(self): c = AppConfig() c.update_server_config( authentication__type="session", multi_dataset__dataroot=self.dataset_dataroot ) c.update_default_dataset_config(user_annotations__enable=True) c.complete_config() with test_server(app_config=c) as server: session = requests.Session() config = session.get(f"{server}/d/pbmc3k.cxg/api/v0.2/config").json() userinfo = session.get(f"{server}/d/pbmc3k.cxg/api/v0.2/userinfo").json() self.assertFalse(config["config"]["authentication"]["requires_client_login"]) self.assertTrue(userinfo["userinfo"]["is_authenticated"]) self.assertEqual(userinfo["userinfo"]["username"], "anonymous")
def test_auth_oauth_session(self): # test with session cookies app_config = AppConfig() app_config.update_server_config( authentication__type="oauth", authentication__params_oauth__api_base_url= f"http://localhost:{PORT}", authentication__params_oauth__client_id="mock_client_id", authentication__params_oauth__client_secret="mock_client_secret", authentication__params_oauth__session_cookie=True, ) app_config.update_server_config( multi_dataset__dataroot=self.dataset_dataroot) app_config.complete_config() self.auth_flow(app_config)
def test_auth_test_single(self): c = AppConfig() c.update_server_config( authentication__type="test", single_dataset__datapath=f"{self.dataset_dataroot}/pbmc3k.cxg") c.complete_config() with test_server(app_config=c) as server: session = requests.Session() config = session.get(f"{server}/api/v0.2/config").json() userinfo = session.get(f"{server}/api/v0.2/userinfo").json() self.assertFalse(userinfo["userinfo"]["is_authenticated"]) self.assertIsNone(userinfo["userinfo"]["username"]) self.assertTrue(config["config"]["authentication"]["requires_client_login"]) self.assertTrue(config["config"]["parameters"]["annotations"]) login_uri = config["config"]["authentication"]["login"] logout_uri = config["config"]["authentication"]["logout"] self.assertEqual(login_uri, "/login") self.assertEqual(logout_uri, "/logout") r = session.get(f"{server}/{login_uri}") # check that the login redirect worked self.assertEqual(r.history[0].status_code, 302) self.assertEqual(r.url, f"{server}/") config = session.get(f"{server}/api/v0.2/config").json() userinfo = session.get(f"{server}/api/v0.2/userinfo").json() self.assertTrue(userinfo["userinfo"]["is_authenticated"]) self.assertEqual(userinfo["userinfo"]["username"], "test_account") self.assertTrue(config["config"]["parameters"]["annotations"]) r = session.get(f"{server}/{logout_uri}") # check that the logout redirect worked self.assertEqual(r.history[0].status_code, 302) self.assertEqual(r.url, f"{server}/") config = session.get(f"{server}/api/v0.2/config").json() userinfo = session.get(f"{server}/api/v0.2/userinfo").json() self.assertFalse(userinfo["userinfo"]["is_authenticated"]) self.assertIsNone(userinfo["userinfo"]["username"]) self.assertTrue(config["config"]["parameters"]["annotations"])
def test_auth_oauth_cookie(self): # test with specified cookie app_config = AppConfig() app_config.update_server_config( authentication__type="oauth", authentication__params_oauth__api_base_url= f"http://localhost:{PORT}", authentication__params_oauth__client_id="mock_client_id", authentication__params_oauth__client_secret="mock_client_secret", authentication__params_oauth__session_cookie=False, authentication__params_oauth__cookie=dict(key="test_cxguser", httponly=True, max_age=60), ) app_config.update_server_config( multi_dataset__dataroot=self.dataset_dataroot) app_config.complete_config() self.auth_flow(app_config, "test_cxguser")
def app_config(data_locator, backed=False, extra_server_config={}, extra_dataset_config={}): config = AppConfig() config.update_server_config( single_dataset__obs_names=None, single_dataset__var_names=None, adaptor__anndata_adaptor__backed=backed, single_dataset__datapath=data_locator, limits__diffexp_cellcount_max=None, limits__column_request_max=None, ) config.update_default_dataset_config( embeddings__names=["umap", "tsne", "pca"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01) config.update_server_config(**extra_server_config) config.update_default_dataset_config(**extra_dataset_config) config.complete_config() return config
with config_location.local_handle() as lh: logging.info(f"Configuration from {config_file}") app_config.update_from_config_file(lh) has_config = True else: logging.critical(f"Configuration file not found {config_file}") sys.exit(1) if not has_config: logging.critical("No config file found") sys.exit(1) dataroot = os.getenv("CXG_DATAROOT") if dataroot: logging.info("Configuration from CXG_DATAROOT") app_config.update_server_config(multi_dataset__dataroot=dataroot) secret_name = os.getenv("CXG_AWS_SECRET_NAME") if secret_name: # need to find the secret manager region. # 1. from CXG_AWS_SECRET_REGION_NAME # 2. discover from dataroot location (if on s3) # 3. discover from config file location (if on s3) secret_region_name = os.getenv("CXG_AWS_SECRET_REGION_NAME") if secret_region_name is None: secret_region_name = discover_s3_region_name( app_config.multi_dataset__dataroot) if not secret_region_name: secret_region_name = discover_s3_region_name(config_file) if not secret_region_name: logging.error("Could not determine the AWS Secret Manager region")
def test_multi_dataset(self): c = AppConfig() # test for illegal url_dataroots for illegal in ("../b", "!$*", "\\n", "", "(bad)"): c.update_server_config( multi_dataset__dataroot={"tag": {"base_url": illegal, "dataroot": "{PROJECT_ROOT}/example-dataset"}} ) with self.assertRaises(ConfigurationError): c.complete_config() # test for legal url_dataroots for legal in ("d", "this.is-okay_", "a/b"): c.update_server_config( multi_dataset__dataroot={"tag": {"base_url": legal, "dataroot": "{PROJECT_ROOT}/example-dataset"}} ) c.complete_config() # test that multi dataroots work end to end c.update_server_config( multi_dataset__dataroot=dict( s1=dict(dataroot=f"{PROJECT_ROOT}/example-dataset", base_url="set1/1/2"), s2=dict(dataroot=f"{PROJECT_ROOT}/server/test/test_datasets", base_url="set2"), s3=dict(dataroot=f"{PROJECT_ROOT}/server/test/test_datasets", base_url="set3"), ) ) # Change this default to test if the dataroot overrides below work. c.update_default_dataset_config(app__about_legal_tos="tos_default.html") # specialize the configs for set1 c.add_dataroot_config( "s1", user_annotations__enable=False, diffexp__enable=True, app__about_legal_tos="tos_set1.html" ) # specialize the configs for set2 c.add_dataroot_config( "s2", user_annotations__enable=True, diffexp__enable=False, app__about_legal_tos="tos_set2.html" ) # no specializations for set3 (they get the default dataset config) c.complete_config() with test_server(app_config=c) as server: session = requests.Session() r = session.get(f"{server}/set1/1/2/pbmc3k.h5ad/api/v0.2/config") data_config = r.json() assert data_config["config"]["displayNames"]["dataset"] == "pbmc3k" assert data_config["config"]["parameters"]["annotations"] is False assert data_config["config"]["parameters"]["disable-diffexp"] is False assert data_config["config"]["parameters"]["about_legal_tos"] == "tos_set1.html" r = session.get(f"{server}/set2/pbmc3k.cxg/api/v0.2/config") data_config = r.json() assert data_config["config"]["displayNames"]["dataset"] == "pbmc3k" assert data_config["config"]["parameters"]["annotations"] is True assert data_config["config"]["parameters"]["about_legal_tos"] == "tos_set2.html" r = session.get(f"{server}/set3/pbmc3k.cxg/api/v0.2/config") data_config = r.json() assert data_config["config"]["displayNames"]["dataset"] == "pbmc3k" assert data_config["config"]["parameters"]["annotations"] is True assert data_config["config"]["parameters"]["disable-diffexp"] is False assert data_config["config"]["parameters"]["about_legal_tos"] == "tos_default.html" r = session.get(f"{server}/health") assert r.json()["status"] == "pass"
def main(): parser = argparse.ArgumentParser("A command to test diffexp") parser.add_argument("dataset", help="name of a dataset to load") parser.add_argument("-na", "--numA", type=int, help="number of rows in group A") parser.add_argument("-nb", "--numB", type=int, help="number of rows in group B") parser.add_argument("-va", "--varA", help="obs variable:value to use for group A") parser.add_argument("-vb", "--varB", help="obs variable:value to use for group B") parser.add_argument("-t", "--trials", default=1, type=int, help="number of trials") parser.add_argument("-a", "--alg", choices=("default", "generic", "cxg"), default="default", help="algorithm to use") parser.add_argument("-s", "--show", default=False, action="store_true", help="show the results") parser.add_argument("-n", "--new-selection", default=False, action="store_true", help="change the selection between each trial") parser.add_argument("--seed", default=1, type=int, help="set the random seed") args = parser.parse_args() app_config = AppConfig() app_config.update_server_config(single_dataset__datapath=args.dataset) app_config.update_server_config(app__verbose=True) app_config.complete_config() loader = MatrixDataLoader(args.dataset) adaptor = loader.open(app_config) if args.show: if isinstance(adaptor, CxgAdaptor): adaptor.open_array("X").schema.dump() random.seed(args.seed) np.random.seed(args.seed) rows = adaptor.get_shape()[0] if args.numA: filterA = random.sample(range(rows), args.numA) elif args.varA: vname, vval = args.varA.split(":") filterA = get_filter_from_obs(adaptor, vname, vval) else: print("must supply numA or varA") sys.exit(1) if args.numB: filterB = random.sample(range(rows), args.numB) elif args.varB: vname, vval = args.varB.split(":") filterB = get_filter_from_obs(adaptor, vname, vval) else: print("must supply numB or varB") sys.exit(1) for i in range(args.trials): if args.new_selection: if args.numA: filterA = random.sample(range(rows), args.numA) if args.numB: filterB = random.sample(range(rows), args.numB) maskA = np.zeros(rows, dtype=bool) maskA[filterA] = True maskB = np.zeros(rows, dtype=bool) maskB[filterB] = True t1 = time.time() if args.alg == "default": results = adaptor.compute_diffexp_ttest(maskA, maskB) elif args.alg == "generic": results = diffexp_generic.diffexp_ttest(adaptor, maskA, maskB) elif args.alg == "cxg": if not isinstance(adaptor, CxgAdaptor): print("cxg only works with CxgAdaptor") sys.exit(1) results = diffexp_cxg.diffexp_ttest(adaptor, maskA, maskB) t2 = time.time() print("TIME=", t2 - t1) if args.show: for res in results: print(res)
def launch( datapath, dataroot, verbose, debug, open_browser, port, host, embedding, obs_names, var_names, max_category_items, disable_custom_colors, diffexp_lfc_cutoff, title, scripts, about, disable_annotations, annotations_file, annotations_dir, backed, disable_diffexp, experimental_annotations_ontology, experimental_annotations_ontology_obo, experimental_enable_reembedding, config_file, dump_default_config, ): """Launch the cellxgene data viewer. This web app lets you explore single-cell expression data. Data must be in a format that cellxgene expects. Read the "getting started" guide to learn more: https://chanzuckerberg.github.io/cellxgene/getting-started.html Examples: > cellxgene launch example-dataset/pbmc3k.h5ad --title pbmc3k > cellxgene launch <your data file> --title <your title> > cellxgene launch <url>""" # TODO Examples to provide when "--dataroot" is unhidden # > cellxgene launch --dataroot example-dataset/ # # > cellxgene launch --dataroot <url> if dump_default_config: print(default_config) sys.exit(0) # Startup message click.echo("[cellxgene] Starting the CLI...") # app config app_config = AppConfig() server_config = app_config.server_config try: if config_file: app_config.update_from_config_file(config_file) # Determine which config options were give on the command line. # Those will override the ones provided in the config file (if provided). cli_config = AppConfig() cli_config.update_server_config( app__verbose=verbose, app__debug=debug, app__host=host, app__port=port, app__open_browser=open_browser, single_dataset__datapath=datapath, single_dataset__title=title, single_dataset__about=about, single_dataset__obs_names=obs_names, single_dataset__var_names=var_names, multi_dataset__dataroot=dataroot, adaptor__anndata_adaptor__backed=backed, ) cli_config.update_default_dataset_config( app__scripts=scripts, user_annotations__enable=not disable_annotations, user_annotations__local_file_csv__file=annotations_file, user_annotations__local_file_csv__directory=annotations_dir, user_annotations__ontology__enable=experimental_annotations_ontology, user_annotations__ontology__obo_location=experimental_annotations_ontology_obo, presentation__max_categories=max_category_items, presentation__custom_colors=not disable_custom_colors, embeddings__names=embedding, embeddings__enable_reembedding=experimental_enable_reembedding, diffexp__enable=not disable_diffexp, diffexp__lfc_cutoff=diffexp_lfc_cutoff, ) diff = cli_config.server_config.changes_from_default() changes = {key: val for key, val, _ in diff} app_config.update_server_config(**changes) diff = cli_config.default_dataset_config.changes_from_default() changes = {key: val for key, val, _ in diff} app_config.update_default_dataset_config(**changes) # process the configuration # any errors will be thrown as an exception. # any info messages will be passed to the messagefn function. def messagefn(message): click.echo("[cellxgene] " + message) # Use a default secret if one is not provided if not server_config.app__flask_secret_key: app_config.update_server_config(app__flask_secret_key="SparkleAndShine") app_config.complete_config(messagefn) except (ConfigurationError, DatasetAccessError) as e: raise click.ClickException(e) handle_scripts(scripts) # create the server server = CliLaunchServer(app_config) if not server_config.app__verbose: log = logging.getLogger("werkzeug") log.setLevel(logging.ERROR) cellxgene_url = f"http://{app_config.server_config.app__host}:{app_config.server_config.app__port}" if server_config.app__open_browser: click.echo(f"[cellxgene] Launching! Opening your browser to {cellxgene_url} now.") webbrowser.open(cellxgene_url) else: click.echo(f"[cellxgene] Launching! Please go to {cellxgene_url} in your browser.") click.echo("[cellxgene] Type CTRL-C at any time to exit.") if not server_config.app__verbose: f = open(devnull, "w") sys.stdout = f try: server.app.run( host=server_config.app__host, debug=server_config.app__debug, port=server_config.app__port, threaded=not server_config.app__debug, use_debugger=False, use_reloader=False, ) except OSError as e: if e.errno == errno.EADDRINUSE: raise click.ClickException("Port is in use, please specify an open port using the --port flag.") from e raise
with config_location.local_handle() as lh: logging.info(f"Configuration from {config_file}") app_config.update_from_config_file(lh) has_config = True else: logging.critical(f"Configuration file not found {config_file}") sys.exit(1) if not has_config: logging.critical("No config file found") sys.exit(1) dataroot = os.getenv("CXG_DATAROOT") if dataroot: logging.info("Configuration from CXG_DATAROOT") app_config.update_server_config(multi_dataset__dataroot=dataroot) # update from secret manager try: handle_config_from_secret(app_config) except SecretKeyRetrievalError: sys.exit(1) # features are unsupported in the current hosted server app_config.update_default_dataset_config( embeddings__enable_reembedding=False, ) app_config.update_server_config( multi_dataset__allowed_matrix_types=["cxg"], ) app_config.complete_config(logging.info) if not app_config.server_config.app__flask_secret_key: