def data_with_tmp_tiledb_annotations(ext: MatrixDataType): tmp_dir = tempfile.mkdtemp() fname = { MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad", MatrixDataType.CXG: "test/fixtures/pbmc3k.cxg", }[ext] data_locator = DataLocator(fname) config = AppConfig() config.update_server_config( app__flask_secret_key="secret", multi_dataset__dataroot=data_locator.path, authentication__type="test", authentication__insecure_test_environment=True, ) config.update_default_dataset_config( embeddings__names=["umap"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01, user_annotations__type="hosted_tiledb_array", user_annotations__hosted_tiledb_array__db_uri= "postgresql://*****:*****@localhost:5432", user_annotations__hosted_tiledb_array__hosted_file_directory=tmp_dir, ) config.complete_config() data = MatrixDataLoader(data_locator.abspath()).open(config) annotations = AnnotationsHostedTileDB( tmp_dir, DbUtils("postgresql://*****:*****@localhost:5432"), ) return data, tmp_dir, annotations
def data_with_tmp_annotations(ext: MatrixDataType, annotations_fixture=False): tmp_dir = tempfile.mkdtemp() annotations_file = path.join(tmp_dir, "test_annotations.csv") if annotations_fixture: shutil.copyfile( f"{PROJECT_ROOT}/server/test/fixtures/pbmc3k-annotations.csv", annotations_file) fname = { MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad", MatrixDataType.CXG: "test/fixtures/pbmc3k.cxg", }[ext] data_locator = DataLocator(fname) config = AppConfig() config.update_server_config( app__flask_secret_key="secret", single_dataset__obs_names=None, single_dataset__var_names=None, single_dataset__datapath=data_locator.path, ) config.update_default_dataset_config( embeddings__names=["umap"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01, ) config.complete_config() data = MatrixDataLoader(data_locator.abspath()).open(config) annotations = AnnotationsLocalFile(None, annotations_file) return data, tmp_dir, annotations
def __init__(self, location, matrix_data_type=None, app_config=None): """ location can be a string or DataLocator """ region_name = None if app_config is None else app_config.server_config.data_locator__s3__region_name self.location = DataLocator(location, region_name=region_name) if not self.location.exists(): raise DatasetAccessError("Dataset does not exist.", HTTPStatus.NOT_FOUND) # matrix_data_type is an enum value of type MatrixDataType self.matrix_data_type = matrix_data_type # matrix_type is a DataAdaptor type, which corresonds to the matrix_data_type self.matrix_type = None if matrix_data_type is None: self.matrix_data_type = self.__matrix_data_type() if not self.__matrix_data_type_allowed(app_config): raise DatasetAccessError("Dataset does not have an allowed type.") if self.matrix_data_type == MatrixDataType.H5AD: from server.data_anndata.anndata_adaptor import AnndataAdaptor self.matrix_type = AnndataAdaptor elif self.matrix_data_type == MatrixDataType.CXG: from server.data_cxg.cxg_adaptor import CxgAdaptor self.matrix_type = CxgAdaptor
def test_posix_file(self): locator = DataLocator("../example-dataset/pbmc3k.h5ad") config = self.get_basic_config() config.update_server_config(single_dataset__datapath=locator.path) config.complete_config() data = AnndataAdaptor(locator, config) self.stdAsserts(data)
def dataroot_test_index(): # the following index page is meant for testing/debugging purposes data = '<!doctype html><html lang="en">' data += "<head><title>Hosted Cellxgene</title></head>" data += "<body><H1>Welcome to cellxgene</H1>" config = current_app.app_config server_config = config.server_config datasets = [] for dataroot_dict in server_config.multi_dataset__dataroot.values(): dataroot = dataroot_dict["dataroot"] url_dataroot = dataroot_dict["base_url"] locator = DataLocator( dataroot, region_name=server_config.data_locator__s3__region_name) for fname in locator.ls(): location = path_join(dataroot, fname) try: MatrixDataLoader(location, app_config=config) datasets.append((url_dataroot, fname)) except DatasetAccessError: # skip over invalid datasets pass data += "<br/>Select one of these datasets...<br/>" data += "<ul>" datasets.sort() for url_dataroot, dataset in datasets: data += f"<li><a href={url_dataroot}/{dataset}>{dataset}</a></li>" data += "</ul>" data += "</body></html>" return make_response(data)
def test_url_http(self): url = "http://raw.githubusercontent.com/chanzuckerberg/cellxgene/master/example-dataset/pbmc3k.h5ad" locator = DataLocator(url) config = AppConfig() config.update(**self.args) data = AnndataAdaptor(locator, config) self.stdAsserts(data)
def setUp(self): self.data_file = DataLocator(f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad") config = AppConfig() config.update_server_config(single_dataset__datapath=self.data_file.path) config.update_server_config(app__flask_secret_key="secret") config.complete_config() self.data = AnndataAdaptor(self.data_file, config)
def data_with_tmp_annotations(ext: MatrixDataType, annotations_fixture=False): tmp_dir = tempfile.mkdtemp() annotations_file = path.join(tmp_dir, "test_annotations.csv") if annotations_fixture: shutil.copyfile( f"{PROJECT_ROOT}/server/test/test_datasets/pbmc3k-annotations.csv", annotations_file) args = { "embeddings__names": ["umap"], "presentation__max_categories": 100, "single_dataset__obs_names": None, "single_dataset__var_names": None, "diffexp__lfc_cutoff": 0.01, } fname = { MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad", MatrixDataType.CXG: "test/test_datasets/pbmc3k.cxg", }[ext] data_locator = DataLocator(fname) config = AppConfig() config.update(**args) config.update(single_dataset__datapath=data_locator.path) config.complete_config() data = MatrixDataLoader(data_locator.abspath()).open(config) annotations = AnnotationsLocalFile(None, annotations_file) return data, tmp_dir, annotations
def convert_pbmc3k(self, **kwargs): rand_str = random_string(8) data_locator = f"/tmp/test_{rand_str}.cxg" self.fixtures.append(data_locator) source_h5ad = anndata.read_h5ad(f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad") write_cxg(adata=source_h5ad, container=data_locator, title="pbmc3k", **kwargs) config = app_config(data_locator) return CxgAdaptor(DataLocator(data_locator), config)
def setUp(self): self.data_locator = DataLocator(f"{PROJECT_ROOT}/server/test/test_datasets/nan.h5ad") self.config = app_config(self.data_locator.path) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) self.data = AnndataAdaptor(self.data_locator, self.config) self.data._create_schema()
def _is_accessible(path, config): if path is None: return True try: dl = DataLocator(path, region_name=config.data_locator__s3__region_name) return dl.exists() except RuntimeError: return False
def convert_pbmc3k(self, **kwargs): random_string = "".join( random.choice(string.ascii_letters) for _ in range(8)) data_locator = f"/tmp/test_{random_string}.cxg" self.fixtures.append(data_locator) source_h5ad = anndata.read_h5ad( f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad") write_cxg(adata=source_h5ad, container=data_locator, title="pbmc3k", **kwargs) config = app_config(data_locator) return CxgAdaptor(DataLocator(data_locator), config)
return hashes @staticmethod def get_csp_hashes(app, app_config): script_hashes, style_hashes = WSGIServer.load_static_csp_hashes(app) script_hashes += WSGIServer.compute_inline_scp_hashes(app, app_config) return (script_hashes, style_hashes) try: app_config = AppConfig() has_config = False # config file: look first for "config.yaml" in the current working directory config_file = "config.yaml" config_location = DataLocator(config_file) if config_location.exists(): with config_location.local_handle() as lh: logging.info(f"Configuration from {config_file}") app_config.update_from_config_file(lh) has_config = True else: # config file: second, use the CXG_CONFIG_FILE config_file = os.getenv("CXG_CONFIG_FILE") if config_file: region_name = discover_s3_region_name(config_file) config_location = DataLocator(config_file, region_name) if config_location.exists(): with config_location.local_handle() as lh: logging.info(f"Configuration from {config_file}")
def setUp(self): config = app_config(self.data_locator, self.backed) self.data = AnndataAdaptor(DataLocator(self.data_locator), config)
def get_data(self, fixture): data_locator = f"{PROJECT_ROOT}/server/test/test_datasets/{fixture}" config = app_config(data_locator) return CxgAdaptor(DataLocator(data_locator), config)
def get_data(self, fixture): data_locator = f"{FIXTURES_ROOT}/{fixture}" config = app_config(data_locator) return CxgAdaptor(DataLocator(data_locator), config)
def test_url_http(self): url = "http://raw.githubusercontent.com/chanzuckerberg/cellxgene/main/example-dataset/pbmc3k.h5ad" locator = DataLocator(url) config = self.get_basic_config() data = AnndataAdaptor(locator, config) self.stdAsserts(data)
def setUp(self): data_locator = f"{PROJECT_ROOT}/server/test/test_datasets/pbmc3k.cxg" config = app_config(data_locator) self.data = CxgAdaptor(DataLocator(data_locator), config)