def data_with_tmp_annotations(ext: MatrixDataType, annotations_fixture=False): tmp_dir = tempfile.mkdtemp() annotations_file = path.join(tmp_dir, "test_annotations.csv") if annotations_fixture: shutil.copyfile( f"{PROJECT_ROOT}/local_server/test/fixtures/pbmc3k-annotations.csv", annotations_file) fname = { MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad", }[ext] data_locator = DataLocator(fname) config = AppConfig() config.update_server_config( app__flask_secret_key="secret", single_dataset__obs_names=None, single_dataset__var_names=None, single_dataset__datapath=data_locator.path, ) config.update_dataset_config( embeddings__names=["umap"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01, ) config.complete_config() data = MatrixDataLoader(data_locator.abspath()).open(config) annotations = AnnotationsLocalFile(None, annotations_file) return data, tmp_dir, annotations
def _is_accessible(path, config): if path is None: return True try: dl = DataLocator(path, region_name=config.data_locator__s3__region_name) return dl.exists() except RuntimeError: return False
def test_posix_file(self): locator = DataLocator("../example-dataset/pbmc3k.h5ad") config = self.get_basic_config() config.update_server_config(single_dataset__datapath=locator.path) config.complete_config() data = AnndataAdaptor(locator, config) self.stdAsserts(data)
def setUp(self): self.data_file = DataLocator(f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad") config = AppConfig() config.update_server_config(single_dataset__datapath=self.data_file.path) config.update_server_config(app__flask_secret_key="secret") config.complete_config() self.data = AnndataAdaptor(self.data_file, config)
def setUp(self): self.data_locator = DataLocator(f"{FIXTURES_ROOT}/nan.h5ad") self.config = app_config(self.data_locator.path) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) self.data = AnndataAdaptor(self.data_locator, self.config) self.data._create_schema()
def __init__(self, location, matrix_data_type=None, app_config=None): """ location can be a string or DataLocator """ region_name = None if app_config is None else app_config.server_config.data_locator__s3__region_name self.location = DataLocator(location, region_name=region_name) if not self.location.exists(): raise DatasetAccessError("Dataset does not exist.", HTTPStatus.NOT_FOUND) # matrix_data_type is an enum value of type MatrixDataType self.matrix_data_type = matrix_data_type # matrix_type is a DataAdaptor type, which corresonds to the matrix_data_type self.matrix_type = None if matrix_data_type is None: self.matrix_data_type = self.__matrix_data_type() if not self.__matrix_data_type_allowed(app_config): raise DatasetAccessError("Dataset does not have an allowed type.") if self.matrix_data_type == MatrixDataType.H5AD: from local_server.data_anndata.anndata_adaptor import AnndataAdaptor self.matrix_type = AnndataAdaptor
class MatrixDataLoader(object): def __init__(self, location, matrix_data_type=None, app_config=None): """ location can be a string or DataLocator """ region_name = None if app_config is None else app_config.server_config.data_locator__s3__region_name self.location = DataLocator(location, region_name=region_name) if not self.location.exists(): raise DatasetAccessError("Dataset does not exist.", HTTPStatus.NOT_FOUND) # matrix_data_type is an enum value of type MatrixDataType self.matrix_data_type = matrix_data_type # matrix_type is a DataAdaptor type, which corresonds to the matrix_data_type self.matrix_type = None if matrix_data_type is None: self.matrix_data_type = self.__matrix_data_type() if not self.__matrix_data_type_allowed(app_config): raise DatasetAccessError("Dataset does not have an allowed type.") if self.matrix_data_type == MatrixDataType.H5AD: from local_server.data_anndata.anndata_adaptor import AnndataAdaptor self.matrix_type = AnndataAdaptor def __matrix_data_type(self): if self.location.path.endswith(".h5ad"): return MatrixDataType.H5AD else: return MatrixDataType.UNKNOWN def __matrix_data_type_allowed(self, app_config): return self.matrix_data_type != MatrixDataType.UNKNOWN def pre_load_validation(self): if self.matrix_data_type == MatrixDataType.UNKNOWN: raise DatasetAccessError( "Dataset does not have a recognized type: .h5ad") self.matrix_type.pre_load_validation(self.location) def file_size(self): return self.matrix_type.file_size(self.location) def open(self, app_config, dataset_config=None): # create and return a DataAdaptor object return self.matrix_type.open(self.location, app_config, dataset_config)
def test_url_http(self): url = "http://raw.githubusercontent.com/chanzuckerberg/cellxgene/main/example-dataset/pbmc3k.h5ad" locator = DataLocator(url) config = self.get_basic_config() data = AnndataAdaptor(locator, config) self.stdAsserts(data)
def setUp(self): config = app_config(self.data_locator, self.backed) self.data = AnndataAdaptor(DataLocator(self.data_locator), config)