Exemple #1
0
def data_with_tmp_annotations(ext: MatrixDataType, annotations_fixture=False):
    tmp_dir = tempfile.mkdtemp()
    annotations_file = path.join(tmp_dir, "test_annotations.csv")
    if annotations_fixture:
        shutil.copyfile(
            f"{PROJECT_ROOT}/local_server/test/fixtures/pbmc3k-annotations.csv",
            annotations_file)
    fname = {
        MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad",
    }[ext]
    data_locator = DataLocator(fname)
    config = AppConfig()
    config.update_server_config(
        app__flask_secret_key="secret",
        single_dataset__obs_names=None,
        single_dataset__var_names=None,
        single_dataset__datapath=data_locator.path,
    )
    config.update_dataset_config(
        embeddings__names=["umap"],
        presentation__max_categories=100,
        diffexp__lfc_cutoff=0.01,
    )

    config.complete_config()
    data = MatrixDataLoader(data_locator.abspath()).open(config)
    annotations = AnnotationsLocalFile(None, annotations_file)
    return data, tmp_dir, annotations
Exemple #2
0
def _is_accessible(path, config):
    if path is None:
        return True

    try:
        dl = DataLocator(path,
                         region_name=config.data_locator__s3__region_name)
        return dl.exists()
    except RuntimeError:
        return False
 def test_posix_file(self):
     locator = DataLocator("../example-dataset/pbmc3k.h5ad")
     config = self.get_basic_config()
     config.update_server_config(single_dataset__datapath=locator.path)
     config.complete_config()
     data = AnndataAdaptor(locator, config)
     self.stdAsserts(data)
 def setUp(self):
     self.data_file = DataLocator(f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad")
     config = AppConfig()
     config.update_server_config(single_dataset__datapath=self.data_file.path)
     config.update_server_config(app__flask_secret_key="secret")
     config.complete_config()
     self.data = AnndataAdaptor(self.data_file, config)
Exemple #5
0
    def setUp(self):
        self.data_locator = DataLocator(f"{FIXTURES_ROOT}/nan.h5ad")
        self.config = app_config(self.data_locator.path)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            self.data = AnndataAdaptor(self.data_locator, self.config)
            self.data._create_schema()
Exemple #6
0
    def __init__(self, location, matrix_data_type=None, app_config=None):
        """ location can be a string or DataLocator """
        region_name = None if app_config is None else app_config.server_config.data_locator__s3__region_name
        self.location = DataLocator(location, region_name=region_name)
        if not self.location.exists():
            raise DatasetAccessError("Dataset does not exist.",
                                     HTTPStatus.NOT_FOUND)

        # matrix_data_type is an enum value of type MatrixDataType
        self.matrix_data_type = matrix_data_type
        # matrix_type is a DataAdaptor type, which corresonds to the matrix_data_type
        self.matrix_type = None

        if matrix_data_type is None:
            self.matrix_data_type = self.__matrix_data_type()

        if not self.__matrix_data_type_allowed(app_config):
            raise DatasetAccessError("Dataset does not have an allowed type.")

        if self.matrix_data_type == MatrixDataType.H5AD:
            from local_server.data_anndata.anndata_adaptor import AnndataAdaptor

            self.matrix_type = AnndataAdaptor
Exemple #7
0
class MatrixDataLoader(object):
    def __init__(self, location, matrix_data_type=None, app_config=None):
        """ location can be a string or DataLocator """
        region_name = None if app_config is None else app_config.server_config.data_locator__s3__region_name
        self.location = DataLocator(location, region_name=region_name)
        if not self.location.exists():
            raise DatasetAccessError("Dataset does not exist.",
                                     HTTPStatus.NOT_FOUND)

        # matrix_data_type is an enum value of type MatrixDataType
        self.matrix_data_type = matrix_data_type
        # matrix_type is a DataAdaptor type, which corresonds to the matrix_data_type
        self.matrix_type = None

        if matrix_data_type is None:
            self.matrix_data_type = self.__matrix_data_type()

        if not self.__matrix_data_type_allowed(app_config):
            raise DatasetAccessError("Dataset does not have an allowed type.")

        if self.matrix_data_type == MatrixDataType.H5AD:
            from local_server.data_anndata.anndata_adaptor import AnndataAdaptor

            self.matrix_type = AnndataAdaptor

    def __matrix_data_type(self):
        if self.location.path.endswith(".h5ad"):
            return MatrixDataType.H5AD
        else:
            return MatrixDataType.UNKNOWN

    def __matrix_data_type_allowed(self, app_config):
        return self.matrix_data_type != MatrixDataType.UNKNOWN

    def pre_load_validation(self):
        if self.matrix_data_type == MatrixDataType.UNKNOWN:
            raise DatasetAccessError(
                "Dataset does not have a recognized type: .h5ad")
        self.matrix_type.pre_load_validation(self.location)

    def file_size(self):
        return self.matrix_type.file_size(self.location)

    def open(self, app_config, dataset_config=None):
        # create and return a DataAdaptor object
        return self.matrix_type.open(self.location, app_config, dataset_config)
 def test_url_http(self):
     url = "http://raw.githubusercontent.com/chanzuckerberg/cellxgene/main/example-dataset/pbmc3k.h5ad"
     locator = DataLocator(url)
     config = self.get_basic_config()
     data = AnndataAdaptor(locator, config)
     self.stdAsserts(data)
 def setUp(self):
     config = app_config(self.data_locator, self.backed)
     self.data = AnndataAdaptor(DataLocator(self.data_locator), config)