Esempio n. 1
0
def data_with_tmp_tiledb_annotations(ext: MatrixDataType):
    tmp_dir = tempfile.mkdtemp()
    fname = {
        MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad",
        MatrixDataType.CXG: "test/fixtures/pbmc3k.cxg",
    }[ext]
    data_locator = DataLocator(fname)
    config = AppConfig()
    config.update_server_config(
        app__flask_secret_key="secret",
        multi_dataset__dataroot=data_locator.path,
        authentication__type="test",
        authentication__insecure_test_environment=True,
    )
    config.update_default_dataset_config(
        embeddings__names=["umap"],
        presentation__max_categories=100,
        diffexp__lfc_cutoff=0.01,
        user_annotations__type="hosted_tiledb_array",
        user_annotations__hosted_tiledb_array__db_uri=
        "postgresql://*****:*****@localhost:5432",
        user_annotations__hosted_tiledb_array__hosted_file_directory=tmp_dir,
    )

    config.complete_config()

    data = MatrixDataLoader(data_locator.abspath()).open(config)
    annotations = AnnotationsHostedTileDB(
        tmp_dir,
        DbUtils("postgresql://*****:*****@localhost:5432"),
    )
    return data, tmp_dir, annotations
Esempio n. 2
0
def data_with_tmp_annotations(ext: MatrixDataType, annotations_fixture=False):
    tmp_dir = tempfile.mkdtemp()
    annotations_file = path.join(tmp_dir, "test_annotations.csv")
    if annotations_fixture:
        shutil.copyfile(
            f"{PROJECT_ROOT}/server/test/test_datasets/pbmc3k-annotations.csv",
            annotations_file)
    args = {
        "embeddings__names": ["umap"],
        "presentation__max_categories": 100,
        "single_dataset__obs_names": None,
        "single_dataset__var_names": None,
        "diffexp__lfc_cutoff": 0.01,
    }
    fname = {
        MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad",
        MatrixDataType.CXG: "test/test_datasets/pbmc3k.cxg",
    }[ext]
    data_locator = DataLocator(fname)
    config = AppConfig()
    config.update(**args)
    config.update(single_dataset__datapath=data_locator.path)
    config.complete_config()
    data = MatrixDataLoader(data_locator.abspath()).open(config)
    annotations = AnnotationsLocalFile(None, annotations_file)
    return data, tmp_dir, annotations
Esempio n. 3
0
def data_with_tmp_annotations(ext: MatrixDataType, annotations_fixture=False):
    tmp_dir = tempfile.mkdtemp()
    annotations_file = path.join(tmp_dir, "test_annotations.csv")
    if annotations_fixture:
        shutil.copyfile(
            f"{PROJECT_ROOT}/server/test/fixtures/pbmc3k-annotations.csv",
            annotations_file)
    fname = {
        MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad",
        MatrixDataType.CXG: "test/fixtures/pbmc3k.cxg",
    }[ext]
    data_locator = DataLocator(fname)
    config = AppConfig()
    config.update_server_config(
        app__flask_secret_key="secret",
        single_dataset__obs_names=None,
        single_dataset__var_names=None,
        single_dataset__datapath=data_locator.path,
    )
    config.update_default_dataset_config(
        embeddings__names=["umap"],
        presentation__max_categories=100,
        diffexp__lfc_cutoff=0.01,
    )

    config.complete_config()
    data = MatrixDataLoader(data_locator.abspath()).open(config)
    annotations = AnnotationsLocalFile(None, annotations_file)
    return data, tmp_dir, annotations
Esempio n. 4
0
def dataroot_test_index():
    # the following index page is meant for testing/debugging purposes
    data = '<!doctype html><html lang="en">'
    data += "<head><title>Hosted Cellxgene</title></head>"
    data += "<body><H1>Welcome to cellxgene</H1>"

    config = current_app.app_config
    server_config = config.server_config
    datasets = []
    for dataroot_dict in server_config.multi_dataset__dataroot.values():
        dataroot = dataroot_dict["dataroot"]
        url_dataroot = dataroot_dict["base_url"]
        locator = DataLocator(
            dataroot, region_name=server_config.data_locator__s3__region_name)
        for fname in locator.ls():
            location = path_join(dataroot, fname)
            try:
                MatrixDataLoader(location, app_config=config)
                datasets.append((url_dataroot, fname))
            except DatasetAccessError:
                # skip over invalid datasets
                pass

    data += "<br/>Select one of these datasets...<br/>"
    data += "<ul>"
    datasets.sort()
    for url_dataroot, dataset in datasets:
        data += f"<li><a href={url_dataroot}/{dataset}>{dataset}</a></li>"
    data += "</ul>"
    data += "</body></html>"

    return make_response(data)
Esempio n. 5
0
    def __init__(self, location, matrix_data_type=None, app_config=None):
        """ location can be a string or DataLocator """
        region_name = None if app_config is None else app_config.server_config.data_locator__s3__region_name
        self.location = DataLocator(location, region_name=region_name)
        if not self.location.exists():
            raise DatasetAccessError("Dataset does not exist.",
                                     HTTPStatus.NOT_FOUND)

        # matrix_data_type is an enum value of type MatrixDataType
        self.matrix_data_type = matrix_data_type
        # matrix_type is a DataAdaptor type, which corresonds to the matrix_data_type
        self.matrix_type = None

        if matrix_data_type is None:
            self.matrix_data_type = self.__matrix_data_type()

        if not self.__matrix_data_type_allowed(app_config):
            raise DatasetAccessError("Dataset does not have an allowed type.")

        if self.matrix_data_type == MatrixDataType.H5AD:
            from server.data_anndata.anndata_adaptor import AnndataAdaptor

            self.matrix_type = AnndataAdaptor
        elif self.matrix_data_type == MatrixDataType.CXG:
            from server.data_cxg.cxg_adaptor import CxgAdaptor

            self.matrix_type = CxgAdaptor
Esempio n. 6
0
def _is_accessible(path, config):
    if path is None:
        return True

    try:
        dl = DataLocator(path,
                         region_name=config.data_locator__s3__region_name)
        return dl.exists()
    except RuntimeError:
        return False
 def test_url_http(self):
     url = "http://raw.githubusercontent.com/chanzuckerberg/cellxgene/master/example-dataset/pbmc3k.h5ad"
     locator = DataLocator(url)
     config = AppConfig()
     config.update(**self.args)
     data = AnndataAdaptor(locator, config)
     self.stdAsserts(data)
 def setUp(self):
     self.data_file = DataLocator(f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad")
     config = AppConfig()
     config.update_server_config(single_dataset__datapath=self.data_file.path)
     config.update_server_config(app__flask_secret_key="secret")
     config.complete_config()
     self.data = AnndataAdaptor(self.data_file, config)
 def test_posix_file(self):
     locator = DataLocator("../example-dataset/pbmc3k.h5ad")
     config = self.get_basic_config()
     config.update_server_config(single_dataset__datapath=locator.path)
     config.complete_config()
     data = AnndataAdaptor(locator, config)
     self.stdAsserts(data)
Esempio n. 10
0
 def convert_pbmc3k(self, **kwargs):
     rand_str = random_string(8)
     data_locator = f"/tmp/test_{rand_str}.cxg"
     self.fixtures.append(data_locator)
     source_h5ad = anndata.read_h5ad(f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad")
     write_cxg(adata=source_h5ad, container=data_locator, title="pbmc3k", **kwargs)
     config = app_config(data_locator)
     return CxgAdaptor(DataLocator(data_locator), config)
Esempio n. 11
0
    def setUp(self):
        self.data_locator = DataLocator(f"{PROJECT_ROOT}/server/test/test_datasets/nan.h5ad")
        self.config = app_config(self.data_locator.path)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            self.data = AnndataAdaptor(self.data_locator, self.config)
            self.data._create_schema()
Esempio n. 12
0
 def convert_pbmc3k(self, **kwargs):
     random_string = "".join(
         random.choice(string.ascii_letters) for _ in range(8))
     data_locator = f"/tmp/test_{random_string}.cxg"
     self.fixtures.append(data_locator)
     source_h5ad = anndata.read_h5ad(
         f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad")
     write_cxg(adata=source_h5ad,
               container=data_locator,
               title="pbmc3k",
               **kwargs)
     config = app_config(data_locator)
     return CxgAdaptor(DataLocator(data_locator), config)
Esempio n. 13
0
 def setUp(self):
     data_locator = f"{PROJECT_ROOT}/server/test/test_datasets/pbmc3k.cxg"
     config = app_config(data_locator)
     self.data = CxgAdaptor(DataLocator(data_locator), config)
Esempio n. 14
0
        return hashes

    @staticmethod
    def get_csp_hashes(app, app_config):
        script_hashes, style_hashes = WSGIServer.load_static_csp_hashes(app)
        script_hashes += WSGIServer.compute_inline_scp_hashes(app, app_config)
        return (script_hashes, style_hashes)


try:
    app_config = AppConfig()

    has_config = False
    # config file: look first for "config.yaml" in the current working directory
    config_file = "config.yaml"
    config_location = DataLocator(config_file)
    if config_location.exists():
        with config_location.local_handle() as lh:
            logging.info(f"Configuration from {config_file}")
            app_config.update_from_config_file(lh)
            has_config = True

    else:
        # config file: second, use the CXG_CONFIG_FILE
        config_file = os.getenv("CXG_CONFIG_FILE")
        if config_file:
            region_name = discover_s3_region_name(config_file)
            config_location = DataLocator(config_file, region_name)
            if config_location.exists():
                with config_location.local_handle() as lh:
                    logging.info(f"Configuration from {config_file}")
Esempio n. 15
0
 def get_data(self, fixture):
     data_locator = f"{FIXTURES_ROOT}/{fixture}"
     config = app_config(data_locator)
     return CxgAdaptor(DataLocator(data_locator), config)
Esempio n. 16
0
class MatrixDataLoader(object):
    def __init__(self, location, matrix_data_type=None, app_config=None):
        """ location can be a string or DataLocator """
        region_name = None if app_config is None else app_config.server_config.data_locator__s3__region_name
        self.location = DataLocator(location, region_name=region_name)
        if not self.location.exists():
            raise DatasetAccessError("Dataset does not exist.",
                                     HTTPStatus.NOT_FOUND)

        # matrix_data_type is an enum value of type MatrixDataType
        self.matrix_data_type = matrix_data_type
        # matrix_type is a DataAdaptor type, which corresonds to the matrix_data_type
        self.matrix_type = None

        if matrix_data_type is None:
            self.matrix_data_type = self.__matrix_data_type()

        if not self.__matrix_data_type_allowed(app_config):
            raise DatasetAccessError("Dataset does not have an allowed type.")

        if self.matrix_data_type == MatrixDataType.H5AD:
            from server.data_anndata.anndata_adaptor import AnndataAdaptor

            self.matrix_type = AnndataAdaptor
        elif self.matrix_data_type == MatrixDataType.CXG:
            from server.data_cxg.cxg_adaptor import CxgAdaptor

            self.matrix_type = CxgAdaptor

    def __matrix_data_type(self):
        if self.location.path.endswith(".h5ad"):
            return MatrixDataType.H5AD
        elif ".cxg" in self.location.path:
            return MatrixDataType.CXG
        else:
            return MatrixDataType.UNKNOWN

    def __matrix_data_type_allowed(self, app_config):
        if self.matrix_data_type == MatrixDataType.UNKNOWN:
            return False

        if not app_config:
            return True
        if not app_config.is_multi_dataset():
            return True
        if len(app_config.server_config.multi_dataset__allowed_matrix_types
               ) == 0:
            return True

        for val in app_config.server_config.multi_dataset__allowed_matrix_types:
            try:
                if self.matrix_data_type == MatrixDataType(val):
                    return True
            except ValueError:
                # Check case where multi_dataset_allowed_matrix_type does not have a
                # valid MatrixDataType value.  TODO:  Add a feature to check
                # the AppConfig for errors on startup
                return False

        return False

    def pre_load_validation(self):
        if self.matrix_data_type == MatrixDataType.UNKNOWN:
            raise DatasetAccessError(
                "Dataset does not have a recognized type: .h5ad or .cxg")
        self.matrix_type.pre_load_validation(self.location)

    def file_size(self):
        return self.matrix_type.file_size(self.location)

    def open(self, app_config, dataset_config=None):
        # create and return a DataAdaptor object
        return self.matrix_type.open(self.location, app_config, dataset_config)
Esempio n. 17
0
 def setUp(self):
     config = app_config(self.data_locator, self.backed)
     self.data = AnndataAdaptor(DataLocator(self.data_locator), config)
Esempio n. 18
0
 def get_data(self, fixture):
     data_locator = f"{PROJECT_ROOT}/server/test/test_datasets/{fixture}"
     config = app_config(data_locator)
     return CxgAdaptor(DataLocator(data_locator), config)
 def test_url_http(self):
     url = "http://raw.githubusercontent.com/chanzuckerberg/cellxgene/main/example-dataset/pbmc3k.h5ad"
     locator = DataLocator(url)
     config = self.get_basic_config()
     data = AnndataAdaptor(locator, config)
     self.stdAsserts(data)