def data_with_tmp_tiledb_annotations(ext: MatrixDataType): tmp_dir = tempfile.mkdtemp() fname = { MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad", MatrixDataType.CXG: "test/fixtures/pbmc3k.cxg", }[ext] data_locator = DataLocator(fname) config = AppConfig() config.update_server_config( app__flask_secret_key="secret", multi_dataset__dataroot=data_locator.path, authentication__type="test", authentication__insecure_test_environment=True, ) config.update_default_dataset_config( embeddings__names=["umap"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01, user_annotations__type="hosted_tiledb_array", user_annotations__hosted_tiledb_array__db_uri= "postgresql://*****:*****@localhost:5432", user_annotations__hosted_tiledb_array__hosted_file_directory=tmp_dir, ) config.complete_config() data = MatrixDataLoader(data_locator.abspath()).open(config) annotations = AnnotationsHostedTileDB( tmp_dir, DbUtils("postgresql://*****:*****@localhost:5432"), ) return data, tmp_dir, annotations
def __init__(self): local_db_uri = "postgresql://*****:*****@localhost:5432" create_db(local_db_uri) self.db = DbUtils(local_db_uri) self._populate_test_data() self._populate_test_data_many()
class TestDatabase: def __init__(self): local_db_uri = "postgresql://*****:*****@localhost:5432" create_db(local_db_uri) self.db = DbUtils(local_db_uri) self._populate_test_data() self._populate_test_data_many() def _populate_test_data(self): self._create_test_user() self._create_test_dataset() self._create_test_annotation() def _populate_test_data_many(self): self._create_test_users() self._create_test_datasets() self._create_test_annotations() def _create_test_user(self): user = CellxGeneUser(id="test_user_id") user2 = CellxGeneUser(id='1234') self.db.session.add(user) self.db.session.add(user2) self.db.session.commit() def _create_test_dataset(self): dataset = CellxGeneDataset(name="test_dataset", ) self.db.session.add(dataset) self.db.session.commit() def _create_test_annotation(self): dataset = self.db.query( [CellxGeneDataset], [CellxGeneDataset.name == "test_dataset"], )[0] annotation = Annotation(tiledb_uri="tiledb_uri", user_id="test_user_id", dataset_id=str(dataset.id)) self.db.session.add(annotation) self.db.session.commit() @staticmethod def get_random_string(): letters = string.ascii_lowercase return ''.join(random.choice(letters) for i in range(12)) def _create_test_users(self, user_count: int = 10): users = [] for i in range(user_count): users.append(CellxGeneUser(id=self.get_random_string())) self.db.session.add_all(users) self.db.session.commit() def _create_test_datasets(self, dataset_count: int = 10): datasets = [] for i in range(dataset_count): datasets.append(CellxGeneDataset(name=self.get_random_string())) self.db.session.add_all(datasets) self.db.session.commit() def order_by_random(self, table: Base): return self.db.session.query(table).order_by(func.random()).first() def _create_test_annotations(self, annotation_count: int = 10): annotations = [] for i in range(annotation_count): dataset = self.order_by_random(CellxGeneDataset) user = self.order_by_random(CellxGeneUser) annotations.append( Annotation(tiledb_uri=self.get_random_string(), user_id=user.id, dataset_id=str(dataset.id))) self.db.session.add_all(annotations) self.db.session.commit()
def handle_user_annotations(self, context): self.check_attr("user_annotations__enable", bool) self.check_attr("user_annotations__type", str) self.check_attr("user_annotations__local_file_csv__directory", (type(None), str)) self.check_attr("user_annotations__local_file_csv__file", (type(None), str)) self.check_attr("user_annotations__ontology__enable", bool) self.check_attr("user_annotations__ontology__obo_location", (type(None), str)) self.check_attr("user_annotations__hosted_tiledb_array__db_uri", (type(None), str)) self.check_attr( "user_annotations__hosted_tiledb_array__hosted_file_directory", (type(None), str)) if self.user_annotations__enable: server_config = self.app_config.server_config if not self.app__authentication_enable: raise ConfigurationError( "user annotations requires authentication to be enabled") if not server_config.auth.is_valid_authentication_type(): auth_type = server_config.authentication__type raise ConfigurationError( f"authentication method {auth_type} is not compatible with user annotations" ) # TODO, replace this with a factory pattern once we have more than one way # to do annotations. currently only local_file_csv if self.user_annotations__type == "local_file_csv": dirname = self.user_annotations__local_file_csv__directory filename = self.user_annotations__local_file_csv__file if filename is not None and dirname is not None: raise ConfigurationError( "'annotations-file' and 'annotations-dir' may not be used together." ) if filename is not None: lf_name, lf_ext = splitext(filename) if lf_ext and lf_ext != ".csv": raise ConfigurationError( f"annotation file type must be .csv: {filename}") if dirname is not None and not isdir(dirname): try: os.mkdir(dirname) except OSError: raise ConfigurationError( "Unable to create directory specified by --annotations-dir" ) self.user_annotations = AnnotationsLocalFile(dirname, filename) # if the user has specified a fixed label file, go ahead and validate it # so that we can remove errors early in the process. server_config = self.app_config.server_config if server_config.single_dataset__datapath and self.user_annotations__local_file_csv__file: with server_config.matrix_data_cache_manager.data_adaptor( self.tag, server_config.single_dataset__datapath, self.app_config) as data_adaptor: data_adaptor.check_new_labels( self.user_annotations.read_labels(data_adaptor)) if self.user_annotations__ontology__enable or self.user_annotations__ontology__obo_location: try: self.user_annotations.load_ontology( self.user_annotations__ontology__obo_location) except OntologyLoadFailure as e: raise ConfigurationError( "Unable to load ontology terms\n" + str(e)) elif self.user_annotations__type == "hosted_tiledb_array": self.check_attr( "user_annotations__hosted_tiledb_array__db_uri", str) self.check_attr( "user_annotations__hosted_tiledb_array__hosted_file_directory", str) self.user_annotations = AnnotationsHostedTileDB( directory_path=self. user_annotations__hosted_tiledb_array__hosted_file_directory, db=DbUtils( self.user_annotations__hosted_tiledb_array__db_uri), ) else: raise ConfigurationError( 'The only annotation type support is "local_file_csv" or "hosted_tiledb_array' ) else: if self.user_annotations__type == "local_file_csv": dirname = self.user_annotations__local_file_csv__directory filename = self.user_annotations__local_file_csv__file if filename is not None: context["messsagefn"]( "Warning: --annotations-file ignored as annotations are disabled." ) if dirname is not None: context["messagefn"]( "Warning: --annotations-dir ignored as annotations are disabled." ) if self.user_annotations__ontology__enable: context["messagefn"]( "Warning: --experimental-annotations-ontology" " ignored as annotations are disabled.") if self.user_annotations__ontology__obo_location is not None: context["messagefn"]( "Warning: --experimental-annotations-ontology-obo" " ignored as annotations are disabled.")
class DatabaseTest(unittest.TestCase): db = DbUtils("postgresql://*****:*****@localhost:5432") @classmethod def setUpClass(cls) -> None: TestDatabase() @classmethod def tearDownClass(cls) -> None: del cls.db def test_user_creation(self): one_user = self.db.get(table=CellxGeneUser, entity_id='test_user_id') self.assertEqual(one_user.id, 'test_user_id') user_count = self.db.session.query(CellxGeneUser).count() self.assertGreater(user_count, 10) def test_dataset_creation(self): one_dataset = self.db.query( table_args=[CellxGeneDataset], filter_args=[CellxGeneDataset.name == 'test_dataset']) self.assertEqual(one_dataset[0].name, 'test_dataset') dataset_count = self.db.session.query(CellxGeneDataset).count() self.assertGreater(dataset_count, 10) def test_annotation_creation(self): one_annotation = self.db.query( table_args=[Annotation], filter_args=[Annotation.tiledb_uri == 'tiledb_uri'])[0] self.assertEqual(one_annotation.tiledb_uri, 'tiledb_uri') annotation_count = self.db.session.query(Annotation).count() self.assertGreater(annotation_count, 10) def test_get_most_recent_annotation_for_user_dataset(self): dataset_id = str( self.db.query( table_args=[CellxGeneDataset], filter_args=[CellxGeneDataset.name == 'test_dataset'])[0].id) # have to commit separately because created_at time written on the db server self.db.session.add( Annotation(dataset_id=dataset_id, user_id='test_user_id', tiledb_uri='tiledb_uri_0')) self.db.session.commit() self.db.session.add( Annotation(dataset_id=dataset_id, user_id='test_user_id', tiledb_uri='tiledb_uri_1')) self.db.session.commit() self.db.session.add( Annotation(dataset_id=dataset_id, user_id='test_user_id', tiledb_uri='tiledb_uri_2')) self.db.session.commit() self.db.session.add( Annotation(dataset_id=dataset_id, user_id='test_user_id', tiledb_uri='tiledb_uri_3')) self.db.session.commit() self.db.session.add( Annotation(dataset_id=dataset_id, user_id='test_user_id', tiledb_uri='tiledb_uri_4')) self.db.session.commit() most_recent_annotation = self.db.query_for_most_recent( Annotation, [ Annotation.dataset_id == dataset_id, Annotation.user_id == 'test_user_id' ]) self.assertEqual(most_recent_annotation.tiledb_uri, 'tiledb_uri_4')