Ejemplo n.º 1
0
    def test_get_most_recent_annotation_for_user_dataset(self):
        dataset_id = str(
            self.db.query(
                table_args=[CellxGeneDataset],
                filter_args=[CellxGeneDataset.name == 'test_dataset'])[0].id)

        # have to commit separately because created_at time written on the db server
        self.db.session.add(
            Annotation(dataset_id=dataset_id,
                       user_id='test_user_id',
                       tiledb_uri='tiledb_uri_0'))
        self.db.session.commit()

        self.db.session.add(
            Annotation(dataset_id=dataset_id,
                       user_id='test_user_id',
                       tiledb_uri='tiledb_uri_1'))
        self.db.session.commit()

        self.db.session.add(
            Annotation(dataset_id=dataset_id,
                       user_id='test_user_id',
                       tiledb_uri='tiledb_uri_2'))
        self.db.session.commit()

        self.db.session.add(
            Annotation(dataset_id=dataset_id,
                       user_id='test_user_id',
                       tiledb_uri='tiledb_uri_3'))
        self.db.session.commit()

        self.db.session.add(
            Annotation(dataset_id=dataset_id,
                       user_id='test_user_id',
                       tiledb_uri='tiledb_uri_4'))
        self.db.session.commit()

        most_recent_annotation = self.db.query_for_most_recent(
            Annotation, [
                Annotation.dataset_id == dataset_id, Annotation.user_id
                == 'test_user_id'
            ])

        self.assertEqual(most_recent_annotation.tiledb_uri, 'tiledb_uri_4')
Ejemplo n.º 2
0
 def _create_test_annotation(self):
     dataset = self.db.query(
         [CellxGeneDataset],
         [CellxGeneDataset.name == "test_dataset"],
     )[0]
     annotation = Annotation(tiledb_uri="tiledb_uri",
                             user_id="test_user_id",
                             dataset_id=str(dataset.id))
     self.db.session.add(annotation)
     self.db.session.commit()
Ejemplo n.º 3
0
 def _create_test_annotations(self, annotation_count: int = 10):
     annotations = []
     for i in range(annotation_count):
         dataset = self.order_by_random(CellxGeneDataset)
         user = self.order_by_random(CellxGeneUser)
         annotations.append(
             Annotation(tiledb_uri=self.get_random_string(),
                        user_id=user.id,
                        dataset_id=str(dataset.id)))
     self.db.session.add_all(annotations)
     self.db.session.commit()
Ejemplo n.º 4
0
    def write_labels(self, df, data_adaptor):
        auth_user_id = self.get_user_id()
        user_name = self.get_user_name()
        timestamp = time.time()
        dataset_location = data_adaptor.get_location()
        dataset_id = self.db.get_or_create_dataset(dataset_location)
        dataset_name = data_adaptor.get_title()
        user_id = self.db.get_or_create_user(auth_user_id)
        """
        NOTE: The uri contains the dataset name, user name and a timestamp as a convenience for debugging purposes.
        People may have the same name and time.time() can be server dependent.
        See - https://docs.python.org/2/library/time.html#time.time

        The annotations objects in the database should be used as the source of truth about who an annotation belongs
        to (for authorization purposes) and what time it was created (for garbage collection).
        """
        uri = f"{self.directory_path}{dataset_name}/{user_name}/{timestamp}"
        if uri.startswith("s3://"):
            pass
        else:
            os.makedirs(uri, exist_ok=True)
        _, dataframe_schema_type_hints = get_dtypes_and_schemas_of_dataframe(
            df)
        if not df.empty:
            self.check_category_names(df)
            # convert to tiledb datatypes

            for col in df:
                df[col] = df[col].astype(get_dtype_of_array(df[col]))
            tiledb.from_pandas(uri, df, sparse=True)
        else:
            uri = ""

        annotation = Annotation(
            tiledb_uri=uri,
            user_id=user_id,
            dataset_id=str(dataset_id),
            schema_hints=json.dumps(dataframe_schema_type_hints),
        )
        self.db.session.add(annotation)
        self.db.session.commit()