def create_dataset(): """Create a new dataset in the datastore for the project. The dataset schema and rows are given in the request body. Dataset annotations are optional. The expected request body format is: { "columns": [ { "id": 0, "name": "string", "type": "string" } ], "rows": [ { "id": 0, "values": [ "string" ] } ], "annotations": [ { "columnId": 0, "rowId": 0, "key": "string", "value": "string" } ] } """ # Validate the request obj = srv.validate_json_request( request, required=[labels.COLUMNS, labels.ROWS], optional=[labels.ANNOTATIONS] ) columns = deserialize.DATASET_COLUMNS(obj[labels.COLUMNS]) rows = [deserialize.DATASET_ROW(row) for row in obj[labels.ROWS]] annotations = None if labels.ANNOTATIONS in obj: annotations = DatasetMetadata() for anno in obj[labels.ANNOTATIONS]: a = deserialize.ANNOTATION(anno) if a.column_id is None: annotations.rows.append(a) elif a.row_id is None: annotations.columns.append(a) else: annotations.cells.append(a) try: dataset = api.datasets.create_dataset( project_id=config.project_id, columns=columns, rows=rows, annotations=annotations ) return jsonify(dataset) except ValueError as ex: raise srv.InvalidRequest(str(ex))
def __init__(self, identifier, columns=None, row_count=None, annotations=None, name=None): """Initialize the dataset. Raises ValueError if dataset columns or rows do not have unique identifiers. Parameters ---------- identifier: string, optional Unique dataset identifier. columns: list(DatasetColumn), optional List of columns. It is expected that each column has a unique identifier. row_count: int, optional Number of rows in the dataset annotations: vizier.datastore.annotation.dataset.DatasetMetadata, optional Annotations for dataset components """ super(DatasetHandle, self).__init__(identifier=identifier, columns=columns, row_count=row_count, name=name) self.annotations = annotations if not annotations is None else DatasetMetadata( )
def test_deduplicate_annotations(self): """Test removing duplicated annotations.""" store = FileSystemDatastore(STORE_DIR) ds = store.create_dataset( columns=[ DatasetColumn(identifier=0, name='A'), DatasetColumn(identifier=1, name='B') ], rows=[DatasetRow(identifier=0, values=['a', 'b'])], annotations=DatasetMetadata( cells=[ DatasetAnnotation(column_id=0, row_id=0, key='X', value=1), DatasetAnnotation(column_id=0, row_id=0, key='X', value=2), DatasetAnnotation(column_id=1, row_id=0, key='X', value=3), DatasetAnnotation(column_id=1, row_id=1, key='X', value=3), DatasetAnnotation(column_id=0, row_id=0, key='Y', value=1), DatasetAnnotation(column_id=0, row_id=0, key='X', value=1), DatasetAnnotation(column_id=0, row_id=0, key='X', value=2), DatasetAnnotation(column_id=1, row_id=0, key='X', value=3), DatasetAnnotation(column_id=1, row_id=1, key='X', value=3), ], columns=[ DatasetAnnotation(column_id=0, key='A', value='x'), DatasetAnnotation(column_id=1, key='A', value='x'), DatasetAnnotation(column_id=0, key='A', value='x'), DatasetAnnotation(column_id=1, key='A', value='x'), DatasetAnnotation(column_id=0, key='A', value='x'), DatasetAnnotation(column_id=1, key='A', value='x'), DatasetAnnotation(column_id=0, key='A', value='x'), DatasetAnnotation(column_id=1, key='A', value='x') ], rows=[ DatasetAnnotation(row_id=0, key='E', value=100), DatasetAnnotation(row_id=0, key='E', value=100) ] ) ) ds = store.get_dataset(ds.identifier) self.assertEqual(len(ds.annotations.cells), 4) self.assertEqual(len(ds.annotations.columns), 2) self.assertEqual(len(ds.annotations.rows), 1) annos = ds.annotations.for_cell(column_id=0, row_id=0) self.assertEqual(len(annos), 3) self.assertTrue(1 in [a.value for a in annos]) self.assertTrue(2 in [a.value for a in annos]) self.assertFalse(3 in [a.value for a in annos]) self.assertEqual(len(ds.annotations.find_all(values=annos, key='X')), 2) with self.assertRaises(ValueError): ds.annotations.find_one(values=annos, key='X') self.assertEqual(len(ds.annotations.for_column(column_id=0)), 1) self.assertEqual(len(ds.annotations.for_row(row_id=0)), 1) annotations = ds.annotations.filter(columns=[1]) self.assertEqual(len(annotations.cells), 1) self.assertEqual(len(annotations.columns), 1) self.assertEqual(len(annotations.rows), 1)
def get_annotations(self, identifier, column_id=None, row_id=None): """Get list of annotations for a resources of a given dataset. If only the column id is provided annotations for the identifier column will be returned. If only the row identifier is given all annotations for the specified row are returned. Otherwise, all annotations for the specified cell are returned. If both identifier are None all annotations for the dataset are returned. Parameters ---------- column_id: int, optional Unique column identifier row_id: int, optiona Unique row identifier Returns ------- vizier.datastore.annotation.dataset.DatasetMetadata """ # Test if a subfolder for the given dataset identifier exists. If not # return None. dataset_dir = self.get_dataset_dir(identifier) if not os.path.isdir(dataset_dir): return None annotations = DatasetMetadata.from_file( self.get_metadata_filename(identifier)) if column_id is None and row_id is None: return annotations elif column_id is None: return DatasetMetadata(rows=annotations.rows).filter(rows=[row_id]) elif row_id is None: return DatasetMetadata(columns=annotations.columns).filter( columns=[column_id]) else: return DatasetMetadata(cells=annotations.cells).filter( columns=[column_id], rows=[row_id])
def DATASET_ANNOTATIONS(obj): """Convert dictionary serialization into a dataset metadata object. Parameters ---------- obj: dict Default serialization for dataset metadata Returns ------- vizier.datastore.annotation.dataset.DatasetMetadata """ return DatasetMetadata( columns=[ANNOTATION(a) for a in obj['columns']], rows=[ANNOTATION(a) for a in obj['rows']], cells=[ANNOTATION(a) for a in obj['cells']], )
def test_query_annotations(self): """Test retrieving annotations via the datastore.""" store = FileSystemDatastore(STORE_DIR) ds = store.create_dataset( columns=[ DatasetColumn(identifier=0, name='A'), DatasetColumn(identifier=1, name='B') ], rows=[DatasetRow(identifier=0, values=['a', 'b'])], annotations=DatasetMetadata( cells=[ DatasetAnnotation(column_id=0, row_id=0, key='X', value=1), DatasetAnnotation(column_id=0, row_id=0, key='X', value=2), DatasetAnnotation(column_id=1, row_id=0, key='X', value=3), DatasetAnnotation(column_id=0, row_id=0, key='Y', value=1) ], columns=[ DatasetAnnotation(column_id=0, key='A', value='x'), DatasetAnnotation(column_id=1, key='A', value='x') ], rows=[ DatasetAnnotation(row_id=0, key='E', value=100) ] ) ) annos = store.get_annotations(ds.identifier, column_id=1) self.assertEqual(len(annos.columns), 1) self.assertEqual(len(annos.rows), 0) self.assertEqual(len(annos.cells), 0) annos = store.get_annotations(ds.identifier, column_id=0) self.assertEqual(len(annos.columns), 1) self.assertEqual(len(annos.rows), 0) self.assertEqual(len(annos.cells), 0) annos = store.get_annotations(ds.identifier, row_id=0) self.assertEqual(len(annos.columns), 0) self.assertEqual(len(annos.rows), 1) self.assertEqual(len(annos.cells), 0) annos = store.get_annotations(ds.identifier, column_id=1, row_id=0) self.assertEqual(len(annos.columns), 0) self.assertEqual(len(annos.rows), 0) self.assertEqual(len(annos.cells), 1) annos = store.get_annotations(ds.identifier, column_id=0, row_id=0) self.assertEqual(len(annos.columns), 0) self.assertEqual(len(annos.rows), 0) self.assertEqual(len(annos.cells), 3)
def test_add_and_delete_metadata(self): """Test functionality to add and delete annotations.""" annotations = DatasetMetadata() annotations.add(column_id=0, key='A', value=0) annotations.add(column_id=0, key='A', value=1) annotations.add(column_id=0, key='A', value=0) annotations.add(column_id=1, key='A', value=0) annotations.add(column_id=1, key='A', value=1) self.assertEqual(len(annotations.columns), 5) annotations.remove(column_id=0, value=1) self.assertEqual(len(annotations.columns), 4) annos = annotations.for_column(column_id=0) self.assertEqual(len(annos), 2) for a in annos: self.assertEqual(a.key, 'A') self.assertEqual(a.value, 0) annotations.add(row_id=0, key='A', value=0) annotations.add(row_id=0, key='B', value=1) annotations.add(row_id=0, key='A', value=0) annotations.add(row_id=1, key='A', value=0) annotations.add(row_id=1, key='A', value=1) self.assertEqual(len(annotations.rows), 5) annotations.remove(row_id=0, key='A') self.assertEqual(len(annotations.rows), 3) self.assertEqual(len(annotations.columns), 4) annos = annotations.for_row(row_id=0) self.assertEqual(len(annos), 1) self.assertEqual(annos[0].key, 'B') self.assertEqual(annos[0].value, 1) annotations.add(column_id=0, row_id=0, key='A', value=0) annotations.add(column_id=1, row_id=0, key='B', value=1) annotations.add(column_id=1, row_id=0, key='A', value=0) annotations.add(column_id=1, row_id=1, key='A', value=0) annotations.add(column_id=1, row_id=0, key='A', value=1) self.assertEqual(len(annotations.cells), 5) annotations.remove(row_id=0, column_id=1) self.assertEqual(len(annotations.cells), 2)
def __init__(self, dataset=None): """Initialize the client for a given dataset. Raises ValueError if dataset columns or rows do not have unique identifiers. Parameters ---------- dataset: vizier.datastore.base.DatasetHandle, optional Handle to the dataset for which this is a client. If None this is a new dataset. """ self.dataset = dataset if not dataset is None: self.identifier = dataset.identifier self.columns = dataset.columns # Delay fetching rows and dataset annotations for now self._annotations = None self._rows = None else: self.identifier = None self.columns = list() self._annotations = DatasetMetadata() self._rows = list()
print(line) store = DatastoreClient( urls=DatastoreClientUrlFactory( urls=UrlFactory( base_url='http://localhost:5000/vizier-db/api/v1' ), project_id=PROJECT_ID ) ) ds = store.create_dataset( columns=[DatasetColumn(identifier=0, name='Name'), DatasetColumn(identifier=1, name='Age')], rows=[DatasetRow(identifier=0, values=['Alice', 32]), DatasetRow(identifier=1, values=['Bob', 23])], annotations=DatasetMetadata(rows=[DatasetAnnotation(row_id=1, key='user:comment', value='Needs cleaning')]) ) print(ds) print([col.identifier for col in ds.columns]) print([col.name for col in ds.columns]) dh = store.get_dataset(ds.identifier) for row in dh.fetch_rows(): print([row.identifier] + row.values) annotations = dh.get_annotations() print_annotations(annotations) store.update_annotation( identifier=dh.identifier,
def test_update_annotations(self): """Test updating annotations via the datastore.""" store = FileSystemDatastore(STORE_DIR) ds = store.create_dataset( columns=[ DatasetColumn(identifier=0, name='A'), DatasetColumn(identifier=1, name='B') ], rows=[DatasetRow(identifier=0, values=['a', 'b'])], annotations=DatasetMetadata( cells=[ DatasetAnnotation(column_id=0, row_id=0, key='X', value=1), DatasetAnnotation(column_id=0, row_id=0, key='X', value=2), DatasetAnnotation(column_id=1, row_id=0, key='X', value=3), DatasetAnnotation(column_id=0, row_id=0, key='Y', value=1) ], columns=[ DatasetAnnotation(column_id=0, key='A', value='x'), DatasetAnnotation(column_id=1, key='A', value='x') ], rows=[ DatasetAnnotation(row_id=0, key='E', value=100) ] ) ) # INSERT row annotatins store.update_annotation( ds.identifier, key='D', row_id=0, new_value=200 ) annos = store.get_annotations(ds.identifier, row_id=0) self.assertEqual(len(annos.rows), 2) for key in ['D', 'E']: self.assertTrue(key in [a.key for a in annos.rows]) for val in [100, 200]: self.assertTrue(val in [a.value for a in annos.rows]) # UPDATE column annotation store.update_annotation( ds.identifier, key='A', column_id=1, old_value='x', new_value='y' ) annos = store.get_annotations(ds.identifier, column_id=1) self.assertEqual(annos.columns[0].key, 'A') self.assertEqual(annos.columns[0].value, 'y') # DELETE cell annotation store.update_annotation( ds.identifier, key='X', column_id=0, row_id=0, old_value=2, ) annos = store.get_annotations(ds.identifier, column_id=0, row_id=0) self.assertEqual(len(annos.cells), 2) for a in annos.cells: self.assertNotEqual(a.value, 2) result = store.update_annotation( ds.identifier, key='X', column_id=1, row_id=0, old_value=3, ) self.assertTrue(result) annos = store.get_annotations(ds.identifier, column_id=1, row_id=0) self.assertEqual(len(annos.cells), 0)