def __init__(self, db: PostgresDb) -> None: self._db = db self.users = UserResource(db) self.metadata_types = MetadataTypeResource(db) self.products = ProductResource(db, self.metadata_types) self.datasets = DatasetResource(db, self.products)
def test_index_dataset(): mock_db = MockDb() mock_types = MockTypesResource(_EXAMPLE_DATASET_TYPE) datasets = DatasetResource(mock_db, mock_types) dataset = datasets.add(_EXAMPLE_NBAR_DATASET) ids = {d.id for d in mock_db.dataset.values()} assert ids == {_nbar_uuid, _ortho_uuid, _telemetry_uuid} # Three datasets (ours and the two embedded source datasets) assert len(mock_db.dataset) == 3 # Our three datasets should be linked together # Nbar -> Ortho -> Telemetry assert len(mock_db.dataset_source) == 2 assert mock_db.dataset_source == {('ortho', _nbar_uuid, _ortho_uuid), ('satellite_telemetry_data', _ortho_uuid, _telemetry_uuid)} # Nothing ingested, because we reported the first as already ingested. dataset = datasets.add(_EXAMPLE_NBAR_DATASET) assert len(mock_db.dataset) == 3 assert len(mock_db.dataset_source) == 2 ds2 = deepcopy(_EXAMPLE_NBAR_DATASET) ds2.metadata_doc['product_type'] = 'zzzz' with pytest.raises(DocumentMismatchError): dataset = datasets.add(ds2)
def test_index_already_ingested_dataset(): mock_db = MockDb() mock_db.already_ingested = {_ortho_uuid, _telemetry_uuid, _nbar_uuid} mock_types = MockTypesResource(_EXAMPLE_DATASET_TYPE) datasets = DatasetResource(mock_db, mock_types) dataset = datasets.add(_EXAMPLE_NBAR_DATASET) # Nothing ingested, because we reported the first as already ingested. assert len(mock_db.dataset) == 0 assert len(mock_db.dataset_source) == 0
def test_index_already_ingested_source_dataset(): mock_db = MockDb() mock_types = MockTypesResource(_EXAMPLE_DATASET_TYPE) datasets = DatasetResource(mock_db, mock_types) dataset = datasets.add(_EXAMPLE_NBAR_DATASET.sources['ortho']) assert len(mock_db.dataset) == 2 assert len(mock_db.dataset_source) == 1 dataset = datasets.add(_EXAMPLE_NBAR_DATASET) assert len(mock_db.dataset) == 3 assert len(mock_db.dataset_source) == 2
def test_index_two_levels_already_ingested(): mock_db = MockDb() mock_types = MockTypesResource(_EXAMPLE_DATASET_TYPE) datasets = DatasetResource(mock_db, mock_types) dataset = datasets.add(_EXAMPLE_NBAR_DATASET.sources['ortho'].sources['satellite_telemetry_data']) assert len(mock_db.dataset) == 1 assert len(mock_db.dataset_source) == 0 dataset = datasets.add(_EXAMPLE_NBAR_DATASET) assert len(mock_db.dataset) == 3 assert len(mock_db.dataset_source) == 2
def test_index_already_ingested_source_dataset(): mock_db = MockDb() mock_db.already_ingested = {_ortho_uuid, _telemetry_uuid} mock_types = MockTypesResource(_EXAMPLE_DATASET_TYPE) datasets = DatasetResource(mock_db, mock_types) dataset = datasets.add(_EXAMPLE_NBAR_DATASET) # Only the first dataset ingested assert len(mock_db.dataset) == 1 assert mock_db.dataset[0][1] == _nbar_uuid # It should have been linked to the ortho. assert len(mock_db.dataset_source) == 1 assert mock_db.dataset_source == { ('ortho', _nbar_uuid, _ortho_uuid), }
def test_index_dataset(): mock_db = MockDb() mock_types = MockTypesResource(_EXAMPLE_DATASET_TYPE) datasets = DatasetResource(mock_db, mock_types) dataset = datasets.add(_EXAMPLE_NBAR_DATASET) ids = {d[0]['id'] for d in mock_db.dataset} assert ids == {_nbar_uuid, _ortho_uuid, _telemetry_uuid} # Three datasets (ours and the two embedded source datasets) assert len(mock_db.dataset) == 3 # Our three datasets should be linked together # Nbar -> Ortho -> Telemetry assert len(mock_db.dataset_source) == 2 assert mock_db.dataset_source == { ('ortho', _nbar_uuid, _ortho_uuid), ('satellite_telemetry_data', _ortho_uuid, _telemetry_uuid) }
def test_index_two_levels_already_ingested(): mock_db = MockDb() # RAW was already ingested. mock_db.already_ingested = {_telemetry_uuid} mock_types = MockTypesResource(_EXAMPLE_DATASET_TYPE) datasets = DatasetResource(mock_db, mock_types) dataset = datasets.add(_EXAMPLE_NBAR_DATASET) ids = {d[0]['id'] for d in mock_db.dataset} assert ids == {_nbar_uuid, _ortho_uuid} # Two datasets (the telemetry data already ingested) assert len(mock_db.dataset) == 2 # Our three datasets should be linked together # Nbar -> Ortho -> Telemetry assert len(mock_db.dataset_source) == 2 assert mock_db.dataset_source == { ('ortho', _nbar_uuid, _ortho_uuid), ('satellite_telemetry_data', _ortho_uuid, _telemetry_uuid) }
def add_dataset(pr, dt, metadict, file): """Add a dataset to the datacube database It's added to 2 tables: - dataset: with all the metadata - dataset_location Args: pr (ProductResource): A ProductResource object, contained in the return of ``add_product`` dt (DatasetType): A DatasetType object, contained in the return of ``add_product`` metadict (dict): Dictionary containing dataset metadata, generally generated by ``metadict_from_netcdf`` file (str): Path of the file to add to the index Return: No return, the function is used for its side effect of adding a dataset to the datacube """ db = PostgresDb.from_config(CONFIG) dataset_resource = DatasetResource(db, pr) dataset = Dataset(dt, metadict, sources={}) dataset_resource.add(dataset) uid = metadict['id'] dataset_resource.add_location(uid, file)
'dtype': 'float64', 'nodata': -9999}, {'name': 'elevation', 'units': 'meter', 'dtype': 'int16', 'nodata': -32768}, {'name': 'slope', 'units': 'degree', 'dtype': 'float32', 'nodata': -9999}, {'name': 'aspect', 'units': 'degree', 'dtype': 'float32', 'nodata': -9999}]} # Product add db = PostgresDb.from_config() meta_resource = MetadataTypeResource(db) product_resource = ProductResource(db, meta_resource) dataset_type = product_resource.add_document(product_def_dict) print(product_resource) print(dataset_type) # Prepare metadata # Dataset add dataset_resource = DatasetResource(db, product_resource) dataset = Dataset() print(dataset_resource)