Esempio n. 1
0
def _find_uri_mismatches(index_url: str, uri: str, validate_data=True) -> Iterable[Mismatch]:
    """
    Compare the index and filesystem contents for the given uris,
    yielding Mismatches of any differences.
    """

    # pylint: disable=protected-access
    index = Index(PostgresDb(PostgresDb._create_engine(index_url)))

    def ids(datasets):
        return [d.id for d in datasets]

    path = uri_to_local_path(uri)
    log = _LOG.bind(path=path)
    log.debug("index.get_dataset_ids_for_uri")
    indexed_datasets = set(get_datasets_for_uri(index, uri))

    datasets_in_file = set()  # type: Set[DatasetLite]
    if path.exists():
        try:
            datasets_in_file = set(map(DatasetLite, paths.get_path_dataset_ids(path)))
        except InvalidDocException as e:
            # Should we do something with indexed_datasets here? If there's none, we're more willing to trash.
            log.info("invalid_path", error_args=e.args)
            yield UnreadableDataset(None, uri)
            return

        log.info("dataset_ids",
                 indexed_dataset_ids=ids(indexed_datasets),
                 file_ids=ids(datasets_in_file))

        if validate_data:
            validation_success = validate.validate_dataset(path, log=log)
            if not validation_success:
                yield InvalidDataset(None, uri)
                return

    for indexed_dataset in indexed_datasets:
        # Does the dataset exist in the file?
        if indexed_dataset in datasets_in_file:
            if indexed_dataset.is_archived:
                yield ArchivedDatasetOnDisk(indexed_dataset, uri)
        else:
            yield LocationMissingOnDisk(indexed_dataset, uri)

    # For all file ids not in the index.
    file_ds_not_in_index = datasets_in_file.difference(indexed_datasets)

    if not file_ds_not_in_index:
        log.info("no mismatch found (dataset already indexed)")

    for dataset in file_ds_not_in_index:
        # If it's already indexed, we just need to add the location.
        indexed_dataset = index.datasets.get(dataset.id)
        if indexed_dataset:
            log.info("location_not_indexed", indexed_dataset=indexed_dataset)
            yield LocationNotIndexed(DatasetLite.from_agdc(indexed_dataset), uri)
        else:
            log.info("dataset_not_index", dataset=dataset, uri=uri)
            yield DatasetNotIndexed(dataset, uri)
Esempio n. 2
0
def test_index_duplicate_dataset(index: Index, initialised_postgres_db: PostgresDb,
                                 local_config,
                                 default_metadata_type)->None:
    dataset_type = index.products.add_document(_pseudo_telemetry_dataset_type)
    assert not index.datasets.has(_telemetry_uuid)

    with initialised_postgres_db.begin() as transaction:
        was_inserted = transaction.insert_dataset(
            _telemetry_dataset,
            _telemetry_uuid,
            dataset_type.id
        )

    assert was_inserted
    assert index.datasets.has(_telemetry_uuid)

    # Insert again.
    with initialised_postgres_db.connect() as connection:
        was_inserted = connection.insert_dataset(
            _telemetry_dataset,
            _telemetry_uuid,
            dataset_type.id
        )
        assert was_inserted is False

    assert index.datasets.has(_telemetry_uuid)
Esempio n. 3
0
def pseudo_ls8_dataset4(index: Index, initialised_postgres_db: PostgresDb,
                        pseudo_ls8_type: DatasetType,
                        pseudo_ls8_dataset2: Dataset) -> Dataset:
    # Same as 2, but a different path/row
    id_ = str(uuid.uuid4())
    dataset_doc = copy.deepcopy(pseudo_ls8_dataset2.metadata_doc)
    dataset_doc['id'] = id_
    dataset_doc['image'] = {
        'satellite_ref_point_start': {
            'x': 116,
            'y': 85
        },
        'satellite_ref_point_end': {
            'x': 116,
            'y': 87
        },
    }

    with initialised_postgres_db.connect() as connection:
        was_inserted = connection.insert_dataset(dataset_doc, id_,
                                                 pseudo_ls8_type.id)
        assert was_inserted
        d = index.datasets.get(id_)
        # The dataset should have been matched to the telemetry type.
        assert d.type.id == pseudo_ls8_type.id
        return d
Esempio n. 4
0
 def from_config(cls,
                 config,
                 application_name=None,
                 validate_connection=True):
     db = PostgresDb.from_config(config,
                                 application_name=application_name,
                                 validate_connection=validate_connection)
     return cls(db)
Esempio n. 5
0
def telemetry_dataset(index: Index, initialised_postgres_db: PostgresDb,
                      default_metadata_type) -> Dataset:
    dataset_type = index.products.add_document(_pseudo_telemetry_dataset_type)
    assert not index.datasets.has(_telemetry_uuid)

    with initialised_postgres_db.begin() as transaction:
        was_inserted = transaction.insert_dataset(_telemetry_dataset,
                                                  _telemetry_uuid,
                                                  dataset_type.id)
    assert was_inserted

    return index.datasets.get(_telemetry_uuid)
Esempio n. 6
0
 def db_fixture_instance(request):
     local_config: LocalConfig = request.getfixturevalue(
         config_fixture_name)
     db = PostgresDb.from_config(local_config,
                                 application_name='dea-test-run',
                                 validate_connection=False)
     # Drop and recreate tables so our tests have a clean db.
     with db.connect() as connection:
         _core.drop_db(connection._connection)
     remove_dynamic_indexes()
     # Disable informational messages since we're doing this on every test run.
     with _increase_logging(_core._LOG) as _:
         _core.ensure_db(db._engine)
     # We don't need informational create/drop messages for every config change.
     _dynamic._LOG.setLevel(logging.WARN)
     yield db
     db.close()
Esempio n. 7
0
def test_transactions(index: Index, initialised_postgres_db: PostgresDb,
                      local_config, default_metadata_type) -> None:
    assert not index.datasets.has(_telemetry_uuid)

    dataset_type = index.products.add_document(_pseudo_telemetry_dataset_type)
    with initialised_postgres_db.begin() as transaction:
        was_inserted = transaction.insert_dataset(_telemetry_dataset,
                                                  _telemetry_uuid,
                                                  dataset_type.id)
        assert was_inserted
        assert transaction.contains_dataset(_telemetry_uuid)
        # Normal DB uses a separate connection: No dataset visible yet.
        assert not index.datasets.has(_telemetry_uuid)

        transaction.rollback()

    # Should have been rolled back.
    assert not index.datasets.has(_telemetry_uuid)
Esempio n. 8
0
def uninitialised_postgres_db(local_config, request):
    """
    Return a connection to an empty PostgreSQL database
    """
    timezone = request.param

    db = PostgresDb.from_config(local_config,
                                application_name='test-run',
                                validate_connection=False)

    # Drop tables so our tests have a clean db.
    # with db.begin() as c:  # Creates a new PostgresDbAPI, by passing a new connection to it
    _core.drop_db(db._engine)
    db._engine.execute('alter database %s set timezone = %r' % (local_config['db_database'], timezone))

    # We need to run this as well, I think because SQLAlchemy grabs them into it's MetaData,
    # and attempts to recreate them. WTF TODO FIX
    remove_dynamic_indexes()

    yield db
    # with db.begin() as c:  # Drop SCHEMA
    _core.drop_db(db._engine)
    db.close()
Esempio n. 9
0
 def get_dataset_fields(cls, doc):
     return PostgresDb.get_dataset_fields(doc)