Esempio n. 1
0
    def can_update(self, dataset, updates_allowed=None):
        """
        Check if dataset can be updated. Return bool,safe_changes,unsafe_changes

        :param datacube.model.Dataset dataset: Dataset to update
        :param dict updates_allowed: Allowed updates
            :rtype: bool,list[change],list[change]
        """
        existing = self.get(dataset.id, include_sources=True)
        if not existing:
            raise ValueError(
                'Unknown dataset %s, cannot update – did you intend to add it?'
                % dataset.id)

        if dataset.type.name != existing.type.name:
            raise ValueError(
                'Changing product is not supported. From %s to %s in %s' %
                (existing.type.name, dataset.type.name, dataset.id))

        # TODO: figure out (un)safe changes from metadata type?
        allowed = {
            # can always add more metadata
            tuple(): changes.allow_extension,
        }
        allowed.update(updates_allowed or {})

        doc_changes = get_doc_changes(existing.metadata_doc,
                                      jsonify_document(dataset.metadata_doc))
        good_changes, bad_changes = changes.classify_changes(
            doc_changes, allowed)

        return not bad_changes, good_changes, bad_changes
Esempio n. 2
0
def run(
    verbose: bool,
    odc_metadata_files: Iterable[Path],
    stac_base_url: str,
    explorer_base_url: str,
    validate: bool,
):
    for input_metadata in odc_metadata_files:
        dataset = serialise.from_path(input_metadata)

        name = input_metadata.stem.replace(".odc-metadata", "")
        output_path = input_metadata.with_name(f"{name}.stac-item.json")

        # Create STAC dict
        item_doc = dc_to_stac(
            dataset,
            input_metadata,
            output_path,
            stac_base_url,
            explorer_base_url,
            do_validate=False,
        )

        if validate:
            eo3stac.validate_item(item_doc)

        with output_path.open("w") as f:
            json.dump(jsonify_document(item_doc),
                      f,
                      indent=4,
                      default=json_fallback)

        if verbose:
            echo(f'Wrote {style(output_path.as_posix(), "green")}')
Esempio n. 3
0
    def add(self, type_):
        """
        Add a Product.

        :param datacube.model.DatasetType type_: Product to add
        :rtype: datacube.model.DatasetType
        """
        DatasetType.validate(type_.definition)

        existing = self.get_by_name(type_.name)
        if existing:
            check_doc_unchanged(existing.definition,
                                jsonify_document(type_.definition),
                                'Metadata Type {}'.format(type_.name))
        else:
            metadata_type = self.metadata_type_resource.get_by_name(
                type_.metadata_type.name)
            if metadata_type is None:
                _LOG.warning('Adding metadata_type "%s" as it doesn\'t exist.',
                             type_.metadata_type.name)
                metadata_type = self.metadata_type_resource.add(
                    type_.metadata_type)
            with self._db.connect() as connection:
                connection.add_dataset_type(
                    name=type_.name,
                    metadata=type_.metadata_doc,
                    metadata_type_id=metadata_type.id,
                    search_fields=metadata_type.dataset_fields,
                    definition=type_.definition)
        return self.get_by_name(type_.name)
Esempio n. 4
0
    def add(self, metadata_type, allow_table_lock=False):
        """
        :param datacube.model.MetadataType metadata_type:
        :param allow_table_lock:
            Allow an exclusive lock to be taken on the table while creating the indexes.
            This will halt other user's requests until completed.

            If false, creation will be slightly slower and cannot be done in a transaction.
        :rtype: datacube.model.MetadataType
        """
        # This column duplication is getting out of hand:
        MetadataType.validate(metadata_type.definition)

        existing = self.get_by_name(metadata_type.name)
        if existing:
            # They've passed us the same one again. Make sure it matches what is stored.
            check_doc_unchanged(existing.definition,
                                jsonify_document(metadata_type.definition),
                                'Metadata Type {}'.format(metadata_type.name))
        else:
            with self._db.connect() as connection:
                connection.add_metadata_type(
                    name=metadata_type.name,
                    definition=metadata_type.definition,
                    concurrently=not allow_table_lock)
        return self.get_by_name(metadata_type.name)
Esempio n. 5
0
    def add(self, type_):
        """
        Add a Product

        :param datacube.model.DatasetType type_: Product to add
        :rtype: datacube.model.DatasetType
        """
        DatasetType.validate(type_.definition)

        existing = self._db.get_dataset_type_by_name(type_.name)
        if existing:
            # TODO: Support for adding/updating match rules?
            # They've passed us the same collection again. Make sure it matches what is stored.
            check_doc_unchanged(
                existing.definition,
                jsonify_document(type_.definition),
                'Dataset type {}'.format(type_.name)
            )
        else:
            self._db.add_dataset_type(
                name=type_.name,
                metadata=type_.metadata_doc,
                metadata_type_id=type_.metadata_type.id,
                definition=type_.definition
            )
        return self.get_by_name(type_.name)
Esempio n. 6
0
    def can_update(self, product, allow_unsafe_updates=False):
        """
        Check if product can be updated. Return bool,safe_changes,unsafe_changes

        (An unsafe change is anything that may potentially make the product
        incompatible with existing datasets of that type)

        :param datacube.model.DatasetType product: Product to update
        :param bool allow_unsafe_updates: Allow unsafe changes. Use with caution.
        :rtype: bool,list[change],list[change]
        """
        DatasetType.validate(product.definition)

        existing = self.get_by_name(product.name)
        if not existing:
            raise ValueError('Unknown product %s, cannot update – did you intend to add it?' % product.name)

        updates_allowed = {
            ('description',): changes.allow_any,
            ('metadata_type',): changes.allow_any,

            # You can safely make the match rules looser but not tighter.
            # Tightening them could exclude datasets already matched to the product.
            # (which would make search results wrong)
            ('metadata',): changes.allow_truncation
        }

        doc_changes = get_doc_changes(existing.definition, jsonify_document(product.definition))
        good_changes, bad_changes = changes.classify_changes(doc_changes, updates_allowed)

        return allow_unsafe_updates or not bad_changes, good_changes, bad_changes
Esempio n. 7
0
    def can_update(self, metadata_type, allow_unsafe_updates=False):
        """
        Check if metadata type can be updated. Return bool,safe_changes,unsafe_changes

        Safe updates currently allow new search fields to be added, description to be changed.

        :param datacube.model.MetadataType metadata_type: updated MetadataType
        :param bool allow_unsafe_updates: Allow unsafe changes. Use with caution.
        :rtype: bool,list[change],list[change]
        """
        MetadataType.validate(metadata_type.definition)

        existing = self.get_by_name(metadata_type.name)
        if not existing:
            raise ValueError(
                'Unknown metadata type %s, cannot update – did you intend to add it?'
                % metadata_type.name)

        updates_allowed = {
            ('description', ):
            changes.allow_any,
            # You can add new fields safely but not modify existing ones.
            (
                'dataset', ):
            changes.allow_extension,
            ('dataset', 'search_fields'):
            changes.allow_extension
        }

        doc_changes = get_doc_changes(
            existing.definition, jsonify_document(metadata_type.definition))
        good_changes, bad_changes = changes.classify_changes(
            doc_changes, updates_allowed)

        return allow_unsafe_updates or not bad_changes, good_changes, bad_changes
Esempio n. 8
0
    def resolve(main_ds, uri):
        try:
            main_ds = SimpleDocNav(dedup_lineage(main_ds))
        except InvalidDocException as e:
            return None, e

        main_uuid = main_ds.id

        ds_by_uuid = toolz.valmap(toolz.first, flatten_datasets(main_ds))
        all_uuid = list(ds_by_uuid)
        db_dss = {str(ds.id): ds for ds in index.datasets.bulk_get(all_uuid)}

        lineage_uuids = set(filter(lambda x: x != main_uuid, all_uuid))
        missing_lineage = lineage_uuids - set(db_dss)

        if missing_lineage and fail_on_missing_lineage:
            return None, "Following lineage datasets are missing from DB: %s" % (','.join(missing_lineage))

        if verify_lineage and not is_doc_eo3(main_ds.doc):
            bad_lineage = []

            for uuid in lineage_uuids:
                if uuid in db_dss:
                    ok, err = check_consistent(jsonify_document(ds_by_uuid[uuid].doc_without_lineage_sources),
                                               db_dss[uuid].metadata_doc)
                    if not ok:
                        bad_lineage.append((uuid, err))

            if len(bad_lineage) > 0:
                error_report = '\n'.join('Inconsistent lineage dataset {}:\n> {}'.format(uuid, err)
                                         for uuid, err in bad_lineage)
                return None, error_report

        def with_cache(v, k, cache):
            cache[k] = v
            return v

        def resolve_ds(ds, sources, cache=None):
            cached = cache.get(ds.id)
            if cached is not None:
                return cached

            uris = [uri] if ds.id == main_uuid else []

            doc = ds.doc

            db_ds = db_dss.get(ds.id)
            if db_ds:
                product = db_ds.type
            else:
                product = match_product(doc)

            return with_cache(Dataset(product, doc, uris=uris, sources=sources), ds.id, cache)

        try:
            return remap_lineage_doc(main_ds, resolve_ds, cache={}), None
        except BadMatch as e:
            return None, e
Esempio n. 9
0
def as_rich_json(o):
    """
    Use datacube's method of simplifying objects before serialising to json

    (Primarily useful for serialising datacube models reliably)

    Much slower than as_json()
    """
    return as_json(jsonify_document(o))
Esempio n. 10
0
    def can_update(self, product, allow_unsafe_updates=False):
        """
        Check if product can be updated. Return bool,safe_changes,unsafe_changes

        (An unsafe change is anything that may potentially make the product
        incompatible with existing datasets of that type)

        :param DatasetType product: Product to update
        :param bool allow_unsafe_updates: Allow unsafe changes. Use with caution.
        :rtype: bool,list[change],list[change]
        """
        DatasetType.validate(product.definition)

        existing = self.get_by_name(product.name)
        if not existing:
            raise ValueError(
                'Unknown product %s, cannot update – did you intend to add it?'
                % product.name)

        updates_allowed = {
            ('description', ):
            changes.allow_any,
            ('license', ):
            changes.allow_any,
            ('metadata_type', ):
            changes.allow_any,

            # You can safely make the match rules looser but not tighter.
            # Tightening them could exclude datasets already matched to the product.
            # (which would make search results wrong)
            (
                'metadata', ):
            changes.allow_truncation,

            # Some old storage fields should not be in the product definition any more: allow removal.
            ('storage', 'chunking'):
            changes.allow_removal,
            ('storage', 'driver'):
            changes.allow_removal,
            ('storage', 'dimension_order'):
            changes.allow_removal,
        }

        doc_changes = get_doc_changes(existing.definition,
                                      jsonify_document(product.definition))
        good_changes, bad_changes = changes.classify_changes(
            doc_changes, updates_allowed)

        for offset, old_val, new_val in good_changes:
            _LOG.info("Safe change in %s from %r to %r",
                      _readable_offset(offset), old_val, new_val)

        for offset, old_val, new_val in bad_changes:
            _LOG.warning("Unsafe change in %s from %r to %r",
                         _readable_offset(offset), old_val, new_val)

        return allow_unsafe_updates or not bad_changes, good_changes, bad_changes
Esempio n. 11
0
    def add(self, dataset, skip_sources=False):
        """
        Ensure a dataset is in the index. Add it if not present.

        :param datacube.model.Dataset dataset: dataset to add
        :param bool skip_sources: don't attempt to index source (use when sources are already indexed)
        :rtype: datacube.model.Dataset
        """
        if not skip_sources:
            for source in dataset.sources.values():
                self.add(source)

        was_inserted = False
        sources_tmp = dataset.type.dataset_reader(dataset.metadata_doc).sources
        dataset.type.dataset_reader(dataset.metadata_doc).sources = {}
        try:
            _LOG.info('Indexing %s', dataset.id)
            with self._db.begin() as transaction:
                try:
                    was_inserted = transaction.insert_dataset(
                        dataset.metadata_doc, dataset.id, dataset.type.id)
                    for classifier, source_dataset in dataset.sources.items():
                        transaction.insert_dataset_source(
                            classifier, dataset.id, source_dataset.id)

                    # try to update location in the same transaction as insertion.
                    # if insertion fails we'll try updating location later
                    # if insertion succeeds the location bit can't possibly fail
                    if dataset.local_uri:
                        transaction.ensure_dataset_location(
                            dataset.id, dataset.local_uri)
                except DuplicateRecordError as e:
                    _LOG.warning(str(e))

            if not was_inserted:
                existing = self.get(dataset.id)
                if existing:
                    check_doc_unchanged(existing.metadata_doc,
                                        jsonify_document(dataset.metadata_doc),
                                        'Dataset {}'.format(dataset.id))

                # reinsert attempt? try updating the location
                if dataset.local_uri:
                    try:
                        self._db.ensure_dataset_location(
                            dataset.id, dataset.local_uri)
                    except DuplicateRecordError as e:
                        _LOG.warning(str(e))
        finally:
            dataset.type.dataset_reader(
                dataset.metadata_doc).sources = sources_tmp

        return dataset
Esempio n. 12
0
    def add(self, dataset, sources_policy='verify', **kwargs):
        """
        Add ``dataset`` to the index. No-op if it is already present.

        :param Dataset dataset: dataset to add
        :param str sources_policy: how should source datasets included in this dataset be handled:

                ``verify``
                    Verify that each source exists in the index, and that they are identical.

                ``ensure``
                    Add source datasets to the index if they doesn't exist.

                ``skip``
                    don't attempt to index source datasets (use when sources are already indexed)

        :rtype: Dataset
        """
        if 'skip_sources' in kwargs and kwargs['skip_sources']:
            warnings.warn(
                '"skip_sources" is deprecated, use "sources_policy=\'skip\'"',
                DeprecationWarning)
            sources_policy = 'skip'
        self._add_sources(dataset, sources_policy)

        sources_tmp = dataset.type.dataset_reader(dataset.metadata_doc).sources
        dataset.type.dataset_reader(dataset.metadata_doc).sources = {}
        try:
            _LOG.info('Indexing %s', dataset.id)

            if not self._try_add(dataset):
                existing = self.get(dataset.id)
                if existing:
                    check_doc_unchanged(existing.metadata_doc,
                                        jsonify_document(dataset.metadata_doc),
                                        'Dataset {}'.format(dataset.id))

                # reinsert attempt? try updating the location
                if dataset.uris:
                    try:
                        with self._db.begin() as transaction:
                            transaction.ensure_dataset_locations(
                                dataset.id, dataset.uris)
                    except DuplicateRecordError as e:
                        _LOG.warning(str(e))
        finally:
            dataset.type.dataset_reader(
                dataset.metadata_doc).sources = sources_tmp

        return dataset
Esempio n. 13
0
    def add(self, dataset, skip_sources=False):
        """
        Ensure a dataset is in the index. Add it if not present.

        :param datacube.model.Dataset dataset: dataset to add
        :param bool skip_sources: don't attempt to index source (use when sources are already indexed)
        :rtype: datacube.model.Dataset
        """
        if not skip_sources:
            for source in dataset.sources.values():
                self.add(source)

        was_inserted = False
        sources_tmp = dataset.type.dataset_reader(dataset.metadata_doc).sources
        dataset.type.dataset_reader(dataset.metadata_doc).sources = {}
        try:
            _LOG.info('Indexing %s', dataset.id)
            with self._db.begin() as transaction:
                try:
                    was_inserted = transaction.insert_dataset(dataset.metadata_doc, dataset.id, dataset.type.id)
                    for classifier, source_dataset in dataset.sources.items():
                        transaction.insert_dataset_source(classifier, dataset.id, source_dataset.id)

                    # try to update location in the same transaction as insertion.
                    # if insertion fails we'll try updating location later
                    # if insertion succeeds the location bit can't possibly fail
                    if dataset.local_uri:
                        transaction.ensure_dataset_location(dataset.id, dataset.local_uri)
                except DuplicateRecordError as e:
                    _LOG.warning(str(e))

            if not was_inserted:
                existing = self.get(dataset.id)
                if existing:
                    check_doc_unchanged(
                        existing.metadata_doc,
                        jsonify_document(dataset.metadata_doc),
                        'Dataset {}'.format(dataset.id)
                    )

                # reinsert attempt? try updating the location
                if dataset.local_uri:
                    try:
                        self._db.ensure_dataset_location(dataset.id, dataset.local_uri)
                    except DuplicateRecordError as e:
                        _LOG.warning(str(e))
        finally:
            dataset.type.dataset_reader(dataset.metadata_doc).sources = sources_tmp

        return dataset
Esempio n. 14
0
    def add(self, dataset, skip_sources=False):
        """
        Ensure a dataset is in the index. Add it if not present.

        :param datacube.model.Dataset dataset: dataset to add
        :param bool skip_sources: don't attempt to index source datasets (use when sources are already indexed)
        :rtype: datacube.model.Dataset
        """
        if skip_sources:
            for source in dataset.sources.values():
                if not self.has(source.id):
                    self.add(source, skip_sources=skip_sources)
        else:
            for source in dataset.sources.values():
                self.add(source, skip_sources=skip_sources)

        sources_tmp = dataset.type.dataset_reader(dataset.metadata_doc).sources
        dataset.type.dataset_reader(dataset.metadata_doc).sources = {}
        try:
            _LOG.info('Indexing %s', dataset.id)

            if not self._try_add(dataset):
                existing = self.get(dataset.id)
                if existing:
                    check_doc_unchanged(existing.metadata_doc,
                                        jsonify_document(dataset.metadata_doc),
                                        'Dataset {}'.format(dataset.id))

                # reinsert attempt? try updating the location
                if dataset.local_uri:
                    try:
                        with self._db.connect() as connection:
                            connection.ensure_dataset_location(
                                dataset.id, dataset.local_uri)
                    except DuplicateRecordError as e:
                        _LOG.warning(str(e))
        finally:
            dataset.type.dataset_reader(
                dataset.metadata_doc).sources = sources_tmp

        return dataset
Esempio n. 15
0
    def add(self, dataset, skip_sources=False, sources_policy='verify'):
        """
        Ensure a dataset is in the index. Add it if not present.

        :param datacube.model.Dataset dataset: dataset to add
        :param str sources_policy: one of 'verify' - verify the metadata, 'ensure' - add if doesn't exist, 'skip' - skip
        :param bool skip_sources: don't attempt to index source datasets (use when sources are already indexed)
        :rtype: datacube.model.Dataset
        """
        if skip_sources:
            warnings.warn('"skip_sources" is deprecated, use "sources_policy"',
                          DeprecationWarning)
            sources_policy = 'skip'
        self._add_sources(dataset, sources_policy)

        sources_tmp = dataset.type.dataset_reader(dataset.metadata_doc).sources
        dataset.type.dataset_reader(dataset.metadata_doc).sources = {}
        try:
            _LOG.info('Indexing %s', dataset.id)

            if not self._try_add(dataset):
                existing = self.get(dataset.id)
                if existing:
                    check_doc_unchanged(existing.metadata_doc,
                                        jsonify_document(dataset.metadata_doc),
                                        'Dataset {}'.format(dataset.id))

                # reinsert attempt? try updating the location
                if dataset.local_uri:
                    try:
                        with self._db.connect() as connection:
                            connection.ensure_dataset_location(
                                dataset.id, dataset.local_uri)
                    except DuplicateRecordError as e:
                        _LOG.warning(str(e))
        finally:
            dataset.type.dataset_reader(
                dataset.metadata_doc).sources = sources_tmp

        return dataset
Esempio n. 16
0
    def add(self, type_):
        """
        Add a Product

        :param datacube.model.DatasetType type_: Product to add
        :rtype: datacube.model.DatasetType
        """
        DatasetType.validate(type_.definition)

        existing = self._db.get_dataset_type_by_name(type_.name)
        if existing:
            # TODO: Support for adding/updating match rules?
            # They've passed us the same collection again. Make sure it matches what is stored.
            check_doc_unchanged(existing.definition,
                                jsonify_document(type_.definition),
                                'Dataset type {}'.format(type_.name))
        else:
            self._db.add_dataset_type(name=type_.name,
                                      metadata=type_.metadata_doc,
                                      metadata_type_id=type_.metadata_type.id,
                                      definition=type_.definition)
        return self.get_by_name(type_.name)
Esempio n. 17
0
    def add(self, type_, allow_table_lock=False):
        """
        Add a Product.

        :param allow_table_lock:
            Allow an exclusive lock to be taken on the table while creating the indexes.
            This will halt other user's requests until completed.

            If false, creation will be slightly slower and cannot be done in a transaction.
        :param datacube.model.DatasetType type_: Product to add
        :rtype: datacube.model.DatasetType
        """
        DatasetType.validate(type_.definition)

        existing = self.get_by_name(type_.name)
        if existing:
            check_doc_unchanged(existing.definition,
                                jsonify_document(type_.definition),
                                'Metadata Type {}'.format(type_.name))
        else:
            metadata_type = self.metadata_type_resource.get_by_name(
                type_.metadata_type.name)
            if metadata_type is None:
                _LOG.warning('Adding metadata_type "%s" as it doesn\'t exist.',
                             type_.metadata_type.name)
                metadata_type = self.metadata_type_resource.add(
                    type_.metadata_type, allow_table_lock=allow_table_lock)
            with self._db.connect() as connection:
                connection.add_dataset_type(
                    name=type_.name,
                    metadata=type_.metadata_doc,
                    metadata_type_id=metadata_type.id,
                    search_fields=metadata_type.dataset_fields,
                    definition=type_.definition,
                    concurrently=not allow_table_lock,
                )
        return self.get_by_name(type_.name)
Esempio n. 18
0
def as_json(o):
    return jsonify(jsonify_document(o), indent=4)
Esempio n. 19
0
def _to_json(o):
    # Postgres <=9.5 doesn't support NaN and Infinity
    fixedup = jsonify_document(o)
    return json.dumps(fixedup, default=_json_fallback)
Esempio n. 20
0
def _to_json(o):
    # Postgres <=9.5 doesn't support NaN and Infinity
    fixedup = jsonify_document(o)
    return json.dumps(fixedup, default=_json_fallback)