コード例 #1
0
ファイル: dataset.py プロジェクト: waynedou/datacube-core
def load_datasets_for_update(doc_stream, index):
    """Consume stream of dataset documents, associate each to a product by looking
    up existing dataset in the index. Datasets not in the database will be
    logged.

    Doesn't load lineage information

    Generates tuples in the form (new_dataset, existing_dataset)
    """

    def mk_dataset(ds, uri):
        uuid = ds.id

        if uuid is None:
            return None, None, "Metadata document it missing id field"

        existing = index.datasets.get(uuid)
        if existing is None:
            return None, None, "No such dataset in the database: {}".format(uuid)

        return Dataset(existing.type,
                       ds.doc_without_lineage_sources,
                       uris=[uri]), existing, None

    for uri, doc in doc_stream:
        dataset, existing, error_msg = mk_dataset(doc, uri)

        if dataset is None:
            _LOG.error("Failure while processing: %s\n > Reason: %s", uri, error_msg)
        else:
            is_consistent, reason = check_dataset_consistent(dataset)
            if is_consistent:
                yield dataset, existing
            else:
                _LOG.error("Dataset %s inconsistency: %s", dataset.id, reason)
コード例 #2
0
    def load_datasets(path, ds_resolve):
        for uri, ds in ui_path_doc_stream(path):

            dataset, err = ds_resolve(ds, uri)

            if dataset is None:
                _LOG.error('dataset is empty', error=str(err))
                continue

            is_consistent, reason = check_dataset_consistent(dataset)
            if not is_consistent:
                _LOG.error("dataset inconsistency", dataset=dataset.id, reason=str(reason))
                continue

            yield dataset