def _path_dataset_ids(path: Path) -> Iterable[uuid.UUID]: for _, metadata_doc in read_documents(path): if metadata_doc is None: raise InvalidDocException("Empty document from path {}".format(path)) if 'id' not in metadata_doc: raise InvalidDocException("No id in path metadata: {}".format(path)) yield uuid.UUID(metadata_doc['id'])
def from_doc(self, definition): """ Create a Product from its definitions :param dict definition: product definition document :rtype: datacube.model.DatasetType """ # This column duplication is getting out of hand: DatasetType.validate(definition) metadata_type = definition['metadata_type'] # They either specified the name of a metadata type, or specified a metadata type. # Is it a name? if isinstance(metadata_type, compat.string_types): metadata_type = self.metadata_type_resource.get_by_name( metadata_type) else: # Otherwise they embedded a document, add it if needed: metadata_type = self.metadata_type_resource.from_doc(metadata_type) definition = definition.copy() definition['metadata_type'] = metadata_type.name if not metadata_type: raise InvalidDocException('Unknown metadata type: %r' % definition['metadata_type']) return DatasetType(metadata_type, definition)
def from_doc(self, definition): """ :type definition: dict :rtype: datacube.model.DatasetType """ # This column duplication is getting out of hand: DatasetType.validate(definition) metadata_type = definition['metadata_type'] # They either specified the name of a metadata type, or specified a metadata type. # Is it a name? if isinstance(metadata_type, compat.string_types): metadata_type = self.metadata_type_resource.get_by_name( metadata_type) else: # Otherwise they embedded a document, add it if needed: metadata_type = self.metadata_type_resource.add( metadata_type, allow_table_lock=False) if not metadata_type: raise InvalidDocException('Unknown metadata type: %r' % definition['metadata_type']) return DatasetType(metadata_type, definition)
def mk_node(ds, sources, cache, sources_path): existing = cache.get(ds.id, None) doc = ds.doc_without_lineage_sources if existing is not None: _ds, _doc, _sources = existing if not check_sources(sources, _sources): raise InvalidDocException('Inconsistent lineage for repeated dataset with _id: {}'.format(ds.id)) if doc != _doc: raise InvalidDocException('Inconsistent metadata for repeated dataset with _id: {}'.format(ds.id)) return _ds out_ds = toolz.assoc_in(doc, sources_path, sources) cache[ds.id] = (out_ds, doc, sources) return out_ds