def simplify_provenance(proxy: EntityProxy) -> EntityProxy: """If there are multiple dates given for some of the provenance fields, we can logically conclude which one is the most meaningful.""" for prop_name in ["modifiedAt", "retrievedAt"]: if proxy.has(prop_name, quiet=True): values = proxy.get(prop_name) proxy.set(prop_name, max(values)) for prop_name in ["createdAt", "authoredAt", "publishedAt"]: if proxy.has(prop_name, quiet=True): values = proxy.get(prop_name) proxy.set(prop_name, min(values)) return proxy
def remove_prefix_dates(entity: EntityProxy) -> EntityProxy: """If an entity has multiple values for a date field, you may want to remove all those that are prefixes of others. For example, if a Person has both a birthDate of 1990 and of 1990-05-01, we'd want to drop the mention of 1990.""" for prop in entity.iterprops(): if prop.type == registry.date: values = remove_prefix_date_values(entity.get(prop)) entity.set(prop, values) return entity
def entity_filename( proxy: EntityProxy, base_name: Optional[str] = None, extension: Optional[str] = None ) -> Optional[str]: """Derive a safe filename for the given entity.""" if proxy.schema.is_a("Document"): for extension_ in proxy.get("extension", quiet=True): if extension is not None: break extension = extension_ for file_name in proxy.get("fileName", quiet=True): base_name_, extension_ = splitext(file_name) if base_name is None and len(base_name_): base_name = base_name_ if extension is None and len(extension_): extension = extension_ for mime_type in proxy.get("mimeType", quiet=True): if extension is not None: break extension = guess_extension(mime_type) base_name = base_name or proxy.id return safe_filename(base_name, extension=extension)
def name_entity(entity: EntityProxy) -> EntityProxy: """If an entity has multiple names, pick the most central one and set all the others as aliases. This is awkward given that names are not special and may not always be the caption.""" if entity.schema.is_a("Thing"): names = entity.get("name") if len(names) > 1: name = registry.name.pick(names) if name in names: names.remove(name) entity.set("name", name) entity.add("alias", names) return entity