def make_id(self, *parts: Any) -> Optional[str]: """Generate a (hopefully unique) ID for the given entity, composed of the given components, and the :attr:`~key_prefix` defined in the proxy. """ self.id = make_entity_id(*parts, key_prefix=self.key_prefix) return self.id
def make_address( context: Context, full=None, remarks=None, summary=None, po_box=None, street=None, street2=None, street3=None, city=None, place=None, postal_code=None, state=None, region=None, country=None, country_code=None, key=None, ): """Generate an address schema object adjacent to the main entity.""" city = jointext(place, city, sep=", ") street = jointext(street, street2, street3, sep=", ") address = context.make("Address") address.add("full", full) address.add("remarks", remarks) address.add("summary", summary) address.add("postOfficeBox", po_box) address.add("street", street) address.add("city", city) address.add("postalCode", postal_code) address.add("region", region) address.add("state", state, quiet=True) address.add("country", country) address.add("country", country_code) country_code = address.first("country") if not address.has("full"): data = { "attention": summary, "house": po_box, "road": street, "postcode": postal_code, "city": city, "state": jointext(region, state, sep=", "), # "country": country, } full = get_formatter().one_line(data, country=country_code) address.add("full", full) if full: # query_full(context, full, address.get("country")) norm_full = slugify(full) hash_id = make_entity_id(country_code, norm_full, key) if hash_id is not None: address.id = f"addr-{hash_id}" return address
def flush(self): writer = self.dataset.bulk() countries = set() results = list( chain(self.aggregator_entities.results(), self.aggregator_patterns.results())) for (key, prop, values) in results: if prop.type == registry.country: countries.add(key) mention_ids = set() for (key, prop, values) in results: label = values[0] if prop.type == registry.name: label = registry.name.pick(values) schema = self.MENTIONS.get(prop) if schema is not None and self.entity.schema.is_a(DOCUMENT): mention = model.make_entity("Mention") mention.make_id("mention", self.entity.id, prop, key) mention_ids.add(mention.id) mention.add("resolved", make_entity_id(key)) mention.add("document", self.entity.id) mention.add("name", values) mention.add("detectedSchema", schema) mention.add("contextCountry", countries) mention = self.ns.apply(mention) writer.put(mention) # pprint(mention.to_dict()) self.entity.add(prop, label, cleaned=True, quiet=True) if len(results): log.debug( "Extracted %d prop values, %d mentions [%s]: %s", len(results), len(mention_ids), self.entity.schema.name, self.entity.id, ) writer.put(self.entity) writer.flush() return mention_ids
def make_id(self, *parts: Optional[str], dataset: Optional[str] = None) -> Optional[str]: hashed = make_entity_id(*parts, key_prefix=self.dataset.name) return self.make_slug(hashed, dataset=dataset)
def make_id(self, *parts): """Generate a (hopefully unique) ID for the given entity, composed of the given components, and the key_prefix defined in the proxy. """ self.id = make_entity_id(*parts, key_prefix=self.key_prefix) return self.id