Example #1
0
 def make_id(self, *parts: Any) -> Optional[str]:
     """Generate a (hopefully unique) ID for the given entity, composed
     of the given components, and the :attr:`~key_prefix` defined in
     the proxy.
     """
     self.id = make_entity_id(*parts, key_prefix=self.key_prefix)
     return self.id
Example #2
0
def make_address(
    context: Context,
    full=None,
    remarks=None,
    summary=None,
    po_box=None,
    street=None,
    street2=None,
    street3=None,
    city=None,
    place=None,
    postal_code=None,
    state=None,
    region=None,
    country=None,
    country_code=None,
    key=None,
):
    """Generate an address schema object adjacent to the main entity."""

    city = jointext(place, city, sep=", ")
    street = jointext(street, street2, street3, sep=", ")

    address = context.make("Address")
    address.add("full", full)
    address.add("remarks", remarks)
    address.add("summary", summary)
    address.add("postOfficeBox", po_box)
    address.add("street", street)
    address.add("city", city)
    address.add("postalCode", postal_code)
    address.add("region", region)
    address.add("state", state, quiet=True)
    address.add("country", country)
    address.add("country", country_code)

    country_code = address.first("country")
    if not address.has("full"):
        data = {
            "attention": summary,
            "house": po_box,
            "road": street,
            "postcode": postal_code,
            "city": city,
            "state": jointext(region, state, sep=", "),
            # "country": country,
        }
        full = get_formatter().one_line(data, country=country_code)
        address.add("full", full)
    if full:
        # query_full(context, full, address.get("country"))
        norm_full = slugify(full)
        hash_id = make_entity_id(country_code, norm_full, key)
        if hash_id is not None:
            address.id = f"addr-{hash_id}"
    return address
Example #3
0
    def flush(self):
        writer = self.dataset.bulk()
        countries = set()
        results = list(
            chain(self.aggregator_entities.results(),
                  self.aggregator_patterns.results()))

        for (key, prop, values) in results:
            if prop.type == registry.country:
                countries.add(key)

        mention_ids = set()
        for (key, prop, values) in results:
            label = values[0]
            if prop.type == registry.name:
                label = registry.name.pick(values)

            schema = self.MENTIONS.get(prop)
            if schema is not None and self.entity.schema.is_a(DOCUMENT):
                mention = model.make_entity("Mention")
                mention.make_id("mention", self.entity.id, prop, key)
                mention_ids.add(mention.id)
                mention.add("resolved", make_entity_id(key))
                mention.add("document", self.entity.id)
                mention.add("name", values)
                mention.add("detectedSchema", schema)
                mention.add("contextCountry", countries)
                mention = self.ns.apply(mention)
                writer.put(mention)
                # pprint(mention.to_dict())

            self.entity.add(prop, label, cleaned=True, quiet=True)

        if len(results):
            log.debug(
                "Extracted %d prop values, %d mentions [%s]: %s",
                len(results),
                len(mention_ids),
                self.entity.schema.name,
                self.entity.id,
            )
            writer.put(self.entity)
            writer.flush()

        return mention_ids
Example #4
0
 def make_id(self,
             *parts: Optional[str],
             dataset: Optional[str] = None) -> Optional[str]:
     hashed = make_entity_id(*parts, key_prefix=self.dataset.name)
     return self.make_slug(hashed, dataset=dataset)
Example #5
0
 def make_id(self, *parts):
     """Generate a (hopefully unique) ID for the given entity, composed
     of the given components, and the key_prefix defined in the proxy.
     """
     self.id = make_entity_id(*parts, key_prefix=self.key_prefix)
     return self.id