def clean_date(date): splits = [ "a)", "b)", "c)", "d)", "e)", "f)", "g)", "h)", "i)", " or ", " to ", " and ", "alt DOB:", "alt DOB", ";", ">>", ] dates = set() if isinstance(date, float): date = str(int(date)) if isinstance(date, datetime): date = date.date().isoformat() date = remove_bracketed(date) if date is None: return dates date = date.replace("\n", " ") for part in multi_split(date, splits): part = part.strip().strip(",") if not len(part): continue dates.update(h.parse_date(part, FORMATS)) return dates
def parse_entry(context: Context, entry): entity = context.make("LegalEntity") if entry.findtext("./type-entry") == "2": entity = context.make("Person") entry_id = entry.findtext("number-entry") entity.id = context.make_slug(entry_id) sanction = h.make_sanction(context, entity) sanction.add("program", entry.findtext("./program-entry")) date_entry = entry.findtext("./date-entry") if date_entry: date = datetime.strptime(date_entry, "%Y%m%d") entity.add("createdAt", date.date()) sanction.add("listingDate", date.date()) sanction.add("startDate", date.date()) for aka in entry.findall("./aka-list"): h.apply_name( entity, name1=aka.findtext("./aka-name1"), name2=aka.findtext("./aka-name2"), name3=aka.findtext("./aka-name3"), tail_name=aka.findtext("./aka-name4"), alias=aka.findtext("type-aka") != "N", is_weak=aka.findtext("./quality-aka") == "2", quiet=True, ) for node in entry.findall("./title-list"): entity.add("title", node.text, quiet=True) for doc in entry.findall("./document-list"): reg = doc.findtext("./document-reg") number = doc.findtext("./document-id") country = doc.findtext("./document-country") passport = context.make("Passport") passport.id = context.make_id("Passport", entity.id, reg, number, country) passport.add("holder", entity) passport.add("passportNumber", number) passport.add("summary", reg) passport.add("country", country) context.emit(passport) for doc in entry.findall("./id-number-list"): entity.add("idNumber", doc.text) for node in entry.findall("./address-list"): address = h.make_address(context, full=node.findtext("./address")) h.apply_address(context, entity, address) for pob in entry.findall("./place-of-birth-list"): entity.add_cast("Person", "birthPlace", pob.text) for dob in entry.findall("./date-of-birth-list"): date = parse_date(dob.text) entity.add_cast("Person", "birthDate", date) for nat in entry.findall("./nationality-list"): for country in multi_split(nat.text, [";", ","]): country = remove_bracketed(country) entity.add("nationality", country, quiet=True) entity.add("topics", "sanction") context.emit(entity, target=True) context.emit(sanction)
def parse_countries(text): countries = set() for country in multi_split(text, COUNTRY_SPLIT): country = remove_bracketed(country) countries.add(country) return countries
def parse_entry(context, entry): entity = context.make("LegalEntity") if entry.findtext("./type-entry") == "2": entity = context.make("Person") entry_id = entry.findtext("number-entry") entity.id = context.make_slug(entry_id) sanction = h.make_sanction(context, entity) sanction.add("program", entry.findtext("./program-entry")) date_entry = entry.findtext("./date-entry") if date_entry: date = datetime.strptime(date_entry, "%Y%m%d") entity.context["created_at"] = date.isoformat() sanction.add("startDate", date.date()) for aka in entry.findall("./aka-list"): first_name = aka.findtext("./aka-name1") entity.add("firstName", first_name, quiet=True) second_name = aka.findtext("./aka-name2") entity.add("secondName", second_name, quiet=True) third_name = aka.findtext("./aka-name3") entity.add("middleName", third_name, quiet=True) last_name = aka.findtext("./aka-name4") entity.add("lastName", last_name, quiet=True) name = jointext(first_name, second_name, third_name, last_name) if aka.findtext("type-aka") == "N": entity.add("name", name) else: if aka.findtext("./quality-aka") == "2": entity.add("weakAlias", name) else: entity.add("alias", name) for node in entry.findall("./title-list"): entity.add("title", node.text, quiet=True) for doc in entry.findall("./document-list"): reg = doc.findtext("./document-reg") number = doc.findtext("./document-id") country = doc.findtext("./document-country") passport = context.make("Passport") passport.id = context.make_id("Passport", entity.id, reg, number, country) passport.add("holder", entity) passport.add("passportNumber", number) passport.add("summary", reg) passport.add("country", country) context.emit(passport) for doc in entry.findall("./id-number-list"): entity.add("idNumber", doc.text) for node in entry.findall("./address-list"): address = h.make_address(context, full=node.findtext("./address")) h.apply_address(context, entity, address) for pob in entry.findall("./place-of-birth-list"): entity.add_cast("Person", "birthPlace", pob.text) for dob in entry.findall("./date-of-birth-list"): date = parse_date(dob.text) entity.add_cast("Person", "birthDate", date) for nat in entry.findall("./nationality-list"): for country in multi_split(nat.text, [";", ","]): country = remove_bracketed(country) entity.add("nationality", country, quiet=True) entity.add("topics", "sanction") context.emit(entity, target=True, unique=True) context.emit(sanction)