def parse_alias(party, parts, alias): # primary = as_bool(alias.get("Primary")) is_weak = as_bool(alias.get("LowQuality")) alias_type = ref_value("AliasType", alias.get("AliasTypeID")) name_prop = ALIAS_TYPES[alias_type] for name in alias.findall("./DocumentedName"): names = defaultdict(lambda: "") for value in name.findall("./DocumentedNamePart/NamePartValue"): type_ = parts.get(value.get("NamePartGroupID")) names[type_] = " ".join([names[type_], value.text]).strip() h.apply_name( party, full=names.pop("Entity Name", None), name_prop=name_prop, is_weak=is_weak, ) party.add("name", names.pop("Vessel Name", None)) party.add("weakAlias", names.pop("Nickname", None)) party.add("registrationNumber", names.pop("Aircraft Name", None)) h.apply_name( party, first_name=names.pop("First Name", None), middle_name=names.pop("Middle Name", None), maiden_name=names.pop("Maiden Name", None), last_name=names.pop("Last Name", None), matronymic=names.pop("Matronymic", None), patronymic=names.pop("Patronymic", None), is_weak=is_weak, name_prop=name_prop, ) h.audit_data(names)
def parse_name(entity, node): name_prop = NAME_TYPE[node.get("name-type")] is_weak = NAME_QUALITY_WEAK[node.get("quality")] parts = defaultdict(dict) for part in node.findall("./name-part"): part_type = part.get("name-part-type") value = part.findtext("./value") parts[None][part_type] = value for spelling in part.findall("./spelling-variant"): key = (spelling.get("lang"), spelling.get("script")) parts[key][part_type] = spelling.text for key, parts in parts.items(): entity.add("title", parts.pop("title", None), quiet=True) entity.add("title", parts.pop("suffix", None), quiet=True) entity.add("weakAlias", parts.pop("other", None), quiet=True) entity.add("weakAlias", parts.pop("tribal-name", None), quiet=True) entity.add("fatherName", parts.pop("grand-father-name", None), quiet=True) h.apply_name( entity, full=parts.pop("whole-name", None), given_name=parts.pop("given-name", None), second_name=parts.pop("further-given-name", None), patronymic=parts.pop("father-name", None), last_name=parts.pop("family-name", None), maiden_name=parts.pop("maiden-name", None), is_weak=is_weak, name_prop=name_prop, quiet=True, ) h.audit_data(parts)
def crawl(context: Context): path = context.fetch_resource("source.html", context.dataset.data.url) context.export_resource(path, HTML, title=context.SOURCE_TITLE) with open(path, "r", encoding="utf-8") as fh: doc = html.fromstring(fh.read()) for table in doc.findall('.//div[@class="editor-content"]//table'): headers = None schema = None for row in table.findall(".//tr"): cells = [ collapse_spaces(c.text_content()) for c in row.findall("./td") ] if headers is None: headers = [slugify(c, sep="_") for c in cells] continue if len(cells) == 1: schema = TYPES[cells[0]] continue row = dict(zip(headers, cells)) entity = context.make(schema) name = row.pop("imie_i_nazwisko_nazwa_podmiotu") entity.id = context.make_slug(name) names = name.split("(") entity.add("name", names[0]) for alias in names[1:]: entity.add("alias", alias.split(")")[0]) notes = row.pop("uzasadnienie_wpisu_na_liste") entity.add("notes", notes) details = row.pop("dane_identyfikacyjne_osoby_podmiotu") for (chop, prop) in CHOPSKA: parts = details.rsplit(chop, 1) details = parts[0] if len(parts) > 1: if prop == "address": addr = h.make_address(context, full=parts[1]) h.apply_address(context, entity, addr) else: entity.add(prop, parts[1]) if len(details.strip()): result = context.lookup("details", details) if result is None: context.log.warning("Unhandled details", details=details) else: for prop, value in result.props.items(): entity.add(prop, value) sanction = h.make_sanction(context, entity) provisions = row.pop("zastosowane_srodki_sankcyjne") sanction.add("provisions", provisions) start_date = row.pop("data_umieszczenia_na_liscie") start_date = start_date.replace(" r.", "") sanction.add("startDate", h.parse_date(start_date, ["%d.%m.%Y"])) h.audit_data(row) context.emit(entity, target=True) context.emit(sanction)
def parse_alias(entity: Entity, alias: Dict[str, str]): name_prop = NAME_QUALITY[alias.pop("QUALITY", None)] h.apply_name( entity, full=alias.pop("ALIAS_NAME", None), quiet=True, name_prop=name_prop, ) h.audit_data(alias, ignore=["NOTE"])
def crawl(context: Context): path = context.fetch_resource("source.json", context.dataset.data.url) context.export_resource(path, JSON, title=context.SOURCE_TITLE) with open(path, "r") as fh: data = json.load(fh) for entry in data.get("result", []): wallet = context.make("CryptoWallet", target=True) wallet.id = context.make_slug(entry.get("address")) wallet.add("publicKey", entry.pop("address")) wallet.add("topics", "crime.theft") wallet.add("createdAt", entry.pop("createdAt")) wallet.add("modifiedAt", entry.pop("updatedAt")) wallet.add("alias", entry.pop("family")) wallet.add("balance", format_number(entry.pop("balance"))) wallet.add("amountUsd", format_number(entry.pop("balanceUSD"))) wallet.add("currency", entry.pop("blockchain")) h.audit_data(entry, ignore=["transactions"]) context.emit(wallet)
def crawl_entities(context: Context): for data in fetch(context, "entidades"): entity = crawl_common(context, data, "entidades", "Organization") entity.add("incorporationDate", data.pop("DATE_OF_BIRTH", None)) entity.add("incorporationDate", data.pop("YEAR", None)) data.pop("CITY_OF_BIRTH", None) entity.add("country", data.pop("COUNTRY_OF_BIRTH", None)) for addr in data.pop("ENTITY_ADDRESS", []): address = parse_address(context, addr) h.apply_address(context, entity, address) for alias in data.pop("ENTITY_ALIAS", []): entity.add("incorporationDate", alias.pop("DATE_OF_BIRTH", None)) entity.add("incorporationDate", alias.pop("YEAR", None)) # entity.add("birthPlace", alias.pop("CITY_OF_BIRTH", None)) entity.add("country", alias.pop("COUNTRY_OF_BIRTH", None)) parse_alias(entity, alias) h.audit_data(data, ["VERSIONNUM"]) context.emit(entity, target=True)
def crawl_individuals(context: Context): path = context.fetch_resource("individuals.xlsx", PEOPLE_URL) context.export_resource(path, XLSX, title=context.SOURCE_TITLE) for record in excel_records(path): seq_id = record.pop("internal_seq_id", None) if seq_id is None: continue name_en = record.pop("name_of_individual_english", None) name_he = record.pop("name_of_individual_hebrew", None) name_ar = record.pop("name_of_individual_arabic", None) entity = context.make("Person") entity.id = context.make_id(name_en, name_he, name_ar) if entity.id is None: continue entity.add("name", name_en or name_he or name_ar) entity.add("alias", name_he) entity.add("alias", name_ar) entity.add("topics", "crime.terror") entity.add("birthDate", parse_date(record.pop("d_o_b", None))) entity.add("nationality", record.pop("nationality_residency", None)) entity.add("idNumber", record.pop("individual_id", None)) sanction = h.make_sanction(context, entity) sanction.add("recordId", seq_id) sanction.add("recordId", record.pop("foreign_designation_id", None)) sanction.add("program", record.pop("designation", None)) sanction.add("program", record.pop("foreign_designation", None)) sanction.add("authority", lang_pick(record, "designated_by")) lang_pick(record, "designated_by_abroad") record.pop("date_of_foreign_designation_date", None) for field in ("date_of_designation_in_israel",): parse_interval(sanction, record.pop(field, None)) context.emit(entity, target=True) context.emit(sanction) h.audit_data(record)
def crawl_organizations(context: Context): path = context.fetch_resource("organizations.xlsx", ORG_URL) context.export_resource(path, XLSX, title=context.SOURCE_TITLE) seq_ids = {} links = [] for record in excel_records(path): seq_id = record.pop("internal_seq_id", None) name_en = record.pop("organization_name_english", None) name_he = record.pop("organization_name_hebrew", None) entity = context.make("Organization") entity.id = context.make_id(name_en, name_he) if entity.id is None: continue if seq_id is not None: seq_ids[seq_id] = entity.id entity.add("name", name_en) entity.add("name", name_he) entity.add("topics", "crime.terror") entity.add("notes", h.clean_note(lang_pick(record, "comments"))) entity.add("notes", h.clean_note(record.pop("column_42", None))) entity.add("email", record.pop("email", None)) entity.add("country", record.pop("country_hebrew", None)) entity.add("country", record.pop("country_english", None)) entity.add("registrationNumber", record.pop("corporation_id", None)) entity.add("legalForm", lang_pick(record, "corporation_type")) entity.add("jurisdiction", lang_pick(record, "location_of_formation")) date = parse_date(record.pop("date_of_corporation", None)) entity.add("incorporationDate", date) for field in list(record.keys()): if field.startswith("organization_name_"): entity.add("alias", record.pop(field, None)) if field.startswith("telephone"): entity.add("phone", record.pop(field, None)) if field.startswith("website"): entity.add("website", record.pop(field, None)) entity.add("phone", record.pop("column_70", None)) entity.add("website", record.pop("column_73", None)) sanction = h.make_sanction(context, entity) sanction.add("recordId", seq_id) sanction.add("recordId", record.pop("seq_num_in_other_countries", None)) sanction.add("program", record.pop("designation_type", None)) sanction.add("reason", lang_pick(record, "designation_justification")) sanction.add("authority", lang_pick(record, "designated_by")) sanction.add("publisher", record.pop("public_records_references", None)) lang_pick(record, "designated_by_abroad") record.pop("date_designated_in_other_countries", None) linked = record.pop("linked_to_internal_seq_id", "") for link in linked.split(";"): links.append((max(link, seq_id), min(link, seq_id))) street = lang_pick(record, "street") city = lang_pick(record, "city_village") if street or city: address = h.make_address( context, street=street, city=city, country_code=entity.first("country") ) h.apply_address(context, entity, address) for field in ( "date_of_temporary_designation", "date_of_permenant_designation", "date_designation_in_west_bank", ): parse_interval(sanction, record.pop(field, None)) context.emit(entity, target=True) context.emit(sanction) h.audit_data(record) for (subject, object) in links: subject_id = seq_ids.get(subject) object_id = seq_ids.get(object) if subject_id is None or object_id is None: continue link = context.make("UnknownLink") link.id = context.make_id(subject_id, object_id) link.add("subject", subject_id) link.add("object", object_id) context.emit(link)
def crawl_persons(context: Context): for data in fetch(context, "personas"): entity = crawl_common(context, data, "personas", "Person") entity.add("title", values(data.pop("TITLE", None))) entity.add("nationality", values(data.pop("NATIONALITY", None))) entity.add("position", values(data.pop("DESIGNATION", None))) entity.add("gender", data.pop("GENDER", None)) entity.add("birthDate", data.pop("DATE_OF_BIRTH", None)) entity.add("birthDate", data.pop("YEAR", None)) entity.add("birthPlace", data.pop("CITY_OF_BIRTH", None)) entity.add("country", data.pop("COUNTRY_OF_BIRTH", None)) for dob in data.pop("INDIVIDUAL_DATE_OF_BIRTH", []): date = parse_date(dob.pop("DATE", None)) entity.add("birthDate", date) date = parse_date(dob.pop("TYPE_OF_DATE", None)) entity.add("birthDate", date) entity.add("birthDate", dob.pop("YEAR", None)) entity.add("birthDate", dob.pop("FROM_YEAR", None)) entity.add("birthDate", dob.pop("TO_YEAR", None)) h.audit_data(dob, ignore=["NOTE"]) for doc in data.pop("INDIVIDUAL_DOCUMENT", []): type_ = doc.pop("TYPE_OF_DOCUMENT", None) number = doc.pop("NUMBER", None) schema = context.lookup_value("doc_types", type_) if schema is None: context.log.warning("Unknown document type", type=type_) continue passport = context.make(schema) passport.id = context.make_id("ID", entity.id, number) passport.add("holder", entity) passport.add("type", type_) passport.add("number", number) passport.add("type", doc.pop("TYPE_OF_DOCUMENT2", None)) passport.add("startDate", parse_date(doc.pop("DATE_OF_ISSUE", None))) passport.add("country", doc.pop("ISSUING_COUNTRY", None)) passport.add("country", doc.pop("COUNTRY_OF_ISSUE", None)) passport.add("summary", doc.pop("NOTE", None)) context.emit(passport) h.audit_data(doc, ignore=["CITY_OF_ISSUE"]) for addr in data.pop("INDIVIDUAL_ADDRESS", []): address = parse_address(context, addr) h.apply_address(context, entity, address) for addr in data.pop("INDIVIDUAL_PLACE_OF_BIRTH", []): address = parse_address(context, addr) if address is not None: entity.add("birthPlace", address.get("full")) entity.add("country", address.get("country")) for alias in data.pop("INDIVIDUAL_ALIAS", []): entity.add("birthDate", alias.pop("DATE_OF_BIRTH", None)) entity.add("birthDate", alias.pop("YEAR", None)) entity.add("birthPlace", alias.pop("CITY_OF_BIRTH", None)) entity.add("country", alias.pop("COUNTRY_OF_BIRTH", None)) parse_alias(entity, alias) h.audit_data(data, ["VERSIONNUM"]) context.emit(entity, target=True)
def crawl_company(context: Context, data: Dict[str, Any]): entity = context.make("Organization") entity.id = company_id(context, data.pop("id")) entity.add("sourceUrl", data.pop("url_en", None)) data.pop("url_ru", None) entity.add("name", data.pop("name_en", None)) entity.add("name", data.pop("name_ru", None)) entity.add("name", data.pop("name_suggest_output_ru", None)) entity.add("alias", data.pop("also_known_as", None)) entity.add("alias", data.pop("short_name_en", None)) entity.add("alias", data.pop("short_name_ru", None)) entity.add("incorporationDate", parse_date(data.pop("founded", None))) entity.add("dissolutionDate", parse_date(data.pop("closed", None))) entity.add("status", data.pop("status_en", data.pop("status_ru", None))) entity.add("status", data.pop("status", None)) entity.add_cast("Company", "ogrnCode", data.pop("ogrn_code", None)) entity.add("registrationNumber", data.pop("edrpou", None)) for country_data in data.pop("related_countries", []): rel_type = country_data.pop("relationship_type") country_name = country_data.pop("to_country_en", None) country_name = country_name or country_data.pop("to_country_ru") # print(country_name) res = context.lookup("country_links", rel_type) if res is None: context.log.warn( "Unknown country link", rel_type=rel_type, entity=entity, country=country_name, ) continue if res.prop is not None: entity.add(res.prop, country_name) # h.audit_data(country_data) for rel_data in data.pop("related_persons", []): other_wdid = clean_wdid(rel_data.pop("person_wikidata_id")) other_id = person_id(context, rel_data.pop("person_id"), other_wdid) rel_type = rel_data.pop("relationship_type_en", None) rel_type_ru = rel_data.pop("relationship_type_ru", None) rel_type = rel_type or rel_type_ru res = context.lookup("person_relations", rel_type) if res is None: context.log.info( "Unknown company/person relation type", rel_type=rel_type, entity=entity, other=other_id, ) continue if res.schema is None: continue if res.schema == "Organization" and res.from_prop == "asset": entity.schema = model.get("Company") rel = context.make(res.schema) id_a_short = short_id(context, entity.id) id_b_short = short_id(context, other_id) rel.id = context.make_slug(id_a_short, res.schema, id_b_short) rel.add(res.from_prop, entity.id) rel.add(res.to_prop, other_id) rel.add(res.desc_prop, rel_type) rel.add("modifiedAt", parse_date(rel_data.pop("date_confirmed"))) rel.add("startDate", parse_date(rel_data.pop("date_established"))) rel.add("endDate", parse_date(rel_data.pop("date_finished"))) context.emit(rel) for rel_data in data.pop("related_companies", []): # pprint(rel_data) # other_id = company_id(context, rel_data.pop("company_id")) # rel_type = rel_data.pop("relationship_type_en", None) # rel_type_ru = rel_data.pop("relationship_type_ru", None) # rel_type = rel_type or rel_type_ru # res = context.lookup("company_relations", rel_type) # if res is None: # context.log.warn( # "Unknown company/company relation type", # rel_type=rel_type, # entity=entity, # other=other_id, # ) # continue # if res.schema is None: # continue # if res.schema == "Organization" and res.from_prop == "asset": # entity.schema = model.get("Company") # rel = context.make(res.schema) # id_a_short = short_id(context, entity.id) # id_b_short = short_id(context, other_id) # rel.id = context.make_slug(id_a_short, res.schema, id_b_short) # rel.add(res.from_prop, entity.id) # rel.add(res.to_prop, other_id) # rel.add(res.desc_prop, rel_type) # rel.add("modifiedAt", parse_date(rel_data.pop("date_confirmed"))) # rel.add("startDate", parse_date(rel_data.pop("date_established"))) # rel.add("endDate", parse_date(rel_data.pop("date_finished"))) # context.emit(rel) pass address = h.make_address( context, street=data.pop("street", None), city=data.pop("city", None), ) h.apply_address(context, entity, address) if data.pop("state_company", False): entity.add("topics", "gov.soe") ignore = [ "wiki", "bank_name", "other_founders", "other_owners", "other_managers", "other_recipient", ] h.audit_data(data, ignore=ignore) # print(entity.to_dict()) context.emit(entity)
def crawl_row(context: Context, data: Dict[str, str]): entity = context.make("LegalEntity") ind_id = data.pop("INDIVIDUAL_Id", data.pop("IndividualID")) entity.id = context.make_slug(ind_id) assert entity.id, data entity.add("notes", h.clean_note(data.pop("COMMENTS", None))) entity.add("notes", h.clean_note(data.pop("Comments", None))) entity.add("notes", h.clean_note(data.pop("NOTE", None))) entity.add("notes", h.clean_note(data.pop("NOTE1", None))) entity.add("notes", h.clean_note(data.pop("NOTE2", None))) entity.add("notes", h.clean_note(data.pop("NOTE3", None))) entity.add_cast("Person", "nationality", data.pop("NATIONALITY", None)) entity.add_cast("Person", "nationality", data.pop("Nationality", None)) entity.add_cast("Person", "title", data.pop("TITLE", None)) entity.add_cast("Person", "title", data.pop("Title", None)) entity.add_cast("Person", "position", data.pop("DESIGNATION", None)) entity.add_cast("Person", "position", data.pop("Designation", None)) entity.add_cast("Person", "birthPlace", data.pop("PLACEOFBIRTH", None)) entity.add_cast("Person", "birthPlace", data.pop("IndividualPlaceOfBirth", None)) entity.add_cast("Person", "birthPlace", data.pop("CITY_OF_BIRTH", None)) entity.add_cast("Person", "birthDate", data.pop("YEAR", None)) entity.add_cast("Person", "gender", data.pop("GENDER", None)) entity.add_cast("Person", "birthDate", parse_date(data.pop("DATE", None))) entity.add_cast("Person", "birthDate", parse_date(data.pop("DATE_OF_BIRTH", None))) dob = parse_date(data.pop("IndividualDateOfBirth", None)) entity.add_cast("Person", "birthDate", dob) data.pop("BIRTHPLACE_x0020_CITY", None) data.pop("BIRTHPLACE_x0020_STATE_PROVINCE", None) entity.add("country", data.pop("BIRTHPLACE_x0020_COUNTRY", None)) entity.add("country", data.pop("COUNTRY_OF_BIRTH", None)) entity.add_cast("Person", "birthPlace", data.pop("BIRTHPLACE_x0020_NOTE", None)) h.apply_name( entity, full=data.pop("FullName", None), given_name=data.pop("FIRST_NAME", None), second_name=data.pop("SECOND_NAME", None), name3=data.pop("THIRD_NAME", None), name4=data.pop("FOURTH_NAME", None), quiet=True, ) alias = data.pop("NAME_ORIGINAL_SCRIPT", None) if alias is not None and "?" not in alias: entity.add("alias", alias) entity.add("alias", data.pop("SORT_KEY", None)) data.pop("IndividualAlias", None) entity.add_cast("Person", "passportNumber", data.pop("PASSPORT", None)) entity.add_cast("Person", "passportNumber", data.pop("IndividualDocument", None)) data.pop("DATE_OF_ISSUE", None) data.pop("CITY_OF_ISSUE", None) entity.add("country", data.pop("COUNTRY_OF_ISSUE", None)) entity.add_cast("Person", "idNumber", data.pop("IDNUMBER", None)) address = h.make_address( context, # remarks=data.pop("NOTE"), full=data.pop("IndividualAddress", None), street=data.pop("STREET", None), city=data.pop("CITY", None), region=data.pop("STATE_PROVINCE", None), postal_code=data.pop("ZIP_CODE", None), country=data.pop("COUNTRY", None), ) h.apply_address(context, entity, address) sanction = h.make_sanction(context, entity) inserted_at = parse_date(data.pop("DateInserted", None)) listed_on = data.pop("ListedON", data.pop("ListedOn", None)) listed_at = parse_date(listed_on) entity.add("createdAt", inserted_at or listed_at) sanction.add("listingDate", listed_at or inserted_at) sanction.add("startDate", data.pop("FROM_YEAR", None)) sanction.add("endDate", data.pop("TO_YEAR", None)) sanction.add("program", data.pop("UN_LIST_TYPE", None)) sanction.add("unscId", data.pop("REFERENCE_NUMBER", None)) sanction.add("unscId", data.pop("ReferenceNumber", None)) sanction.add("authority", data.pop("SUBMITTED_BY", None)) entity.add("topics", "sanction") h.audit_data(data, ignore=["VERSIONNUM", "TYPE_OF_DATE", "ApplicationStatus"]) context.emit(entity, target=True) context.emit(sanction)
def parse_row(context: Context, row): group_type = row.pop("GroupTypeDescription") schema = TYPES.get(group_type) if schema is None: context.log.error("Unknown group type", group_type=group_type) return entity = context.make(schema) entity.id = context.make_slug(row.pop("GroupID")) sanction = h.make_sanction(context, entity) sanction.add("program", row.pop("RegimeName")) sanction.add("authority", row.pop("ListingType", None)) listed_date = h.parse_date(row.pop("DateListed"), FORMATS) sanction.add("listingDate", listed_date) designated_date = h.parse_date(row.pop("DateDesignated"), FORMATS) sanction.add("startDate", designated_date) entity.add("createdAt", listed_date) if not entity.has("createdAt"): entity.add("createdAt", designated_date) sanction.add("authorityId", row.pop("UKSanctionsListRef", None)) sanction.add("unscId", row.pop("UNRef", None)) sanction.add("status", row.pop("GroupStatus", None)) sanction.add("reason", row.pop("UKStatementOfReasons", None)) last_updated = h.parse_date(row.pop("LastUpdated"), FORMATS) sanction.add("modifiedAt", last_updated) entity.add("modifiedAt", last_updated) # TODO: derive topics and schema from this?? entity_type = row.pop("Entity_Type", None) entity.add_cast("LegalEntity", "legalForm", entity_type) reg_number = row.pop("Entity_BusinessRegNumber", None) entity.add_cast("LegalEntity", "registrationNumber", reg_number) row.pop("Ship_Length", None) entity.add_cast("Vessel", "flag", row.pop("Ship_Flag", None)) flags = split_new(row.pop("Ship_PreviousFlags", None)) entity.add_cast("Vessel", "pastFlags", flags) entity.add_cast("Vessel", "type", row.pop("Ship_Type", None)) entity.add_cast("Vessel", "tonnage", row.pop("Ship_Tonnage", None)) entity.add_cast("Vessel", "buildDate", row.pop("Ship_YearBuilt", None)) entity.add_cast("Vessel", "imoNumber", row.pop("Ship_IMONumber", None)) ship_owner = row.pop("Ship_CurrentOwners", None) if ship_owner is not None: owner = context.make("LegalEntity") owner.id = context.make_slug("named", ship_owner) owner.add("name", ship_owner) context.emit(owner) ownership = context.make("Ownership") ownership.id = context.make_id(entity.id, "owns", owner.id) ownership.add("owner", owner) ownership.add("asset", entity) context.emit(ownership) countries = parse_countries(row.pop("Country", None)) entity.add("country", countries) title = split_items(row.pop("Title", None)) entity.add("title", title, quiet=True) pobs = split_items(row.pop("Individual_TownOfBirth", None)) entity.add_cast("Person", "birthPlace", pobs) dob = h.parse_date(row.pop("Individual_DateOfBirth", None), FORMATS) entity.add_cast("Person", "birthDate", dob) cob = parse_countries(row.pop("Individual_CountryOfBirth", None)) entity.add_cast("Person", "country", cob) nationalities = parse_countries(row.pop("Individual_Nationality", None)) entity.add_cast("Person", "nationality", nationalities) positions = split_items(row.pop("Individual_Position", None)) entity.add_cast("Person", "position", positions) entity.add_cast("Person", "gender", row.pop("Individual_Gender", None)) name_type = row.pop("AliasType", None) name_prop = NAME_TYPES.get(name_type) if name_prop is None: context.log.warning("Unknown name type", type=name_type) return name_quality = row.pop("AliasQuality", None) is_weak = WEAK_QUALITY.get(name_quality) if is_weak is None: context.log.warning("Unknown name quality", quality=name_quality) return h.apply_name( entity, name1=row.pop("name1", None), name2=row.pop("name2", None), name3=row.pop("name3", None), name4=row.pop("name4", None), name5=row.pop("name5", None), tail_name=row.pop("Name6", None), name_prop=name_prop, is_weak=is_weak, quiet=True, ) entity.add("alias", row.pop("NameNonLatinScript", None)) full_address = join_text( row.pop("Address1", None), row.pop("Address2", None), row.pop("Address3", None), row.pop("Address4", None), row.pop("Address5", None), row.pop("Address6", None), sep=", ", ) address = h.make_address( context, full=full_address, postal_code=row.pop("PostCode", None), country=first(countries), ) h.apply_address(context, entity, address) passport_number = row.pop("Individual_PassportNumber", None) passport_numbers = split_items(passport_number) entity.add_cast("Person", "passportNumber", passport_numbers) passport_detail = row.pop("Individual_PassportDetails", None) # passport_details = split_items(passport_detail) # TODO: where do I stuff this? ni_number = row.pop("Individual_NINumber", None) ni_numbers = split_items(ni_number) entity.add_cast("Person", "idNumber", ni_numbers) ni_detail = row.pop("Individual_NIDetails", None) # ni_details = split_items(ni_detail) # TODO: where do I stuff this? for phone in split_new(row.pop("PhoneNumber", None)): entity.add_cast("LegalEntity", "phone", phone) for email in split_new(row.pop("EmailAddress", None)): entity.add_cast("LegalEntity", "email", email) for website in split_new(row.pop("Website", None)): entity.add_cast("LegalEntity", "website", website) for name in parse_companies(context, row.pop("Entity_ParentCompany", None)): parent = context.make("Organization") parent.id = context.make_slug("named", name) parent.add("name", name) context.emit(parent) ownership = context.make("Ownership") ownership.id = context.make_id(entity.id, "owns", parent.id) ownership.add("owner", parent) ownership.add("asset", entity) context.emit(ownership) for name in parse_companies(context, row.pop("Entity_Subsidiaries", None)): subsidiary = context.make("Company") subsidiary.id = context.make_slug("named", name) subsidiary.add("name", name) context.emit(subsidiary) ownership = context.make("Ownership") ownership.id = context.make_id(entity.id, "owns", subsidiary.id) ownership.add("owner", entity) ownership.add("asset", subsidiary) context.emit(ownership) grp_status = row.pop("GrpStatus", None) if grp_status != "A": context.log.warning("Unknown GrpStatus", value=grp_status) entity.add("notes", h.clean_note(row.pop("OtherInformation", None))) h.audit_data(row, ignore=["NonLatinScriptLanguage", "NonLatinScriptType"]) entity.add("topics", "sanction") context.emit(entity, target=True) context.emit(sanction)
def parse_result(context: Context, result): type_ = result.pop("type", None) schema = context.lookup_value("type", type_) if schema is None: context.log.error("Unknown result type", type=type_) return entity = context.make(schema) entity.id = context.make_slug(result.pop("id")) entity_number = result.pop("entity_number", None) if entity_number is not None: assert int(entity_number) entity.id = context.make_slug(entity_number, dataset="us_ofac_sdn") name = result.pop("name", None) name = name.replace("and any successor, sub-unit, or subsidiary thereof", "") entity.add("name", name) for alias in ensure_list(result.pop("alt_names", "")): entity.add("alias", alias.split("; ")) entity.add("notes", result.pop("remarks", None)) entity.add("country", result.pop("country", None)) if entity.schema.is_a("Person"): entity.add("position", result.pop("title", None)) entity.add("nationality", result.pop("nationalities", None)) entity.add("nationality", result.pop("citizenships", None)) for dob in result.pop("dates_of_birth", []): entity.add("birthDate", h.parse_date(dob, FORMATS)) entity.add("birthPlace", result.pop("places_of_birth", None)) elif entity.schema.is_a("Vessel"): entity.add("flag", result.pop("vessel_flag", None)) entity.add("callSign", result.pop("call_sign", None)) entity.add("type", result.pop("vessel_type", None)) grt = result.pop("gross_registered_tonnage", None) entity.add("grossRegisteredTonnage", grt) gt = result.pop("gross_tonnage", None) entity.add("tonnage", gt) # TODO: make adjacent owner entity result.pop("vessel_owner", None) assert result.pop("title", None) is None assert not len(result.pop("nationalities", [])) assert not len(result.pop("citizenships", [])) assert not len(result.pop("dates_of_birth", [])) assert not len(result.pop("places_of_birth", [])) for address in result.pop("addresses", []): obj = h.make_address( context, street=address.get("address"), city=address.get("city"), postal_code=address.get("postal_code"), region=address.get("state"), country=address.get("country"), ) h.apply_address(context, entity, obj) for ident in result.pop("ids", []): country = ident.pop("country") entity.add("country", country) h.apply_feature( context, entity, ident.pop("type"), ident.pop("number"), country=country, date_formats=FORMATS, start_date=ident.pop("issue_date", None), end_date=ident.pop("expiration_date", None), ) sanction = context.make("Sanction") sanction.id = context.make_id(entity.id, "Sanction") sanction.add("entity", entity) sanction.add("program", result.pop("programs", [])) sanction.add("provisions", result.pop("license_policy", [])) sanction.add("reason", result.pop("license_requirement", [])) sanction.add("authorityId", result.pop("federal_register_notice", None)) sanction.add("startDate", result.pop("start_date", None)) sanction.add("endDate", result.pop("end_date", None)) sanction.add("country", "us") sanction.add("authority", result.pop("source", None)) # TODO: deref source_url = deref_url(context, result.pop("source_information_url")) sanction.add("sourceUrl", source_url) result.pop("source_list_url") context.emit(sanction) context.emit(entity, target=True) h.audit_data(result, ignore=["standard_order"])