def apply_prop(context, entity, sanction, field, value): if field == "ALIAS": entity.add("alias", value.pop("Alias")) elif field == "SEXE": entity.add("gender", h.clean_gender(value.pop("Sexe"))) elif field == "PRENOM": entity.add("firstName", value.pop("Prenom")) elif field == "NATIONALITE": entity.add("nationality", value.pop("Pays")) elif field == "TITRE": entity.add("position", value.pop("Titre")) elif field == "SITE_INTERNET": entity.add("website", value.pop("SiteInternet")) elif field == "TELEPHONE": entity.add("phone", value.pop("Telephone")) elif field == "COURRIEL": entity.add("email", value.pop("Courriel")) elif field == "NUMERO_OMI": entity.add("imoNumber", value.pop("NumeroOMI")) elif field == "DATE_DE_NAISSANCE": date = parse_parts(value.pop("Annee"), value.pop("Mois"), value.pop("Jour")) entity.add("birthDate", date) elif field in ("ADRESSE_PM", "ADRESSE_PP"): address = h.make_address( context, full=value.pop("Adresse"), country=value.pop("Pays"), ) h.apply_address(context, entity, address) elif field == "LIEU_DE_NAISSANCE": entity.add("birthPlace", value.pop("Lieu")) entity.add("country", value.pop("Pays")) elif field == "PASSEPORT": entity.add("passportNumber", value.pop("NumeroPasseport")) elif field == "IDENTIFICATION": comment = value.pop("Commentaire") content = value.pop("Identification") result = context.lookup("identification", comment) if result is None: context.log.warning( "Unknown Identification type", comment=comment, content=content, ) else: schema = result.schema or entity.schema entity.add_cast(schema, result.prop, content) if result.prop == "notes": entity.add(result.prop, comment) elif field == "AUTRE_IDENTITE": entity.add("idNumber", value.pop("NumeroCarte")) elif field == "REFERENCE_UE": sanction.add("program", value.pop("ReferenceUe")) elif field == "REFERENCE_ONU": sanction.add("program", value.pop("ReferenceOnu")) elif field == "FONDEMENT_JURIDIQUE": sanction.add("reason", value.pop("FondementJuridiqueLabel")) elif field == "MOTIFS": sanction.add("reason", value.pop("Motifs"))
def parse_identity(context: Context, entity, node, places): for name in node.findall(".//name"): parse_name(entity, name) for address in node.findall(".//address"): place = places.get(address.get("place-id")) address = compose_address(context, entity, place, address) h.apply_address(context, entity, address) for bday in node.findall(".//day-month-year"): bval = parse_parts(bday.get("year"), bday.get("month"), bday.get("day")) if entity.schema.is_a("Person"): entity.add("birthDate", bval) else: entity.add("incorporationDate", bval) for nationality in node.findall(".//nationality"): country = nationality.find("./country") if country is not None: entity.add("nationality", country.get("iso-code")) entity.add("nationality", country.text) for bplace in node.findall(".//place-of-birth"): place = places.get(bplace.get("place-id")) address = compose_address(context, entity, place, bplace) entity.add("birthPlace", address.get("full")) for doc in node.findall(".//identification-document"): country = doc.find("./issuer") type_ = doc.get("document-type") number = doc.findtext("./number") entity.add("nationality", country.text, quiet=True) schema = "Identification" if type_ in ("id-card"): entity.add("idNumber", number) if type_ in ("passport", "diplomatic-passport"): entity.add("idNumber", number) schema = "Passport" passport = context.make(schema) passport.id = context.make_id(entity.id, type_, doc.get("ssid")) passport.add("holder", entity) passport.add("country", country.text) passport.add("number", number) passport.add("type", type_) passport.add("summary", doc.findtext("./remark")) passport.add("startDate", doc.findtext("./date-of-issue")) passport.add("endDate", doc.findtext("./expiry-date")) context.emit(passport)
def parse_row(context, row): group_type = row.pop("GroupTypeDescription") org_type = row.pop("OrgType", None) if group_type == "Individual": base_schema = "Person" elif row.get("TypeOfVessel") is not None: base_schema = "Vessel" elif group_type == "Entity": base_schema = context.lookup_value("org_type", org_type, "Organization") else: context.log.error("Unknown entity type", group_type=group_type) return entity = context.make(base_schema) entity.id = context.make_slug(row.pop("GroupID")) if org_type is not None: org_types = split_items(org_type) entity.add_cast("LegalEntity", "legalForm", org_types) sanction = h.make_sanction(context, entity) # entity.add("position", row.pop("Position"), quiet=True) entity.add("notes", row.pop("OtherInformation", None), quiet=True) entity.add("notes", row.pop("FurtherIdentifiyingInformation", None), quiet=True) sanction.add("program", row.pop("RegimeName")) sanction.add("authority", row.pop("ListingType", None)) sanction.add("startDate", h.parse_date(row.pop("DateListed"), FORMATS)) sanction.add("recordId", row.pop("FCOId", None)) sanction.add("status", row.pop("GroupStatus", None)) sanction.add("reason", row.pop("UKStatementOfReasons", None)) last_updated = h.parse_date(row.pop("LastUpdated"), FORMATS) if last_updated is not None: sanction.add("modifiedAt", last_updated) sanction.context["updated_at"] = last_updated entity.add("modifiedAt", last_updated) entity.context["updated_at"] = last_updated # DoB is sometimes a year only row.pop("DateOfBirth", None) dob = parse_parts( row.pop("YearOfBirth", 0), row.pop("MonthOfBirth", 0), row.pop("DayOfBirth", 0), ) entity.add_cast("Person", "birthDate", dob) gender = h.clean_gender(row.pop("Gender", None)) entity.add_cast("Person", "gender", gender) id_number = row.pop("NationalIdNumber", None) entity.add_cast("LegalEntity", "idNumber", split_items(id_number)) passport = row.pop("PassportDetails", None) entity.add_cast("Person", "passportNumber", split_items(passport)) flag = row.pop("FlagOfVessel", None) entity.add_cast("Vessel", "flag", flag) prev_flag = row.pop("PreviousFlags", None) entity.add_cast("Vessel", "pastFlags", prev_flag) year = row.pop("YearBuilt", None) entity.add_cast("Vehicle", "buildDate", year) type_ = row.pop("TypeOfVessel", None) entity.add_cast("Vehicle", "type", type_) imo = row.pop("IMONumber", None) entity.add_cast("Vessel", "imoNumber", imo) tonnage = row.pop("TonnageOfVessel", None) entity.add_cast("Vessel", "tonnage", tonnage) row.pop("LengthOfVessel", None) # entity.add("legalForm", org_type) title = split_items(row.pop("NameTitle", None)) entity.add("title", title, quiet=True) entity.add("firstName", row.pop("name1", None), quiet=True) entity.add("secondName", row.pop("name2", None), quiet=True) entity.add("middleName", row.pop("name3", None), quiet=True) entity.add("middleName", row.pop("name4", None), quiet=True) entity.add("middleName", row.pop("name5", None), quiet=True) name6 = row.pop("Name6", None) entity.add("lastName", name6, quiet=True) full_name = row.pop("FullName", name6) row.pop("AliasTypeName") if row.pop("AliasType") == "AKA": entity.add("alias", full_name) else: entity.add("name", full_name) nationalities = parse_countries(row.pop("Nationality", None)) entity.add("nationality", nationalities, quiet=True) position = split_items(row.pop("Position", None)) entity.add("position", position, quiet=True) birth_countries = parse_countries(row.pop("CountryOfBirth", None)) entity.add("country", birth_countries, quiet=True) countries = parse_countries(row.pop("Country", None)) entity.add("country", countries) pob = split_items(row.pop("TownOfBirth", None)) entity.add("birthPlace", pob, quiet=True) address = h.make_address( context, full=row.pop("FullAddress", None), street=row.pop("address1", None), street2=row.pop("address2", None), street3=row.pop("address3", None), city=row.pop("address4", None), place=row.pop("address5", None), region=row.pop("address6", None), postal_code=row.pop("PostCode", None), country=first(countries), ) h.apply_address(context, entity, address) reg_number = row.pop("BusinessRegNumber", None) entity.add_cast("LegalEntity", "registrationNumber", reg_number) phones = split_items(row.pop("PhoneNumber", None), comma=True) phones = h.clean_phones(phones) entity.add_cast("LegalEntity", "phone", phones) website = split_items(row.pop("Website", None), comma=True) entity.add_cast("LegalEntity", "website", website) emails = split_items(row.pop("EmailAddress", None), comma=True) emails = h.clean_emails(emails) entity.add_cast("LegalEntity", "email", emails) # TODO: graph row.pop("Subsidiaries", None) row.pop("ParentCompany", None) row.pop("CurrentOwners", None) row.pop("DateListedDay", None) row.pop("DateListedMonth", None) row.pop("DateListedYear", None) row.pop("LastUpdatedDay", None) row.pop("LastUpdatedMonth", None) row.pop("LastUpdatedYear", None) row.pop("GrpStatus", None) row.pop("ID", None) row.pop("DateOfBirthId", None) row.pop("DateListedDay", None) if len(row): pprint(row) entity.add("topics", "sanction") context.emit(entity, target=True, unique=True) context.emit(sanction)
def parse_date(el): pf = parse_parts( el.findtext("./Year"), el.findtext("./Month"), el.findtext("./Day") ) return pf.text
def parse_entry(context: Context, entry: Element): subject_type = entry.find("./subjectType") schema = context.lookup_value( "subject_type", subject_type.get("code"), dataset="eu_fsf", ) if schema is None: context.log.warning("Unknown subject type", type=subject_type) return entity = context.make(schema) eu_ref = entry.get("euReferenceNumber") if eu_ref is not None: entity.id = context.make_slug(eu_ref, dataset="eu_fsf") else: entity.id = context.make_slug("logical", entry.get("logicalId")) entity.add("notes", h.clean_note(entry.findtext("./remark"))) entity.add("topics", "sanction") parse_sanctions(context, entity, entry) for name in entry.findall("./nameAlias"): is_weak = not as_bool(name.get("strong")) h.apply_name( entity, full=name.get("wholeName"), first_name=name.get("firstName"), middle_name=name.get("middleName"), last_name=name.get("lastName"), is_weak=is_weak, quiet=True, ) entity.add("title", name.get("title"), quiet=True) entity.add("position", name.get("function"), quiet=True) entity.add("gender", name.get("gender"), quiet=True) for node in entry.findall("./identification"): type = node.get("identificationTypeCode") schema = "Passport" if type == "passport" else "Identification" passport = context.make(schema) passport.id = context.make_id("ID", entity.id, node.get("logicalId")) passport.add("holder", entity) passport.add("authority", node.get("issuedBy")) passport.add("type", node.get("identificationTypeDescription")) passport.add("number", node.get("number")) passport.add("number", node.get("latinNumber")) passport.add("startDate", node.get("issueDate")) passport.add("startDate", node.get("issueDate")) passport.add("country", parse_country(node)) passport.add("country", node.get("countryDescription")) for remark in node.findall("./remark"): passport.add("summary", remark.text) context.emit(passport) for node in entry.findall("./address"): address = parse_address(context, node) h.apply_address(context, entity, address) for child in node.getchildren(): if child.tag in ("regulationSummary"): continue elif child.tag == "remark": entity.add("notes", child.text) elif child.tag == "contactInfo": prop = context.lookup_value( "contact_info", child.get("key"), dataset="eu_fsf", ) if prop is None: context.log.warning("Unknown contact info", node=child) else: entity.add(prop, child.get("value")) else: context.log.warning("Unknown address component", node=child) for birth in entry.findall("./birthdate"): partialBirth = parse_parts(birth.get("year"), birth.get("month"), birth.get("day")) entity.add("birthDate", birth.get("birthdate")) entity.add("birthDate", partialBirth) address = parse_address(context, birth) if address is not None: entity.add("birthPlace", address.get("full")) entity.add("country", address.get("country")) for node in entry.findall("./citizenship"): entity.add("nationality", parse_country(node), quiet=True) entity.add("nationality", node.get("countryDescription"), quiet=True) context.emit(entity, target=True)