def parse_entry(emitter, entry): entity = emitter.make('LegalEntity') if entry.get('Type') == 'P': entity = emitter.make('Person') entity.make_id(entry.get('Id')) entity.add('sourceUrl', entry.get('pdf_link')) sanction = emitter.make('Sanction') sanction.make_id(entity.id) sanction.add('entity', entity) sanction.add('authority', 'European External Action Service') sanction.add('sourceUrl', entry.get('pdf_link')) program = jointext(entry.get('programme'), entry.get('legal_basis'), sep=' - ') sanction.add('program', program) sanction.add('reason', entry.get('remark')) sanction.add('startDate', entry.get('reg_date')) for name in entry.findall('./NAME'): if entity.has('name'): entity.add('alias', name.findtext('./WHOLENAME')) else: entity.add('name', name.findtext('./WHOLENAME')) entity.add('title', name.findtext('./TITLE'), quiet=True) entity.add('firstName', name.findtext('./FIRSTNAME'), quiet=True) entity.add('middleName', name.findtext('./MIDDLENAME'), quiet=True) entity.add('lastName', name.findtext('./LASTNAME'), quiet=True) entity.add('position', name.findtext('./FUNCTION'), quiet=True) gender = GENDERS.get(name.findtext('./GENDER')) entity.add('gender', gender, quiet=True) for pnode in entry.findall('./PASSPORT'): passport = emitter.make('Passport') passport.make_id('Passport', entity.id, pnode.findtext('./NUMBER')) passport.add('holder', entity) passport.add('passportNumber', pnode.findtext('./NUMBER')) passport.add('country', pnode.findtext('./COUNTRY')) emitter.emit(passport) for node in entry.findall('./ADDRESS'): address = jointext(node.findtext('./STREET'), node.findtext('./NUMBER'), node.findtext('./CITY'), node.findtext('./ZIPCODE')) entity.add('address', address) entity.add('country', node.findtext('./COUNTRY')) for birth in entry.findall('./BIRTH'): entity.add('birthDate', birth.findtext('./DATE')) entity.add('birthPlace', birth.findtext('./PLACE')) entity.add('country', birth.findtext('./COUNTRY')) for country in entry.findall('./CITIZEN/COUNTRY'): entity.add('nationality', country.text, quiet=True) emitter.emit(entity) emitter.emit(sanction)
def parse_individual(emitter, node): person = emitter.make('Person') sanction = parse_common(emitter, person, node) person.add('title', values(node.find('./TITLE'))) firstName = node.findtext('./FIRST_NAME') secondName = node.findtext('./SECOND_NAME') thirdName = node.findtext('./THIRD_NAME') name = jointext(firstName, secondName, thirdName) person.add('name', name) person.add('firstName', firstName) person.add('secondName', secondName) person.add('middleName', thirdName) person.add('position', values(node.find('./DESIGNATION'))) for alias in node.findall('./INDIVIDUAL_ALIAS'): parse_alias(person, alias) for addr in node.findall('./INDIVIDUAL_ADDRESS'): parse_address(person, addr) for doc in node.findall('./INDIVIDUAL_DOCUMENT'): passport = emitter.make('Passport') number = doc.findtext('./NUMBER') date = doc.findtext('./DATE_OF_ISSUE') type_ = doc.findtext('./TYPE_OF_DOCUMENT') if number is None and date is None and type_ is None: continue passport.make_id(person.id, number, date, type_) passport.add('holder', person) passport.add('passportNumber', number) passport.add('startDate', date) passport.add('type', type_) passport.add('type', doc.findtext('./TYPE_OF_DOCUMENT2')) passport.add('summary', doc.findtext('./NOTE')) country = doc.findtext('./COUNTRY_OF_ISSUE') country = country or doc.findtext('./ISSUING_COUNTRY') passport.add('country', normalize_country(country)) emitter.emit(passport) for nat in node.findall('./NATIONALITY/VALUE'): person.add('nationality', normalize_country(nat.text)) for dob in node.findall('./INDIVIDUAL_DATE_OF_BIRTH'): date = dob.findtext('./DATE') or dob.findtext('./YEAR') person.add('birthDate', date) for pob in node.findall('./INDIVIDUAL_PLACE_OF_BIRTH'): person.add('country', normalize_country(pob.findtext('./COUNTRY'))) place = jointext(pob.findtext('./CITY'), pob.findtext('./STATE_PROVINCE'), pob.findtext('./COUNTRY'), sep=', ') person.add('birthPlace', place) emitter.emit(person) emitter.emit(sanction)
def make_address( context: Context, full=None, remarks=None, summary=None, po_box=None, street=None, street2=None, street3=None, city=None, place=None, postal_code=None, state=None, region=None, country=None, country_code=None, key=None, ): """Generate an address schema object adjacent to the main entity.""" city = jointext(place, city, sep=", ") street = jointext(street, street2, street3, sep=", ") address = context.make("Address") address.add("full", full) address.add("remarks", remarks) address.add("summary", summary) address.add("postOfficeBox", po_box) address.add("street", street) address.add("city", city) address.add("postalCode", postal_code) address.add("region", region) address.add("state", state, quiet=True) address.add("country", country) address.add("country", country_code) country_code = address.first("country") if not address.has("full"): data = { "attention": summary, "house": po_box, "road": street, "postcode": postal_code, "city": city, "state": jointext(region, state, sep=", "), # "country": country, } full = get_formatter().one_line(data, country=country_code) address.add("full", full) if full: # query_full(context, full, address.get("country")) norm_full = slugify(full) hash_id = make_entity_id(country_code, norm_full, key) if hash_id is not None: address.id = f"addr-{hash_id}" return address
def officer(context, data): emitter = EntityEmitter(context) officer_id = data.get("officer_id") url = API_URL % officer_id with context.http.get(url, auth=AUTH) as res: if res.status_code != 200: context.log.info("CoH error: %r", res.json) return data = res.json # pprint(data) person = emitter.make("Person") person.make_id(officer_id) source_url = urljoin(WEB_URL, data.get("links", {}).get("self", "/")) person.add("sourceUrl", source_url) last_name = data.pop("surname", None) person.add("lastName", last_name) forename = data.pop("forename", None) person.add("firstName", forename) other_forenames = data.pop("other_forenames", None) person.add("middleName", other_forenames) person.add("name", jointext(forename, other_forenames, last_name)) person.add("title", data.pop("title", None)) person.add("nationality", data.pop("nationality", None)) person.add("birthDate", data.pop("date_of_birth", None)) person.add("topics", "crime") for disqual in data.pop("disqualifications", []): case = disqual.get("case_identifier") sanction = emitter.make("Sanction") sanction.make_id(person.id, case) sanction.add("entity", person) sanction.add("authority", "UK Companies House") sanction.add("program", case) from_date = disqual.pop("disqualified_from", None) person.context["created_at"] = from_date sanction.add("startDate", from_date) sanction.add("endDate", disqual.pop("disqualified_until", None)) emitter.emit(sanction) address = disqual.pop("address", {}) locality = address.get("locality") locality = jointext(locality, address.get("postal_code")) street = address.get("address_line_1") premises = address.get("premises") street = jointext(street, premises) address = jointext( street, address.get("address_line_2"), locality, address.get("region"), sep=", ", ) person.add("address", address) emitter.emit(person)
def parse_alias(party, doc, alias): primary = alias.get('Primary') == 'true' weak = alias.get('LowQuality') == 'true' alias_type = deref(doc, 'AliasType', alias.get('AliasTypeID')) data = {} for name_part in alias.findall(qpath('DocumentedNamePart')): value = name_part.find(qpath('NamePartValue')) type_id = value.get('NamePartGroupID') type_id = deref(doc, 'NamePartGroup', type_id, 'NamePartTypeID') # noqa part_type = deref(doc, 'NamePartType', type_id) field = NAMES.get(part_type) data[field] = value.text if field != 'name' and not weak: party.add(field, value.text) # print(field, value.text) name = jointext(data.get('firstName'), data.get('middleName'), data.get('fatherName'), data.get('lastName'), data.get('name')) if primary: party.add('name', name) elif alias_type == 'F.K.A.': party.add('previousName', name) else: party.add('alias', name)
def parse_alias(party, parts, alias): primary = alias.get("Primary") == "true" weak = alias.get("LowQuality") == "true" alias_type = ref_value("AliasType", alias.get("AliasTypeID")) for name in alias.findall("./DocumentedName"): data = {} for name_part in name.findall("./DocumentedNamePart"): value = name_part.find("./NamePartValue") type_ = parts.get(value.get("NamePartGroupID")) field = NAMES[type_] data[field] = value.text if field != "name" and not weak: party.add(field, value.text) # print(field, value.text) name = jointext( data.get("firstName"), data.get("middleName"), data.get("fatherName"), data.get("lastName"), data.get("name"), ) if primary: party.add("name", name) elif alias_type == "F.K.A.": party.add("previousName", name) else: party.add("alias", name)
def parse_individual(emitter, data): person = emitter.make('Person') name = jointext(data["Name"], data["Middle Name"], data["Last Name"]) person.make_id(name, data["Reason for inclusion"]) person.add('name', name) person.add('lastName', data["Last Name"]) person.add('firstName', data["Name"]) person.add('fatherName', data["Middle Name"]) # Some records have multiple dobs dob = data["Date of birth"] if dob is not None: dobs = dob.split() for date in dobs: person.add('birthDate', parse_date(date)) person.add('birthPlace', data["Place of birth"]) sanction = emitter.make('Sanction') sanction.make_id('Sanction', person.id) sanction.add('entity', person) sanction.add('authority', 'Kyrgyz Financial Intelligence Unit') sanction.add('reason', data["Reason for inclusion"]) sanction.add('program', data["Category of entity"]) sanction.add('startDate', parse_date(data["Date of inclusion"])) emitter.emit(person) emitter.emit(sanction)
def parse_entry(context, node): entity_name = node.findtext("./Entity") if entity_name is not None: entity = context.make("LegalEntity") entity.add("name", entity_name.split("/")) else: entity = context.make("Person") given_name = node.findtext("./GivenName") entity.add("firstName", given_name) last_name = node.findtext("./LastName") entity.add("lastName", last_name) entity.add("name", jointext(given_name, last_name)) entity.add("birthDate", node.findtext("./DateOfBirth")) # ids are per country and entry type (individual/entity) item = node.findtext("./Item") schedule = node.findtext("./Schedule") country = node.findtext("./Country") if "/" in country: country, _ = country.split("/") entity.id = context.make_slug(country, schedule, item, strict=False) entity.add("country", country) sanction = h.make_sanction(context, entity) sanction.add("program", schedule) names = node.findtext("./Aliases") if names is not None: for name in names.split(", "): name = collapse_spaces(name) entity.add("alias", name) entity.add("topics", "sanction") context.emit(entity, target=True) context.emit(sanction)
def parse_row(emitter, row): entity = emitter.make("LegalEntity") entity.make_id("USBIS", row.get("Effective_Date"), row.get("Name")) entity.add("name", row.get("Name")) entity.add("notes", row.get("Action")) entity.add("country", row.get("Country")) entity.add("modifiedAt", row.get("Last_Update")) entity.context["updated_at"] = row.get("Last_Update") address = jointext( row.get("Street_Address"), row.get("Postal_Code"), row.get("City"), row.get("State"), sep=", ", ) entity.add("address", address) emitter.emit(entity) sanction = emitter.make("Sanction") sanction.make_id(entity.id, row.get("FR_Citation")) sanction.add("entity", entity) sanction.add("program", row.get("FR_Citation")) sanction.add("authority", "US Bureau of Industry and Security") sanction.add("country", "us") sanction.add("startDate", parse_date(row.get("Effective_Date"))) sanction.add("endDate", parse_date(row.get("Expiration_Date"))) pprint(row) emitter.emit(sanction)
def load_locations(doc): locations = {} for location in doc.findall("./Locations/Location"): location_id = location.get("ID") countries = set() parts = {} for part in location.findall("./LocationPart"): type_ = ref_value("LocPartType", part.get("LocPartTypeID")) parts[type_] = part.findtext("./LocationPartValue/Value") address = jointext( parts.get("Unknown"), parts.get("ADDRESS1"), parts.get("ADDRESS2"), parts.get("ADDRESS2"), parts.get("CITY"), parts.get("POSTAL CODE"), parts.get("REGION"), parts.get("STATE/PROVINCE"), sep=", ", ) countries = set([parts.get("Unknown")]) for area in location.findall("./LocationAreaCode"): area_code = ref_get("AreaCode", area.get("AreaCodeID")) country = ref_get("Country", area_code.get("CountryID")) countries.add(country.get("ISO2")) for country in location.findall("./LocationCountry"): country = ref_get("Country", country.get("CountryID")) countries.add(country.get("ISO2")) locations[location_id] = (address, countries) return locations
def parse(context, data): emitter = EntityEmitter(context) url = data.get('url') context.log.info("Interpol Red Notices URL: %s", url) with context.http.get(url) as result: context.log.info("result.text=%s", result.text) dict = json.loads(result.text) name = jointext(dict['forename'], dict['name']) if name is None or name == 'Identity unknown': return entity = emitter.make('Person') entity.make_id(url) entity.add('name', name) entity.add('sourceUrl', url) description = dict['distinguishing_marks'] entity.add('description', description) entity.add('keywords', 'REDNOTICE') entity.add('keywords', 'CRIME') if ', ' in name: last, first = name.split(', ', 1) entity.add('alias', jointext(first, last)) warrants = dict['arrest_warrants'] summary = '' for warrant in warrants: issuingCountryId = jointext('[', warrant['issuing_country_id'], ']', sep='') charge = jointext(issuingCountryId, warrant['charge']) summary = jointext(summary, charge, sep='\r\n') entity.add('summary', summary) entity.add('lastName', dict['name']) entity.add('firstName', dict['forename']) entity.add('nationality', dict['nationalities']) entity.add('gender', SEXES[dict['sex_id']]) entity.add('birthDate', dict['date_of_birth']) entity.add('birthPlace', dict['place_of_birth']) emitter.emit(entity)
def officer(context, data): emitter = EntityEmitter(context) officer_id = data.get('officer_id') url = API_URL % officer_id with context.http.get(url, auth=AUTH) as res: if res.status_code != 200: return data = res.json person = emitter.make('Person') person.make_id(officer_id) source_url = urljoin(WEB_URL, data.get('links', {}).get('self', '/')) person.add('sourceUrl', source_url) last_name = data.pop('surname', None) person.add('lastName', last_name) forename = data.pop('forename', None) person.add('firstName', forename) other_forenames = data.pop('other_forenames', None) person.add('middleName', other_forenames) person.add('name', jointext(forename, other_forenames, last_name)) person.add('title', data.pop('title', None)) nationality = normalize_country(data.pop('nationality', None)) person.add('nationality', nationality) person.add('birthDate', data.pop('date_of_birth', None)) for disqual in data.pop('disqualifications', []): case = disqual.get('case_identifier') sanction = emitter.make('Sanction') sanction.make_id(person.id, case) sanction.add('entity', person) sanction.add('authority', 'UK Companies House') sanction.add('program', case) sanction.add('startDate', disqual.pop('disqualified_from', None)) sanction.add('endDate', disqual.pop('disqualified_until', None)) emitter.emit(sanction) address = disqual.pop('address', {}) locality = address.get('locality') locality = jointext(locality, address.get('postal_code')) street = address.get('address_line_1') premises = address.get('premises') street = jointext(street, premises) address = jointext(street, address.get('address_line_2'), locality, address.get('region'), sep=', ') person.add('address', address) emitter.emit(person)
def make_address(address): address = (address.get('remarks'), address.get('co'), address.get('location'), address.get('address-details'), address.get('p-o-box'), address.get('zip-code'), address.get('area')) return jointext(*address, sep=', ')
def sanction_text(self, sanction): return jointext( *sanction.get("program"), *sanction.get("reason"), *sanction.get("status"), *sanction.get("startDate"), *sanction.get("endDate"), sep=" - ", )
def whole_name(parts): name = (parts.get('given-name'), parts.get('further-given-name'), parts.get('father-name'), parts.get('family-name'), parts.get('grand-father-name'), parts.get('tribal-name'), parts.get('whole-name'), parts.get('other')) return jointext(*name, sep=' ')
def make_address(address): address = ( address.get("remarks"), address.get("co"), address.get("location"), address.get("address-details"), address.get("p-o-box"), address.get("zip-code"), address.get("area"), ) return jointext(*address, sep=", ")
def parse_individual(context, node): person = context.make("Person") sanction = parse_common(context, person, node) person.add("title", values(node.find("./TITLE"))) person.add("firstName", node.findtext("./FIRST_NAME")) person.add("secondName", node.findtext("./SECOND_NAME")) person.add("middleName", node.findtext("./THIRD_NAME")) person.add("position", values(node.find("./DESIGNATION"))) for alias in node.findall("./INDIVIDUAL_ALIAS"): parse_alias(person, alias) for addr in node.findall("./INDIVIDUAL_ADDRESS"): parse_address(person, addr) for doc in node.findall("./INDIVIDUAL_DOCUMENT"): passport = context.make("Passport") number = doc.findtext("./NUMBER") date = doc.findtext("./DATE_OF_ISSUE") type_ = doc.findtext("./TYPE_OF_DOCUMENT") if number is None and date is None and type_ is None: continue passport.make_id(person.id, number, date, type_) passport.add("holder", person) passport.add("passportNumber", number) passport.add("startDate", date) passport.add("type", type_) passport.add("type", doc.findtext("./TYPE_OF_DOCUMENT2")) passport.add("summary", doc.findtext("./NOTE")) country = doc.findtext("./COUNTRY_OF_ISSUE") country = country or doc.findtext("./ISSUING_COUNTRY") passport.add("country", country) context.emit(passport) for nat in node.findall("./NATIONALITY/VALUE"): person.add("nationality", nat.text) for dob in node.findall("./INDIVIDUAL_DATE_OF_BIRTH"): date = dob.findtext("./DATE") or dob.findtext("./YEAR") person.add("birthDate", date) for pob in node.findall("./INDIVIDUAL_PLACE_OF_BIRTH"): person.add("country", pob.findtext("./COUNTRY")) place = jointext( pob.findtext("./CITY"), pob.findtext("./STATE_PROVINCE"), pob.findtext("./COUNTRY"), sep=", ", ) person.add("birthPlace", place) context.emit(person) context.emit(sanction)
def whole_name(parts): name = ( parts.get("given-name"), parts.get("further-given-name"), parts.get("father-name"), parts.get("family-name"), parts.get("grand-father-name"), parts.get("tribal-name"), parts.get("whole-name"), parts.get("other"), ) return jointext(*name, sep=" ")
def parse_person(emitter, node): entity = emitter.make("Person") last_name = node.findtext("./Surname") entity.add("lastName", last_name) first_name = node.findtext("./Name") entity.add("firstName", first_name) patronymic = node.findtext("./Patronomic") entity.add("fatherName", patronymic) entity.add("name", jointext(first_name, patronymic, last_name)) entity.add("birthDate", parse_date(node.findtext("./DataBirth"))) entity.add("birthPlace", node.findtext("./PlaceBirth")) parse_common(emitter, node, entity)
def parse_address(entity, addr): text = addr.xpath('string()').strip() if not len(text): return country = addr.findtext('./COUNTRY') address = jointext(addr.findtext('./NOTE'), addr.findtext('./STREET'), addr.findtext('./CITY'), addr.findtext('./STATE_PROVINCE'), country, sep=', ') entity.add('address', address) entity.add('country', normalize_country(country))
def parse_address(entity, addr): text = addr.xpath("string()").strip() if not len(text): return country = addr.findtext("./COUNTRY") address = jointext( addr.findtext("./NOTE"), addr.findtext("./STREET"), addr.findtext("./CITY"), addr.findtext("./STATE_PROVINCE"), country, sep=", ", ) entity.add("address", address) entity.add("country", country)
def parse_feature(doc, feature): detail = feature.find(qpath('VersionDetail')) period = feature.find(qpath('DatePeriod')) if period is not None: return parse_date_period(period) vlocation = feature.find(qpath('VersionLocation')) if vlocation is not None: location = deref(doc, 'Location', vlocation.get('LocationID'), element=True) country_code = None parts = {} for part in location.findall(qpath('LocationPart')): type_id = part.get('LocPartTypeID') type_ = deref(doc, 'LocPartType', type_id) value = part.findtext(qpath('Value')) parts[type_] = value address = jointext(parts.get('Unknown'), parts.get('ADDRESS1'), parts.get('ADDRESS2'), parts.get('ADDRESS2'), parts.get('CITY'), parts.get('POSTAL CODE'), parts.get('REGION'), parts.get('STATE/PROVINCE'), sep=', ') for area in location.findall(qpath('LocationAreaCode')): country_id = deref(doc, 'AreaCode', area.get('AreaCodeID'), 'CountryID') # noqa country_code = deref(doc, 'Country', country_id, 'ISO2') for country in location.findall(qpath('LocationCountry')): country_id = country.get('CountryID') country_code = deref(doc, 'Country', country_id, 'ISO2') return (address, country_code) if detail is not None: reference_id = detail.get('DetailReferenceID') if reference_id is not None: return deref(doc, 'DetailReference', reference_id) return detail.text
def parse(context, data): emitter = EntityEmitter(context) with context.http.rehash(data) as result: doc = result.html name = element_text(doc.find('.//div[@class="nom_fugitif_wanted"]')) if name is None or name == 'Identity unknown': return entity = emitter.make('Person') entity.make_id(data.get('url')) entity.add('name', name) entity.add('sourceUrl', data.get('url')) wanted = element_text( doc.find('.//span[@class="nom_fugitif_wanted_small"]')) # noqa entity.add('program', wanted) entity.add('keywords', 'REDNOTICE') entity.add('keywords', 'CRIME') if ', ' in name: last, first = name.split(', ', 1) entity.add('alias', jointext(first, last)) for row in doc.findall('.//div[@class="bloc_detail"]//tr'): title, value = row.findall('./td') name = slugify(element_text(title), sep='_') value = element_text(value) if value is None: continue if name == 'charges': entity.add('summary', value) elif name == 'present_family_name': entity.add('lastName', value) elif name == 'forename': entity.add('firstName', value) elif name == 'nationality': for country in value.split(', '): entity.add('nationality', country) elif name == 'sex': entity.add('gender', SEXES[value]) elif name == 'date_of_birth': entity.add('birthDate', value.split('(')[0]) elif name == 'place_of_birth': entity.add('birthPlace', value) emitter.emit(entity) emitter.finalize()
def parse_entry(emitter, node): # ids are per country and entry type (individual/entity) country = node.findtext('./Country') if ' / ' in country: country, _ = country.split(' / ') country_code = normalize_country(country) entity_name = node.findtext('./Entity') item = node.findtext('.//Item') entity = emitter.make('LegalEntity') if entity_name is None: entity = emitter.make('Person') entity.make_id(country, entity_name, item) entity.add('name', entity_name) entity.add('country', country_code) sanction = emitter.make('Sanction') sanction.make_id(entity.id) sanction.add('entity', entity) sanction.add('authority', 'Canadian international sanctions') sanction.add('program', node.findtext('.//Schedule')) given_name = node.findtext('.//GivenName') entity.add('firstName', given_name, quiet=True) last_name = node.findtext('.//LastName') entity.add('lastName', last_name, quiet=True) entity.add('name', jointext(given_name, last_name)) dob = node.findtext('.//DateOfBirth') if dob is not None: dob = '-'.join(reversed(dob.split('/'))) entity.add('birthDate', dob, quiet=True) names = node.findtext('.//Aliases') if names is None: return for name in names.split(', '): name = collapse_spaces(name) entity.add('alias', name) emitter.emit(entity) emitter.emit(sanction)
def parse_entry(context, node): # ids are per country and entry type (individual/entity) country = node.findtext("./Country") if " / " in country: country, _ = country.split(" / ") entity_name = node.findtext("./Entity") item = node.findtext(".//Item") entity = context.make("LegalEntity") if entity_name is None: entity = context.make("Person") entity.make_id("CASEMA", country, entity_name, item) entity.add("name", entity_name) entity.add("country", country) sanction = context.make("Sanction") sanction.make_id("Sanction", entity.id) sanction.add("entity", entity) sanction.add("authority", "Canadian international sanctions") sanction.add("program", node.findtext(".//Schedule")) given_name = node.findtext(".//GivenName") entity.add("firstName", given_name, quiet=True) last_name = node.findtext(".//LastName") entity.add("lastName", last_name, quiet=True) entity.add("name", jointext(given_name, last_name)) dob = node.findtext(".//DateOfBirth") if dob is not None: dob = "-".join(reversed(dob.split("/"))) entity.add("birthDate", dob, quiet=True) names = node.findtext(".//Aliases") if names is not None: for name in names.split(", "): name = collapse_spaces(name) entity.add("alias", name) context.emit(entity) context.emit(sanction)
def parse_row(emitter, row): entity = emitter.make('LegalEntity') entity.make_id(row.get('Effective_Date'), row.get('Name')) entity.add('name', row.get('Name')) entity.add('notes', row.get('Action')) entity.add('country', normalize_country(row.get('Country'))) # entity.updated_at = row.get('Effective_Date') address = jointext(row.get('Street_Address'), row.get('Postal_Code'), row.get('City'), row.get('State'), sep=', ') entity.add('address', address) emitter.emit(entity) sanction = emitter.make('Sanction') sanction.make_id(entity.id, row.get('FR_Citation')) sanction.add('entity', entity) sanction.add('program', row.get('FR_Citation')) sanction.add('authority', 'US Bureau of Industry and Security') sanction.add('country', 'us') sanction.add('startDate', row.get('Effective_Date')) emitter.emit(sanction)
def parse_entry(emitter, entry): reg_date = entry.get("reg_date") entity = emitter.make("LegalEntity") if entry.get("Type") == "P": entity = emitter.make("Person") entity.id = "eeas-%s" % entry.get("Id") entity.add("sourceUrl", entry.get("pdf_link")) entity.add("modifiedAt", reg_date) entity.context["created_at"] = reg_date sanction = emitter.make("Sanction") sanction.make_id(entity.id) sanction.add("entity", entity) sanction.add("authority", "European External Action Service") sanction.add("sourceUrl", entry.get("pdf_link")) sanction.add("program", entry.get("programme")) sanction.add("program", entry.get("legal_basis")) sanction.add("reason", entry.get("remark")) sanction.add("startDate", reg_date) for name in entry.findall("./NAME"): if entity.has("name"): entity.add("alias", name.findtext("./WHOLENAME")) else: entity.add("name", name.findtext("./WHOLENAME")) entity.add("title", name.findtext("./TITLE"), quiet=True) entity.add("firstName", name.findtext("./FIRSTNAME"), quiet=True) entity.add("middleName", name.findtext("./MIDDLENAME"), quiet=True) entity.add("lastName", name.findtext("./LASTNAME"), quiet=True) entity.add("position", name.findtext("./FUNCTION"), quiet=True) gender = GENDERS.get(name.findtext("./GENDER")) entity.add("gender", gender, quiet=True) for pnode in entry.findall("./PASSPORT"): passport = emitter.make("Passport") passport.make_id("Passport", entity.id, pnode.findtext("./NUMBER")) passport.add("holder", entity) passport.add("passportNumber", pnode.findtext("./NUMBER")) passport.add("country", pnode.findtext("./COUNTRY")) emitter.emit(passport) for node in entry.findall("./ADDRESS"): address = jointext( node.findtext("./STREET"), node.findtext("./NUMBER"), node.findtext("./CITY"), node.findtext("./ZIPCODE"), ) entity.add("address", address) entity.add("country", node.findtext("./COUNTRY")) for birth in entry.findall("./BIRTH"): entity.add("birthDate", birth.findtext("./DATE")) entity.add("birthPlace", birth.findtext("./PLACE")) entity.add("country", birth.findtext("./COUNTRY")) for country in entry.findall("./CITIZEN/COUNTRY"): entity.add("nationality", country.text, quiet=True) emitter.emit(entity) emitter.emit(sanction)
def parse_entry(emitter, group, rows): entity = emitter.make('LegalEntity') entity.make_id(group) sanction = emitter.make('Sanction') sanction.make_id(entity.id, 'Sanction') sanction.add('entity', entity) sanction.add('authority', 'HM Treasury Financial sanctions targets') sanction.add('country', 'gb') for row in rows: if row.pop('Group Type') == 'Individual': entity.schema = model.get('Person') row.pop('Alias Type', None) name1 = row.pop('Name 1') entity.add('firstName', name1, quiet=True) name2 = row.pop('Name 2') name3 = row.pop('Name 3') name4 = row.pop('Name 4') name5 = row.pop('Name 5') name6 = row.pop('Name 6') entity.add('lastName', name6, quiet=True) name = jointext(name1, name2, name3, name4, name5, name6) if not entity.has('name'): entity.add('name', name) else: entity.add('alias', name) entity.add('title', row.pop('Title'), quiet=True) sanction.add('program', row.pop('Regime')) last_updated = parse_date(row.pop('Last Updated')) sanction.add('modifiedAt', last_updated) sanction.add('startDate', parse_date(row.pop('Listed On'))) entity.add('modifiedAt', last_updated) entity.add('position', row.pop('Position'), quiet=True) entity.add('notes', row.pop('Other Information'), quiet=True) entity.add('birthDate', parse_date(row.pop('DOB')), quiet=True) nationality = normalize_country(row.pop('Nationality', None)) entity.add('nationality', nationality, quiet=True) country = row.pop('Country', None) entity.add('country', normalize_country(country)) address = jointext(row.pop('Address 1', None), row.pop('Address 2', None), row.pop('Address 3', None), row.pop('Address 4', None), row.pop('Address 5', None), row.pop('Address 6', None), row.pop('Post/Zip Code', None), country) entity.add('address', address, quiet=True) passport = row.pop('Passport Details', None) entity.add('passportNumber', passport, quiet=True) national_id = row.pop('NI Number', None) entity.add('nationalId', national_id, quiet=True) country_of_birth = [] for country in split_items(row.pop('Country of Birth')): code = normalize_country(country) country_of_birth.append(country) entity.add('country', code) for town in split_items(row.pop('Town of Birth', None)): entity.add('birthPlace', town) emitter.emit(entity) emitter.emit(sanction)
def parse_entry(emitter, group, rows): entity = emitter.make("LegalEntity") entity.id = "gbhmt-%s" % group sanction = emitter.make("Sanction") sanction.make_id(entity.id, "Sanction") sanction.add("entity", entity) sanction.add("authority", "HM Treasury Financial sanctions targets") sanction.add("country", "gb") for row in rows: if row.pop("Group Type") == "Individual": entity.schema = model.get("Person") row.pop("Alias Type", None) name1 = row.pop("Name 1") entity.add("firstName", name1, quiet=True) name2 = row.pop("Name 2") name3 = row.pop("Name 3") name4 = row.pop("Name 4") name5 = row.pop("Name 5") name6 = row.pop("Name 6") entity.add("lastName", name6, quiet=True) name = jointext(name1, name2, name3, name4, name5, name6) if not entity.has("name"): entity.add("name", name) else: entity.add("alias", name) entity.add("position", row.pop("Position"), quiet=True) entity.add("notes", row.pop("Other Information"), quiet=True) entity.add("birthDate", parse_date(row.pop("DOB")), quiet=True) entity.add("nationality", row.pop("Nationality", None), quiet=True) entity.add("title", row.pop("Title"), quiet=True) sanction.add("program", row.pop("Regime")) sanction.add("startDate", parse_date(row.pop("Listed On"))) last_updated = parse_date(row.pop("Last Updated")) if last_updated is not None: sanction.add("modifiedAt", last_updated) sanction.context["updated_at"] = last_updated entity.add("modifiedAt", last_updated) entity.context["updated_at"] = last_updated country = row.pop("Country", None) entity.add("country", country) address = jointext( row.pop("Address 1", None), row.pop("Address 2", None), row.pop("Address 3", None), row.pop("Address 4", None), row.pop("Address 5", None), row.pop("Address 6", None), row.pop("Post/Zip Code", None), country, ) entity.add("address", address, quiet=True) passport = row.pop("Passport Details", None) entity.add("passportNumber", passport, quiet=True) national_id = row.pop("NI Number", None) entity.add("nationalId", national_id, quiet=True) for country in split_items(row.pop("Country of Birth")): entity.add("country", country) for town in split_items(row.pop("Town of Birth", None)): entity.add("birthPlace", town) emitter.emit(entity) emitter.emit(sanction)
def parse_entry(emitter, entry): entity = emitter.make("LegalEntity") if entry.findtext("./type-entry") == "2": entity = emitter.make("Person") entity.make_id(entry.findtext("number-entry")) sanction = emitter.make("Sanction") sanction.make_id("Sanction", entity.id) sanction.add("entity", entity) sanction.add("authority", "State Financial Monitoring Service of Ukraine") sanction.add( "sourceUrl", "http://www.sdfm.gov.ua/articles.php?cat_id=87&lang=en" ) # noqa sanction.add("program", entry.findtext("./program-entry")) date_entry = entry.findtext("./date-entry") if date_entry: date = datetime.strptime(date_entry, "%Y%m%d") entity.context["created_at"] = date.isoformat() sanction.add("startDate", date.date()) for aka in entry.findall("./aka-list"): first_name = aka.findtext("./aka-name1") entity.add("firstName", first_name, quiet=True) second_name = aka.findtext("./aka-name2") entity.add("secondName", second_name, quiet=True) third_name = aka.findtext("./aka-name3") entity.add("middleName", third_name, quiet=True) last_name = aka.findtext("./aka-name4") entity.add("lastName", last_name, quiet=True) name = jointext(first_name, second_name, third_name, last_name) if aka.findtext("type-aka") == "N": entity.add("name", name) else: if aka.findtext("./quality-aka") == "2": entity.add("weakAlias", name) else: entity.add("alias", name) for node in entry.findall("./title-list"): entity.add("title", node.text, quiet=True) for doc in entry.findall("./document-list"): reg = doc.findtext("./document-reg") number = doc.findtext("./document-id") country = doc.findtext("./document-country") passport = emitter.make("Passport") passport.make_id("Passport", entity.id, reg, number, country) passport.add("holder", entity) passport.add("passportNumber", number) passport.add("summary", reg) passport.add("country", country) emitter.emit(passport) for doc in entry.findall("./id-number-list"): entity.add("idNumber", doc.text) for node in entry.findall("./address-list"): entity.add("address", node.findtext("./address")) for pob in entry.findall("./place-of-birth-list"): entity.add("birthPlace", pob.text, quiet=True) for dob in entry.findall("./date-of-birth-list"): entity.add("birthDate", parse_date(dob.text), quiet=True) for nat in entry.findall("./nationality-list"): entity.add("nationality", nat.text, quiet=True) emitter.emit(entity) emitter.emit(sanction)