def parse_case(case): url = case.get('url') name = combine_name(*reversed(case.get('name').split(', '))) updated = dateutil_parse(case.get('last_updated')) record = { 'uid': make_id('interpol', url.split('/')[-1]), 'source_url': url, 'name': name, 'summary': case.get('reason'), 'updated_at': updated.date().isoformat(), 'place_of_birth': case.get('place_of_birth'), 'gender': case.get('sex', '').lower(), 'first_name': case.get('forename'), 'last_name': case.get('present_family_name'), 'nationality': normalize_country(case.get('nationality')), 'identities': [], 'addresses': [], 'other_names': [] } record.update(SOURCE) birth = case.get('date_of_birth').split(' ')[0] try: dt = datetime.strptime(birth, '%Y').date().isoformat() record['date_of_birth'] = dt except Exception: try: dt = datetime.strptime(birth, '%d/%m/%Y').date().isoformat() record['date_of_birth'] = dt except Exception as ex: log.exception(ex) source.emit(record)
def parse_common(node, type_): program_ref = '%s (%s)' % (node.findtext('./UN_LIST_TYPE').strip(), node.findtext('./REFERENCE_NUMBER').strip()) record = { 'uid': make_id('un', 'sc', node.findtext('./DATAID')), 'type': type_, 'program': program_ref, 'summary': node.findtext('./COMMENTS1'), 'name': combine_name(node.findtext('./FIRST_NAME'), node.findtext('./SECOND_NAME'), node.findtext('./THIRD_NAME')), 'function': node.findtext('./DESIGNATION/VALUE'), 'updated_at': node.findtext('./LISTED_ON'), 'nationality': normalize_country(node.findtext('./NATIONALITY/VALUE')), 'other_names': [], 'addresses': [], 'identities': [] } record.update(BASE) orig = node.findtext('./NAME_ORIGINAL_SCRIPT') if orig is not None: record['name'] = orig last_updated = node.findtext('./LAST_DAY_UPDATED/VALUE') if last_updated is not None: record['updated_at'] = last_updated if ':' in record['updated_at']: record['updated_at'] = record['updated_at'].rsplit('-', 1)[0] # print etree.tostring(node, pretty_print=True) return record
def get_names(aka): names = [aka.findtext('./aka-name1'), aka.findtext('./aka-name2'), aka.findtext('./aka-name3'), aka.findtext('./aka-name4')] names = [n for n in names if n is not None] data = {'other_name': combine_name(*names)} if not len(names): return data data['last_name'] = names[-1] names.remove(names[-1]) if len(names) > 0: data['first_name'] = names[0] if len(names) > 1: data['second_name'] = names[1] if len(names) > 2: data['middle_name'] = names[2] return data
def get_names(aka): names = [ aka.findtext('./aka-name1'), aka.findtext('./aka-name2'), aka.findtext('./aka-name3'), aka.findtext('./aka-name4') ] names = [n for n in names if n is not None] data = {'other_name': combine_name(*names)} if not len(names): return data data['last_name'] = names[-1] names.remove(names[-1]) if len(names) > 0: data['first_name'] = names[0] if len(names) > 1: data['second_name'] = names[1] if len(names) > 2: data['middle_name'] = names[2] return data
def parse_common(node, type_): program_ref = '%s (%s)' % (node.findtext('./UN_LIST_TYPE').strip(), node.findtext('./REFERENCE_NUMBER').strip()) record = { 'uid': make_id('un', 'sc', node.findtext('./DATAID')), 'type': type_, 'program': program_ref, 'summary': node.findtext('./COMMENTS1'), 'name': combine_name(node.findtext('./FIRST_NAME'), node.findtext('./SECOND_NAME'), node.findtext('./THIRD_NAME')), 'function': node.findtext('./DESIGNATION/VALUE'), 'updated_at': node.findtext('./LISTED_ON'), 'nationality': source.normalize_country(node.findtext('./NATIONALITY/VALUE')), 'other_names': [], 'addresses': [], 'identities': [] } record.update(BASE) orig = node.findtext('./NAME_ORIGINAL_SCRIPT') if orig is not None: record['name'] = orig last_updated = node.findtext('./LAST_DAY_UPDATED/VALUE') if last_updated is not None: record['updated_at'] = last_updated if ':' in record['updated_at']: record['updated_at'] = record['updated_at'].rsplit('-', 1)[0] # print etree.tostring(node, pretty_print=True) return record
def get_name_data(names): data = {} parts = [] for (name_part, value) in names: np_type = name_part.get('name-part-type') data['quality'] = name_part.getparent().get('quality') if value and len(value) and value.strip() != '-': if np_type == 'whole-name': data['other_name'] = value if np_type == 'family-name': data['last_name'] = value if np_type == 'given-name': data['first_name'] = value if np_type == 'further-given-name': data['second_name'] = value parts.append((value, int(name_part.get('order')))) if 'other_name' not in data and len(parts): parts = sorted(parts, key=lambda (a, b): b) parts = [a for (a, b) in parts] data['other_name'] = combine_name(*parts) return data
def parse_entry(group, rows): record = SOURCE.copy() record.update({ 'uid': make_id('gb', 'hmt', group), 'identities': [], 'addresses': [], 'other_names': [] }) for row in rows: record.update({ 'type': row.pop('Group Type').lower(), 'date_of_birth': parse_date(row.pop('DOB')), 'place_of_birth': row.pop('Town of Birth'), 'country_of_birth': source.normalize_country(row.pop('Country of Birth')), 'nationality': source.normalize_country(row.get('Nationality')), 'program': row.pop('Regime'), 'summary': row.pop('Other Information'), 'updated_at': parse_date(row.pop('Last Updated')), 'function': row.pop('Position') }) names = { 'first_name': row.get('Name 1'), 'second_name': row.get('Name 2'), 'middle_name': row.get('Name 3'), 'last_name': row.get('Name 6') } name = [ row.pop('Title'), row.pop('Name 1'), row.pop('Name 2'), row.pop('Name 3'), row.pop('Name 4'), row.pop('Name 5'), row.pop('Name 6') ] name = combine_name(*name) if 'name' not in record: record['name'] = name record.update(names) else: names['other_name'] = name names['type'] = row.pop('Alias Type') record['other_names'].append(names) addr = [ row.pop('Address 1'), row.pop('Address 2'), row.pop('Address 3'), row.pop('Address 4'), row.pop('Address 5'), row.pop('Address 6') ] addr = combine_name(*addr) if len(addr): record['addresses'].append({ 'text': addr, 'postal_code': row.pop('Post/Zip Code') }) if row.get('Passport Details'): record['identities'].append({ 'type': 'Passport', 'number': row.pop('Passport Details'), 'country': source.normalize_country(row.get('Nationality')) }) if row.get('NI Number'): record['identities'].append({ 'type': 'NI', 'number': row.pop('NI Number'), 'country': source.normalize_country(row.get('Country')) }) # from pprint import pprint # pprint(row) source.emit(record)
def parse_entry(source, record, entry): uid = entry.findtext('uid') record.update({ 'uid': make_id('us', 'ofac', uid), 'type': 'individual', 'program': entry.findtext('./programList/program'), 'summary': entry.findtext('./remarks'), 'first_name': entry.findtext('./firstName'), 'last_name': entry.findtext('./lastName'), 'name': combine_name(entry.findtext('./firstName'), entry.findtext('./lastName')) }) is_entity = entry.findtext('./sdnType') != 'Individual' if is_entity: record['type'] = 'entity' record.pop('last_name', None) record['other_names'] = [] for aka in entry.findall('./akaList/aka'): data = { 'type': aka.findtext('./type'), 'quality': aka.findtext('./category'), 'first_name': aka.findtext('./firstName'), 'last_name': aka.findtext('./lastName'), 'other_name': combine_name(aka.findtext('./firstName'), aka.findtext('./lastName')) } if is_entity: data.pop('last_name', None) record['other_names'].append(data) record['identities'] = [] for ident in entry.findall('./idList/id'): data = { 'type': ident.findtext('./idType'), 'number': ident.findtext('./idNumber'), 'country': source.normalize_country(ident.findtext('./idCountry')) } record['identities'].append(data) record['addresses'] = [] for address in entry.findall('./addressList/address'): data = { 'address1': address.findtext('./address1'), 'address2': address.findtext('./address2'), 'city': address.findtext('./city'), 'country': source.normalize_country(address.findtext('./country')) } record['addresses'].append(data) for pob in entry.findall('./placeOfBirthList/placeOfBirthItem'): if pob.findtext('./mainEntry') == 'true': record['place_of_birth'] = pob.findtext('./placeOfBirth') for pob in entry.findall('./dateOfBirthList/dateOfBirthItem'): if pob.findtext('./mainEntry') == 'true': dt = pob.findtext('./dateOfBirth') record['date_of_birth'] = parse_date(dt) # print etree.tostring(entry, pretty_print=True) if is_entity: record.pop('last_name', None) source.emit(record)
def parse_entry(source, record, entry): uid = entry.findtext('uid') record.update({ 'uid': make_id('us', 'ofac', uid), 'type': 'individual', 'program': entry.findtext('./programList/program'), 'summary': entry.findtext('./remarks'), 'first_name': entry.findtext('./firstName'), 'last_name': entry.findtext('./lastName'), 'name': combine_name(entry.findtext('./firstName'), entry.findtext('./lastName')) }) is_entity = entry.findtext('./sdnType') != 'Individual' if is_entity: record['type'] = 'entity' record.pop('last_name', None) record['other_names'] = [] for aka in entry.findall('./akaList/aka'): data = { 'type': aka.findtext('./type'), 'quality': aka.findtext('./category'), 'first_name': aka.findtext('./firstName'), 'last_name': aka.findtext('./lastName'), 'other_name': combine_name(aka.findtext('./firstName'), aka.findtext('./lastName')) } if is_entity: data.pop('last_name', None) record['other_names'].append(data) record['identities'] = [] for ident in entry.findall('./idList/id'): data = { 'type': ident.findtext('./idType'), 'number': ident.findtext('./idNumber'), 'country': normalize_country(ident.findtext('./idCountry')) } record['identities'].append(data) record['addresses'] = [] for address in entry.findall('./addressList/address'): data = { 'address1': address.findtext('./address1'), 'address2': address.findtext('./address2'), 'city': address.findtext('./city'), 'country': normalize_country(address.findtext('./country')) } record['addresses'].append(data) for pob in entry.findall('./placeOfBirthList/placeOfBirthItem'): if pob.findtext('./mainEntry') == 'true': record['place_of_birth'] = pob.findtext('./placeOfBirth') for pob in entry.findall('./dateOfBirthList/dateOfBirthItem'): if pob.findtext('./mainEntry') == 'true': dt = pob.findtext('./dateOfBirth') record['date_of_birth'] = parse_date(dt) # print etree.tostring(entry, pretty_print=True) if is_entity: record.pop('last_name', None) source.emit(record)
def parse_entry(group, rows): record = SOURCE.copy() record.update({"uid": make_id("gb", "hmt", group), "identities": [], "addresses": [], "other_names": []}) for row in rows: record.update( { "type": row.pop("Group Type").lower(), "date_of_birth": parse_date(row.pop("DOB")), "place_of_birth": row.pop("Town of Birth"), "country_of_birth": normalize_country(row.pop("Country of Birth")), "nationality": normalize_country(row.get("Nationality")), "program": row.pop("Regime"), "summary": row.pop("Other Information"), "updated_at": parse_date(row.pop("Last Updated")), "function": row.pop("Position"), } ) names = { "first_name": row.get("Name 1"), "second_name": row.get("Name 2"), "middle_name": row.get("Name 3"), "last_name": row.get("Name 6"), } name = [ row.pop("Title"), row.pop("Name 1"), row.pop("Name 2"), row.pop("Name 3"), row.pop("Name 4"), row.pop("Name 5"), row.pop("Name 6"), ] name = combine_name(*name) if "name" not in record: record["name"] = name record.update(names) else: names["other_name"] = name names["type"] = row.pop("Alias Type") record["other_names"].append(names) addr = [ row.pop("Address 1"), row.pop("Address 2"), row.pop("Address 3"), row.pop("Address 4"), row.pop("Address 5"), row.pop("Address 6"), ] addr = combine_name(*addr) if len(addr): record["addresses"].append({"text": addr, "postal_code": row.pop("Post/Zip Code")}) if row.get("Passport Details"): record["identities"].append( { "type": "Passport", "number": row.pop("Passport Details"), "country": normalize_country(row.get("Nationality")), } ) if row.get("NI Number"): record["identities"].append( {"type": "NI", "number": row.pop("NI Number"), "country": normalize_country(row.get("Country"))} ) # from pprint import pprint # pprint(row) source.emit(record)