def parse_common(node, type_): program_ref = '%s (%s)' % (node.findtext('./UN_LIST_TYPE').strip(), node.findtext('./REFERENCE_NUMBER').strip()) record = { 'uid': make_id('un', 'sc', node.findtext('./DATAID')), 'type': type_, 'program': program_ref, 'summary': node.findtext('./COMMENTS1'), 'name': combine_name(node.findtext('./FIRST_NAME'), node.findtext('./SECOND_NAME'), node.findtext('./THIRD_NAME')), 'function': node.findtext('./DESIGNATION/VALUE'), 'updated_at': node.findtext('./LISTED_ON'), 'nationality': normalize_country(node.findtext('./NATIONALITY/VALUE')), 'other_names': [], 'addresses': [], 'identities': [] } record.update(BASE) orig = node.findtext('./NAME_ORIGINAL_SCRIPT') if orig is not None: record['name'] = orig last_updated = node.findtext('./LAST_DAY_UPDATED/VALUE') if last_updated is not None: record['updated_at'] = last_updated if ':' in record['updated_at']: record['updated_at'] = record['updated_at'].rsplit('-', 1)[0] # print etree.tostring(node, pretty_print=True) return record
def parse_case(emit, case): url = case.get('url') name = combine_name(*reversed(case.get('name').split(', '))) updated = dateutil_parse(case.get('last_updated')) record = { 'uid': make_id('interpol', url.split('/')[-1]), 'source_url': url, 'name': name, 'summary': case.get('reason'), 'updated_at': updated.date().isoformat(), 'place_of_birth': case.get('place_of_birth'), 'gender': case.get('sex', '').lower(), 'first_name': case.get('forename'), 'last_name': case.get('present_family_name'), 'nationality': normalize_country(case.get('nationality')), 'identities': [], 'addresses': [], 'other_names': [] } record.update(SOURCE) birth = case.get('date_of_birth').split(' ')[0] try: dt = datetime.strptime(birth, '%Y').date().isoformat() record['date_of_birth'] = dt except Exception: try: dt = datetime.strptime(birth, '%d/%m/%Y').date().isoformat() record['date_of_birth'] = dt except Exception as ex: log.exception(ex) emit.entity(record)
def get_names(aka): names = [aka.findtext('./aka-name1'), aka.findtext('./aka-name2'), aka.findtext('./aka-name3'), aka.findtext('./aka-name4')] names = [n for n in names if n is not None] data = {'other_name': combine_name(*names)} if not len(names): return data data['last_name'] = names[-1] names.remove(names[-1]) if len(names) > 0: data['first_name'] = names[0] if len(names) > 1: data['second_name'] = names[1] if len(names) > 2: data['middle_name'] = names[2] return data
def parse_common(node, type_): program_ref = '%s (%s)' % (node.findtext('./UN_LIST_TYPE').strip(), node.findtext('./REFERENCE_NUMBER').strip()) record = { 'uid': make_id('un', 'sc', node.findtext('./DATAID')), 'type': type_, 'program': program_ref, 'summary': node.findtext('./COMMENTS1'), 'name': combine_name(node.findtext('./FIRST_NAME'), node.findtext('./LAST_NAME')), 'function': node.findtext('./DESIGNATION/VALUE'), 'updated_at': node.findtext('./LISTED_ON'), 'nationality': normalize_country(node.findtext('./NATIONALITY/VALUE')), 'other_names': [], 'addresses': [], 'identities': [] } record.update(BASE) orig = node.findtext('./NAME_ORIGINAL_SCRIPT') if orig is not None: record['name'] = orig last_updated = node.findtext('./LAST_DAY_UPDATED/VALUE') if last_updated is not None: record['updated_at'] = last_updated if ':' in record['updated_at']: record['updated_at'] = record['updated_at'].rsplit('-', 1)[0] # print etree.tostring(node, pretty_print=True) return record
def get_name_data(names): data = {} parts = [] for (name_part, value) in names: np_type = name_part.get('name-part-type') data['quality'] = name_part.getparent().get('quality') if value and len(value) and value.strip() != '-': if np_type == 'whole-name': data['other_name'] = value if np_type == 'family-name': data['last_name'] = value if np_type == 'given-name': data['first_name'] = value if np_type == 'further-given-name': data['second_name'] = value parts.append((value, int(name_part.get('order')))) if 'other_name' not in data and len(parts): parts = sorted(parts, key=lambda (a, b): b) parts = [a for (a, b) in parts] data['other_name'] = combine_name(*parts) return data
def get_name_data(names): data = {} parts = [] for (name_part, value) in names: np_type = name_part.get('name-part-type') data['quality'] = name_part.getparent().get('quality') if value and len(value) and value.strip() != '-': if np_type == 'whole-name': data['other_name'] = value if np_type == 'family-name': data['last_name'] = value if np_type == 'given-name': data['first_name'] = value if np_type == 'further-given-name': data['second_name'] = value parts.append((value, int(name_part.get('order')))) if 'other_name' not in data and len(parts): parts = sorted(parts, key=lambda (a, b): b) parts = [a for (a, b) in parts] data['other_name'] = combine_name(*parts) return data
def parse_entry(emit, group, rows): record = SOURCE.copy() record.update({ 'uid': make_id('gb', 'hmt', group), 'identities': [], 'addresses': [], 'other_names': [] }) for row in rows: record.update({ 'type': row.pop('Group Type').lower(), 'date_of_birth': parse_date(row.pop('DOB')), 'place_of_birth': row.pop('Town of Birth'), 'country_of_birth': normalize_country(row.pop('Country of Birth')), 'nationality': normalize_country(row.get('Nationality')), 'program': row.pop('Regime'), 'summary': row.pop('Other Information'), 'updated_at': parse_date(row.pop('Last Updated')), 'function': row.pop('Position') }) names = { 'first_name': row.get('Name 1'), 'second_name': row.get('Name 2'), 'middle_name': row.get('Name 3'), 'last_name': row.get('Name 6') } name = [row.pop('Title'), row.pop('Name 1'), row.pop('Name 2'), row.pop('Name 3'), row.pop('Name 4'), row.pop('Name 5'), row.pop('Name 6')] name = combine_name(*name) if 'name' not in record: record['name'] = name record.update(names) else: names['other_name'] = name names['type'] = row.pop('Alias Type') record['other_names'].append(names) addr = [row.pop('Address 1'), row.pop('Address 2'), row.pop('Address 3'), row.pop('Address 4'), row.pop('Address 5'), row.pop('Address 6')] addr = combine_name(*addr) if len(addr): record['addresses'].append({ 'text': addr, 'postal_code': row.pop('Post/Zip Code') }) if row.get('Passport Details'): record['identities'].append({ 'type': 'Passport', 'number': row.pop('Passport Details'), 'country': normalize_country(row.get('Nationality')) }) if row.get('NI Number'): record['identities'].append({ 'type': 'NI', 'number': row.pop('NI Number'), 'country': normalize_country(row.get('Country')) }) # from pprint import pprint # pprint(row) emit.entity(record)
def parse_entry(emit, record, entry): uid = entry.findtext('uid') record.update({ 'uid': make_id('us', 'ofac', uid), 'type': 'individual', 'program': entry.findtext('./programList/program'), 'summary': entry.findtext('./remarks'), 'first_name': entry.findtext('./firstName'), 'last_name': entry.findtext('./lastName'), 'name': combine_name(entry.findtext('./firstName'), entry.findtext('./lastName')) }) is_entity = entry.findtext('./sdnType') != 'Individual' if is_entity: record['type'] = 'entity' record.pop('last_name', None) record['other_names'] = [] for aka in entry.findall('./akaList/aka'): data = { 'type': aka.findtext('./type'), 'quality': aka.findtext('./category'), 'first_name': aka.findtext('./firstName'), 'last_name': aka.findtext('./lastName'), 'other_name': combine_name(aka.findtext('./firstName'), aka.findtext('./lastName')) } if is_entity: data.pop('last_name', None) record['other_names'].append(data) record['identities'] = [] for ident in entry.findall('./idList/id'): data = { 'type': ident.findtext('./idType'), 'number': ident.findtext('./idNumber'), 'country': normalize_country(ident.findtext('./idCountry')) } record['identities'].append(data) record['addresses'] = [] for address in entry.findall('./addressList/address'): data = { 'address1': address.findtext('./address1'), 'address2': address.findtext('./address2'), 'city': address.findtext('./city'), 'country': normalize_country(address.findtext('./country')) } record['addresses'].append(data) for pob in entry.findall('./placeOfBirthList/placeOfBirthItem'): if pob.findtext('./mainEntry') == 'true': record['place_of_birth'] = pob.findtext('./placeOfBirth') for pob in entry.findall('./dateOfBirthList/dateOfBirthItem'): if pob.findtext('./mainEntry') == 'true': dt = pob.findtext('./dateOfBirth') record['date_of_birth'] = parse_date(dt) # print etree.tostring(entry, pretty_print=True) if is_entity: record.pop('last_name', None) emit.entity(record)
def parse_entry(emit, group, rows): record = SOURCE.copy() record.update({ 'uid': make_id('gb', 'hmt', group), 'identities': [], 'addresses': [], 'other_names': [] }) for row in rows: record.update({ 'type': row.pop('Group Type').lower(), 'date_of_birth': parse_date(row.pop('DOB')), 'place_of_birth': row.pop('Town of Birth'), 'country_of_birth': normalize_country(row.pop('Country of Birth')), 'nationality': normalize_country(row.get('Nationality')), 'program': row.pop('Regime'), 'summary': row.pop('Other Information'), 'updated_at': parse_date(row.pop('Last Updated')), 'function': row.pop('Position') }) names = { 'first_name': row.get('Name 1'), 'second_name': row.get('Name 2'), 'middle_name': row.get('Name 3'), 'last_name': row.get('Name 6') } name = [ row.pop('Title'), row.pop('Name 1'), row.pop('Name 2'), row.pop('Name 3'), row.pop('Name 4'), row.pop('Name 5'), row.pop('Name 6') ] name = combine_name(*name) if 'name' not in record: record['name'] = name record.update(names) else: names['other_name'] = name names['type'] = row.pop('Alias Type') record['other_names'].append(names) addr = [ row.pop('Address 1'), row.pop('Address 2'), row.pop('Address 3'), row.pop('Address 4'), row.pop('Address 5'), row.pop('Address 6') ] addr = combine_name(*addr) if len(addr): record['addresses'].append({ 'text': addr, 'postal_code': row.pop('Post/Zip Code') }) if row.get('Passport Details'): record['identities'].append({ 'type': 'Passport', 'number': row.pop('Passport Details'), 'country': normalize_country(row.get('Nationality')) }) if row.get('NI Number'): record['identities'].append({ 'type': 'NI', 'number': row.pop('NI Number'), 'country': normalize_country(row.get('Country')) }) # from pprint import pprint # pprint(row) emit.entity(record)
def parse_entry(emit, record, entry): uid = entry.findtext('uid') record.update({ 'uid': make_id('us', 'ofac', uid), 'type': 'individual', 'program': entry.findtext('./programList/program'), 'summary': entry.findtext('./remarks'), 'first_name': entry.findtext('./firstName'), 'last_name': entry.findtext('./lastName'), 'name': combine_name(entry.findtext('./firstName'), entry.findtext('./lastName')) }) is_entity = entry.findtext('./sdnType') != 'Individual' if is_entity: record['type'] = 'entity' record.pop('last_name', None) record['other_names'] = [] for aka in entry.findall('./akaList/aka'): data = { 'type': aka.findtext('./type'), 'quality': aka.findtext('./category'), 'first_name': aka.findtext('./firstName'), 'last_name': aka.findtext('./lastName'), 'other_name': combine_name(aka.findtext('./firstName'), aka.findtext('./lastName')) } if is_entity: data.pop('last_name', None) record['other_names'].append(data) record['identities'] = [] for ident in entry.findall('./idList/id'): data = { 'type': ident.findtext('./idType'), 'number': ident.findtext('./idNumber'), 'country': normalize_country(ident.findtext('./idCountry')) } record['identities'].append(data) record['addresses'] = [] for address in entry.findall('./addressList/address'): data = { 'address1': address.findtext('./address1'), 'address2': address.findtext('./address2'), 'city': address.findtext('./city'), 'country': normalize_country(address.findtext('./country')) } record['addresses'].append(data) for pob in entry.findall('./placeOfBirthList/placeOfBirthItem'): if pob.findtext('./mainEntry') == 'true': record['place_of_birth'] = pob.findtext('./placeOfBirth') for pob in entry.findall('./dateOfBirthList/dateOfBirthItem'): if pob.findtext('./mainEntry') == 'true': dt = pob.findtext('./dateOfBirth') record['date_of_birth'] = parse_date(dt) # print etree.tostring(entry, pretty_print=True) if is_entity: record.pop('last_name', None) emit.entity(record)