コード例 #1
0
ファイル: interpol.py プロジェクト: 01-/opennames
def parse_case(emit, case):
    url = case.get('url')
    name = combine_name(*reversed(case.get('name').split(', ')))
    updated = dateutil_parse(case.get('last_updated'))
    record = {
        'uid': make_id('interpol', url.split('/')[-1]),
        'source_url': url,
        'name': name,
        'summary': case.get('reason'),
        'updated_at': updated.date().isoformat(),
        'place_of_birth': case.get('place_of_birth'),
        'gender': case.get('sex', '').lower(),
        'first_name': case.get('forename'),
        'last_name': case.get('present_family_name'),
        'nationality': normalize_country(case.get('nationality')),
        'identities': [],
        'addresses': [],
        'other_names': []
    }
    record.update(SOURCE)
    birth = case.get('date_of_birth').split(' ')[0]

    try:
        dt = datetime.strptime(birth, '%Y').date().isoformat()
        record['date_of_birth'] = dt
    except Exception:
        try:
            dt = datetime.strptime(birth, '%d/%m/%Y').date().isoformat()
            record['date_of_birth'] = dt
        except Exception as ex:
            log.exception(ex)

    emit.entity(record)
コード例 #2
0
ファイル: unsc.py プロジェクト: 01-/opennames
def parse_common(node, type_):
    program_ref = '%s (%s)' % (node.findtext('./UN_LIST_TYPE').strip(),
                               node.findtext('./REFERENCE_NUMBER').strip())
    record = {
        'uid': make_id('un', 'sc', node.findtext('./DATAID')),
        'type': type_,
        'program': program_ref,
        'summary': node.findtext('./COMMENTS1'),
        'name': combine_name(node.findtext('./FIRST_NAME'),
                             node.findtext('./SECOND_NAME'),
                             node.findtext('./THIRD_NAME')),
        'function': node.findtext('./DESIGNATION/VALUE'),
        'updated_at': node.findtext('./LISTED_ON'),
        'nationality': normalize_country(node.findtext('./NATIONALITY/VALUE')),
        'other_names': [],
        'addresses': [],
        'identities': []
    }
    record.update(BASE)
    orig = node.findtext('./NAME_ORIGINAL_SCRIPT')
    if orig is not None:
        record['name'] = orig

    last_updated = node.findtext('./LAST_DAY_UPDATED/VALUE')
    if last_updated is not None:
        record['updated_at'] = last_updated

    if ':' in record['updated_at']:
        record['updated_at'] = record['updated_at'].rsplit('-', 1)[0]

    # print etree.tostring(node, pretty_print=True)
    return record
コード例 #3
0
def parse_case(emit, case):
    url = case.get('url')
    name = combine_name(*reversed(case.get('name').split(', ')))
    updated = dateutil_parse(case.get('last_updated'))
    record = {
        'uid': make_id('interpol',
                       url.split('/')[-1]),
        'source_url': url,
        'name': name,
        'summary': case.get('reason'),
        'updated_at': updated.date().isoformat(),
        'place_of_birth': case.get('place_of_birth'),
        'gender': case.get('sex', '').lower(),
        'first_name': case.get('forename'),
        'last_name': case.get('present_family_name'),
        'nationality': normalize_country(case.get('nationality')),
        'identities': [],
        'addresses': [],
        'other_names': []
    }
    record.update(SOURCE)
    birth = case.get('date_of_birth').split(' ')[0]

    try:
        dt = datetime.strptime(birth, '%Y').date().isoformat()
        record['date_of_birth'] = dt
    except Exception:
        try:
            dt = datetime.strptime(birth, '%d/%m/%Y').date().isoformat()
            record['date_of_birth'] = dt
        except Exception as ex:
            log.exception(ex)

    emit.entity(record)
コード例 #4
0
ファイル: wbdeb.py プロジェクト: fxcebx/opennames
def wbdeb_parse(emit, html_file):
    doc = html.parse(html_file)
    for table in doc.findall('//table'):
        if 'List of Debarred' not in table.get('summary', ''):
            continue
        rows = table.findall('.//tr')
        print table.get('summary'), len(rows)
        for row in rows:
            tds = row.findall('./td')
            if len(tds) != 6:
                continue
            values = [clean_value(td) for td in tds]
            uid = sha1()
            for value in values:
                uid.update(value.encode('utf-8'))
            uid = uid.hexdigest()[:10]

            names = clean_name(values[0])
            if not len(names):
                log.warning("No name: %r", values)
                continue

            record = {
                'uid':
                make_id('wb', 'debarred', uid),
                'name':
                values[0],
                'nationality':
                normalize_country(values[2]),
                'program':
                values[5],
                'addresses': [{
                    'text': values[1],
                    'country': normalize_country(values[2])
                }],
                'other_names': [],
                'updated_at':
                dateutil_parse(values[3]).date().isoformat()
            }

            for name in names[1:]:
                record['other_names'].append({'other_name': name})
            record.update(SOURCE)
            emit.entity(record)
コード例 #5
0
def parse_row(emit, row):
    record = SOURCE.copy()
    record.update({
        'uid': make_id('us', 'bis', row.get('Effective_Date'),
                       row.get('Name')),
        'name': row.get('Name'),
        'program': row.get('FR_Citation'),
        'summary': row.get('Action'),
        'updated_at': row.get('Last Update'),
        'nationality': normalize_country(row.get('Country')),
        'addresses': [{
            'address1': row.get('Street_Address'),
            'postal_code': row.get('Postal_Code'),
            'region': row.get('State'),
            'city': row.get('City'),
            'country': normalize_country(row.get('Country'))
        }]
    })
    emit.entity(record)
コード例 #6
0
ファイル: every_politician.py プロジェクト: 01-/opennames
def everypolitician_parse(emit, json_file):
    with open(json_file, 'r') as fh:
        data = json.load(fh)

    for policitian in data.get('politicians'):
        # from pprint import pprint
        # pprint(policitian)

        # TODO: add politician
        country = normalize_country(policitian.get('country_code'))
        entity = {
            'uid': make_id('evpo', policitian.get('id').split('-')[-1]),
            'name': policitian.get('name'),
            'type': 'individual',
            'addresses': [{'country': country}],
            'updated_at': parse_ts(policitian.get('legislature_lastmod')),
            'source_url': policitian.get('source_url')
        }
        entity.update(PUBLISHER)
        emit.entity(entity)
コード例 #7
0
ファイル: wbdeb.py プロジェクト: fxcebx/opennames
def wbdeb_parse(emit, html_file):
    doc = html.parse(html_file)
    for table in doc.findall('//table'):
        if 'List of Debarred' not in table.get('summary', ''):
            continue
        rows = table.findall('.//tr')
        print table.get('summary'), len(rows)
        for row in rows:
            tds = row.findall('./td')
            if len(tds) != 6:
                continue
            values = [clean_value(td) for td in tds]
            uid = sha1()
            for value in values:
                uid.update(value.encode('utf-8'))
            uid = uid.hexdigest()[:10]

            names = clean_name(values[0])
            if not len(names):
                log.warning("No name: %r", values)
                continue

            record = {
                'uid': make_id('wb', 'debarred', uid),
                'name': values[0],
                'nationality': normalize_country(values[2]),
                'program': values[5],
                'addresses': [{
                    'text': values[1],
                    'country': normalize_country(values[2])
                }],
                'other_names': [],
                'updated_at': dateutil_parse(values[3]).date().isoformat()
            }

            for name in names[1:]:
                record['other_names'].append({
                    'other_name': name
                })
            record.update(SOURCE)
            emit.entity(record)
コード例 #8
0
ファイル: seco.py プロジェクト: 01-/opennames
def parse_entry(emit, doc, target, sanctions, places):
    node = target.find('./individual')
    if node is None:
        node = target.find('./entity')
    record = {
        'uid': make_id('ch', 'seco', target.get('ssid')),
        'type': node.tag,
        'updated_at': doc.getroot().get('date'),
        'program': sanctions.get(target.get('sanctions-set-id')),
        'function': node.findtext('./other-information'),
        'summary': node.findtext('./justification'),
        'other_names': [],
        'addresses': [],
        'identities': []
    }
    record.update(PUBLISHER)
    for ident in node.findall('./identity'):
        parse_identity(record, ident, places)

    # from pprint import pprint
    # pprint(record)
    emit.entity(record)
コード例 #9
0
ファイル: unsc.py プロジェクト: fxcebx/opennames
def parse_common(node, type_):
    program_ref = '%s (%s)' % (node.findtext('./UN_LIST_TYPE').strip(),
                               node.findtext('./REFERENCE_NUMBER').strip())
    record = {
        'uid':
        make_id('un', 'sc', node.findtext('./DATAID')),
        'type':
        type_,
        'program':
        program_ref,
        'summary':
        node.findtext('./COMMENTS1'),
        'name':
        combine_name(node.findtext('./FIRST_NAME'),
                     node.findtext('./LAST_NAME')),
        'function':
        node.findtext('./DESIGNATION/VALUE'),
        'updated_at':
        node.findtext('./LISTED_ON'),
        'nationality':
        normalize_country(node.findtext('./NATIONALITY/VALUE')),
        'other_names': [],
        'addresses': [],
        'identities': []
    }
    record.update(BASE)
    orig = node.findtext('./NAME_ORIGINAL_SCRIPT')
    if orig is not None:
        record['name'] = orig

    last_updated = node.findtext('./LAST_DAY_UPDATED/VALUE')
    if last_updated is not None:
        record['updated_at'] = last_updated

    if ':' in record['updated_at']:
        record['updated_at'] = record['updated_at'].rsplit('-', 1)[0]

    # print etree.tostring(node, pretty_print=True)
    return record
コード例 #10
0
ファイル: seco.py プロジェクト: fxcebx/opennames
def parse_entry(emit, doc, target, sanctions, places):
    node = target.find('./individual')
    if node is None:
        node = target.find('./entity')
    record = {
        'uid': make_id('ch', 'seco', target.get('ssid')),
        'type': node.tag,
        'updated_at': doc.getroot().get('date'),
        'program': sanctions.get(target.get('sanctions-set-id')),
        'function': node.findtext('./other-information'),
        'summary': node.findtext('./justification'),
        'other_names': [],
        'addresses': [],
        'identities': []
    }
    record.update(PUBLISHER)
    for ident in node.findall('./identity'):
        parse_identity(record, ident, places)

    # from pprint import pprint
    # pprint(record)
    emit.entity(record)
コード例 #11
0
def everypolitician_parse(emit, json_file):
    with open(json_file, 'r') as fh:
        data = json.load(fh)

    for policitian in data.get('politicians'):
        # from pprint import pprint
        # pprint(policitian)

        # TODO: add politician
        country = normalize_country(policitian.get('country_code'))
        entity = {
            'uid': make_id('evpo',
                           policitian.get('id').split('-')[-1]),
            'name': policitian.get('name'),
            'type': 'individual',
            'addresses': [{
                'country': country
            }],
            'updated_at': parse_ts(policitian.get('legislature_lastmod')),
            'source_url': policitian.get('source_url')
        }
        entity.update(PUBLISHER)
        emit.entity(entity)
コード例 #12
0
ファイル: cia_world_leaders.py プロジェクト: 01-/opennames
def worldleaders_parse(emit, json_file):
    with open(json_file, 'r') as fh:
        data = json.load(fh)

    for leader in data.get('leaders'):
        if not len(leader.get('name')):
            log.warning('No name on entity: %(title)s (%(country)s)', leader)
            continue
        country = normalize_country(leader.get('country'))
        summary = leader.get('title')
        if leader.get('component'):
            summary = '%s (%s)' % (summary, leader.get('component'))
        entity = {
            'uid': make_id('us', 'cia', 'worldleaders', country,
                           leader.get('name')),
            'name': leader.get('name'),
            'type': 'individual',
            'summary': summary,
            'addresses': [{'country': country}],
            'updated_at': parse_date(leader.get('last_update')),
            'source_url': leader.get('url')
        }
        entity.update(PUBLISHER)
        emit.entity(entity)
コード例 #13
0
def worldleaders_parse(emit, json_file):
    with open(json_file, 'r') as fh:
        data = json.load(fh)

    for leader in data.get('leaders'):
        if not len(leader.get('name')):
            log.warning('No name on entity: %(title)s (%(country)s)', leader)
            continue
        country = normalize_country(leader.get('country'))
        summary = leader.get('title')
        if leader.get('component'):
            summary = '%s (%s)' % (summary, leader.get('component'))
        entity = {
            'uid': make_id('us', 'cia', 'worldleaders', country,
                           leader.get('name')),
            'name': leader.get('name'),
            'type': 'individual',
            'summary': summary,
            'addresses': [{'country': country}],
            'updated_at': parse_date(leader.get('last_update')),
            'source_url': leader.get('url')
        }
        entity.update(PUBLISHER)
        emit.entity(entity)
コード例 #14
0
ファイル: ofac.py プロジェクト: 01-/opennames
def parse_entry(emit, record, entry):
    uid = entry.findtext('uid')
    record.update({
        'uid': make_id('us', 'ofac', uid),
        'type': 'individual',
        'program': entry.findtext('./programList/program'),
        'summary': entry.findtext('./remarks'),
        'first_name': entry.findtext('./firstName'),
        'last_name': entry.findtext('./lastName'),
        'name': combine_name(entry.findtext('./firstName'),
                             entry.findtext('./lastName'))
    })
    is_entity = entry.findtext('./sdnType') != 'Individual'
    if is_entity:
        record['type'] = 'entity'
        record.pop('last_name', None)

    record['other_names'] = []
    for aka in entry.findall('./akaList/aka'):
        data = {
            'type': aka.findtext('./type'),
            'quality': aka.findtext('./category'),
            'first_name': aka.findtext('./firstName'),
            'last_name': aka.findtext('./lastName'),
            'other_name': combine_name(aka.findtext('./firstName'),
                                       aka.findtext('./lastName'))
        }
        if is_entity:
            data.pop('last_name', None)
        record['other_names'].append(data)

    record['identities'] = []
    for ident in entry.findall('./idList/id'):
        data = {
            'type': ident.findtext('./idType'),
            'number': ident.findtext('./idNumber'),
            'country': normalize_country(ident.findtext('./idCountry'))
        }
        record['identities'].append(data)

    record['addresses'] = []
    for address in entry.findall('./addressList/address'):
        data = {
            'address1': address.findtext('./address1'),
            'address2': address.findtext('./address2'),
            'city': address.findtext('./city'),
            'country': normalize_country(address.findtext('./country'))
        }
        record['addresses'].append(data)

    for pob in entry.findall('./placeOfBirthList/placeOfBirthItem'):
        if pob.findtext('./mainEntry') == 'true':
            record['place_of_birth'] = pob.findtext('./placeOfBirth')

    for pob in entry.findall('./dateOfBirthList/dateOfBirthItem'):
        if pob.findtext('./mainEntry') == 'true':
            dt = pob.findtext('./dateOfBirth')
            record['date_of_birth'] = parse_date(dt)

    # print etree.tostring(entry, pretty_print=True)

    if is_entity:
        record.pop('last_name', None)
    emit.entity(record)
コード例 #15
0
ファイル: eeas.py プロジェクト: fxcebx/opennames
def parse_entry(emit, record, entry):
    uid = entry.get('Id')
    record.update({
        'uid': make_id('eu', 'eeas', uid),
        'type': 'individual',
        'updated_at': entry.get('reg_date'),
        'source_url': entry.get('pdf_link'),
        'program': entry.get('programme'),
        'summary': entry.get('remark')
    })
    is_entity = entry.get('Type') != 'P'
    if is_entity:
        record['type'] = 'entity'

    record['other_names'] = []
    for aka in entry.findall('./NAME'):
        data = {
            'first_name': aka.findtext('./FIRSTNAME'),
            'last_name': aka.findtext('./LASTNAME'),
            'middle_name': aka.findtext('./MIDDLENAME'),
            'other_name': aka.findtext('./WHOLENAME')
        }

        funct = aka.findtext('./FUNCTION')
        if funct and len(funct) > len(record.get('function', '')):
            record['function'] = funct

        gender = aka.findtext('./GENDER')
        if gender == 'M':
            data['gender'] = 'male'
        if gender == 'F':
            data['gender'] = 'female'

        if 'name' not in record:
            record['name'] = data.pop('other_name')
            record.update(data)
        else:
            record['other_names'].append(data)

    record['identities'] = []
    for passport in entry.findall('./PASSPORT'):
        data = {
            'type': 'Passport',
            'number': passport.findtext('./NUMBER'),
            'country': normalize_country(passport.findtext('./COUNTRY'))
        }
        record['identities'].append(data)

    record['addresses'] = []
    for address in entry.findall('./ADDRESS'):
        data = {
            'address1': address.findtext('./STREET'),
            'address2': address.findtext('./NUMBER'),
            'city': address.findtext('./CITY'),
            'postal_code': address.findtext('./ZIPCODE'),
            'country': normalize_country(address.findtext('./COUNTRY'))
        }
        record['addresses'].append(data)

    for birth in entry.findall('./BIRTH'):
        place = birth.findtext('./PLACE')
        if place and len(place) > len(record.get('place_of_birth', '')):
            record['place_of_birth'] = place

        date_ = parse_date(birth.findtext('./DATE'))
        if date_ and len(date_) > len(record.get('date_of_birth', '')):
            record['date_of_birth'] = date_

        country = normalize_country(birth.findtext('./COUNTRY'))
        if country and len(country) > len(record.get('country_of_birth', '')):
            record['country_of_birth'] = country

    # print etree.tostring(entry, pretty_print=True)
    emit.entity(record)
コード例 #16
0
ファイル: eeas.py プロジェクト: fxcebx/opennames
def parse_entry(emit, record, entry):
    uid = entry.get('Id')
    record.update({
        'uid': make_id('eu', 'eeas', uid),
        'type': 'individual',
        'updated_at': entry.get('reg_date'),
        'source_url': entry.get('pdf_link'),
        'program': entry.get('programme'),
        'summary': entry.get('remark')
    })
    is_entity = entry.get('Type') != 'P'
    if is_entity:
        record['type'] = 'entity'

    record['other_names'] = []
    for aka in entry.findall('./NAME'):
        data = {
            'first_name': aka.findtext('./FIRSTNAME'),
            'last_name': aka.findtext('./LASTNAME'),
            'middle_name': aka.findtext('./MIDDLENAME'),
            'other_name': aka.findtext('./WHOLENAME')
        }

        funct = aka.findtext('./FUNCTION')
        if funct and len(funct) > len(record.get('function', '')):
            record['function'] = funct

        gender = aka.findtext('./GENDER')
        if gender == 'M':
            data['gender'] = 'male'
        if gender == 'F':
            data['gender'] = 'female'

        if 'name' not in record:
            record['name'] = data.pop('other_name')
            record.update(data)
        else:
            record['other_names'].append(data)

    record['identities'] = []
    for passport in entry.findall('./PASSPORT'):
        data = {
            'type': 'Passport',
            'number': passport.findtext('./NUMBER'),
            'country': normalize_country(passport.findtext('./COUNTRY'))
        }
        record['identities'].append(data)

    record['addresses'] = []
    for address in entry.findall('./ADDRESS'):
        data = {
            'address1': address.findtext('./STREET'),
            'address2': address.findtext('./NUMBER'),
            'city': address.findtext('./CITY'),
            'postal_code': address.findtext('./ZIPCODE'),
            'country': normalize_country(address.findtext('./COUNTRY'))
        }
        record['addresses'].append(data)

    for birth in entry.findall('./BIRTH'):
        place = birth.findtext('./PLACE')
        if place and len(place) > len(record.get('place_of_birth', '')):
            record['place_of_birth'] = place

        date_ = parse_date(birth.findtext('./DATE'))
        if date_ and len(date_) > len(record.get('date_of_birth', '')):
            record['date_of_birth'] = date_

        country = normalize_country(birth.findtext('./COUNTRY'))
        if country and len(country) > len(record.get('country_of_birth', '')):
            record['country_of_birth'] = country

    # print etree.tostring(entry, pretty_print=True)
    emit.entity(record)
コード例 #17
0
def parse_entry(emit, group, rows):
    record = SOURCE.copy()
    record.update({
        'uid': make_id('gb', 'hmt', group),
        'identities': [],
        'addresses': [],
        'other_names': []
    })
    for row in rows:
        record.update({
            'type':
            row.pop('Group Type').lower(),
            'date_of_birth':
            parse_date(row.pop('DOB')),
            'place_of_birth':
            row.pop('Town of Birth'),
            'country_of_birth':
            normalize_country(row.pop('Country of Birth')),
            'nationality':
            normalize_country(row.get('Nationality')),
            'program':
            row.pop('Regime'),
            'summary':
            row.pop('Other Information'),
            'updated_at':
            parse_date(row.pop('Last Updated')),
            'function':
            row.pop('Position')
        })

        names = {
            'first_name': row.get('Name 1'),
            'second_name': row.get('Name 2'),
            'middle_name': row.get('Name 3'),
            'last_name': row.get('Name 6')
        }

        name = [
            row.pop('Title'),
            row.pop('Name 1'),
            row.pop('Name 2'),
            row.pop('Name 3'),
            row.pop('Name 4'),
            row.pop('Name 5'),
            row.pop('Name 6')
        ]
        name = combine_name(*name)

        if 'name' not in record:
            record['name'] = name
            record.update(names)
        else:
            names['other_name'] = name
            names['type'] = row.pop('Alias Type')
            record['other_names'].append(names)

        addr = [
            row.pop('Address 1'),
            row.pop('Address 2'),
            row.pop('Address 3'),
            row.pop('Address 4'),
            row.pop('Address 5'),
            row.pop('Address 6')
        ]
        addr = combine_name(*addr)
        if len(addr):
            record['addresses'].append({
                'text': addr,
                'postal_code': row.pop('Post/Zip Code')
            })

        if row.get('Passport Details'):
            record['identities'].append({
                'type':
                'Passport',
                'number':
                row.pop('Passport Details'),
                'country':
                normalize_country(row.get('Nationality'))
            })

        if row.get('NI Number'):
            record['identities'].append({
                'type':
                'NI',
                'number':
                row.pop('NI Number'),
                'country':
                normalize_country(row.get('Country'))
            })

        # from pprint import pprint
        # pprint(row)
    emit.entity(record)
コード例 #18
0
def parse_entry(emit, record, entry):
    uid = entry.findtext('uid')
    record.update({
        'uid':
        make_id('us', 'ofac', uid),
        'type':
        'individual',
        'program':
        entry.findtext('./programList/program'),
        'summary':
        entry.findtext('./remarks'),
        'first_name':
        entry.findtext('./firstName'),
        'last_name':
        entry.findtext('./lastName'),
        'name':
        combine_name(entry.findtext('./firstName'),
                     entry.findtext('./lastName'))
    })
    is_entity = entry.findtext('./sdnType') != 'Individual'
    if is_entity:
        record['type'] = 'entity'
        record.pop('last_name', None)

    record['other_names'] = []
    for aka in entry.findall('./akaList/aka'):
        data = {
            'type':
            aka.findtext('./type'),
            'quality':
            aka.findtext('./category'),
            'first_name':
            aka.findtext('./firstName'),
            'last_name':
            aka.findtext('./lastName'),
            'other_name':
            combine_name(aka.findtext('./firstName'),
                         aka.findtext('./lastName'))
        }
        if is_entity:
            data.pop('last_name', None)
        record['other_names'].append(data)

    record['identities'] = []
    for ident in entry.findall('./idList/id'):
        data = {
            'type': ident.findtext('./idType'),
            'number': ident.findtext('./idNumber'),
            'country': normalize_country(ident.findtext('./idCountry'))
        }
        record['identities'].append(data)

    record['addresses'] = []
    for address in entry.findall('./addressList/address'):
        data = {
            'address1': address.findtext('./address1'),
            'address2': address.findtext('./address2'),
            'city': address.findtext('./city'),
            'country': normalize_country(address.findtext('./country'))
        }
        record['addresses'].append(data)

    for pob in entry.findall('./placeOfBirthList/placeOfBirthItem'):
        if pob.findtext('./mainEntry') == 'true':
            record['place_of_birth'] = pob.findtext('./placeOfBirth')

    for pob in entry.findall('./dateOfBirthList/dateOfBirthItem'):
        if pob.findtext('./mainEntry') == 'true':
            dt = pob.findtext('./dateOfBirth')
            record['date_of_birth'] = parse_date(dt)

    # print etree.tostring(entry, pretty_print=True)

    if is_entity:
        record.pop('last_name', None)
    emit.entity(record)
コード例 #19
0
ファイル: hmt.py プロジェクト: fxcebx/opennames
def parse_entry(emit, group, rows):
    record = SOURCE.copy()
    record.update({
        'uid': make_id('gb', 'hmt', group),
        'identities': [],
        'addresses': [],
        'other_names': []
    })
    for row in rows:
        record.update({
            'type': row.pop('Group Type').lower(),
            'date_of_birth': parse_date(row.pop('DOB')),
            'place_of_birth': row.pop('Town of Birth'),
            'country_of_birth': normalize_country(row.pop('Country of Birth')),
            'nationality': normalize_country(row.get('Nationality')),
            'program': row.pop('Regime'),
            'summary': row.pop('Other Information'),
            'updated_at': parse_date(row.pop('Last Updated')),
            'function': row.pop('Position')
        })

        names = {
            'first_name': row.get('Name 1'),
            'second_name': row.get('Name 2'),
            'middle_name': row.get('Name 3'),
            'last_name': row.get('Name 6')
        }

        name = [row.pop('Title'), row.pop('Name 1'), row.pop('Name 2'),
                row.pop('Name 3'), row.pop('Name 4'), row.pop('Name 5'),
                row.pop('Name 6')]
        name = combine_name(*name)

        if 'name' not in record:
            record['name'] = name
            record.update(names)
        else:
            names['other_name'] = name
            names['type'] = row.pop('Alias Type')
            record['other_names'].append(names)

        addr = [row.pop('Address 1'), row.pop('Address 2'),
                row.pop('Address 3'), row.pop('Address 4'),
                row.pop('Address 5'), row.pop('Address 6')]
        addr = combine_name(*addr)
        if len(addr):
            record['addresses'].append({
                'text': addr,
                'postal_code': row.pop('Post/Zip Code')
            })

        if row.get('Passport Details'):
            record['identities'].append({
                'type': 'Passport',
                'number': row.pop('Passport Details'),
                'country': normalize_country(row.get('Nationality'))
            })

        if row.get('NI Number'):
            record['identities'].append({
                'type': 'NI',
                'number': row.pop('NI Number'),
                'country': normalize_country(row.get('Country'))
            })

        # from pprint import pprint
        # pprint(row)
    emit.entity(record)
コード例 #20
0
def parse_entry(emit, entry):
    uid = entry.findtext('number-entry')
    record = {
        'uid': make_id('ua', 'sdfm', uid),
        'type': 'individual',
        'publisher': 'State Financial Monitoring Service of Ukraine',
        'publisher_url': 'http://www.sdfm.gov.ua/',
        'source_id': 'UA-SDFM-SANC',
        'source_url': 'http://www.sdfm.gov.ua/articles.php?cat_id=87&lang=en',
        'program': entry.findtext('./program-entry'),
        'summary': entry.findtext('./comments')
    }
    date_entry = entry.findtext('./date-entry')
    if date_entry:
        date_entry = datetime.strptime(date_entry, '%Y%m%d')
        record['updated_at'] = date_entry.date().isoformat()

    is_entity = entry.findtext('./type-entry') != '1'
    if is_entity:
        record['type'] = 'entity'

    record['other_names'] = []
    for aka in entry.findall('./aka-list'):
        data = get_names(aka)
        if aka.findtext('type-aka') == 'N':
            record['name'] = data.pop('other_name', None)
            record.update(data)
        else:
            data['type'] = aka.findtext('./type-aka')
            data['quality'] = aka.findtext('./quality-aka')
            if data['quality'] == '1':
                data['quality'] = 'strong'
            if data['quality'] == '2':
                data['quality'] = 'weak'
            # if is_entity:
            #     data.pop('last_name', None)
            record['other_names'].append(data)

    record['identities'] = []
    for doc in entry.findall('./document-list'):
        data = {
            'text': doc.findtext('./document-reg'),
            'number': doc.findtext('./document-id'),
            'country': normalize_country(doc.findtext('./document-country'))
        }
        record['identities'].append(data)

    for doc in entry.findall('./id-number-list'):
        data = {'text': doc.text.strip()}
        record['identities'].append(data)

    record['addresses'] = []
    for address in entry.findall('./address-list'):
        data = {
            'text': address.findtext('./address')
        }
        record['addresses'].append(data)

    # FIXME: handle multiple
    for pob in entry.findall('./place-of-birth-list'):
        if 'place_of_birth' in record:
            log.debug('Multiple places of birth: %(name)s', record)
        record['place_of_birth'] = pob.text.strip()

    # FIXME: handle multiple
    for dob in entry.findall('./date-of-birth-list'):
        if 'place_of_birth' in record:
            log.debug('Multiple dates of birth: %(name)s', record)
        record['date_of_birth'] = parse_date(dob.text)

    # print etree.tostring(entry, pretty_print=True)
    emit.entity(record)