Example #1
0
    def parse_parl(self, data):
        name = '%(given_names)s %(family_name)s' % data
        if not self.data:
            self.data.update(data)
            del self.data['party_id']
            del self.data['party_name']
            del self.data['email']
            self.data['id'] = idFactory.new('person')
            self.data['name'] = name
        assert self.data['name'] == name
        self.data.setdefault('identifiers', []).insert(0, { 'scheme': 'za.gov.parliament/person', 'identifier': '%(id)s' % data } )
        if data.get('email'):
            if 'contact_details' in self.data:
                assert data['email'] == [ x for x in self.data['contact_details'] if x['type'] == 'email' ][0]['value']
            else:
                self.data['contact_details'] = [ { 'type': 'email', 'value': data['email'] } ]

        party = self.organizations[data['party_name']]
        if 'identifiers' in party:
            assert party['identifiers'][0]['identifier'] == data['party_id']
        else:
            party['identifiers'] = [ { 'scheme': 'za.gov.parliament/party', 'identifier': data['party_id'] } ]

        existing_party = [ x for x in self.data.get('memberships', []) if 'party' in x['organization_id'] ]
        if existing_party:
            assert party['id'] == existing_party[0]['organization_id'], party['id']
        else:
            add_membership( self.data, { 'person_id': self.data['id'], 'organization_id': party['id'] } )

        self.text = requests.get('http://www.parliament.gov.za/live/content.php?Item_ID=184&MemberID=%(id)s' % data).text
        self.parse_honorific()
        self.parse_table()
        self.parse_photo()
        self.parse_committees()
Example #2
0
def parse(data):
    orgs_by_id = dict([ (x['id'], x) for x in data['organizations'].values() ])

    # TODO: Perhaps check old/new committees, then stop using parl.py
    # committees. Or just assume these new ones are accurate.
    for row in csv.DictReader(open(data_path + 'committees.csv')):
        if row['Name'] not in data['organizations']:
            data['organizations'][row['Name']] = {
                'id': idFactory.new('committee_pmg'),
                'name': row['Name'],
                'slug': row['Name'].lower().replace(' ','-'),
                'classification': row['Type']
            }

    for row in csv.DictReader(open(data_path + 'committee-members.csv')):
        row['Name'] = re.sub('^([^,]*) Mr, (.*)$', r'\1, Mr \2', row['Name'])

        family_name, initials = row['Name'].split(',')
        initials = re.sub('^\s*(Mr|Ms|Dr|Nkosi|Prof|Adv|Prince)\s+', '', initials)

        # TODO: Use the person's other_names filed, and get these misspellings in there.
        if family_name == 'Khorai': family_name = 'Khoarai'
        if family_name == 'Hoosan': family_name = 'Hoosen'
        if family_name == 'Jeffrey': family_name = 'Jeffery'
        if family_name == 'Hill-Lews': family_name = 'Hill-Lewis'
        if family_name == 'Koornhof' and initials == 'NC': initials = 'NJJVR'

        matches = [ x for x in data['persons'].values() if asciify(x['family_name']) == family_name ]
        if len(matches) > 1:
            matches = [ x for x in data['persons'].values() if x['family_name'] == family_name and initialise(x['given_names']) == initials ]
            if not matches:
                matches = [ x for x in data['persons'].values() if x['family_name'] == family_name and initialise(x['given_names'])[0:len(initials)] == initials ]

        # With the current data, we now always have one result 
        assert len(matches) == 1
        person = matches[0]

        party = [ x for x in person['memberships'] if 'party' in x['organization_id'] ][0]['organization_id']
        assert row['Party'] == orgs_by_id[party]['name'], row['Party'] + orgs_by_id[party]['name']

        mship = {
            'organization_id': data['organizations'][row['Committee']]['id']
        }
        if row['IsAlternative?'] == 'True':
            mship['role'] = 'Alternate Member'
        if row['IsChairperson?'] == 'True':
            mship['role'] = 'Chairperson'
        add_membership(person, mship)

    return data
Example #3
0
 def parse_committees(self):
     m = re.search('<td[^>]*><b[^>]*>Committees represented on: *</b></td>.*?<table[^>]*>(.*?)</table>(?s)', self.text)
     committees = dict(re.findall('<a href="content.php\?Item_ID=\d+&CommitteeID=(\d+)">(.*?)</a>', m.group(1)))
     for id, name in reversed(committees.items()):
         if name in self.organizations:
             assert self.organizations[name]['id'] == 'org.mysociety.za/committee/' + id
         else:
             self.organizations[name] = {
                 'id': 'org.mysociety.za/committee/' + id,
                 'name': name,
                 'identifiers': [ { 'scheme': 'za.gov.parliament/committee', 'identifier': id } ],
                 'slug': name.lower().replace(' ','-'),
                 'classification': 'committee'
             }
         add_membership(self.data, { 'person_id': self.data['id'], 'organization_id': self.organizations[name]['id'] } )
Example #4
0
def parse(data):
    for person in data['persons'].values():
        person['slug'] = person['name'].lower().replace(' ', '-')

    # There are three non-Assembly/NCOP people in the executive
    no_house = 0
    for p in data['persons'].values():
        if not [
                y for y in p['memberships'] if 'house' in y['organization_id']
        ]:
            no_house += 1
    assert no_house == 3

    na_manual = {
        'Cassel Charlie Mathale': {
            'start_date': '2013-07-15'
        },
        'Wayne Maxim Thring': {
            'start_date': '2013-06-21'
        },
        'Masenyani Richard Baloyi': {
            'end_date': '2013-07-10'
        },
        'Letlapa Moroatshoge Mphahlele': {
            'end_date':
            '2013-07-11',
            'end_reason':
            'Ceased to be a member under section 47(3)(c) of the Constitution (changed party)'
        },
        # 'Mpethi': { 'start_date': ? },
        'Ntopile Marcel Kganyago': {
            'end_date': '2013-07-17',
            'end_reason': 'Died'
        },
        'Nqabayomzi Lawrence Kwankwa': {
            'start_date': '2013-08-06'
        },
        'Loretta Jacobus': {
            'end_date': '2013-08-01'
        },
    }

    ncop_manual = {
        'Rory Dean MacPherson': {
            'party': 'DA',
            'end_date': '2009-05-29',
            'province': 'KwaZulu-Natal'
        },
        'Robert Alfred Lees': {
            'start_date': '2009-06-11'
        },
        'Sheery Su-Huei Cheng': {
            'party': 'DA',
            'end_date': '2010-09-30',
            'province': 'Gauteng'
        },
        'Beverley Lynette Abrahams': {
            'start_date': '2010-10-01'
        },
        'Timothy Duncan Harris': {
            'party': 'DA',
            'end_date': '2010-09-09',
            'province': 'Western Cape'
        },
        'Theodorus Barnardus Beyleveldt': {
            'party': 'DA',
            'start_date': '2010-10-12',
            'end_date': '2011-07-10',
            'end_reason': 'Died',
            'province': 'Western Cape'
        },
        'Denis Joseph': {
            'start_date': '2011-10-20'
        },
        'Armiston Watson': {
            'party': 'DA',
            'end_date': '2011-11-07',
            'province': 'Mpumalanga'
        },
        'Velly Makasana Manzini': {
            'start_date': '2011-11-08'
        },
        'Tlhalefi Andries Mashamaite': {
            'party': 'ANC',
            'end_date': '2012-05-08',
            'province': 'Limpopo'
        },
        'Thabo Lucas Makunyane': {
            'start_date': '2012-05-22'
        },
        'Zukisa Cheryl Faku': {
            'start_date': '2013-04-25'
        },
        'Mokoane Collen Maine': {
            'end_date': '2013-08-01'
        },  # XXX
    }

    for person in data['persons'].values():
        name = person['name']
        mships = person['memberships']
        mship = [
            x for x in mships
            if 'ncop' in x['organization_id'] and x['role'] == 'Delegate'
        ]
        if mship:
            # Present, and has NCOP membership entry. Set a start and possibly end date.
            mship = mship[0]
            assert 'start_date' not in mship
            n = ncop_manual.pop(name, {})
            mship['start_date'] = n.get('start_date', '2009-05-07')
            if 'end_date' in n and 'end_date' not in mship:
                mship['end_date'] = n['end_date']
        elif name in ncop_manual:
            # Present, but has no NCOP membership entry
            n = ncop_manual.pop(name)
            add_membership(
                person, {
                    'organization_id': 'org.mysociety.za/house/ncop',
                    'label': 'Delegate for %s' % n['province'],
                    'role': 'Delegate',
                    'area': {
                        'id':
                        'org.mysociety.za/mapit/code/p/' +
                        PROVINCES[n['province']],
                        'name':
                        n['province']
                    },
                    'start_date': n.get('start_date', '2009-05-07'),
                    'end_date': n['end_date'],
                    'end_reason': n.get('end_reason', 'Resigned'),
                })
        mship = [
            x for x in mships
            if 'house/na' in x['organization_id'] and x['role'] == 'Member'
        ]
        if mship:
            mship = mship[0]
            n = na_manual.pop(name, {})
            if 'start_date' not in mship:
                mship['start_date'] = n.pop('start_date', '2009-05-06')
            if n:
                assert 'end_date' not in mship
                mship['end_date'] = n['end_date']
                mship['end_reason'] = n.get('end_reason', 'Resigned')
        elif name in na_manual:
            raise Exception

    # The ones left have no person entry at all.
    for name, d in ncop_manual.items():
        id = idFactory.new('person')
        given_names, family_name = name.rsplit(None, 1)
        person = {
            'id': id,
            'name': name,
            'given_names': given_names,
            'family_name': family_name,
            'slug': name.lower().replace(' ', '-'),
        }
        add_membership(
            person,
            {'organization_id': data['organizations'][d['party']]['id']})
        add_membership(
            person, {
                'organization_id': 'org.mysociety.za/house/ncop',
                'label': 'Delegate for %s' % d['province'],
                'role': 'Delegate',
                'area': {
                    'id':
                    'org.mysociety.za/mapit/code/p/' +
                    PROVINCES[d['province']],
                    'name':
                    d['province']
                },
                'start_date': d.get('start_date', '2009-05-07'),
                'end_date': d['end_date'],
                'end_reason': d.get('end_reason', 'Resigned'),
            })
        data['persons'][name] = person

    return data
Example #5
0
def parse(data):
    for person in data['persons'].values():
        person['slug'] = person['name'].lower().replace(' ', '-')

    # There are three non-Assembly/NCOP people in the executive
    no_house = 0
    for p in data['persons'].values():
        if not [ y for y in p['memberships'] if 'house' in y['organization_id'] ]:
            no_house += 1
    assert no_house == 3

    na_manual = {
        'Cassel Charlie Mathale': { 'start_date': '2013-07-15' },
        'Wayne Maxim Thring': { 'start_date': '2013-06-21' },
        'Masenyani Richard Baloyi': { 'end_date': '2013-07-10' },
        'Letlapa Moroatshoge Mphahlele': { 'end_date': '2013-07-11', 'end_reason': 'Ceased to be a member under section 47(3)(c) of the Constitution (changed party)' },
        # 'Mpethi': { 'start_date': ? },
        'Ntopile Marcel Kganyago': { 'end_date': '2013-07-17', 'end_reason': 'Died' },
        'Nqabayomzi Lawrence Kwankwa': { 'start_date': '2013-08-06' },
        'Loretta Jacobus': { 'end_date': '2013-08-01' },
    }

    ncop_manual = {
        'Rory Dean MacPherson': { 'party': 'DA', 'end_date': '2009-05-29', 'province': 'KwaZulu-Natal' },
            'Robert Alfred Lees': { 'start_date': '2009-06-11' },
        'Sheery Su-Huei Cheng': { 'party': 'DA', 'end_date': '2010-09-30', 'province': 'Gauteng' },
            'Beverley Lynette Abrahams': { 'start_date': '2010-10-01' },
        'Timothy Duncan Harris': { 'party': 'DA', 'end_date': '2010-09-09', 'province': 'Western Cape' },
            'Theodorus Barnardus Beyleveldt': { 'party': 'DA', 'start_date': '2010-10-12', 'end_date': '2011-07-10', 'end_reason': 'Died', 'province': 'Western Cape' },
            'Denis Joseph': { 'start_date': '2011-10-20' },
        'Armiston Watson': { 'party': 'DA', 'end_date': '2011-11-07', 'province': 'Mpumalanga' },
            'Velly Makasana Manzini': { 'start_date': '2011-11-08' },
        'Tlhalefi Andries Mashamaite': { 'party': 'ANC', 'end_date': '2012-05-08', 'province': 'Limpopo' },
            'Thabo Lucas Makunyane': { 'start_date': '2012-05-22' },
        'Zukisa Cheryl Faku': { 'start_date': '2013-04-25' },
        'Mokoane Collen Maine': { 'end_date': '2013-08-01' }, # XXX
    }

    for person in data['persons'].values():
        name = person['name']
        mships = person['memberships']
        mship = [ x for x in mships if 'ncop' in x['organization_id'] and x['role'] == 'Delegate' ]
        if mship:
            # Present, and has NCOP membership entry. Set a start and possibly end date.
            mship = mship[0]
            assert 'start_date' not in mship
            n = ncop_manual.pop(name, {})
            mship['start_date'] = n.get('start_date', '2009-05-07')
            if 'end_date' in n and 'end_date' not in mship:
                mship['end_date'] = n['end_date']
        elif name in ncop_manual:
            # Present, but has no NCOP membership entry
            n = ncop_manual.pop(name)
            add_membership(person, { 'organization_id': 'org.mysociety.za/house/ncop',
                'label': 'Delegate for %s' % n['province'], 'role': 'Delegate',
                'area': { 'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[n['province']], 'name': n['province'] },
                'start_date': n.get('start_date', '2009-05-07'),
                'end_date': n['end_date'],
                'end_reason': n.get('end_reason', 'Resigned'),
            })
        mship = [ x for x in mships if 'house/na' in x['organization_id'] and x['role'] == 'Member' ]
        if mship:
            mship = mship[0]
            n = na_manual.pop(name, {})
            if 'start_date' not in mship:
                mship['start_date'] = n.pop('start_date', '2009-05-06')
            if n:
                assert 'end_date' not in mship
                mship['end_date'] = n['end_date']
                mship['end_reason'] = n.get('end_reason', 'Resigned')
        elif name in na_manual:
            raise Exception

    # The ones left have no person entry at all.
    for name, d in ncop_manual.items():
        id = idFactory.new('person')
        given_names, family_name = name.rsplit(None, 1)
        person = {
            'id': id,
            'name': name,
            'given_names': given_names,
            'family_name': family_name,
            'slug': name.lower().replace(' ', '-'),
        }
        add_membership(person, { 'organization_id': data['organizations'][d['party']]['id'] })
        add_membership(person, {
            'organization_id': 'org.mysociety.za/house/ncop',
            'label': 'Delegate for %s' % d['province'],
            'role': 'Delegate',
            'area': { 'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[d['province']], 'name': d['province'] },
            'start_date': d.get('start_date', '2009-05-07'),
            'end_date': d['end_date'],
            'end_reason': d.get('end_reason', 'Resigned'),
        })
        data['persons'][name] = person

    return data
Example #6
0
def parse():
    for row in FixingDictReader(
            open(data_path + 'myreps_na_executive_export.csv')):
        person_bits = dict((col_map(k), v) for k, v in row.items()
                           if k in ('first_name', 'last_name', 'initials_alt',
                                    'other_names', 'title', 'email') and v)
        position_bits = dict(
            (col_map(k), v) for k, v in row.items()
            if k in ('start_date', 'end_date', 'end_reason', 'organisation',
                     'position',
                     'region') and v and v != 'Member' and v != 'National')
        if 'end_date' not in position_bits: del position_bits['end_reason']
        if 'end_date' in position_bits and position_bits['end_reason'] == '0':
            del position_bits['end_reason']

        if person_bits['given_names'] == 'Tlhalefi Andries':
            continue  # Comes in elsewhere
        # Manual fixes of file
        fix_person_bits(person_bits)
        fix_end_reason(position_bits, person_bits)

        name = '%(given_names)s %(family_name)s' % person_bits

        person_bits['name'] = name
        if person_bits.get('email'):
            person_bits['contact_details'] = [{
                'type': 'email',
                'value': person_bits.pop('email')
            }]
        if 'other_names' in person_bits:
            person_bits['other_names'] = [{'name': person_bits['other_names']}]

        if position_bits['organisation'] not in ORGANIZATIONS:
            ORGANIZATIONS.setdefault(
                position_bits['organisation'], {
                    'id':
                    'org.mysociety.za/party/' +
                    position_bits['organisation'].lower(),
                    'name':
                    position_bits['organisation'],
                    'slug':
                    position_bits['organisation'].lower(),
                    'classification':
                    'party'
                })
        position_bits['organization_id'] = ORGANIZATIONS[
            position_bits['organisation']]['id']
        del position_bits['organisation']
        if position_bits[
                'organization_id'] == 'org.mysociety.za/house/national-assembly' and 'role' not in position_bits:
            position_bits['label'] = position_bits['role'] = 'Member'
        elif position_bits['organization_id'] == 'org.mysociety.za/house/ncop':
            position_bits['label'] = position_bits['role'] = 'Delegate'
        if 'end_reason' in position_bits:
            position_bits['end_reason'] = REASONS[position_bits['end_reason']]
        if position_bits.get('region'):
            r = position_bits['region']
            position_bits['area'] = {
                'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[r],
                'name': r
            }
            position_bits['label'] += ' for ' + r
            del position_bits['region']

        if name in PEOPLE:
            person_bits['id'] = PEOPLE[name]['person']['id']
            assert PEOPLE[name]['person'] == person_bits
        else:
            person_bits['id'] = idFactory.new('person')
            PEOPLE[name] = {'id': person_bits['id'], 'person': person_bits}
        add_membership(PEOPLE[name], position_bits)

    # National Assembly MyReps site data
    # To fetch myreps ID and PERSON_ID

    na = open(data_path + 'myreps-na.xml').read()
    people = ET.fromstring(na).iter('Members')
    cols_xml = [
        'id', 'person_id', 'person_first_name', 'person_last_name',
        'person_paries'
    ]

    for person in people:
        row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml]))
        if row['person_first_name'] == 'Nomaindiya Cathleen':
            row['person_first_name'] = 'NomaIndiya Cathleen'
        if row['person_first_name'] == 'Alpheus' and row[
                'person_last_name'] == 'Mokabhe':
            row.update(person_first_name='Alpheus Mokabhe',
                       person_last_name='Maziya')
        if row['person_first_name'] == 'Ximbi':
            row.update(person_first_name='Dumsani Livingstone',
                       person_last_name='Ximbi')
        name = '%(person_first_name)s %(person_last_name)s' % row
        name = fix_bad_encoding(name.encode('utf-8'))
        PEOPLE[name]['person']['identifiers'] = [
            {
                'identifier': row['person_id'],
                'scheme': 'myreps_person_id'
            },
        ]
        if row['id']:
            PEOPLE[name]['person']['identifiers'].append({
                'identifier':
                row['id'],
                'scheme':
                'myreps_id'
            })

    na_prev = open(data_path + 'myreps-national-assembly.html').read()
    na_prev = re.search('<div[^>]*id="past"[^>]*>.*?</div>(?s)',
                        na_prev).group(0)
    for person in re.findall(
            '<li><a href="/people/view/(.*?)">([^<]*) ([^<]*?)</a> until .*?</li>',
            na_prev):
        row = dict(zip(cols_xml, ['', person[0], person[1], person[2], '']))
        if row['person_first_name'] == 'Patricia de':
            row.update(person_first_name='Patricia',
                       person_last_name='de Lille')
        if row['person_first_name'] == 'D van der':
            row.update(person_first_name='D', person_last_name='van der Walt')
        name = '%(person_first_name)s %(person_last_name)s' % row
        PEOPLE[name]['person']['identifiers'] = [
            {
                'identifier': row['person_id'],
                'scheme': 'myreps_person_id'
            },
        ]

    # NCOP MyReps site data

    ncop = open(data_path + 'myreps-ncop.xml').read()
    people = ET.fromstring(ncop).iter('Members')
    for person in people:
        row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml]))
        # Change couple of names to match parliament data
        if row['person_first_name'] == 'Arthur':
            row['person_first_name'] = 'Robert Alfred'
        elif row['person_first_name'] == 'Buoang Lemias':
            row['person_first_name'] = 'Budang Lemias'
        name = '%(person_first_name)s %(person_last_name)s' % row
        id = idFactory.new('person')
        PEOPLE[name] = {
            'id': id,
            'person': {
                'id':
                id,
                'given_names':
                row['person_first_name'],
                'family_name':
                row['person_last_name'],
                'name':
                name,
                'identifiers': [
                    {
                        'identifier': row['id'],
                        'scheme': 'myreps_id'
                    },
                    {
                        'identifier': row['person_id'],
                        'scheme': 'myreps_person_id'
                    },
                ]
            },
        }
        add_membership(
            PEOPLE[name], {
                'organization_id': 'org.mysociety.za/house/ncop',
                'label': 'Delegate',
                'role': 'Delegate'
            })
        if row['id'] == '7852':
            # Special case of one person resigned since data
            PEOPLE[name]['memberships'][0].update(
                end_date='2013-03-27',
                end_reason='Resigned',
                label='Delegate for Eastern Cape',
                area={
                    'id':
                    'org.mysociety.za/mapit/code/p/' +
                    PROVINCES['Eastern Cape'],
                    'name':
                    'Eastern Cape'
                })
        if row['person_paries']:
            add_membership(
                PEOPLE[name],
                {'organization_id': ORGANIZATIONS[row['person_paries']]['id']})

    for name in PEOPLE.keys():
        PEOPLE[name]['person'].update(memberships=PEOPLE[name]['memberships'])
        PEOPLE[name] = PEOPLE[name]['person']

    return {
        'persons': PEOPLE,
        'organizations': ORGANIZATIONS,
    }
Example #7
0
def parse(data):
    orgs_by_id = dict([(x['id'], x) for x in data['organizations'].values()])

    # TODO: Perhaps check old/new committees, then stop using parl.py
    # committees. Or just assume these new ones are accurate.
    for row in csv.DictReader(open(data_path + 'committees.csv')):
        if row['Name'] not in data['organizations']:
            data['organizations'][row['Name']] = {
                'id': idFactory.new('committee_pmg'),
                'name': row['Name'],
                'slug': row['Name'].lower().replace(' ', '-'),
                'classification': row['Type']
            }

    for row in csv.DictReader(open(data_path + 'committee-members.csv')):
        row['Name'] = re.sub('^([^,]*) Mr, (.*)$', r'\1, Mr \2', row['Name'])

        family_name, initials = row['Name'].split(',')
        initials = re.sub('^\s*(Mr|Ms|Dr|Nkosi|Prof|Adv|Prince)\s+', '',
                          initials)

        # TODO: Use the person's other_names filed, and get these misspellings in there.
        if family_name == 'Khorai': family_name = 'Khoarai'
        if family_name == 'Hoosan': family_name = 'Hoosen'
        if family_name == 'Jeffrey': family_name = 'Jeffery'
        if family_name == 'Hill-Lews': family_name = 'Hill-Lewis'
        if family_name == 'Koornhof' and initials == 'NC': initials = 'NJJVR'

        matches = [
            x for x in data['persons'].values()
            if asciify(x['family_name']) == family_name
        ]
        if len(matches) > 1:
            matches = [
                x for x in data['persons'].values()
                if x['family_name'] == family_name
                and initialise(x['given_names']) == initials
            ]
            if not matches:
                matches = [
                    x for x in data['persons'].values()
                    if x['family_name'] == family_name and initialise(
                        x['given_names'])[0:len(initials)] == initials
                ]

        # With the current data, we now always have one result
        assert len(matches) == 1
        person = matches[0]

        party = [
            x for x in person['memberships'] if 'party' in x['organization_id']
        ][0]['organization_id']
        assert row['Party'] == orgs_by_id[party][
            'name'], row['Party'] + orgs_by_id[party]['name']

        mship = {
            'organization_id': data['organizations'][row['Committee']]['id']
        }
        if row['IsAlternative?'] == 'True':
            mship['role'] = 'Alternate Member'
        if row['IsChairperson?'] == 'True':
            mship['role'] = 'Chairperson'
        add_membership(person, mship)

    return data
Example #8
0
def parse():
    for row in FixingDictReader(open(data_path + "myreps_na_executive_export.csv")):
        person_bits = dict(
            (col_map(k), v)
            for k, v in row.items()
            if k in ("first_name", "last_name", "initials_alt", "other_names", "title", "email") and v
        )
        position_bits = dict(
            (col_map(k), v)
            for k, v in row.items()
            if k in ("start_date", "end_date", "end_reason", "organisation", "position", "region")
            and v
            and v != "Member"
            and v != "National"
        )
        if "end_date" not in position_bits:
            del position_bits["end_reason"]
        if "end_date" in position_bits and position_bits["end_reason"] == "0":
            del position_bits["end_reason"]

        if person_bits["given_names"] == "Tlhalefi Andries":
            continue  # Comes in elsewhere
        # Manual fixes of file
        fix_person_bits(person_bits)
        fix_end_reason(position_bits, person_bits)

        name = "%(given_names)s %(family_name)s" % person_bits

        person_bits["name"] = name
        if person_bits.get("email"):
            person_bits["contact_details"] = [{"type": "email", "value": person_bits.pop("email")}]
        if "other_names" in person_bits:
            person_bits["other_names"] = [{"name": person_bits["other_names"]}]

        if position_bits["organisation"] not in ORGANIZATIONS:
            ORGANIZATIONS.setdefault(
                position_bits["organisation"],
                {
                    "id": "org.mysociety.za/party/" + position_bits["organisation"].lower(),
                    "name": position_bits["organisation"],
                    "slug": position_bits["organisation"].lower(),
                    "classification": "party",
                },
            )
        position_bits["organization_id"] = ORGANIZATIONS[position_bits["organisation"]]["id"]
        del position_bits["organisation"]
        if (
            position_bits["organization_id"] == "org.mysociety.za/house/national-assembly"
            and "role" not in position_bits
        ):
            position_bits["label"] = position_bits["role"] = "Member"
        elif position_bits["organization_id"] == "org.mysociety.za/house/ncop":
            position_bits["label"] = position_bits["role"] = "Delegate"
        if "end_reason" in position_bits:
            position_bits["end_reason"] = REASONS[position_bits["end_reason"]]
        if position_bits.get("region"):
            r = position_bits["region"]
            position_bits["area"] = {"id": "org.mysociety.za/mapit/code/p/" + PROVINCES[r], "name": r}
            position_bits["label"] += " for " + r
            del position_bits["region"]

        if name in PEOPLE:
            person_bits["id"] = PEOPLE[name]["person"]["id"]
            assert PEOPLE[name]["person"] == person_bits
        else:
            person_bits["id"] = idFactory.new("person")
            PEOPLE[name] = {"id": person_bits["id"], "person": person_bits}
        add_membership(PEOPLE[name], position_bits)

    # National Assembly MyReps site data
    # To fetch myreps ID and PERSON_ID

    na = open(data_path + "myreps-na.xml").read()
    people = ET.fromstring(na).iter("Members")
    cols_xml = ["id", "person_id", "person_first_name", "person_last_name", "person_paries"]

    for person in people:
        row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml]))
        if row["person_first_name"] == "Nomaindiya Cathleen":
            row["person_first_name"] = "NomaIndiya Cathleen"
        if row["person_first_name"] == "Alpheus" and row["person_last_name"] == "Mokabhe":
            row.update(person_first_name="Alpheus Mokabhe", person_last_name="Maziya")
        if row["person_first_name"] == "Ximbi":
            row.update(person_first_name="Dumsani Livingstone", person_last_name="Ximbi")
        name = "%(person_first_name)s %(person_last_name)s" % row
        name = fix_bad_encoding(name.encode("utf-8"))
        PEOPLE[name]["person"]["identifiers"] = [{"identifier": row["person_id"], "scheme": "myreps_person_id"}]
        if row["id"]:
            PEOPLE[name]["person"]["identifiers"].append({"identifier": row["id"], "scheme": "myreps_id"})

    na_prev = open(data_path + "myreps-national-assembly.html").read()
    na_prev = re.search('<div[^>]*id="past"[^>]*>.*?</div>(?s)', na_prev).group(0)
    for person in re.findall('<li><a href="/people/view/(.*?)">([^<]*) ([^<]*?)</a> until .*?</li>', na_prev):
        row = dict(zip(cols_xml, ["", person[0], person[1], person[2], ""]))
        if row["person_first_name"] == "Patricia de":
            row.update(person_first_name="Patricia", person_last_name="de Lille")
        if row["person_first_name"] == "D van der":
            row.update(person_first_name="D", person_last_name="van der Walt")
        name = "%(person_first_name)s %(person_last_name)s" % row
        PEOPLE[name]["person"]["identifiers"] = [{"identifier": row["person_id"], "scheme": "myreps_person_id"}]

    # NCOP MyReps site data

    ncop = open(data_path + "myreps-ncop.xml").read()
    people = ET.fromstring(ncop).iter("Members")
    for person in people:
        row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml]))
        # Change couple of names to match parliament data
        if row["person_first_name"] == "Arthur":
            row["person_first_name"] = "Robert Alfred"
        elif row["person_first_name"] == "Buoang Lemias":
            row["person_first_name"] = "Budang Lemias"
        name = "%(person_first_name)s %(person_last_name)s" % row
        id = idFactory.new("person")
        PEOPLE[name] = {
            "id": id,
            "person": {
                "id": id,
                "given_names": row["person_first_name"],
                "family_name": row["person_last_name"],
                "name": name,
                "identifiers": [
                    {"identifier": row["id"], "scheme": "myreps_id"},
                    {"identifier": row["person_id"], "scheme": "myreps_person_id"},
                ],
            },
        }
        add_membership(
            PEOPLE[name], {"organization_id": "org.mysociety.za/house/ncop", "label": "Delegate", "role": "Delegate"}
        )
        if row["id"] == "7852":
            # Special case of one person resigned since data
            PEOPLE[name]["memberships"][0].update(
                end_date="2013-03-27",
                end_reason="Resigned",
                label="Delegate for Eastern Cape",
                area={"id": "org.mysociety.za/mapit/code/p/" + PROVINCES["Eastern Cape"], "name": "Eastern Cape"},
            )
        if row["person_paries"]:
            add_membership(PEOPLE[name], {"organization_id": ORGANIZATIONS[row["person_paries"]]["id"]})

    for name in PEOPLE.keys():
        PEOPLE[name]["person"].update(memberships=PEOPLE[name]["memberships"])
        PEOPLE[name] = PEOPLE[name]["person"]

    return {"persons": PEOPLE, "organizations": ORGANIZATIONS}
Example #9
0
    def parse_table(self):
        m = re.findall('<td height="25" valign="middle" class="pad"><b>(.*?):</b></td>\s*<td width="70%" valign="middle" class="pad">(.*?)</td>(?s)', self.text)
        m = dict((k,v) for k, v in m if v not in ('-', '<a href = mailto:></a>'))

        for contact_detail in ('Constituency Fax Number', 'Session Fax Number', 'Cell Phone Number', 'Constituency Phone Number', 'Session Phone Number', 'Constituency Postal Address', 'Constituency Street Address'):
            if contact_detail in m:
                if '<a target' in m[contact_detail]: continue
                if 'Fax' in contact_detail: type = 'fax'
                elif 'Cell' in contact_detail: type = 'cell'
                elif 'Number' in contact_detail: type = 'voice'
                elif 'Address' in contact_detail: type = 'address'
                self.data.setdefault('contact_details', []).append( { 'type': type, 'value': m.pop(contact_detail), 'note': contact_detail } )

        house = self.organizations[m.pop('House')]['id']
        province = None
        if 'Delegate of Province' in m:
            province = m.pop('Delegate of Province')
            label = 'Delegate'
        elif 'Province' in m:
            province = m.pop('Province')
            label = 'Member'

        if self.data['name'] in ('Nqabayomzi Lawrence Kwankwa', 'Cassel Charlie Mathale', 'Wayne Maxim Thring'):
            label = 'Member'

        existing_house = [ x for x in self.data['memberships'] if 'house' in x['organization_id'] ]
        if existing_house:
            assert existing_house[0]['organization_id'] == house
            if province:
                if 'area' in existing_house[0]:
                    assert existing_house[0]['area']['name'] == province
                else:
                    existing_house[0]['area'] = {
                        'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[province], 'name': province
                    }
                    existing_house[0]['label'] = label + ' for ' + province
        else:
            # Faku
            dat = {
                'person_id': self.data['id'],
                'organization_id': house,
                'role': label,
                'label': label
            }
            if province:
                dat['label'] = label + ' for ' + province
                dat['area'] = { 'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[province], 'name': province }
            add_membership(self.data, dat)

        if 'Position(s)' in m:
            posns = [ { 'role': x.strip() } for x in m.pop('Position(s)').split('<br />') if x.strip() != 'Delegate' ]
            if len(posns) == 1 and posns[0]['role'] == 'Correctional Services':
                posns[0]['role'] = 'Minister of ' + posns[0]['role']
            elif len(posns) == 1 and posns[0]['role'] == 'Public Service and Administration':
                posns[0]['role'] = 'Minister for the ' + posns[0]['role']
            elif len(posns) == 2 and posns[0]['role'] == 'Minister in The Presidency' and posns[1]['role'] == 'Performance Monitoring and Evaluation as well as Administration in the Presidency':
                posns = [ { 'role': 'Minister in The Presidency: Performance, Monitoring and Evaluation as well as Administration' } ]
            posns = [ x for x in posns if 'Minister' not in x['role'] and x['role'] not in ('Deputy President', 'The Chief Whip of the Opposition', 'House Chairperson', 'Leader Of Opposition', 'Chief Whip of the Opposition', 'Deputy Speaker of the National Assembly', 'Speaker of the National Assembly' ) ] # These come from step 1
            for p in posns:
                p.update(person_id=self.data['id'], organization_id=house)
                add_membership(self.data, p)