Example #1
0
    def parse_parl(self, data):
        name = '%(given_names)s %(family_name)s' % data
        if not self.data:
            self.data.update(data)
            del self.data['party_id']
            del self.data['party_name']
            del self.data['email']
            self.data['id'] = idFactory.new('person')
            self.data['name'] = name
        assert self.data['name'] == name
        self.data.setdefault('identifiers', []).insert(0, { 'scheme': 'za.gov.parliament/person', 'identifier': '%(id)s' % data } )
        if data.get('email'):
            if 'contact_details' in self.data:
                assert data['email'] == [ x for x in self.data['contact_details'] if x['type'] == 'email' ][0]['value']
            else:
                self.data['contact_details'] = [ { 'type': 'email', 'value': data['email'] } ]

        party = self.organizations[data['party_name']]
        if 'identifiers' in party:
            assert party['identifiers'][0]['identifier'] == data['party_id']
        else:
            party['identifiers'] = [ { 'scheme': 'za.gov.parliament/party', 'identifier': data['party_id'] } ]

        existing_party = [ x for x in self.data.get('memberships', []) if 'party' in x['organization_id'] ]
        if existing_party:
            assert party['id'] == existing_party[0]['organization_id'], party['id']
        else:
            add_membership( self.data, { 'person_id': self.data['id'], 'organization_id': party['id'] } )

        self.text = requests.get('http://www.parliament.gov.za/live/content.php?Item_ID=184&MemberID=%(id)s' % data).text
        self.parse_honorific()
        self.parse_table()
        self.parse_photo()
        self.parse_committees()
Example #2
0
def parse(data):
    orgs_by_id = dict([ (x['id'], x) for x in data['organizations'].values() ])

    # TODO: Perhaps check old/new committees, then stop using parl.py
    # committees. Or just assume these new ones are accurate.
    for row in csv.DictReader(open(data_path + 'committees.csv')):
        if row['Name'] not in data['organizations']:
            data['organizations'][row['Name']] = {
                'id': idFactory.new('committee_pmg'),
                'name': row['Name'],
                'slug': row['Name'].lower().replace(' ','-'),
                'classification': row['Type']
            }

    for row in csv.DictReader(open(data_path + 'committee-members.csv')):
        row['Name'] = re.sub('^([^,]*) Mr, (.*)$', r'\1, Mr \2', row['Name'])

        family_name, initials = row['Name'].split(',')
        initials = re.sub('^\s*(Mr|Ms|Dr|Nkosi|Prof|Adv|Prince)\s+', '', initials)

        # TODO: Use the person's other_names filed, and get these misspellings in there.
        if family_name == 'Khorai': family_name = 'Khoarai'
        if family_name == 'Hoosan': family_name = 'Hoosen'
        if family_name == 'Jeffrey': family_name = 'Jeffery'
        if family_name == 'Hill-Lews': family_name = 'Hill-Lewis'
        if family_name == 'Koornhof' and initials == 'NC': initials = 'NJJVR'

        matches = [ x for x in data['persons'].values() if asciify(x['family_name']) == family_name ]
        if len(matches) > 1:
            matches = [ x for x in data['persons'].values() if x['family_name'] == family_name and initialise(x['given_names']) == initials ]
            if not matches:
                matches = [ x for x in data['persons'].values() if x['family_name'] == family_name and initialise(x['given_names'])[0:len(initials)] == initials ]

        # With the current data, we now always have one result 
        assert len(matches) == 1
        person = matches[0]

        party = [ x for x in person['memberships'] if 'party' in x['organization_id'] ][0]['organization_id']
        assert row['Party'] == orgs_by_id[party]['name'], row['Party'] + orgs_by_id[party]['name']

        mship = {
            'organization_id': data['organizations'][row['Committee']]['id']
        }
        if row['IsAlternative?'] == 'True':
            mship['role'] = 'Alternate Member'
        if row['IsChairperson?'] == 'True':
            mship['role'] = 'Chairperson'
        add_membership(person, mship)

    return data
Example #3
0
def parse(data):
    for person in data['persons'].values():
        person['slug'] = person['name'].lower().replace(' ', '-')

    # There are three non-Assembly/NCOP people in the executive
    no_house = 0
    for p in data['persons'].values():
        if not [
                y for y in p['memberships'] if 'house' in y['organization_id']
        ]:
            no_house += 1
    assert no_house == 3

    na_manual = {
        'Cassel Charlie Mathale': {
            'start_date': '2013-07-15'
        },
        'Wayne Maxim Thring': {
            'start_date': '2013-06-21'
        },
        'Masenyani Richard Baloyi': {
            'end_date': '2013-07-10'
        },
        'Letlapa Moroatshoge Mphahlele': {
            'end_date':
            '2013-07-11',
            'end_reason':
            'Ceased to be a member under section 47(3)(c) of the Constitution (changed party)'
        },
        # 'Mpethi': { 'start_date': ? },
        'Ntopile Marcel Kganyago': {
            'end_date': '2013-07-17',
            'end_reason': 'Died'
        },
        'Nqabayomzi Lawrence Kwankwa': {
            'start_date': '2013-08-06'
        },
        'Loretta Jacobus': {
            'end_date': '2013-08-01'
        },
    }

    ncop_manual = {
        'Rory Dean MacPherson': {
            'party': 'DA',
            'end_date': '2009-05-29',
            'province': 'KwaZulu-Natal'
        },
        'Robert Alfred Lees': {
            'start_date': '2009-06-11'
        },
        'Sheery Su-Huei Cheng': {
            'party': 'DA',
            'end_date': '2010-09-30',
            'province': 'Gauteng'
        },
        'Beverley Lynette Abrahams': {
            'start_date': '2010-10-01'
        },
        'Timothy Duncan Harris': {
            'party': 'DA',
            'end_date': '2010-09-09',
            'province': 'Western Cape'
        },
        'Theodorus Barnardus Beyleveldt': {
            'party': 'DA',
            'start_date': '2010-10-12',
            'end_date': '2011-07-10',
            'end_reason': 'Died',
            'province': 'Western Cape'
        },
        'Denis Joseph': {
            'start_date': '2011-10-20'
        },
        'Armiston Watson': {
            'party': 'DA',
            'end_date': '2011-11-07',
            'province': 'Mpumalanga'
        },
        'Velly Makasana Manzini': {
            'start_date': '2011-11-08'
        },
        'Tlhalefi Andries Mashamaite': {
            'party': 'ANC',
            'end_date': '2012-05-08',
            'province': 'Limpopo'
        },
        'Thabo Lucas Makunyane': {
            'start_date': '2012-05-22'
        },
        'Zukisa Cheryl Faku': {
            'start_date': '2013-04-25'
        },
        'Mokoane Collen Maine': {
            'end_date': '2013-08-01'
        },  # XXX
    }

    for person in data['persons'].values():
        name = person['name']
        mships = person['memberships']
        mship = [
            x for x in mships
            if 'ncop' in x['organization_id'] and x['role'] == 'Delegate'
        ]
        if mship:
            # Present, and has NCOP membership entry. Set a start and possibly end date.
            mship = mship[0]
            assert 'start_date' not in mship
            n = ncop_manual.pop(name, {})
            mship['start_date'] = n.get('start_date', '2009-05-07')
            if 'end_date' in n and 'end_date' not in mship:
                mship['end_date'] = n['end_date']
        elif name in ncop_manual:
            # Present, but has no NCOP membership entry
            n = ncop_manual.pop(name)
            add_membership(
                person, {
                    'organization_id': 'org.mysociety.za/house/ncop',
                    'label': 'Delegate for %s' % n['province'],
                    'role': 'Delegate',
                    'area': {
                        'id':
                        'org.mysociety.za/mapit/code/p/' +
                        PROVINCES[n['province']],
                        'name':
                        n['province']
                    },
                    'start_date': n.get('start_date', '2009-05-07'),
                    'end_date': n['end_date'],
                    'end_reason': n.get('end_reason', 'Resigned'),
                })
        mship = [
            x for x in mships
            if 'house/na' in x['organization_id'] and x['role'] == 'Member'
        ]
        if mship:
            mship = mship[0]
            n = na_manual.pop(name, {})
            if 'start_date' not in mship:
                mship['start_date'] = n.pop('start_date', '2009-05-06')
            if n:
                assert 'end_date' not in mship
                mship['end_date'] = n['end_date']
                mship['end_reason'] = n.get('end_reason', 'Resigned')
        elif name in na_manual:
            raise Exception

    # The ones left have no person entry at all.
    for name, d in ncop_manual.items():
        id = idFactory.new('person')
        given_names, family_name = name.rsplit(None, 1)
        person = {
            'id': id,
            'name': name,
            'given_names': given_names,
            'family_name': family_name,
            'slug': name.lower().replace(' ', '-'),
        }
        add_membership(
            person,
            {'organization_id': data['organizations'][d['party']]['id']})
        add_membership(
            person, {
                'organization_id': 'org.mysociety.za/house/ncop',
                'label': 'Delegate for %s' % d['province'],
                'role': 'Delegate',
                'area': {
                    'id':
                    'org.mysociety.za/mapit/code/p/' +
                    PROVINCES[d['province']],
                    'name':
                    d['province']
                },
                'start_date': d.get('start_date', '2009-05-07'),
                'end_date': d['end_date'],
                'end_reason': d.get('end_reason', 'Resigned'),
            })
        data['persons'][name] = person

    return data
Example #4
0
def parse(data):
    for person in data['persons'].values():
        person['slug'] = person['name'].lower().replace(' ', '-')

    # There are three non-Assembly/NCOP people in the executive
    no_house = 0
    for p in data['persons'].values():
        if not [ y for y in p['memberships'] if 'house' in y['organization_id'] ]:
            no_house += 1
    assert no_house == 3

    na_manual = {
        'Cassel Charlie Mathale': { 'start_date': '2013-07-15' },
        'Wayne Maxim Thring': { 'start_date': '2013-06-21' },
        'Masenyani Richard Baloyi': { 'end_date': '2013-07-10' },
        'Letlapa Moroatshoge Mphahlele': { 'end_date': '2013-07-11', 'end_reason': 'Ceased to be a member under section 47(3)(c) of the Constitution (changed party)' },
        # 'Mpethi': { 'start_date': ? },
        'Ntopile Marcel Kganyago': { 'end_date': '2013-07-17', 'end_reason': 'Died' },
        'Nqabayomzi Lawrence Kwankwa': { 'start_date': '2013-08-06' },
        'Loretta Jacobus': { 'end_date': '2013-08-01' },
    }

    ncop_manual = {
        'Rory Dean MacPherson': { 'party': 'DA', 'end_date': '2009-05-29', 'province': 'KwaZulu-Natal' },
            'Robert Alfred Lees': { 'start_date': '2009-06-11' },
        'Sheery Su-Huei Cheng': { 'party': 'DA', 'end_date': '2010-09-30', 'province': 'Gauteng' },
            'Beverley Lynette Abrahams': { 'start_date': '2010-10-01' },
        'Timothy Duncan Harris': { 'party': 'DA', 'end_date': '2010-09-09', 'province': 'Western Cape' },
            'Theodorus Barnardus Beyleveldt': { 'party': 'DA', 'start_date': '2010-10-12', 'end_date': '2011-07-10', 'end_reason': 'Died', 'province': 'Western Cape' },
            'Denis Joseph': { 'start_date': '2011-10-20' },
        'Armiston Watson': { 'party': 'DA', 'end_date': '2011-11-07', 'province': 'Mpumalanga' },
            'Velly Makasana Manzini': { 'start_date': '2011-11-08' },
        'Tlhalefi Andries Mashamaite': { 'party': 'ANC', 'end_date': '2012-05-08', 'province': 'Limpopo' },
            'Thabo Lucas Makunyane': { 'start_date': '2012-05-22' },
        'Zukisa Cheryl Faku': { 'start_date': '2013-04-25' },
        'Mokoane Collen Maine': { 'end_date': '2013-08-01' }, # XXX
    }

    for person in data['persons'].values():
        name = person['name']
        mships = person['memberships']
        mship = [ x for x in mships if 'ncop' in x['organization_id'] and x['role'] == 'Delegate' ]
        if mship:
            # Present, and has NCOP membership entry. Set a start and possibly end date.
            mship = mship[0]
            assert 'start_date' not in mship
            n = ncop_manual.pop(name, {})
            mship['start_date'] = n.get('start_date', '2009-05-07')
            if 'end_date' in n and 'end_date' not in mship:
                mship['end_date'] = n['end_date']
        elif name in ncop_manual:
            # Present, but has no NCOP membership entry
            n = ncop_manual.pop(name)
            add_membership(person, { 'organization_id': 'org.mysociety.za/house/ncop',
                'label': 'Delegate for %s' % n['province'], 'role': 'Delegate',
                'area': { 'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[n['province']], 'name': n['province'] },
                'start_date': n.get('start_date', '2009-05-07'),
                'end_date': n['end_date'],
                'end_reason': n.get('end_reason', 'Resigned'),
            })
        mship = [ x for x in mships if 'house/na' in x['organization_id'] and x['role'] == 'Member' ]
        if mship:
            mship = mship[0]
            n = na_manual.pop(name, {})
            if 'start_date' not in mship:
                mship['start_date'] = n.pop('start_date', '2009-05-06')
            if n:
                assert 'end_date' not in mship
                mship['end_date'] = n['end_date']
                mship['end_reason'] = n.get('end_reason', 'Resigned')
        elif name in na_manual:
            raise Exception

    # The ones left have no person entry at all.
    for name, d in ncop_manual.items():
        id = idFactory.new('person')
        given_names, family_name = name.rsplit(None, 1)
        person = {
            'id': id,
            'name': name,
            'given_names': given_names,
            'family_name': family_name,
            'slug': name.lower().replace(' ', '-'),
        }
        add_membership(person, { 'organization_id': data['organizations'][d['party']]['id'] })
        add_membership(person, {
            'organization_id': 'org.mysociety.za/house/ncop',
            'label': 'Delegate for %s' % d['province'],
            'role': 'Delegate',
            'area': { 'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[d['province']], 'name': d['province'] },
            'start_date': d.get('start_date', '2009-05-07'),
            'end_date': d['end_date'],
            'end_reason': d.get('end_reason', 'Resigned'),
        })
        data['persons'][name] = person

    return data
Example #5
0
def parse():
    for row in FixingDictReader(
            open(data_path + 'myreps_na_executive_export.csv')):
        person_bits = dict((col_map(k), v) for k, v in row.items()
                           if k in ('first_name', 'last_name', 'initials_alt',
                                    'other_names', 'title', 'email') and v)
        position_bits = dict(
            (col_map(k), v) for k, v in row.items()
            if k in ('start_date', 'end_date', 'end_reason', 'organisation',
                     'position',
                     'region') and v and v != 'Member' and v != 'National')
        if 'end_date' not in position_bits: del position_bits['end_reason']
        if 'end_date' in position_bits and position_bits['end_reason'] == '0':
            del position_bits['end_reason']

        if person_bits['given_names'] == 'Tlhalefi Andries':
            continue  # Comes in elsewhere
        # Manual fixes of file
        fix_person_bits(person_bits)
        fix_end_reason(position_bits, person_bits)

        name = '%(given_names)s %(family_name)s' % person_bits

        person_bits['name'] = name
        if person_bits.get('email'):
            person_bits['contact_details'] = [{
                'type': 'email',
                'value': person_bits.pop('email')
            }]
        if 'other_names' in person_bits:
            person_bits['other_names'] = [{'name': person_bits['other_names']}]

        if position_bits['organisation'] not in ORGANIZATIONS:
            ORGANIZATIONS.setdefault(
                position_bits['organisation'], {
                    'id':
                    'org.mysociety.za/party/' +
                    position_bits['organisation'].lower(),
                    'name':
                    position_bits['organisation'],
                    'slug':
                    position_bits['organisation'].lower(),
                    'classification':
                    'party'
                })
        position_bits['organization_id'] = ORGANIZATIONS[
            position_bits['organisation']]['id']
        del position_bits['organisation']
        if position_bits[
                'organization_id'] == 'org.mysociety.za/house/national-assembly' and 'role' not in position_bits:
            position_bits['label'] = position_bits['role'] = 'Member'
        elif position_bits['organization_id'] == 'org.mysociety.za/house/ncop':
            position_bits['label'] = position_bits['role'] = 'Delegate'
        if 'end_reason' in position_bits:
            position_bits['end_reason'] = REASONS[position_bits['end_reason']]
        if position_bits.get('region'):
            r = position_bits['region']
            position_bits['area'] = {
                'id': 'org.mysociety.za/mapit/code/p/' + PROVINCES[r],
                'name': r
            }
            position_bits['label'] += ' for ' + r
            del position_bits['region']

        if name in PEOPLE:
            person_bits['id'] = PEOPLE[name]['person']['id']
            assert PEOPLE[name]['person'] == person_bits
        else:
            person_bits['id'] = idFactory.new('person')
            PEOPLE[name] = {'id': person_bits['id'], 'person': person_bits}
        add_membership(PEOPLE[name], position_bits)

    # National Assembly MyReps site data
    # To fetch myreps ID and PERSON_ID

    na = open(data_path + 'myreps-na.xml').read()
    people = ET.fromstring(na).iter('Members')
    cols_xml = [
        'id', 'person_id', 'person_first_name', 'person_last_name',
        'person_paries'
    ]

    for person in people:
        row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml]))
        if row['person_first_name'] == 'Nomaindiya Cathleen':
            row['person_first_name'] = 'NomaIndiya Cathleen'
        if row['person_first_name'] == 'Alpheus' and row[
                'person_last_name'] == 'Mokabhe':
            row.update(person_first_name='Alpheus Mokabhe',
                       person_last_name='Maziya')
        if row['person_first_name'] == 'Ximbi':
            row.update(person_first_name='Dumsani Livingstone',
                       person_last_name='Ximbi')
        name = '%(person_first_name)s %(person_last_name)s' % row
        name = fix_bad_encoding(name.encode('utf-8'))
        PEOPLE[name]['person']['identifiers'] = [
            {
                'identifier': row['person_id'],
                'scheme': 'myreps_person_id'
            },
        ]
        if row['id']:
            PEOPLE[name]['person']['identifiers'].append({
                'identifier':
                row['id'],
                'scheme':
                'myreps_id'
            })

    na_prev = open(data_path + 'myreps-national-assembly.html').read()
    na_prev = re.search('<div[^>]*id="past"[^>]*>.*?</div>(?s)',
                        na_prev).group(0)
    for person in re.findall(
            '<li><a href="/people/view/(.*?)">([^<]*) ([^<]*?)</a> until .*?</li>',
            na_prev):
        row = dict(zip(cols_xml, ['', person[0], person[1], person[2], '']))
        if row['person_first_name'] == 'Patricia de':
            row.update(person_first_name='Patricia',
                       person_last_name='de Lille')
        if row['person_first_name'] == 'D van der':
            row.update(person_first_name='D', person_last_name='van der Walt')
        name = '%(person_first_name)s %(person_last_name)s' % row
        PEOPLE[name]['person']['identifiers'] = [
            {
                'identifier': row['person_id'],
                'scheme': 'myreps_person_id'
            },
        ]

    # NCOP MyReps site data

    ncop = open(data_path + 'myreps-ncop.xml').read()
    people = ET.fromstring(ncop).iter('Members')
    for person in people:
        row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml]))
        # Change couple of names to match parliament data
        if row['person_first_name'] == 'Arthur':
            row['person_first_name'] = 'Robert Alfred'
        elif row['person_first_name'] == 'Buoang Lemias':
            row['person_first_name'] = 'Budang Lemias'
        name = '%(person_first_name)s %(person_last_name)s' % row
        id = idFactory.new('person')
        PEOPLE[name] = {
            'id': id,
            'person': {
                'id':
                id,
                'given_names':
                row['person_first_name'],
                'family_name':
                row['person_last_name'],
                'name':
                name,
                'identifiers': [
                    {
                        'identifier': row['id'],
                        'scheme': 'myreps_id'
                    },
                    {
                        'identifier': row['person_id'],
                        'scheme': 'myreps_person_id'
                    },
                ]
            },
        }
        add_membership(
            PEOPLE[name], {
                'organization_id': 'org.mysociety.za/house/ncop',
                'label': 'Delegate',
                'role': 'Delegate'
            })
        if row['id'] == '7852':
            # Special case of one person resigned since data
            PEOPLE[name]['memberships'][0].update(
                end_date='2013-03-27',
                end_reason='Resigned',
                label='Delegate for Eastern Cape',
                area={
                    'id':
                    'org.mysociety.za/mapit/code/p/' +
                    PROVINCES['Eastern Cape'],
                    'name':
                    'Eastern Cape'
                })
        if row['person_paries']:
            add_membership(
                PEOPLE[name],
                {'organization_id': ORGANIZATIONS[row['person_paries']]['id']})

    for name in PEOPLE.keys():
        PEOPLE[name]['person'].update(memberships=PEOPLE[name]['memberships'])
        PEOPLE[name] = PEOPLE[name]['person']

    return {
        'persons': PEOPLE,
        'organizations': ORGANIZATIONS,
    }
Example #6
0
def parse(data):
    orgs_by_id = dict([(x['id'], x) for x in data['organizations'].values()])

    # TODO: Perhaps check old/new committees, then stop using parl.py
    # committees. Or just assume these new ones are accurate.
    for row in csv.DictReader(open(data_path + 'committees.csv')):
        if row['Name'] not in data['organizations']:
            data['organizations'][row['Name']] = {
                'id': idFactory.new('committee_pmg'),
                'name': row['Name'],
                'slug': row['Name'].lower().replace(' ', '-'),
                'classification': row['Type']
            }

    for row in csv.DictReader(open(data_path + 'committee-members.csv')):
        row['Name'] = re.sub('^([^,]*) Mr, (.*)$', r'\1, Mr \2', row['Name'])

        family_name, initials = row['Name'].split(',')
        initials = re.sub('^\s*(Mr|Ms|Dr|Nkosi|Prof|Adv|Prince)\s+', '',
                          initials)

        # TODO: Use the person's other_names filed, and get these misspellings in there.
        if family_name == 'Khorai': family_name = 'Khoarai'
        if family_name == 'Hoosan': family_name = 'Hoosen'
        if family_name == 'Jeffrey': family_name = 'Jeffery'
        if family_name == 'Hill-Lews': family_name = 'Hill-Lewis'
        if family_name == 'Koornhof' and initials == 'NC': initials = 'NJJVR'

        matches = [
            x for x in data['persons'].values()
            if asciify(x['family_name']) == family_name
        ]
        if len(matches) > 1:
            matches = [
                x for x in data['persons'].values()
                if x['family_name'] == family_name
                and initialise(x['given_names']) == initials
            ]
            if not matches:
                matches = [
                    x for x in data['persons'].values()
                    if x['family_name'] == family_name and initialise(
                        x['given_names'])[0:len(initials)] == initials
                ]

        # With the current data, we now always have one result
        assert len(matches) == 1
        person = matches[0]

        party = [
            x for x in person['memberships'] if 'party' in x['organization_id']
        ][0]['organization_id']
        assert row['Party'] == orgs_by_id[party][
            'name'], row['Party'] + orgs_by_id[party]['name']

        mship = {
            'organization_id': data['organizations'][row['Committee']]['id']
        }
        if row['IsAlternative?'] == 'True':
            mship['role'] = 'Alternate Member'
        if row['IsChairperson?'] == 'True':
            mship['role'] = 'Chairperson'
        add_membership(person, mship)

    return data
Example #7
0
def parse():
    for row in FixingDictReader(open(data_path + "myreps_na_executive_export.csv")):
        person_bits = dict(
            (col_map(k), v)
            for k, v in row.items()
            if k in ("first_name", "last_name", "initials_alt", "other_names", "title", "email") and v
        )
        position_bits = dict(
            (col_map(k), v)
            for k, v in row.items()
            if k in ("start_date", "end_date", "end_reason", "organisation", "position", "region")
            and v
            and v != "Member"
            and v != "National"
        )
        if "end_date" not in position_bits:
            del position_bits["end_reason"]
        if "end_date" in position_bits and position_bits["end_reason"] == "0":
            del position_bits["end_reason"]

        if person_bits["given_names"] == "Tlhalefi Andries":
            continue  # Comes in elsewhere
        # Manual fixes of file
        fix_person_bits(person_bits)
        fix_end_reason(position_bits, person_bits)

        name = "%(given_names)s %(family_name)s" % person_bits

        person_bits["name"] = name
        if person_bits.get("email"):
            person_bits["contact_details"] = [{"type": "email", "value": person_bits.pop("email")}]
        if "other_names" in person_bits:
            person_bits["other_names"] = [{"name": person_bits["other_names"]}]

        if position_bits["organisation"] not in ORGANIZATIONS:
            ORGANIZATIONS.setdefault(
                position_bits["organisation"],
                {
                    "id": "org.mysociety.za/party/" + position_bits["organisation"].lower(),
                    "name": position_bits["organisation"],
                    "slug": position_bits["organisation"].lower(),
                    "classification": "party",
                },
            )
        position_bits["organization_id"] = ORGANIZATIONS[position_bits["organisation"]]["id"]
        del position_bits["organisation"]
        if (
            position_bits["organization_id"] == "org.mysociety.za/house/national-assembly"
            and "role" not in position_bits
        ):
            position_bits["label"] = position_bits["role"] = "Member"
        elif position_bits["organization_id"] == "org.mysociety.za/house/ncop":
            position_bits["label"] = position_bits["role"] = "Delegate"
        if "end_reason" in position_bits:
            position_bits["end_reason"] = REASONS[position_bits["end_reason"]]
        if position_bits.get("region"):
            r = position_bits["region"]
            position_bits["area"] = {"id": "org.mysociety.za/mapit/code/p/" + PROVINCES[r], "name": r}
            position_bits["label"] += " for " + r
            del position_bits["region"]

        if name in PEOPLE:
            person_bits["id"] = PEOPLE[name]["person"]["id"]
            assert PEOPLE[name]["person"] == person_bits
        else:
            person_bits["id"] = idFactory.new("person")
            PEOPLE[name] = {"id": person_bits["id"], "person": person_bits}
        add_membership(PEOPLE[name], position_bits)

    # National Assembly MyReps site data
    # To fetch myreps ID and PERSON_ID

    na = open(data_path + "myreps-na.xml").read()
    people = ET.fromstring(na).iter("Members")
    cols_xml = ["id", "person_id", "person_first_name", "person_last_name", "person_paries"]

    for person in people:
        row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml]))
        if row["person_first_name"] == "Nomaindiya Cathleen":
            row["person_first_name"] = "NomaIndiya Cathleen"
        if row["person_first_name"] == "Alpheus" and row["person_last_name"] == "Mokabhe":
            row.update(person_first_name="Alpheus Mokabhe", person_last_name="Maziya")
        if row["person_first_name"] == "Ximbi":
            row.update(person_first_name="Dumsani Livingstone", person_last_name="Ximbi")
        name = "%(person_first_name)s %(person_last_name)s" % row
        name = fix_bad_encoding(name.encode("utf-8"))
        PEOPLE[name]["person"]["identifiers"] = [{"identifier": row["person_id"], "scheme": "myreps_person_id"}]
        if row["id"]:
            PEOPLE[name]["person"]["identifiers"].append({"identifier": row["id"], "scheme": "myreps_id"})

    na_prev = open(data_path + "myreps-national-assembly.html").read()
    na_prev = re.search('<div[^>]*id="past"[^>]*>.*?</div>(?s)', na_prev).group(0)
    for person in re.findall('<li><a href="/people/view/(.*?)">([^<]*) ([^<]*?)</a> until .*?</li>', na_prev):
        row = dict(zip(cols_xml, ["", person[0], person[1], person[2], ""]))
        if row["person_first_name"] == "Patricia de":
            row.update(person_first_name="Patricia", person_last_name="de Lille")
        if row["person_first_name"] == "D van der":
            row.update(person_first_name="D", person_last_name="van der Walt")
        name = "%(person_first_name)s %(person_last_name)s" % row
        PEOPLE[name]["person"]["identifiers"] = [{"identifier": row["person_id"], "scheme": "myreps_person_id"}]

    # NCOP MyReps site data

    ncop = open(data_path + "myreps-ncop.xml").read()
    people = ET.fromstring(ncop).iter("Members")
    for person in people:
        row = dict(zip(cols_xml, [person.find(x).text for x in cols_xml]))
        # Change couple of names to match parliament data
        if row["person_first_name"] == "Arthur":
            row["person_first_name"] = "Robert Alfred"
        elif row["person_first_name"] == "Buoang Lemias":
            row["person_first_name"] = "Budang Lemias"
        name = "%(person_first_name)s %(person_last_name)s" % row
        id = idFactory.new("person")
        PEOPLE[name] = {
            "id": id,
            "person": {
                "id": id,
                "given_names": row["person_first_name"],
                "family_name": row["person_last_name"],
                "name": name,
                "identifiers": [
                    {"identifier": row["id"], "scheme": "myreps_id"},
                    {"identifier": row["person_id"], "scheme": "myreps_person_id"},
                ],
            },
        }
        add_membership(
            PEOPLE[name], {"organization_id": "org.mysociety.za/house/ncop", "label": "Delegate", "role": "Delegate"}
        )
        if row["id"] == "7852":
            # Special case of one person resigned since data
            PEOPLE[name]["memberships"][0].update(
                end_date="2013-03-27",
                end_reason="Resigned",
                label="Delegate for Eastern Cape",
                area={"id": "org.mysociety.za/mapit/code/p/" + PROVINCES["Eastern Cape"], "name": "Eastern Cape"},
            )
        if row["person_paries"]:
            add_membership(PEOPLE[name], {"organization_id": ORGANIZATIONS[row["person_paries"]]["id"]})

    for name in PEOPLE.keys():
        PEOPLE[name]["person"].update(memberships=PEOPLE[name]["memberships"])
        PEOPLE[name] = PEOPLE[name]["person"]

    return {"persons": PEOPLE, "organizations": ORGANIZATIONS}