Example #1
0
    def person_data(self, representative, division_id, division_name, role,
                    organization_name):
        # Corrections and tweaks.
        duplicate_names = {
            'Colleen Evans',
            'Kim Watt-Senner',
        }
        name_corrections = {
            'Claire l Moglove': 'Claire Moglove',
            'KSenya Dorwart': 'Ksenya Dorwart',
        }
        email_corrections = {
            'sharrison@qualicumbeach,com': '*****@*****.**'
        }

        # Get name.
        representative_name = re.sub(
            ' +', ' ',
            str(representative.xpath('a/b/text()')[0]).strip())
        representative_name = name_corrections.get(representative_name,
                                                   representative_name)

        # Get phone.
        representative_phone = str(
            representative.xpath(
                'text()[contains(., "Phone")]'))[12:-2].replace('-', '')

        # Get email.
        email_scrape = representative.xpath(
            'a[contains(@href,"mailto:")]/text()')
        if email_scrape:
            representative_email = email_scrape[0]
            representative_email = email_corrections.get(
                representative_email, representative_email)

        # Create record and append contact data.
        p = Person(primary_org='government',
                   primary_org_name=organization_name,
                   name=representative_name,
                   district=division_name,
                   role=role)
        p.add_source(LIST_PAGE)

        # Handle duplicate names.
        if representative_name in duplicate_names:
            p.birth_date = str(self.birth_date)
            self.birth_date += 1
        if email_scrape:
            p.add_contact('email', representative_email)
        if representative_phone and len(representative_phone) == 10:
            p.add_contact('voice', representative_phone, 'legislature')

        p._related[0].extras[
            'boundary_url'] = '/boundaries/census-subdivisions/{}/'.format(
                division_id.rsplit(':', 1)[1])

        return p
Example #2
0
    def person_data(self, representative, division_id, division_name, role, organization_name):
        # Corrections and tweaks.
        duplicate_names = {
            'Colleen Evans',
            'Kim Watt-Senner',
        }
        name_corrections = {
            'Claire l Moglove': 'Claire Moglove',
            'KSenya Dorwart': 'Ksenya Dorwart',
        }
        email_corrections = {
            'sharrison@qualicumbeach,com': '*****@*****.**'
        }

        # Get name.
        representative_name = re.sub(' +', ' ', str(representative.xpath('a/b/text()')[0]).strip())
        representative_name = name_corrections.get(representative_name, representative_name)

        # Get phone.
        representative_phone = str(representative.xpath('text()[contains(., "Phone")]'))[12:-2].replace('-', '')

        # Get email.
        representative_email = representative.xpath('a[contains(@href,"mailto:")]/text()')[0]
        representative_email = email_corrections.get(representative_email, representative_email)

        # Create record and append contact data.
        p = Person(primary_org='government', primary_org_name=organization_name, name=representative_name, district=division_name, role=role)
        p.add_source(LIST_PAGE)

        # Handle duplicate names.
        if representative_name in duplicate_names:
            p.birth_date = str(self.birth_date)
            self.birth_date += 1
        if representative_email:
            p.add_contact('email', representative_email)
        if representative_phone and len(representative_phone) == 10:
            p.add_contact('voice', representative_phone, 'legislature')

        p._related[0].extras['boundary_url'] = '/boundaries/census-subdivisions/{}/'.format(division_id.rsplit(':', 1)[1])

        return p
Example #3
0
    def scrape(self):
        organizations = {}
        seat_numbers = defaultdict(lambda: defaultdict(int))

        reader = self.csv_reader(self.csv_url,
                                 delimiter=self.delimiter,
                                 header=True,
                                 encoding=self.encoding,
                                 skip_rows=self.skip_rows)
        reader.fieldnames = [
            self.header_converter(field) for field in reader.fieldnames
        ]
        for row in reader:

            try:
                if self.is_valid_row(row):
                    for key, corrections in self.corrections.items():
                        if not isinstance(corrections, dict):
                            row[key] = corrections(row[key])
                        elif row[key] in corrections:
                            row[key] = corrections[row[key]]

                    organization_classification = 'legislature'

                    organization_name = row['organization']
                    organization_key = organization_name.lower()
                    if organization_key in organizations:
                        organization = organizations[organization_key]
                    else:
                        organization = Organization(
                            organization_name,
                            classification=organization_classification)
                        organization.add_source(self.csv_url)
                        yield organization
                        organizations[organization_key] = organization

                    if not row['primary role']:
                        row['primary role'] = 'Councillor'

                    role = row['primary role']

                    post = Post(role=role,
                                label=organization_name,
                                organization_id=organization._id)
                    yield post

                    name = row['name'].strip(' .,')

                    district = row['district name']

                    if self.many_posts_per_area and role not in self.unique_roles:
                        seat_numbers[role][district] += 1
                        district = '{} (seat {})'.format(
                            district, seat_numbers[role][district])

                    p = Person(primary_org=organization_classification,
                               name=name,
                               district=district,
                               role=role,
                               party=row.get('party name'))
                    p.add_source(self.csv_url)

                    if row.get('gender'):
                        p.gender = row['gender']
                    if row.get('photo url'):
                        p.image = row['photo url']

                    if row.get('source url'):
                        p.add_source(row['source url'].strip(' .,'))

                    if row.get('website'):
                        p.add_link(row['website'], note='web site')
                    if row.get('facebook'):
                        p.add_link(re.sub(r'[#?].+', '', row['facebook']))
                    if row.get('twitter'):
                        p.add_link(row['twitter'])

                    if row['email']:
                        p.add_contact('email', row['email'].strip(' .,'))
                    if row['address']:
                        p.add_contact('address', row['address'], 'legislature')
                    if row.get('phone'):
                        p.add_contact('voice', row['phone'], 'legislature')
                    if row.get('fax'):
                        p.add_contact('fax', row['fax'], 'legislature')
                    if row.get('cell'):
                        p.add_contact('cell', row['cell'], 'legislature')
                    if row.get('birth date'):
                        p.birth_date = row['birth date']

                    if row.get('incumbent'):
                        p.extras['incumbent'] = row['incumbent']

                    if name in self.other_names:
                        for other_name in self.other_names[name]:
                            p.add_name(other_name)

                    # Validate person entity so that we can catch the exception if needed.
                    p.validate()

                    yield p
            except Exception as e:
                print(repr(e))
                continue
Example #4
0
    def scrape(self):
        exclude_divisions = {
        }
        exclude_districts = {
            'Capital',
            'Capital F',
            'Capital G',
            'Capital H',
            'Central Coast B',
            'Central Okanagan East',
            'Central Okanagan West',
            'Comox Valley B',
            'Comox Valley C',
            'Islands Trust',
            'Kitimat-Stikine C',
            'Kootenay Boundary B',
            'Kootenay Boundary C',
            'Kootenay Boundary D',
            'Kootenay Boundary E',
            'Metro Vancouver A',
            'North Coast A',
            'North Coast C',
            'North Coast D',
            'North Coast E',
            'Okanagan-Similkameen I',
            'Okanagan-Similkameen Olalla Local Community Commission',
            'Qathet A',
            'Qathet B',
            'Qathet C',
            'Qathet D',
            'Qathet E',
        }
        expected_roles = {
            'candidate',
        }
        infixes = {
            'CY': 'City',
            'DM': 'District',
            'IGD': 'District',
            'IM': 'Municipal',
            'RGM': 'Regional',
            'T': 'Town',
            'VL': 'Village',
            'RDA': 'District',
        }
        duplicate_names = {
            'Rick Smith',
            'Sung Y Wong',
            'Elizabeth Taylor',
        }

        names_to_ids = {}
        for division in Division.get('ocd-division/country:ca').children('csd'):
            type_id = division.id.rsplit(':', 1)[1]
            if type_id.startswith('59'):
                if division.attrs['classification'] == 'IRI':
                    continue
                if division.name in names_to_ids:
                    names_to_ids[division.name] = None
                else:
                    names_to_ids[division.name] = division.id

        reader = self.csv_reader(COUNCIL_PAGE, header=True)
        reader.fieldnames = [field.lower() for field in reader.fieldnames]

        organizations = {}

        birth_date = 1900
        seen = set()

        rows = [row for row in reader]
        assert len(rows), 'No councillors found'
        for row in rows:
            name = row['full name']
            district_name = row['district name']

            if not any(row.values()) or name.lower() in ('', 'vacant') or district_name in exclude_districts:
                continue

            if row['district id']:
                division_id = 'ocd-division/country:ca/csd:{}'.format(row['district id'])
            else:
                division_id = names_to_ids[row['district name']]

            if division_id in exclude_divisions:
                continue
            if not division_id:
                raise Exception('unhandled collision: {}'.format(row['district name']))

            division = Division.get(division_id)

            division_name = division.name

            organization_name = '{} {} Council'.format(division_name, infixes[division.attrs['classification']])

            if division_id not in seen:
                seen.add(division_id)
                organizations[division_id] = Organization(name=organization_name, classification='government')
                organizations[division_id].add_source(COUNCIL_PAGE)

            organization = organizations[division_id]

            role = row['primary role']
            if role not in expected_roles:
                raise Exception('unexpected role: {}'.format(role))
            if row['district id']:
                district = format(division_id)
            else:
                district = division_name

            organization.add_post(role=role, label=district, division_id=division_id)

            p = Person(primary_org='government', primary_org_name=organization_name, name=name, district=district, role=role)
            p.add_source(COUNCIL_PAGE)
            if row['source url']:
                p.add_source(row['source url'])

            if name in duplicate_names:
                p.birth_date = str(birth_date)
                birth_date += 1

            if row['email']:
                p.add_contact('email', row['email'])

            if row['phone']:
                p.add_contact('voice', row['phone'], 'legislature')

            if row['twitter']:
                p.add_link(row['twitter'])

            p._related[0].extras['boundary_url'] = '/boundaries/census-subdivisions/{}/'.format(division_id.rsplit(':', 1)[1])

            yield p

        for organization in organizations.values():
            yield organization
Example #5
0
    def scrape(self):
        exclude_divisions = {
            'ocd-division/country:ca/csd:1301006',  # Saint John
            'ocd-division/country:ca/csd:1307022',  # Moncton
            'ocd-division/country:ca/csd:1310032',  # Fredericton
        }
        expected_roles = {
            'Mayor',
            'Councillor',
        }
        unique_roles = {
            'Mayor',
        }
        classifications = {
            'Cities': 'City',
            'Towns': 'Town',
            'Villages': 'Village',
            'Rural Communities': 'Community',
            'Regional Municipality': 'Regional',
        }
        corrections = {
            'Beaubassin-est/East': 'Beaubassin East',
            'Lac-Baker': 'Lac Baker',
            'Saint-François-de-Madawaska': 'Saint-François de Madawaska',
            'Saint-Hilaire': 'Saint Hilaire',
        }
        unknown_names = {
            'Haut-Madawaska',  # incorporated after Census 2016
        }
        duplicate_names = {
            'Denis Savoie',
            'Josée Levesque',
            'Luc Levesque',
        }

        names_to_ids = {}
        for division in Division.get('ocd-division/country:ca').children(
                'csd'):
            type_id = division.id.rsplit(':', 1)[1]
            if type_id.startswith('13'):
                if division.attrs['classification'] == 'P':
                    continue
                if division.name in names_to_ids:
                    raise Exception('unhandled collision: {}'.format(
                        division.name))
                else:
                    names_to_ids[division.name] = division.id

        page = self.lxmlize(COUNCIL_PAGE)
        list_links = page.xpath(
            '//div[@id="sidebar"]//div[contains(@class, "list")][1]//a')

        birth_date = 1900
        seen = set()

        assert len(list_links), 'No list items found'
        for list_link in list_links:
            page = self.lxmlize(list_link.attrib['href'])
            detail_urls = page.xpath('//td[1]//@href')

            assert len(detail_urls), 'No municipalities found'
            for detail_url in detail_urls:
                page = self.lxmlize(detail_url, encoding='utf-8')
                division_name = re.sub(
                    r'\ASt\b\.?', 'Saint',
                    page.xpath('//h1/text()')[0].split(' - ', 1)[1])
                division_name = corrections.get(division_name, division_name)

                if division_name in unknown_names:
                    continue
                division_id = names_to_ids[division_name]
                if division_id in exclude_divisions:
                    continue
                if division_id in seen:
                    raise Exception(
                        'unhandled collision: {}'.format(division_id))

                seen.add(division_id)
                division_name = Division.get(division_id).name
                organization_name = '{} {} Council'.format(
                    division_name, classifications[list_link.text])
                organization = Organization(name=organization_name,
                                            classification='government')
                organization.add_source(detail_url)

                address = ', '.join(
                    page.xpath('//div[@class="left_contents"]/p[1]/text()'))

                contacts = page.xpath(
                    '//div[@class="left_contents"]/p[contains(., "Contact")]/text()'
                )
                phone = contacts[0].split(':')[1]
                if len(contacts) > 1:
                    fax = contacts[1].split(':')[1]
                email = self.get_email(page,
                                       '//div[@class="left_contents"]',
                                       error=False)

                url = page.xpath(
                    '//div[@class="left_contents"]//@href[not(contains(., "mailto:"))]'
                )
                if url:
                    url = url[0]

                groups = page.xpath(
                    '//div[contains(@class, "right_contents")]/p')
                assert len(groups), 'No groups found'
                for p in groups:
                    role = p.xpath('./b/text()')[0].rstrip('s')
                    if role not in expected_roles:
                        raise Exception('unexpected role: {}'.format(role))

                    councillors = p.xpath('./text()')
                    assert len(councillors), 'No councillors found'
                    for seat_number, name in enumerate(councillors, 1):
                        if 'vacant' in name.lower():
                            continue

                        if role in unique_roles:
                            district = division_name
                        else:
                            district = '{} (seat {})'.format(
                                division_name, seat_number)

                        organization.add_post(role=role,
                                              label=district,
                                              division_id=division_id)

                        p = Person(primary_org='government',
                                   primary_org_name=organization_name,
                                   name=name,
                                   district=district,
                                   role=role)
                        p.add_source(COUNCIL_PAGE)
                        p.add_source(list_link.attrib['href'])
                        p.add_source(detail_url)

                        if name in duplicate_names:
                            p.birth_date = str(birth_date)
                            birth_date += 1

                        p.add_contact('address', address, 'legislature')
                        # @see https://en.wikipedia.org/wiki/Area_code_506
                        if phone:
                            p.add_contact('voice',
                                          phone,
                                          'legislature',
                                          area_code=506)
                        if fax:
                            p.add_contact('fax',
                                          fax,
                                          'legislature',
                                          area_code=506)
                        if email:
                            p.add_contact('email', email)
                        if url:
                            p.add_link(url)

                        p._related[0].extras[
                            'boundary_url'] = '/boundaries/census-subdivisions/{}/'.format(
                                division_id.rsplit(':', 1)[1])

                        yield p

                yield organization
Example #6
0
    def scrape(self):
        exclude_divisions = {
            'ocd-division/country:ca/csd:1301006',  # Saint John
            'ocd-division/country:ca/csd:1307022',  # Moncton
            'ocd-division/country:ca/csd:1310032',  # Fredericton
        }
        expected_roles = {
            'Mayor',
            'Councillor',
        }
        unique_roles = {
            'Mayor',
        }
        classifications = {
            'Cities': 'City',
            'Towns': 'Town',
            'Villages': 'Village',
            'Rural Communities': 'Community',
            'Regional Municipality': 'Regional',
        }
        corrections = {
            'Beaubassin-est/East': 'Beaubassin East',
            'Lac-Baker': 'Lac Baker',
            'Saint-François-de-Madawaska': 'Saint-François de Madawaska',
            'Saint-Hilaire': 'Saint Hilaire',
        }
        unknown_names = {
            'Haut-Madawaska',  # incorporated after Census 2016
        }
        duplicate_names = {
            'Denis Savoie',
            'Josée Levesque',
            'Luc Levesque',
        }

        names_to_ids = {}
        for division in Division.get('ocd-division/country:ca').children('csd'):
            type_id = division.id.rsplit(':', 1)[1]
            if type_id.startswith('13'):
                if division.attrs['classification'] == 'P':
                    continue
                if division.name in names_to_ids:
                    raise Exception('unhandled collision: {}'.format(division.name))
                else:
                    names_to_ids[division.name] = division.id

        page = self.lxmlize(COUNCIL_PAGE)
        list_links = page.xpath('//div[@id="sidebar"]//div[contains(@class, "list")][1]//a')

        birth_date = 1900
        seen = set()

        assert len(list_links), 'No list items found'
        for list_link in list_links:
            page = self.lxmlize(list_link.attrib['href'])
            detail_urls = page.xpath('//td[1]//@href')

            assert len(detail_urls), 'No municipalities found'
            for detail_url in detail_urls:
                page = self.lxmlize(detail_url, encoding='utf-8')
                division_name = re.sub(r'\ASt\b\.?', 'Saint', page.xpath('//h1/text()')[0].split(' - ', 1)[1])
                division_name = corrections.get(division_name, division_name)

                if division_name in unknown_names:
                    continue
                division_id = names_to_ids[division_name]
                if division_id in exclude_divisions:
                    continue
                if division_id in seen:
                    raise Exception('unhandled collision: {}'.format(division_id))

                seen.add(division_id)
                division_name = Division.get(division_id).name
                organization_name = '{} {} Council'.format(division_name, classifications[list_link.text])
                organization = Organization(name=organization_name, classification='government')
                organization.add_source(detail_url)

                address = ', '.join(page.xpath('//div[@class="left_contents"]/p[1]/text()'))

                contacts = page.xpath('//div[@class="left_contents"]/p[contains(., "Contact")]/text()')
                phone = contacts[0].split(':')[1]
                if len(contacts) > 1:
                    fax = contacts[1].split(':')[1]
                email = self.get_email(page, '//div[@class="left_contents"]', error=False)

                url = page.xpath('//div[@class="left_contents"]//@href[not(contains(., "mailto:"))]')
                if url:
                    url = url[0]

                groups = page.xpath('//div[contains(@class, "right_contents")]/p')
                assert len(groups), 'No groups found'
                for p in groups:
                    role = p.xpath('./b/text()')[0].rstrip('s')
                    if role not in expected_roles:
                        raise Exception('unexpected role: {}'.format(role))

                    councillors = p.xpath('./text()')
                    assert len(councillors), 'No councillors found'
                    for seat_number, name in enumerate(councillors, 1):
                        if 'vacant' in name.lower():
                            continue

                        if role in unique_roles:
                            district = division_name
                        else:
                            district = '{} (seat {})'.format(division_name, seat_number)

                        organization.add_post(role=role, label=district, division_id=division_id)

                        p = Person(primary_org='government', primary_org_name=organization_name, name=name, district=district, role=role)
                        p.add_source(COUNCIL_PAGE)
                        p.add_source(list_link.attrib['href'])
                        p.add_source(detail_url)

                        if name in duplicate_names:
                            p.birth_date = str(birth_date)
                            birth_date += 1

                        p.add_contact('address', address, 'legislature')
                        # @see https://en.wikipedia.org/wiki/Area_code_506
                        if phone:
                            p.add_contact('voice', phone, 'legislature', area_code=506)
                        if fax:
                            p.add_contact('fax', fax, 'legislature', area_code=506)
                        if email:
                            p.add_contact('email', email)
                        if url:
                            p.add_link(url)

                        p._related[0].extras['boundary_url'] = '/boundaries/census-subdivisions/{}/'.format(division_id.rsplit(':', 1)[1])

                        yield p

                yield organization
Example #7
0
    def scrape(self):
        exclude_divisions = {}
        exclude_districts = {
            'Capital',
            'Capital F',
            'Capital G',
            'Capital H',
            'Central Coast B',
            'Central Okanagan East',
            'Central Okanagan West',
            'Comox Valley B',
            'Comox Valley C',
            'Islands Trust',
            'Kitimat-Stikine C',
            'Kootenay Boundary B',
            'Kootenay Boundary C',
            'Kootenay Boundary D',
            'Kootenay Boundary E',
            'Metro Vancouver A',
            'North Coast A',
            'North Coast C',
            'North Coast D',
            'North Coast E',
            'Okanagan-Similkameen I',
            'Okanagan-Similkameen Olalla Local Community Commission',
            'Qathet A',
            'Qathet B',
            'Qathet C',
            'Qathet D',
            'Qathet E',
        }
        expected_roles = {
            'candidate',
        }
        infixes = {
            'CY': 'City',
            'DM': 'District',
            'IGD': 'District',
            'IM': 'Municipal',
            'RGM': 'Regional',
            'T': 'Town',
            'VL': 'Village',
            'RDA': 'District',
        }
        duplicate_names = {
            'Rick Smith',
            'Sung Y Wong',
            'Elizabeth Taylor',
        }

        names_to_ids = {}
        for division in Division.get('ocd-division/country:ca').children(
                'csd'):
            type_id = division.id.rsplit(':', 1)[1]
            if type_id.startswith('59'):
                if division.attrs['classification'] == 'IRI':
                    continue
                if division.name in names_to_ids:
                    names_to_ids[division.name] = None
                else:
                    names_to_ids[division.name] = division.id

        reader = self.csv_reader(COUNCIL_PAGE, header=True)
        reader.fieldnames = [field.lower() for field in reader.fieldnames]

        organizations = {}

        birth_date = 1900
        seen = set()

        for row in reader:
            name = row['full name']
            district_name = row['district name']

            if not any(row.values()) or name.lower() in (
                    '', 'vacant') or district_name in exclude_districts:
                continue

            if row['district id']:
                division_id = 'ocd-division/country:ca/csd:{}'.format(
                    row['district id'])
            else:
                division_id = names_to_ids[row['district name']]

            if division_id in exclude_divisions:
                continue
            if not division_id:
                raise Exception('unhandled collision: {}'.format(
                    row['district name']))

            division = Division.get(division_id)

            division_name = division.name

            organization_name = '{} {} Council'.format(
                division_name, infixes[division.attrs['classification']])

            if division_id not in seen:
                seen.add(division_id)
                organizations[division_id] = Organization(
                    name=organization_name, classification='government')
                organizations[division_id].add_source(COUNCIL_PAGE)

            organization = organizations[division_id]

            role = row['primary role']
            if role not in expected_roles:
                raise Exception('unexpected role: {}'.format(role))
            if row['district id']:
                district = format(division_id)
            else:
                district = division_name

            organization.add_post(role=role,
                                  label=district,
                                  division_id=division_id)

            p = Person(primary_org='government',
                       primary_org_name=organization_name,
                       name=name,
                       district=district,
                       role=role)
            p.add_source(COUNCIL_PAGE)
            if row['source url']:
                p.add_source(row['source url'])

            if name in duplicate_names:
                p.birth_date = str(birth_date)
                birth_date += 1

            if row['email']:
                p.add_contact('email', row['email'])

            if row['phone']:
                p.add_contact('voice', row['phone'], 'legislature')

            if row['twitter']:
                p.add_link(row['twitter'])

            p._related[0].extras[
                'boundary_url'] = '/boundaries/census-subdivisions/{}/'.format(
                    division_id.rsplit(':', 1)[1])

            yield p

        for organization in organizations.values():
            yield organization
Example #8
0
    def scrape(self):
        exclude_divisions = {
            'ocd-division/country:ca/csd:5909052',  # Abbotsford
            'ocd-division/country:ca/csd:5915001',  # Langley (DM)
            'ocd-division/country:ca/csd:5915004',  # Surrey
            'ocd-division/country:ca/csd:5915015',  # Richmond
            'ocd-division/country:ca/csd:5915022',  # Vancouver
            'ocd-division/country:ca/csd:5915025',  # Burnaby
            'ocd-division/country:ca/csd:5915034',  # Coquitlam
            'ocd-division/country:ca/csd:5917021',  # Saanich
            'ocd-division/country:ca/csd:5917034',  # Victoria
            'ocd-division/country:ca/csd:5935010',  # Kelowna
        }
        expected_roles = {
            'Mayor',
            'Councillor',
        }
        unique_roles = {
            'Mayor',
        }
        infixes = {
            'CY': 'City',
            'DM': 'District',
            'IGD': 'District',
            'IM': 'Municipal',
            'RGM': 'Regional',
            'T': 'Town',
            'VL': 'Village',
        }
        duplicate_names = {
            'Colleen Evans',
        }

        names_to_ids = {}
        for division in Division.get('ocd-division/country:ca').children(
                'csd'):
            type_id = division.id.rsplit(':', 1)[1]
            if type_id.startswith('59'):
                if division.attrs['classification'] == 'IRI':
                    continue
                if division.name in names_to_ids:
                    names_to_ids[division.name] = None
                else:
                    names_to_ids[division.name] = division.id

        reader = self.csv_reader(COUNCIL_PAGE, header=True)
        reader.fieldnames = [field.lower() for field in reader.fieldnames]

        organizations = {}
        seat_numbers = defaultdict(int)

        birth_date = 1900
        seen = set()

        for row in reader:
            name = row['full name']

            if not any(row.values()) or 'vacant' in name.lower():
                continue

            if row['district id']:
                division_id = 'ocd-division/country:ca/csd:{}'.format(
                    row['district id'])
            else:
                division_id = names_to_ids[row['district name']]

            if division_id in exclude_divisions:
                continue
            if not division_id:
                raise Exception('unhandled collision: {}'.format(
                    row['district name']))

            division = Division.get(division_id)

            division_name = division.name
            organization_name = '{} {} Council'.format(
                division_name, infixes[division.attrs['classification']])

            if division_id not in seen:
                seen.add(division_id)
                organizations[division_id] = Organization(
                    name=organization_name, classification='government')
                organizations[division_id].add_source(COUNCIL_PAGE)

            organization = organizations[division_id]

            role = row['primary role']
            if role not in expected_roles:
                raise Exception('unexpected role: {}'.format(role))

            if role in unique_roles:
                district = division_name
            else:
                seat_numbers[division_id] += 1
                district = '{} (seat {})'.format(division_name,
                                                 seat_numbers[division_id])
            if row['district id']:
                district += ' ({})'.format(division_id)

            organization.add_post(role=role,
                                  label=district,
                                  division_id=division_id)

            p = Person(primary_org='government',
                       primary_org_name=organization_name,
                       name=name,
                       district=district,
                       role=role)
            p.add_source(COUNCIL_PAGE)
            if row['source url']:
                p.add_source(row['source url'])

            if name in duplicate_names:
                p.birth_date = str(birth_date)
                birth_date += 1

            p.add_contact('email', row['email'])
            p.add_contact('voice', row['phone'], 'legislature')

            p._related[0].extras[
                'boundary_url'] = '/boundaries/census-subdivisions/{}/'.format(
                    division_id.rsplit(':', 1)[1])

            yield p

        for organization in organizations.values():
            yield organization