Esempi in Python per Division.all, esempi in Python per opencivicdata.divisions.Division.all

Esempio n. 1

0

Mostra file

File: tasks.py Progetto: anukat2015/scrapers-ca

def province_and_territory_codes():
    if not province_and_territory_codes_memo:
        for division in Division.all('ca'):
            if division._type in ('province', 'territory'):
                province_and_territory_codes_memo[
                    division.attrs['sgc']] = division.id
    return province_and_territory_codes_memo

Esempio n. 2

0

Mostra file

def province_or_territory_abbreviation(code):
    if not province_or_territory_abbreviation_memo:
        for division in Division.all('ca', from_csv=ocd_division_csv):
            if division._type in ('province', 'territory'):
                province_or_territory_abbreviation_memo[
                    division.attrs['sgc']] = type_id(division.id).upper()
    return province_or_territory_abbreviation_memo[type_id(code)[:2]]

Esempio n. 3

0

Mostra file

def province_or_territory_abbreviations():
    if not province_or_territory_abbreviation_memo:
        province_or_territory_abbreviation_memo['PEI'] = 'PE'
        for division in Division.all('ca'):
            if division._type in ('province', 'territory'):
                abbreviation = division.id.rsplit(':', 1)[1].upper()
                province_or_territory_abbreviation_memo[division.name] = abbreviation
                province_or_territory_abbreviation_memo[division.attrs['name_fr']] = abbreviation
    return province_or_territory_abbreviation_memo

Esempio n. 4

0

Mostra file

def divisions_with_boroughs():
    """
    Returns the OCD identifiers for divisions with boroughs.
    """
    if not divisions_with_boroughs_memo:
        for division in Division.all('ca', from_csv=ocd_division_csv):
            if division._type == 'borough':
                divisions_with_boroughs_memo.add(division.parent.id)
    return divisions_with_boroughs_memo

Esempio n. 5

0

Mostra file

File: utils.py Progetto: opencivicdata/scrapers-ca

def province_or_territory_abbreviations():
    if not province_or_territory_abbreviation_memo:
        province_or_territory_abbreviation_memo['PEI'] = 'PE'
        for division in Division.all('ca'):
            if division._type in ('province', 'territory'):
                abbreviation = division.id.rsplit(':', 1)[1].upper()
                province_or_territory_abbreviation_memo[division.name] = abbreviation
                province_or_territory_abbreviation_memo[division.attrs['name_fr']] = abbreviation
    return province_or_territory_abbreviation_memo

Esempio n. 6

0

Mostra file

File: tasks.py Progetto: opennorth/represent-canada-data

def divisions_with_boroughs():
    """
    Returns the OCD identifiers for divisions with boroughs.
    """
    if not divisions_with_boroughs_memo:
        for division in Division.all('ca', from_csv=ocd_division_csv):
            if division._type == 'borough':
                divisions_with_boroughs_memo.add(division.parent.id)
    return divisions_with_boroughs_memo

Esempio n. 7

0

Mostra file

File: checkmappings.py Progetto: johnfelipe/scrapers_ca_app

 def handle(self, *args, **options):
     ids = set(division.id for division in Division.all('ca'))
     for slug, data in settings.IMAGO_BOUNDARY_MAPPINGS.items():
         url = 'https://represent.opennorth.ca/boundaries/{}/?limit=0'.format(
             slug)
         for obj in requests.get(url).json()['objects']:
             if callable(data['boundary_key']):
                 expected = data['prefix'] + data['boundary_key'](obj)
             else:
                 expected = data['prefix'] + obj[data['boundary_key']]
             if expected not in ids:
                 log.warn('No match for {} from {}'.format(expected, url))

Esempio n. 8

0

Mostra file

File: tasks.py Progetto: opencivicdata/scrapers-ca

def validate_spreadsheet(url, identifier_header, geographic_name_header):
    """
    Validates the identifiers, geographic names and geographic types in a spreadsheet.
    """
    sgc_to_id = {}

    for division in Division.all('ca', from_csv=ocd_division_csv):
        sgc_to_id[division.attrs['sgc']] = division.id

    reader = csv_dict_reader(url)
    for row in reader:
        identifier = row[identifier_header]

        if len(identifier) == 2:
            identifier = sgc_to_id[identifier]
        elif len(identifier) == 4:
            identifier = 'ocd-division/country:ca/cd:{}'.format(identifier)
        elif len(identifier) == 7:
            identifier = 'ocd-division/country:ca/csd:{}'.format(identifier)

        division = Division.get(identifier)
        if row[geographic_name_header] != division.name:
            print('{}: name: {} not {}'.format(identifier, division.name, row[geographic_name_header]))

Esempio n. 9

0

Mostra file

def validate_spreadsheet(url, identifier_header, geographic_name_header):
    """
    Validates the identifiers, geographic names and geographic types in a spreadsheet.
    """
    sgc_to_id = {}

    for division in Division.all('ca', from_csv=ocd_division_csv):
        sgc_to_id[division.attrs['sgc']] = division.id

    reader = csv_dict_reader(url)
    for row in reader:
        identifier = row[identifier_header]

        if len(identifier) == 2:
            identifier = sgc_to_id[identifier]
        elif len(identifier) == 4:
            identifier = 'ocd-division/country:ca/cd:{}'.format(identifier)
        elif len(identifier) == 7:
            identifier = 'ocd-division/country:ca/csd:{}'.format(identifier)

        division = Division.get(identifier)
        if row[geographic_name_header] != division.name:
            print('{}: name: {} not {}'.format(identifier, division.name,
                                               row[geographic_name_header]))

Esempio n. 10

0

Mostra file

File: mappings.py Progetto: johnfelipe/scrapers_ca_app

    def handle(self, *args, **options):
        mappings = {}

        divisions = list(Division.all('ca'))  # cache all divisions
        for obj in requests.get('https://represent.opennorth.ca/boundary-sets/?limit=0').json()['objects']:
            slug = obj['url'].split('/')[2]
            if obj['url'] in ('/boundary-sets/census-divisions/', '/boundary-sets/census-subdivisions/'):
                continue
            if obj['url'] in ('/boundary-sets/federal-electoral-districts/', '/boundary-sets/federal-electoral-districts-next-election/'):
                prefix = 'ocd-division/country:ca/ed:'
                boundary_key = 'external_id'
            else:
                url = 'https://represent.opennorth.ca{}'.format(obj['url'])
                boundary_set = requests.get(url).json()

                if boundary_set['extra'].get('ocd_division'):
                    division_id = boundary_set['extra']['ocd_division']
                elif boundary_set['extra'].get('geographic_code'):
                    geographic_code = boundary_set['extra']['geographic_code']
                    geographic_code_length = len(geographic_code)
                    if geographic_code_length == 7:
                        division_id = 'ocd-division/country:ca/csd:{}'.format(geographic_code)
                    elif geographic_code_length == 4:
                        division_id = 'ocd-division/country:ca/cd:{}'.format(geographic_code)
                    elif geographic_code_length == 2:
                        division_id = next((division for division in divisions if division.attrs['sgc'] == geographic_code), None).id
                    else:
                        log.error('Unrecognized geographic_code {}'.format(geographic_code))
                        continue

                try:
                    division = Division.get(division_id)
                    if division._type == 'borough':
                        division = division.parent
                        division_id = division.id
                        exclude = ('place', 'borough')
                    elif 'boroughs' in obj['name']:
                        exclude = ('place', 'district')
                    elif 'districts' in obj['name']:
                        exclude = ('place', 'borough')
                    else:
                        exclude = ('place',)

                    subtypes = set(child._type for child in division.children() if child._type not in exclude)
                    if len(subtypes) == 0:
                        log.warn('No subtypes for {}'.format(division_id))
                        continue
                    elif len(subtypes) > 1:
                        log.warn('>1 subtypes for {}: {}'.format(division_id, list(subtypes)))
                        continue
                    else:
                        prefix = '{}/{}:'.format(division_id, subtypes.pop())

                    boundary_key = 'external_id'
                    for child in division.children():
                        if child._type not in exclude:
                            type_id = child.id.rsplit(':', 1)[1]
                            if not numeric_re.search(type_id):
                                if len(type_id) in (1, 3):  # Lunenburg 1-letter identifiers, BC uses 3-letter identifiers
                                    boundary_key = 'lower'
                                else:
                                    boundary_key = 'matcher'
                                break
                except KeyError:
                    log.warn('No division for {}'.format(url))

            mappings[slug] = {
                'key': 'id',
                'prefix': prefix,
                'boundary_key': boundary_key,
            }

        with open(os.path.join(settings.BASE_DIR, 'mappings.py'), 'w') as f:
            f.write("# DO NOT EDIT THIS AUTO-GENERATED FILE\n")
            f.write("import re\n\n")

            f.write("from django.template.defaultfilters import slugify\n\n")

            f.write("leading_zero_re = re.compile(r'^0+')\n")
            f.write("invalid_re = re.compile(r'[^a-z\d._~-]')\n")
            f.write("leading_district_re = re.compile(r'^District ')\n")
            f.write("lower = lambda boundary: boundary['external_id'].lower()\n\n")
            f.write("matcher = lambda boundary: leading_district_re.sub('', leading_zero_re.sub('', invalid_re.sub('~', boundary['name'].lower().replace(' ', '_'))))\n\n")

            f.write('IMAGO_BOUNDARY_MAPPINGS = {\n')
            for slug, data in OrderedDict(sorted(mappings.items())).items():
                f.write("    '{}': {{\n".format(slug))
                for key, value in OrderedDict(sorted(data.items())).items():
                    if key == 'boundary_key' and value in ('lower', 'matcher'):
                        f.write("        '{}': {},\n".format(key, value))
                    else:
                        f.write("        '{}': '{}',\n".format(key, value))
                f.write("    },\n")
            f.write('}\n')

Esempio n. 11

0

Mostra file

File: tasks.py Progetto: opennorth/represent-canada-data

def province_or_territory_abbreviation(code):
    if not province_or_territory_abbreviation_memo:
        for division in Division.all('ca', from_csv=ocd_division_csv):
            if division._type in ('province', 'territory'):
                province_or_territory_abbreviation_memo[division.attrs['sgc']] = type_id(division.id).upper()
    return province_or_territory_abbreviation_memo[type_id(code)[:2]]

Esempio n. 12

0

Mostra file

File: tasks.py Progetto: opennorth/represent-canada-data

def spreadsheet(base='.', private_base='../represent-canada-private-data'):
    """
    Validate the spreadsheet for tracking progress on data collection.
    """
    expecteds = OrderedDict()

    # Append to `municipal_subdivisions` from `constants.py`.
    for division in Division.all('ca', from_csv=ocd_division_csv):
        if division.attrs['has_children']:
            municipal_subdivisions[type_id(division.id)] = division.attrs['has_children']

    expecteds['ocd-division/country:ca'] = default_expectation.copy()
    expecteds['ocd-division/country:ca'].update({
        'OCD': 'ocd-division/country:ca',
        'Geographic name': 'Canada',
    })

    # Create expectations for provinces and territories.
    for division in Division.all('ca', from_csv=ocd_division_csv):
        if division._type in ('province', 'territory'):
            expected = default_expectation.copy()
            expected.update({
                'OCD': division.id,
                'Geographic name': division.name,
                'Province or territory': type_id(division.id).upper(),
            })

            expecteds[division.id] = expected

    # Create expectations for census subdivisions.
    reader = csv_dict_reader('http://www12.statcan.gc.ca/census-recensement/2016/dp-pd/hlt-fst/pd-pl/Tables/CompFile.cfm?Lang=Eng&T=301&OFT=FULLCSV', 'ISO-8859-1')
    for row in reader:
        code = row['Geographic code']

        if code == 'Note:':
            break

        division = Division.get('ocd-division/country:ca/csd:%s' % row['Geographic code'], from_csv=ocd_division_csv)

        expected = default_expectation.copy()
        expected.update({
            'OCD': division.id,
            'Geographic name': division.name,
            'Province or territory': province_or_territory_abbreviation(division.id),
            'Geographic type': division.attrs['classification'],
            'Population': row['Population, 2016'],
        })

        if code in municipal_subdivisions:
            if municipal_subdivisions[code] == 'N':
                expected['Shapefile?'] = 'N/A'
                expected['Contact'] = 'N/A'
                expected['Received via'] = 'N/A'
                expected['Last boundary'] = 'N/A'
                expected['Permission to distribute'] = 'N/A'
            elif municipal_subdivisions[code] == 'Y':
                expected['Shapefile?'] = 'Request'

        expecteds[division.id] = expected

    # Merge information from received data.
    for directory, permission_to_distribute in [(base, 'Y'), (private_base, 'N')]:
        boundaries.registry = {}
        for slug, config in registry(directory).items():
            if 'extra' in config:
                division_id = config['extra']['division_id']
                if division_id in expecteds:
                    license_path = os.path.join(dirname(config['file']), 'LICENSE.txt')
                    license_text = ''
                    if os.path.exists(license_path):
                        with open(license_path) as f:
                            license_text = f.read().rstrip('\n')

                    expected = expecteds[division_id]
                    expected['Shapefile?'] = 'Y'
                    expected['Permission to distribute'] = permission_to_distribute

                    if 'data_url' in config:
                        expected['Last boundary'] = 'N/A'
                    elif 'last_updated' in config:
                        expected['Last boundary'] = config['last_updated'].strftime('%-m/%-d/%Y')

                    if 'source_url' in config:
                        expected['Contact'] = 'N/A'
                        expected['Received via'] = 'online'
                    elif expected['Province or territory'] == 'SK' and expected['Geographic type'] == 'RM':
                        expected['Contact'] = 'Mikael Nagel\nMapping Technician\nTel: 1-306-569-2988 x205\nToll free: 1-800-663-6864\[email protected]'
                        expected['Received via'] = 'purchase'
                    else:
                        match = all_rights_reserved_terms_re.search(license_text)
                        if match:
                            expected['Contact'] = match.group(1)
                        expected['Received via'] = 'email'
                # The spreadsheet doesn't track borough boundaries.
                elif '/borough:' not in division_id:
                    sys.stderr.write('%-25s no record\n' % division_id)
            else:
                sys.stderr.write('%-25s no extra\n' % slug)

    reader = csv_dict_reader('https://docs.google.com/spreadsheets/d/1ihCIDc9EtvxF7kzPg3Yk6e928DN7JzaycH92IBYr0QU/pub?gid=25&single=true&output=csv', 'utf-8')

    actuals = set()
    for actual in reader:
        expected = expecteds[actual['OCD']]
        actuals.add(actual['OCD'])

        for key in actual.keys() - ('Population', 'URL', 'Request notes', 'Next boundary', 'Response notes'):
            e = expected[key]
            a = actual[key]

            # Note: Some of the following conditions are repetitive for readability.
            if e != a:
                # Change expectations for in-progress requests.
                if expected['Shapefile?'] == 'Request' and actual['Shapefile?'] == 'Requested' and (
                   # Request sent.
                   (key == 'Shapefile?' and e == 'Request' and a == 'Requested') or
                   # Contact found.
                   (key == 'Contact' and not e and a)):
                    continue

                # Some provinces and territories have missing expectations, in which case we defer to the spreadsheet.
                elif actual['Province or territory'] in ('ON', 'MB') and not expected['Shapefile?'] and (
                   # We determined that we needed to request boundaries.
                   (key == 'Shapefile?' and not e and a in ('Request', 'Requested')) or
                   # We determined that we needed to request boundaries, and did so.
                   (actual['Shapefile?'] == 'Requested' and key == 'Contact' and not e and a)):
                    continue

                # Contacts for private data are only stored in the spreadsheet.
                elif ((expected['Permission to distribute'] == 'N' and key == 'Contact' and not e and a) or
                      # MFIPPA receptions are only stored in the spreadsheet.
                      (key == 'Received via' and e == 'email' and a == 'MFIPPA')):
                    continue

                sys.stderr.write('%-25s %s: expected "%s" got "%s"\n' % (key, actual['OCD'], e, a))

    for division_id in expecteds.keys() - actuals:
        record = expecteds[division_id]
        if record['Shapefile?'] != 'N/A':
            sys.stderr.write('%s\t%s\t%s\t%s\t%s\n' % (division_id, record['Geographic name'], record['Province or territory'], record['Geographic type'], record['Population']))

    for division_id in actuals - expecteds.keys():
        sys.stderr.write('Remove %s\n' % division_id)

Esempio n. 13

0

Mostra file

def get_definition(division_id, aggregation=False):
    if not ocdid_to_type_name_map:
        # Map census division type codes to names.
        census_division_type_names = {}
        document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2011/ref/dict/table-tableau/table-tableau-4-eng.cfm').content)
        for abbr in document.xpath('//table/tbody/tr/th[1]/abbr'):
            census_division_type_names[abbr.text_content()] = re.sub(' ?/.+\Z', '', abbr.attrib['title'])

        # Map census subdivision type codes to names.
        census_subdivision_type_names = {}
        document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2011/ref/dict/table-tableau/table-tableau-5-eng.cfm').content)
        for abbr in document.xpath('//table/tbody/tr/th[1]/abbr'):
            census_subdivision_type_names[abbr.text_content()] = re.sub(' ?/.+\Z', '', abbr.attrib['title'])

        # Map OCD identifiers to census types.
        for division in Division.all('ca'):
            if division._type == 'cd':
                ocdid_to_type_name_map[division.id] = census_division_type_names[division.attrs['classification']]
            elif division._type == 'csd':
                ocdid_to_type_name_map[division.id] = census_subdivision_type_names[division.attrs['classification']]

    codes = province_and_territory_codes()
    division = Division.get(division_id)

    expected = {}
    vowels = ('A', 'À', 'E', 'É', 'I', 'Î', 'O', 'Ô', 'U')

    sections = division_id.split('/')
    ocd_type, ocd_type_id = sections[-1].split(':')

    # Determine the module name, name and classification.
    if ocd_type == 'country':
        expected['module_name'] = 'ca'
        expected['name'] = 'Parliament of Canada'
    elif ocd_type in ('province', 'territory'):
        pattern = 'ca_{}_municipalities' if aggregation else 'ca_{}'
        expected['module_name'] = pattern.format(ocd_type_id)
        if aggregation:
            expected['name'] = '{} Municipalities'.format(division.name)
        elif ocd_type_id in ('nl', 'ns'):
            expected['name'] = '{} House of Assembly'.format(division.name)
        elif ocd_type_id == 'qc':
            expected['name'] = 'Assemblée nationale du Québec'
        else:
            expected['name'] = 'Legislative Assembly of {}'.format(division.name)
    elif ocd_type == 'cd':
        province_or_territory_type_id = codes[ocd_type_id[:2]].split(':')[-1]
        expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_type_id, slug(division.name))
        name_infix = ocdid_to_type_name_map[division_id]
        if name_infix == 'Regional municipality':
            name_infix = 'Regional'
        expected['name'] = '{} {} Council'.format(division.name, name_infix)
    elif ocd_type == 'csd':
        province_or_territory_type_id = codes[ocd_type_id[:2]].split(':')[-1]
        expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_type_id, slug(division.name))
        if ocd_type_id[:2] == '24':
            if division.name[0] in vowels:
                expected['name'] = "Conseil municipal d'{}".format(division.name)
            else:
                expected['name'] = "Conseil municipal de {}".format(division.name)
        else:
            name_infix = ocdid_to_type_name_map[division_id]
            if name_infix in ('Municipality', 'Specialized municipality'):
                name_infix = 'Municipal'
            elif name_infix == 'District municipality':
                name_infix = 'District'
            elif name_infix == 'Regional municipality':
                name_infix = 'Regional'
            expected['name'] = '{} {} Council'.format(division.name, name_infix)
    elif ocd_type == 'arrondissement':
        census_subdivision_type_id = sections[-2].split(':')[-1]
        province_or_territory_type_id = codes[census_subdivision_type_id[:2]].split(':')[-1]
        expected['module_name'] = 'ca_{}_{}_{}'.format(province_or_territory_type_id, slug(Division.get('/'.join(sections[:-1])).name), slug(division.name))
        if division.name[0] in vowels:
            expected['name'] = "Conseil d'arrondissement d'{}".format(division.name)
        elif division.name[:3] == 'Le ':
            expected['name'] = "Conseil d'arrondissement du {}".format(division.name[3:])
        else:
            expected['name'] = "Conseil d'arrondissement de {}".format(division.name)
    else:
        raise Exception('{}: Unrecognized OCD type {}'.format(division_id, ocd_type))

    # Determine the class name.
    class_name_parts = re.split('[ -]', re.sub("[—–]", '-', re.sub("['.]", '', division.name)))
    expected['class_name'] = unidecode(text_type(''.join(word if re.match('[A-Z]', word) else word.capitalize() for word in class_name_parts)))
    if aggregation:
        expected['class_name'] += 'Municipalities'

    # Determine the url.
    expected['url'] = division.attrs['url']

    # Determine the division name.
    expected['division_name'] = division.name

    return expected

Esempio n. 14

0

Mostra file

File: tasks.py Progetto: opencivicdata/scrapers-ca

def get_definition(division_id, aggregation=False):
    """
    Returns the expected configuration for a given division.
    """
    if not ocdid_to_type_name_map:
        # Map census division type codes to names.
        census_division_type_names = {}
        document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/tab/t1_4-eng.cfm').content)
        for text in document.xpath('//table//th[@headers]/text()'):
            code, name = text.split(' – ', 1)
            census_division_type_names[code] = name.split(' / ', 1)[0]

        # Map census subdivision type codes to names.
        census_subdivision_type_names = {}
        document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/tab/t1_5-eng.cfm').content)
        for text in document.xpath('//table//th[@headers]/text()'):
            code, name = text.split(' – ', 1)  # non-breaking space
            census_subdivision_type_names[code] = name.split(' / ', 1)[0]

        # Map OCD identifiers to census types.
        for division in Division.all('ca', from_csv=ocd_division_csv):
            if division._type == 'cd':
                ocdid_to_type_name_map[division.id] = census_division_type_names[division.attrs['classification']]
            elif division._type == 'csd':
                ocdid_to_type_name_map[division.id] = census_subdivision_type_names[division.attrs['classification']]

    division = Division.get(division_id, from_csv=ocd_division_csv)
    ocd_type_id = type_id(division.id)

    expected = {}
    vowels = ('A', 'À', 'E', 'É', 'I', 'Î', 'O', 'Ô', 'U')

    # Determine the module name, name and classification.
    if division._type == 'country':
        expected['module_name'] = 'ca'
        expected['name'] = 'Parliament of Canada'

    elif division._type in ('province', 'territory'):
        pattern = 'ca_{}_municipalities' if aggregation else 'ca_{}'
        expected['module_name'] = pattern.format(ocd_type_id)
        if aggregation:
            expected['name'] = '{} Municipalities'.format(division.name)
        elif ocd_type_id in ('nl', 'ns'):
            expected['name'] = '{} House of Assembly'.format(division.name)
        elif ocd_type_id == 'qc':
            expected['name'] = 'Assemblée nationale du Québec'
        else:
            expected['name'] = 'Legislative Assembly of {}'.format(division.name)

    elif division._type == 'cd':
        expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_abbreviation(division.id), slug(division.name))
        name_infix = ocdid_to_type_name_map[division.id]
        if name_infix == 'Regional municipality':
            name_infix = 'Regional'
        expected['name'] = '{} {} Council'.format(division.name, name_infix)

    elif division._type == 'csd':
        expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_abbreviation(division.id), slug(division.name))
        if ocd_type_id[:2] == '24':
            if division.name[0] in vowels:
                expected['name'] = "Conseil municipal d'{}".format(division.name)
            else:
                expected['name'] = "Conseil municipal de {}".format(division.name)
        else:
            name_infix = ocdid_to_type_name_map[division.id]
            if name_infix in ('Municipality', 'Specialized municipality'):
                name_infix = 'Municipal'
            elif name_infix == 'District municipality':
                name_infix = 'District'
            elif name_infix == 'Regional municipality':
                name_infix = 'Regional'
            expected['name'] = '{} {} Council'.format(division.name, name_infix)

    elif division._type == 'arrondissement':
        expected['module_name'] = 'ca_{}_{}_{}'.format(province_or_territory_abbreviation(division.parent.id), slug(division.parent.name), slug(division.name))
        if division.name[0] in vowels:
            expected['name'] = "Conseil d'arrondissement d'{}".format(division.name)
        elif division.name[:3] == 'Le ':
            expected['name'] = "Conseil d'arrondissement du {}".format(division.name[3:])
        else:
            expected['name'] = "Conseil d'arrondissement de {}".format(division.name)

    else:
        raise Exception('{}: Unrecognized OCD type {}'.format(division.id, division._type))

    # Determine the class name.
    class_name_parts = re.split('[ -]', re.sub("[—–]", '-', re.sub("['.]", '', division.name)))
    expected['class_name'] = unidecode(str(''.join(word if re.match('[A-Z]', word) else word.capitalize() for word in class_name_parts)))
    if aggregation:
        expected['class_name'] += 'Municipalities'

    # Determine the url.
    expected['url'] = division.attrs['url']

    # Determine the division name.
    expected['division_name'] = division.name

    return expected

Esempio n. 15

0

Mostra file

def spreadsheet(base='.', private_base='../represent-canada-private-data'):
    """
    Validate the spreadsheet for tracking progress on data collection.
    """
    expecteds = OrderedDict()

    # Append to `municipal_subdivisions` from `constants.py`.
    for division in Division.all('ca', from_csv=ocd_division_csv):
        if division.attrs['has_children']:
            municipal_subdivisions[type_id(
                division.id)] = division.attrs['has_children']

    expecteds['ocd-division/country:ca'] = default_expectation.copy()
    expecteds['ocd-division/country:ca'].update({
        'OCD': 'ocd-division/country:ca',
        'Geographic name': 'Canada',
    })

    # Create expectations for provinces and territories.
    for division in Division.all('ca', from_csv=ocd_division_csv):
        if division._type in ('province', 'territory'):
            expected = default_expectation.copy()
            expected.update({
                'OCD':
                division.id,
                'Geographic name':
                division.name,
                'Province or territory':
                type_id(division.id).upper(),
            })

            expecteds[division.id] = expected

    # Create expectations for census subdivisions.
    reader = csv_dict_reader(
        'http://www12.statcan.gc.ca/census-recensement/2016/dp-pd/hlt-fst/pd-pl/Tables/CompFile.cfm?Lang=Eng&T=301&OFT=FULLCSV',
        'ISO-8859-1')
    for row in reader:
        code = row['Geographic code']

        if code == 'Note:':
            break

        division = Division.get('ocd-division/country:ca/csd:%s' %
                                row['Geographic code'],
                                from_csv=ocd_division_csv)

        expected = default_expectation.copy()
        expected.update({
            'OCD':
            division.id,
            'Geographic name':
            division.name,
            'Province or territory':
            province_or_territory_abbreviation(division.id),
            'Geographic type':
            division.attrs['classification'],
            'Population':
            row['Population, 2016'],
        })

        if code in municipal_subdivisions:
            if municipal_subdivisions[code] == 'N':
                expected['Shapefile?'] = 'N/A'
                expected['Contact'] = 'N/A'
                expected['Received via'] = 'N/A'
                expected['Last boundary'] = 'N/A'
                expected['Permission to distribute'] = 'N/A'
            elif municipal_subdivisions[code] == 'Y':
                expected['Shapefile?'] = 'Request'

        expecteds[division.id] = expected

    # Merge information from received data.
    for directory, permission_to_distribute in [(base, 'Y'),
                                                (private_base, 'N')]:
        boundaries.registry = {}
        for slug, config in registry(directory).items():
            if 'extra' in config:
                division_id = config['extra']['division_id']
                if division_id in expecteds:
                    license_path = os.path.join(dirname(config['file']),
                                                'LICENSE.txt')
                    license_text = ''
                    if os.path.exists(license_path):
                        with open(license_path) as f:
                            license_text = f.read().rstrip('\n')

                    expected = expecteds[division_id]
                    expected['Shapefile?'] = 'Y'
                    expected[
                        'Permission to distribute'] = permission_to_distribute

                    if 'data_url' in config:
                        expected['Last boundary'] = 'N/A'
                    elif 'last_updated' in config:
                        expected['Last boundary'] = config[
                            'last_updated'].strftime('%-m/%-d/%Y')

                    if 'source_url' in config:
                        expected['Contact'] = 'N/A'
                        expected['Received via'] = 'online'
                    elif expected['Province or territory'] == 'SK' and expected[
                            'Geographic type'] == 'RM':
                        expected[
                            'Contact'] = 'Mikael Nagel\nMapping Technician\nTel: 1-306-569-2988 x205\nToll free: 1-800-663-6864\[email protected]'
                        expected['Received via'] = 'purchase'
                    else:
                        match = all_rights_reserved_terms_re.search(
                            license_text)
                        if match:
                            expected['Contact'] = match.group(1)
                        expected['Received via'] = 'email'
                # The spreadsheet doesn't track borough boundaries.
                elif '/borough:' not in division_id:
                    sys.stderr.write('%-25s no record\n' % division_id)
            else:
                sys.stderr.write('%-25s no extra\n' % slug)

    reader = csv_dict_reader(
        'https://docs.google.com/spreadsheets/d/1ihCIDc9EtvxF7kzPg3Yk6e928DN7JzaycH92IBYr0QU/pub?gid=25&single=true&output=csv',
        'utf-8')

    actuals = set()
    for actual in reader:
        expected = expecteds[actual['OCD']]
        actuals.add(actual['OCD'])

        for key in actual.keys() - ('Population', 'URL', 'Request notes',
                                    'Next boundary', 'Response notes'):
            e = expected[key]
            a = actual[key]

            # Note: Some of the following conditions are repetitive for readability.
            if e != a:
                # Change expectations for in-progress requests.
                if expected['Shapefile?'] == 'Request' and actual[
                        'Shapefile?'] == 'Requested' and (
                            # Request sent.
                            (key == 'Shapefile?' and e == 'Request'
                             and a == 'Requested') or
                            # Contact found.
                            (key == 'Contact' and not e and a)):
                    continue

                # Some provinces and territories have missing expectations, in which case we defer to the spreadsheet.
                elif actual['Province or territory'] in (
                        'ON', 'MB'
                ) and not expected['Shapefile?'] and (
                        # We determined that we needed to request boundaries.
                    (key == 'Shapefile?' and not e
                     and a in ('Request', 'Requested')) or
                        # We determined that we needed to request boundaries, and did so.
                    (actual['Shapefile?'] == 'Requested' and key == 'Contact'
                     and not e and a)):
                    continue

                # Contacts for private data are only stored in the spreadsheet.
                elif ((expected['Permission to distribute'] == 'N'
                       and key == 'Contact' and not e and a) or
                      # MFIPPA receptions are only stored in the spreadsheet.
                      (key == 'Received via' and e == 'email' and a == 'MFIPPA'
                       )):
                    continue

                sys.stderr.write('%-25s %s: expected "%s" got "%s"\n' %
                                 (key, actual['OCD'], e, a))

    for division_id in expecteds.keys() - actuals:
        record = expecteds[division_id]
        if record['Shapefile?'] != 'N/A':
            sys.stderr.write('%s\t%s\t%s\t%s\t%s\n' %
                             (division_id, record['Geographic name'],
                              record['Province or territory'],
                              record['Geographic type'], record['Population']))

    for division_id in actuals - expecteds.keys():
        sys.stderr.write('Remove %s\n' % division_id)

Esempio n. 16

0

Mostra file

File: tasks.py Progetto: anukat2015/scrapers-ca

def province_and_territory_codes():
    if not province_and_territory_codes_memo:
        for division in Division.all('ca'):
            if division._type in ('province', 'territory'):
                province_and_territory_codes_memo[division.attrs['sgc']] = division.id
    return province_and_territory_codes_memo

Esempio n. 17

0

Mostra file

def get_definition(division_id, aggregation=False):
    """
    Returns the expected configuration for a given division.
    """
    if not ocdid_to_type_name_map:
        # Map census division type codes to names.
        census_division_type_names = {}
        document = lxml.html.fromstring(
            requests.get(
                'https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/tab/t1_4-eng.cfm'
            ).content)
        for text in document.xpath('//table//th[@headers]/text()'):
            code, name = text.split(' – ', 1)
            census_division_type_names[code] = name.split(' / ', 1)[0]

        # Map census subdivision type codes to names.
        census_subdivision_type_names = {}
        document = lxml.html.fromstring(
            requests.get(
                'https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/tab/t1_5-eng.cfm'
            ).content)
        for text in document.xpath('//table//th[@headers]/text()'):
            code, name = text.split(' – ', 1)  # non-breaking space
            census_subdivision_type_names[code] = name.split(' / ', 1)[0]

        # Map OCD identifiers to census types.
        for division in Division.all('ca', from_csv=ocd_division_csv):
            if division._type == 'cd':
                ocdid_to_type_name_map[
                    division.id] = census_division_type_names[
                        division.attrs['classification']]
            elif division._type == 'csd':
                ocdid_to_type_name_map[
                    division.id] = census_subdivision_type_names[
                        division.attrs['classification']]

    division = Division.get(division_id, from_csv=ocd_division_csv)
    ocd_type_id = type_id(division.id)

    expected = {}
    vowels = ('A', 'À', 'E', 'É', 'I', 'Î', 'O', 'Ô', 'U')

    # Determine the module name, name and classification.
    if division._type == 'country':
        expected['module_name'] = 'ca'
        expected['name'] = 'Parliament of Canada'

    elif division._type in ('province', 'territory'):
        pattern = 'ca_{}_municipalities' if aggregation else 'ca_{}'
        expected['module_name'] = pattern.format(ocd_type_id)
        if aggregation:
            expected['name'] = '{} Municipalities'.format(division.name)
        elif ocd_type_id in ('nl', 'ns'):
            expected['name'] = '{} House of Assembly'.format(division.name)
        elif ocd_type_id == 'qc':
            expected['name'] = 'Assemblée nationale du Québec'
        else:
            expected['name'] = 'Legislative Assembly of {}'.format(
                division.name)

    elif division._type == 'cd':
        expected['module_name'] = 'ca_{}_{}'.format(
            province_or_territory_abbreviation(division.id),
            slug(division.name))
        name_infix = ocdid_to_type_name_map[division.id]
        if name_infix == 'Regional municipality':
            name_infix = 'Regional'
        expected['name'] = '{} {} Council'.format(division.name, name_infix)

    elif division._type == 'csd':
        expected['module_name'] = 'ca_{}_{}'.format(
            province_or_territory_abbreviation(division.id),
            slug(division.name))
        if ocd_type_id[:2] == '24':
            if division.name[0] in vowels:
                expected['name'] = "Conseil municipal d'{}".format(
                    division.name)
            else:
                expected['name'] = "Conseil municipal de {}".format(
                    division.name)
        else:
            name_infix = ocdid_to_type_name_map[division.id]
            if name_infix in ('Municipality', 'Specialized municipality'):
                name_infix = 'Municipal'
            elif name_infix == 'District municipality':
                name_infix = 'District'
            elif name_infix == 'Regional municipality':
                name_infix = 'Regional'
            expected['name'] = '{} {} Council'.format(division.name,
                                                      name_infix)

    elif division._type == 'arrondissement':
        expected['module_name'] = 'ca_{}_{}_{}'.format(
            province_or_territory_abbreviation(division.parent.id),
            slug(division.parent.name), slug(division.name))
        if division.name[0] in vowels:
            expected['name'] = "Conseil d'arrondissement d'{}".format(
                division.name)
        elif division.name[:3] == 'Le ':
            expected['name'] = "Conseil d'arrondissement du {}".format(
                division.name[3:])
        else:
            expected['name'] = "Conseil d'arrondissement de {}".format(
                division.name)

    else:
        raise Exception('{}: Unrecognized OCD type {}'.format(
            division.id, division._type))

    # Determine the class name.
    class_name_parts = re.split(
        '[ -]', re.sub("[—–]", '-', re.sub("['.]", '', division.name)))
    expected['class_name'] = unidecode(
        str(''.join(word if re.match('[A-Z]', word) else word.capitalize()
                    for word in class_name_parts)))
    if aggregation:
        expected['class_name'] += 'Municipalities'

    # Determine the url.
    expected['url'] = division.attrs['url']

    # Determine the division name.
    expected['division_name'] = division.name

    return expected

Esempio n. 18

0

Mostra file

File: mappings.py Progetto: johnfelipe/scrapers_ca_app

    def handle(self, *args, **options):
        mappings = {}

        divisions = list(Division.all('ca'))  # cache all divisions
        for obj in requests.get(
                'https://represent.opennorth.ca/boundary-sets/?limit=0').json(
                )['objects']:
            slug = obj['url'].split('/')[2]
            if obj['url'] in ('/boundary-sets/census-divisions/',
                              '/boundary-sets/census-subdivisions/'):
                continue
            if obj['url'] in (
                    '/boundary-sets/federal-electoral-districts/',
                    '/boundary-sets/federal-electoral-districts-next-election/'
            ):
                prefix = 'ocd-division/country:ca/ed:'
                boundary_key = 'external_id'
            else:
                url = 'https://represent.opennorth.ca{}'.format(obj['url'])
                boundary_set = requests.get(url).json()

                if boundary_set['extra'].get('ocd_division'):
                    division_id = boundary_set['extra']['ocd_division']
                elif boundary_set['extra'].get('geographic_code'):
                    geographic_code = boundary_set['extra']['geographic_code']
                    geographic_code_length = len(geographic_code)
                    if geographic_code_length == 7:
                        division_id = 'ocd-division/country:ca/csd:{}'.format(
                            geographic_code)
                    elif geographic_code_length == 4:
                        division_id = 'ocd-division/country:ca/cd:{}'.format(
                            geographic_code)
                    elif geographic_code_length == 2:
                        division_id = next(
                            (division for division in divisions
                             if division.attrs['sgc'] == geographic_code),
                            None).id
                    else:
                        log.error('Unrecognized geographic_code {}'.format(
                            geographic_code))
                        continue

                try:
                    division = Division.get(division_id)
                    if division._type == 'borough':
                        division = division.parent
                        division_id = division.id
                        exclude = ('place', 'borough')
                    elif 'boroughs' in obj['name']:
                        exclude = ('place', 'district')
                    elif 'districts' in obj['name']:
                        exclude = ('place', 'borough')
                    else:
                        exclude = ('place', )

                    subtypes = set(child._type
                                   for child in division.children()
                                   if child._type not in exclude)
                    if len(subtypes) == 0:
                        log.warn('No subtypes for {}'.format(division_id))
                        continue
                    elif len(subtypes) > 1:
                        log.warn('>1 subtypes for {}: {}'.format(
                            division_id, list(subtypes)))
                        continue
                    else:
                        prefix = '{}/{}:'.format(division_id, subtypes.pop())

                    boundary_key = 'external_id'
                    for child in division.children():
                        if child._type not in exclude:
                            type_id = child.id.rsplit(':', 1)[1]
                            if not numeric_re.search(type_id):
                                if len(type_id) in (
                                        1, 3
                                ):  # Lunenburg 1-letter identifiers, BC uses 3-letter identifiers
                                    boundary_key = 'lower'
                                else:
                                    boundary_key = 'matcher'
                                break
                except KeyError:
                    log.warn('No division for {}'.format(url))

            mappings[slug] = {
                'key': 'id',
                'prefix': prefix,
                'boundary_key': boundary_key,
            }

        with open(os.path.join(settings.BASE_DIR, 'mappings.py'), 'w') as f:
            f.write("# DO NOT EDIT THIS AUTO-GENERATED FILE\n")
            f.write("import re\n\n")

            f.write("from django.template.defaultfilters import slugify\n\n")

            f.write("leading_zero_re = re.compile(r'^0+')\n")
            f.write("invalid_re = re.compile(r'[^a-z\d._~-]')\n")
            f.write("leading_district_re = re.compile(r'^District ')\n")
            f.write(
                "lower = lambda boundary: boundary['external_id'].lower()\n\n")
            f.write(
                "matcher = lambda boundary: leading_district_re.sub('', leading_zero_re.sub('', invalid_re.sub('~', boundary['name'].lower().replace(' ', '_'))))\n\n"
            )

            f.write('IMAGO_BOUNDARY_MAPPINGS = {\n')
            for slug, data in OrderedDict(sorted(mappings.items())).items():
                f.write("    '{}': {{\n".format(slug))
                for key, value in OrderedDict(sorted(data.items())).items():
                    if key == 'boundary_key' and value in ('lower', 'matcher'):
                        f.write("        '{}': {},\n".format(key, value))
                    else:
                        f.write("        '{}': '{}',\n".format(key, value))
                f.write("    },\n")
            f.write('}\n')

Esempio n. 19

0

Mostra file

File: tasks.py Progetto: anukat2015/scrapers-ca

def get_definition(division_id, aggregation=False):
    if not ocdid_to_type_name_map:
        # Map census division type codes to names.
        census_division_type_names = {}
        document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2011/ref/dict/table-tableau/table-tableau-4-eng.cfm').content)
        for abbr in document.xpath('//table/tbody/tr/th[1]/abbr'):
            census_division_type_names[abbr.text_content()] = re.sub(' /.+\Z', '', abbr.attrib['title'])

        # Map census subdivision type codes to names.
        census_subdivision_type_names = {}
        document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2011/ref/dict/table-tableau/table-tableau-5-eng.cfm').content)
        for abbr in document.xpath('//table/tbody/tr/th[1]/abbr'):
            census_subdivision_type_names[abbr.text_content()] = re.sub(' /.+\Z', '', abbr.attrib['title'])

        # Map OCD identifiers to census types.
        for division in Division.all('ca'):
            if division._type == 'cd':
                ocdid_to_type_name_map[division.id] = census_division_type_names[division.attrs['classification']]
            elif division._type == 'csd':
                ocdid_to_type_name_map[division.id] = census_subdivision_type_names[division.attrs['classification']]

    codes = province_and_territory_codes()
    division = Division.get(division_id)

    expected = {}
    vowels = ('A', 'À', 'E', 'É', 'I', 'Î', 'O', 'Ô', 'U')

    sections = division_id.split('/')
    ocd_type, ocd_type_id = sections[-1].split(':')

    # Determine the module name, name and classification.
    if ocd_type == 'country':
        expected['module_name'] = 'ca'
        expected['name'] = 'Parliament of Canada'
    elif ocd_type in ('province', 'territory'):
        pattern = 'ca_{}_municipalities' if aggregation else 'ca_{}'
        expected['module_name'] = pattern.format(ocd_type_id)
        if aggregation:
            expected['name'] = '{} Municipalities'.format(division.name)
        elif ocd_type_id in ('nl', 'ns'):
            expected['name'] = '{} House of Assembly'.format(division.name)
        elif ocd_type_id == 'qc':
            expected['name'] = 'Assemblée nationale du Québec'
        else:
            expected['name'] = 'Legislative Assembly of {}'.format(division.name)
    elif ocd_type == 'cd':
        province_or_territory_type_id = codes[ocd_type_id[:2]].split(':')[-1]
        expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_type_id, slug(division.name))
        name_infix = ocdid_to_type_name_map[division_id]
        if name_infix == 'Regional municipality':
            name_infix = 'Regional'
        expected['name'] = '{} {} Council'.format(division.name, name_infix)
    elif ocd_type == 'csd':
        province_or_territory_type_id = codes[ocd_type_id[:2]].split(':')[-1]
        expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_type_id, slug(division.name))
        if ocd_type_id[:2] == '24':
            if division.name[0] in vowels:
                expected['name'] = "Conseil municipal d'{}".format(division.name)
            else:
                expected['name'] = "Conseil municipal de {}".format(division.name)
        else:
            name_infix = ocdid_to_type_name_map[division_id]
            if name_infix in ('Municipality', 'Specialized municipality'):
                name_infix = 'Municipal'
            elif name_infix == 'District municipality':
                name_infix = 'District'
            elif name_infix == 'Regional municipality':
                name_infix = 'Regional'
            expected['name'] = '{} {} Council'.format(division.name, name_infix)
    elif ocd_type == 'arrondissement':
        census_subdivision_type_id = sections[-2].split(':')[-1]
        province_or_territory_type_id = codes[census_subdivision_type_id[:2]].split(':')[-1]
        expected['module_name'] = 'ca_{}_{}_{}'.format(province_or_territory_type_id, slug(Division.get('/'.join(sections[:-1])).name), slug(division.name))
        if division.name[0] in vowels:
            expected['name'] = "Conseil d'arrondissement d'{}".format(division.name)
        elif division.name[:3] == 'Le ':
            expected['name'] = "Conseil d'arrondissement du {}".format(division.name[3:])
        else:
            expected['name'] = "Conseil d'arrondissement de {}".format(division.name)
    else:
        raise Exception('{}: Unrecognized OCD type {}'.format(division_id, ocd_type))

    # Determine the class name.
    class_name_parts = re.split('[ -]', re.sub("[—–]", '-', re.sub("['.]", '', division.name)))
    expected['class_name'] = unidecode(text_type(''.join(word if re.match('[A-Z]', word) else word.capitalize() for word in class_name_parts)))
    if aggregation:
        expected['class_name'] += 'Municipalities'

    # Determine the url.
    expected['url'] = division.attrs['url']

    # Determine the division name.
    expected['division_name'] = division.name

    return expected