def province_and_territory_codes(): if not province_and_territory_codes_memo: for division in Division.all('ca'): if division._type in ('province', 'territory'): province_and_territory_codes_memo[ division.attrs['sgc']] = division.id return province_and_territory_codes_memo
def province_or_territory_abbreviation(code): if not province_or_territory_abbreviation_memo: for division in Division.all('ca', from_csv=ocd_division_csv): if division._type in ('province', 'territory'): province_or_territory_abbreviation_memo[ division.attrs['sgc']] = type_id(division.id).upper() return province_or_territory_abbreviation_memo[type_id(code)[:2]]
def province_or_territory_abbreviations(): if not province_or_territory_abbreviation_memo: province_or_territory_abbreviation_memo['PEI'] = 'PE' for division in Division.all('ca'): if division._type in ('province', 'territory'): abbreviation = division.id.rsplit(':', 1)[1].upper() province_or_territory_abbreviation_memo[division.name] = abbreviation province_or_territory_abbreviation_memo[division.attrs['name_fr']] = abbreviation return province_or_territory_abbreviation_memo
def divisions_with_boroughs(): """ Returns the OCD identifiers for divisions with boroughs. """ if not divisions_with_boroughs_memo: for division in Division.all('ca', from_csv=ocd_division_csv): if division._type == 'borough': divisions_with_boroughs_memo.add(division.parent.id) return divisions_with_boroughs_memo
def handle(self, *args, **options): ids = set(division.id for division in Division.all('ca')) for slug, data in settings.IMAGO_BOUNDARY_MAPPINGS.items(): url = 'https://represent.opennorth.ca/boundaries/{}/?limit=0'.format( slug) for obj in requests.get(url).json()['objects']: if callable(data['boundary_key']): expected = data['prefix'] + data['boundary_key'](obj) else: expected = data['prefix'] + obj[data['boundary_key']] if expected not in ids: log.warn('No match for {} from {}'.format(expected, url))
def validate_spreadsheet(url, identifier_header, geographic_name_header): """ Validates the identifiers, geographic names and geographic types in a spreadsheet. """ sgc_to_id = {} for division in Division.all('ca', from_csv=ocd_division_csv): sgc_to_id[division.attrs['sgc']] = division.id reader = csv_dict_reader(url) for row in reader: identifier = row[identifier_header] if len(identifier) == 2: identifier = sgc_to_id[identifier] elif len(identifier) == 4: identifier = 'ocd-division/country:ca/cd:{}'.format(identifier) elif len(identifier) == 7: identifier = 'ocd-division/country:ca/csd:{}'.format(identifier) division = Division.get(identifier) if row[geographic_name_header] != division.name: print('{}: name: {} not {}'.format(identifier, division.name, row[geographic_name_header]))
def handle(self, *args, **options): mappings = {} divisions = list(Division.all('ca')) # cache all divisions for obj in requests.get('https://represent.opennorth.ca/boundary-sets/?limit=0').json()['objects']: slug = obj['url'].split('/')[2] if obj['url'] in ('/boundary-sets/census-divisions/', '/boundary-sets/census-subdivisions/'): continue if obj['url'] in ('/boundary-sets/federal-electoral-districts/', '/boundary-sets/federal-electoral-districts-next-election/'): prefix = 'ocd-division/country:ca/ed:' boundary_key = 'external_id' else: url = 'https://represent.opennorth.ca{}'.format(obj['url']) boundary_set = requests.get(url).json() if boundary_set['extra'].get('ocd_division'): division_id = boundary_set['extra']['ocd_division'] elif boundary_set['extra'].get('geographic_code'): geographic_code = boundary_set['extra']['geographic_code'] geographic_code_length = len(geographic_code) if geographic_code_length == 7: division_id = 'ocd-division/country:ca/csd:{}'.format(geographic_code) elif geographic_code_length == 4: division_id = 'ocd-division/country:ca/cd:{}'.format(geographic_code) elif geographic_code_length == 2: division_id = next((division for division in divisions if division.attrs['sgc'] == geographic_code), None).id else: log.error('Unrecognized geographic_code {}'.format(geographic_code)) continue try: division = Division.get(division_id) if division._type == 'borough': division = division.parent division_id = division.id exclude = ('place', 'borough') elif 'boroughs' in obj['name']: exclude = ('place', 'district') elif 'districts' in obj['name']: exclude = ('place', 'borough') else: exclude = ('place',) subtypes = set(child._type for child in division.children() if child._type not in exclude) if len(subtypes) == 0: log.warn('No subtypes for {}'.format(division_id)) continue elif len(subtypes) > 1: log.warn('>1 subtypes for {}: {}'.format(division_id, list(subtypes))) continue else: prefix = '{}/{}:'.format(division_id, subtypes.pop()) boundary_key = 'external_id' for child in division.children(): if child._type not in exclude: type_id = child.id.rsplit(':', 1)[1] if not numeric_re.search(type_id): if len(type_id) in (1, 3): # Lunenburg 1-letter identifiers, BC uses 3-letter identifiers boundary_key = 'lower' else: boundary_key = 'matcher' break except KeyError: log.warn('No division for {}'.format(url)) mappings[slug] = { 'key': 'id', 'prefix': prefix, 'boundary_key': boundary_key, } with open(os.path.join(settings.BASE_DIR, 'mappings.py'), 'w') as f: f.write("# DO NOT EDIT THIS AUTO-GENERATED FILE\n") f.write("import re\n\n") f.write("from django.template.defaultfilters import slugify\n\n") f.write("leading_zero_re = re.compile(r'^0+')\n") f.write("invalid_re = re.compile(r'[^a-z\d._~-]')\n") f.write("leading_district_re = re.compile(r'^District ')\n") f.write("lower = lambda boundary: boundary['external_id'].lower()\n\n") f.write("matcher = lambda boundary: leading_district_re.sub('', leading_zero_re.sub('', invalid_re.sub('~', boundary['name'].lower().replace(' ', '_'))))\n\n") f.write('IMAGO_BOUNDARY_MAPPINGS = {\n') for slug, data in OrderedDict(sorted(mappings.items())).items(): f.write(" '{}': {{\n".format(slug)) for key, value in OrderedDict(sorted(data.items())).items(): if key == 'boundary_key' and value in ('lower', 'matcher'): f.write(" '{}': {},\n".format(key, value)) else: f.write(" '{}': '{}',\n".format(key, value)) f.write(" },\n") f.write('}\n')
def province_or_territory_abbreviation(code): if not province_or_territory_abbreviation_memo: for division in Division.all('ca', from_csv=ocd_division_csv): if division._type in ('province', 'territory'): province_or_territory_abbreviation_memo[division.attrs['sgc']] = type_id(division.id).upper() return province_or_territory_abbreviation_memo[type_id(code)[:2]]
def spreadsheet(base='.', private_base='../represent-canada-private-data'): """ Validate the spreadsheet for tracking progress on data collection. """ expecteds = OrderedDict() # Append to `municipal_subdivisions` from `constants.py`. for division in Division.all('ca', from_csv=ocd_division_csv): if division.attrs['has_children']: municipal_subdivisions[type_id(division.id)] = division.attrs['has_children'] expecteds['ocd-division/country:ca'] = default_expectation.copy() expecteds['ocd-division/country:ca'].update({ 'OCD': 'ocd-division/country:ca', 'Geographic name': 'Canada', }) # Create expectations for provinces and territories. for division in Division.all('ca', from_csv=ocd_division_csv): if division._type in ('province', 'territory'): expected = default_expectation.copy() expected.update({ 'OCD': division.id, 'Geographic name': division.name, 'Province or territory': type_id(division.id).upper(), }) expecteds[division.id] = expected # Create expectations for census subdivisions. reader = csv_dict_reader('http://www12.statcan.gc.ca/census-recensement/2016/dp-pd/hlt-fst/pd-pl/Tables/CompFile.cfm?Lang=Eng&T=301&OFT=FULLCSV', 'ISO-8859-1') for row in reader: code = row['Geographic code'] if code == 'Note:': break division = Division.get('ocd-division/country:ca/csd:%s' % row['Geographic code'], from_csv=ocd_division_csv) expected = default_expectation.copy() expected.update({ 'OCD': division.id, 'Geographic name': division.name, 'Province or territory': province_or_territory_abbreviation(division.id), 'Geographic type': division.attrs['classification'], 'Population': row['Population, 2016'], }) if code in municipal_subdivisions: if municipal_subdivisions[code] == 'N': expected['Shapefile?'] = 'N/A' expected['Contact'] = 'N/A' expected['Received via'] = 'N/A' expected['Last boundary'] = 'N/A' expected['Permission to distribute'] = 'N/A' elif municipal_subdivisions[code] == 'Y': expected['Shapefile?'] = 'Request' expecteds[division.id] = expected # Merge information from received data. for directory, permission_to_distribute in [(base, 'Y'), (private_base, 'N')]: boundaries.registry = {} for slug, config in registry(directory).items(): if 'extra' in config: division_id = config['extra']['division_id'] if division_id in expecteds: license_path = os.path.join(dirname(config['file']), 'LICENSE.txt') license_text = '' if os.path.exists(license_path): with open(license_path) as f: license_text = f.read().rstrip('\n') expected = expecteds[division_id] expected['Shapefile?'] = 'Y' expected['Permission to distribute'] = permission_to_distribute if 'data_url' in config: expected['Last boundary'] = 'N/A' elif 'last_updated' in config: expected['Last boundary'] = config['last_updated'].strftime('%-m/%-d/%Y') if 'source_url' in config: expected['Contact'] = 'N/A' expected['Received via'] = 'online' elif expected['Province or territory'] == 'SK' and expected['Geographic type'] == 'RM': expected['Contact'] = 'Mikael Nagel\nMapping Technician\nTel: 1-306-569-2988 x205\nToll free: 1-800-663-6864\[email protected]' expected['Received via'] = 'purchase' else: match = all_rights_reserved_terms_re.search(license_text) if match: expected['Contact'] = match.group(1) expected['Received via'] = 'email' # The spreadsheet doesn't track borough boundaries. elif '/borough:' not in division_id: sys.stderr.write('%-25s no record\n' % division_id) else: sys.stderr.write('%-25s no extra\n' % slug) reader = csv_dict_reader('https://docs.google.com/spreadsheets/d/1ihCIDc9EtvxF7kzPg3Yk6e928DN7JzaycH92IBYr0QU/pub?gid=25&single=true&output=csv', 'utf-8') actuals = set() for actual in reader: expected = expecteds[actual['OCD']] actuals.add(actual['OCD']) for key in actual.keys() - ('Population', 'URL', 'Request notes', 'Next boundary', 'Response notes'): e = expected[key] a = actual[key] # Note: Some of the following conditions are repetitive for readability. if e != a: # Change expectations for in-progress requests. if expected['Shapefile?'] == 'Request' and actual['Shapefile?'] == 'Requested' and ( # Request sent. (key == 'Shapefile?' and e == 'Request' and a == 'Requested') or # Contact found. (key == 'Contact' and not e and a)): continue # Some provinces and territories have missing expectations, in which case we defer to the spreadsheet. elif actual['Province or territory'] in ('ON', 'MB') and not expected['Shapefile?'] and ( # We determined that we needed to request boundaries. (key == 'Shapefile?' and not e and a in ('Request', 'Requested')) or # We determined that we needed to request boundaries, and did so. (actual['Shapefile?'] == 'Requested' and key == 'Contact' and not e and a)): continue # Contacts for private data are only stored in the spreadsheet. elif ((expected['Permission to distribute'] == 'N' and key == 'Contact' and not e and a) or # MFIPPA receptions are only stored in the spreadsheet. (key == 'Received via' and e == 'email' and a == 'MFIPPA')): continue sys.stderr.write('%-25s %s: expected "%s" got "%s"\n' % (key, actual['OCD'], e, a)) for division_id in expecteds.keys() - actuals: record = expecteds[division_id] if record['Shapefile?'] != 'N/A': sys.stderr.write('%s\t%s\t%s\t%s\t%s\n' % (division_id, record['Geographic name'], record['Province or territory'], record['Geographic type'], record['Population'])) for division_id in actuals - expecteds.keys(): sys.stderr.write('Remove %s\n' % division_id)
def get_definition(division_id, aggregation=False): if not ocdid_to_type_name_map: # Map census division type codes to names. census_division_type_names = {} document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2011/ref/dict/table-tableau/table-tableau-4-eng.cfm').content) for abbr in document.xpath('//table/tbody/tr/th[1]/abbr'): census_division_type_names[abbr.text_content()] = re.sub(' ?/.+\Z', '', abbr.attrib['title']) # Map census subdivision type codes to names. census_subdivision_type_names = {} document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2011/ref/dict/table-tableau/table-tableau-5-eng.cfm').content) for abbr in document.xpath('//table/tbody/tr/th[1]/abbr'): census_subdivision_type_names[abbr.text_content()] = re.sub(' ?/.+\Z', '', abbr.attrib['title']) # Map OCD identifiers to census types. for division in Division.all('ca'): if division._type == 'cd': ocdid_to_type_name_map[division.id] = census_division_type_names[division.attrs['classification']] elif division._type == 'csd': ocdid_to_type_name_map[division.id] = census_subdivision_type_names[division.attrs['classification']] codes = province_and_territory_codes() division = Division.get(division_id) expected = {} vowels = ('A', 'À', 'E', 'É', 'I', 'Î', 'O', 'Ô', 'U') sections = division_id.split('/') ocd_type, ocd_type_id = sections[-1].split(':') # Determine the module name, name and classification. if ocd_type == 'country': expected['module_name'] = 'ca' expected['name'] = 'Parliament of Canada' elif ocd_type in ('province', 'territory'): pattern = 'ca_{}_municipalities' if aggregation else 'ca_{}' expected['module_name'] = pattern.format(ocd_type_id) if aggregation: expected['name'] = '{} Municipalities'.format(division.name) elif ocd_type_id in ('nl', 'ns'): expected['name'] = '{} House of Assembly'.format(division.name) elif ocd_type_id == 'qc': expected['name'] = 'Assemblée nationale du Québec' else: expected['name'] = 'Legislative Assembly of {}'.format(division.name) elif ocd_type == 'cd': province_or_territory_type_id = codes[ocd_type_id[:2]].split(':')[-1] expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_type_id, slug(division.name)) name_infix = ocdid_to_type_name_map[division_id] if name_infix == 'Regional municipality': name_infix = 'Regional' expected['name'] = '{} {} Council'.format(division.name, name_infix) elif ocd_type == 'csd': province_or_territory_type_id = codes[ocd_type_id[:2]].split(':')[-1] expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_type_id, slug(division.name)) if ocd_type_id[:2] == '24': if division.name[0] in vowels: expected['name'] = "Conseil municipal d'{}".format(division.name) else: expected['name'] = "Conseil municipal de {}".format(division.name) else: name_infix = ocdid_to_type_name_map[division_id] if name_infix in ('Municipality', 'Specialized municipality'): name_infix = 'Municipal' elif name_infix == 'District municipality': name_infix = 'District' elif name_infix == 'Regional municipality': name_infix = 'Regional' expected['name'] = '{} {} Council'.format(division.name, name_infix) elif ocd_type == 'arrondissement': census_subdivision_type_id = sections[-2].split(':')[-1] province_or_territory_type_id = codes[census_subdivision_type_id[:2]].split(':')[-1] expected['module_name'] = 'ca_{}_{}_{}'.format(province_or_territory_type_id, slug(Division.get('/'.join(sections[:-1])).name), slug(division.name)) if division.name[0] in vowels: expected['name'] = "Conseil d'arrondissement d'{}".format(division.name) elif division.name[:3] == 'Le ': expected['name'] = "Conseil d'arrondissement du {}".format(division.name[3:]) else: expected['name'] = "Conseil d'arrondissement de {}".format(division.name) else: raise Exception('{}: Unrecognized OCD type {}'.format(division_id, ocd_type)) # Determine the class name. class_name_parts = re.split('[ -]', re.sub("[—–]", '-', re.sub("['.]", '', division.name))) expected['class_name'] = unidecode(text_type(''.join(word if re.match('[A-Z]', word) else word.capitalize() for word in class_name_parts))) if aggregation: expected['class_name'] += 'Municipalities' # Determine the url. expected['url'] = division.attrs['url'] # Determine the division name. expected['division_name'] = division.name return expected
def get_definition(division_id, aggregation=False): """ Returns the expected configuration for a given division. """ if not ocdid_to_type_name_map: # Map census division type codes to names. census_division_type_names = {} document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/tab/t1_4-eng.cfm').content) for text in document.xpath('//table//th[@headers]/text()'): code, name = text.split(' – ', 1) census_division_type_names[code] = name.split(' / ', 1)[0] # Map census subdivision type codes to names. census_subdivision_type_names = {} document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/tab/t1_5-eng.cfm').content) for text in document.xpath('//table//th[@headers]/text()'): code, name = text.split(' – ', 1) # non-breaking space census_subdivision_type_names[code] = name.split(' / ', 1)[0] # Map OCD identifiers to census types. for division in Division.all('ca', from_csv=ocd_division_csv): if division._type == 'cd': ocdid_to_type_name_map[division.id] = census_division_type_names[division.attrs['classification']] elif division._type == 'csd': ocdid_to_type_name_map[division.id] = census_subdivision_type_names[division.attrs['classification']] division = Division.get(division_id, from_csv=ocd_division_csv) ocd_type_id = type_id(division.id) expected = {} vowels = ('A', 'À', 'E', 'É', 'I', 'Î', 'O', 'Ô', 'U') # Determine the module name, name and classification. if division._type == 'country': expected['module_name'] = 'ca' expected['name'] = 'Parliament of Canada' elif division._type in ('province', 'territory'): pattern = 'ca_{}_municipalities' if aggregation else 'ca_{}' expected['module_name'] = pattern.format(ocd_type_id) if aggregation: expected['name'] = '{} Municipalities'.format(division.name) elif ocd_type_id in ('nl', 'ns'): expected['name'] = '{} House of Assembly'.format(division.name) elif ocd_type_id == 'qc': expected['name'] = 'Assemblée nationale du Québec' else: expected['name'] = 'Legislative Assembly of {}'.format(division.name) elif division._type == 'cd': expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_abbreviation(division.id), slug(division.name)) name_infix = ocdid_to_type_name_map[division.id] if name_infix == 'Regional municipality': name_infix = 'Regional' expected['name'] = '{} {} Council'.format(division.name, name_infix) elif division._type == 'csd': expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_abbreviation(division.id), slug(division.name)) if ocd_type_id[:2] == '24': if division.name[0] in vowels: expected['name'] = "Conseil municipal d'{}".format(division.name) else: expected['name'] = "Conseil municipal de {}".format(division.name) else: name_infix = ocdid_to_type_name_map[division.id] if name_infix in ('Municipality', 'Specialized municipality'): name_infix = 'Municipal' elif name_infix == 'District municipality': name_infix = 'District' elif name_infix == 'Regional municipality': name_infix = 'Regional' expected['name'] = '{} {} Council'.format(division.name, name_infix) elif division._type == 'arrondissement': expected['module_name'] = 'ca_{}_{}_{}'.format(province_or_territory_abbreviation(division.parent.id), slug(division.parent.name), slug(division.name)) if division.name[0] in vowels: expected['name'] = "Conseil d'arrondissement d'{}".format(division.name) elif division.name[:3] == 'Le ': expected['name'] = "Conseil d'arrondissement du {}".format(division.name[3:]) else: expected['name'] = "Conseil d'arrondissement de {}".format(division.name) else: raise Exception('{}: Unrecognized OCD type {}'.format(division.id, division._type)) # Determine the class name. class_name_parts = re.split('[ -]', re.sub("[—–]", '-', re.sub("['.]", '', division.name))) expected['class_name'] = unidecode(str(''.join(word if re.match('[A-Z]', word) else word.capitalize() for word in class_name_parts))) if aggregation: expected['class_name'] += 'Municipalities' # Determine the url. expected['url'] = division.attrs['url'] # Determine the division name. expected['division_name'] = division.name return expected
def spreadsheet(base='.', private_base='../represent-canada-private-data'): """ Validate the spreadsheet for tracking progress on data collection. """ expecteds = OrderedDict() # Append to `municipal_subdivisions` from `constants.py`. for division in Division.all('ca', from_csv=ocd_division_csv): if division.attrs['has_children']: municipal_subdivisions[type_id( division.id)] = division.attrs['has_children'] expecteds['ocd-division/country:ca'] = default_expectation.copy() expecteds['ocd-division/country:ca'].update({ 'OCD': 'ocd-division/country:ca', 'Geographic name': 'Canada', }) # Create expectations for provinces and territories. for division in Division.all('ca', from_csv=ocd_division_csv): if division._type in ('province', 'territory'): expected = default_expectation.copy() expected.update({ 'OCD': division.id, 'Geographic name': division.name, 'Province or territory': type_id(division.id).upper(), }) expecteds[division.id] = expected # Create expectations for census subdivisions. reader = csv_dict_reader( 'http://www12.statcan.gc.ca/census-recensement/2016/dp-pd/hlt-fst/pd-pl/Tables/CompFile.cfm?Lang=Eng&T=301&OFT=FULLCSV', 'ISO-8859-1') for row in reader: code = row['Geographic code'] if code == 'Note:': break division = Division.get('ocd-division/country:ca/csd:%s' % row['Geographic code'], from_csv=ocd_division_csv) expected = default_expectation.copy() expected.update({ 'OCD': division.id, 'Geographic name': division.name, 'Province or territory': province_or_territory_abbreviation(division.id), 'Geographic type': division.attrs['classification'], 'Population': row['Population, 2016'], }) if code in municipal_subdivisions: if municipal_subdivisions[code] == 'N': expected['Shapefile?'] = 'N/A' expected['Contact'] = 'N/A' expected['Received via'] = 'N/A' expected['Last boundary'] = 'N/A' expected['Permission to distribute'] = 'N/A' elif municipal_subdivisions[code] == 'Y': expected['Shapefile?'] = 'Request' expecteds[division.id] = expected # Merge information from received data. for directory, permission_to_distribute in [(base, 'Y'), (private_base, 'N')]: boundaries.registry = {} for slug, config in registry(directory).items(): if 'extra' in config: division_id = config['extra']['division_id'] if division_id in expecteds: license_path = os.path.join(dirname(config['file']), 'LICENSE.txt') license_text = '' if os.path.exists(license_path): with open(license_path) as f: license_text = f.read().rstrip('\n') expected = expecteds[division_id] expected['Shapefile?'] = 'Y' expected[ 'Permission to distribute'] = permission_to_distribute if 'data_url' in config: expected['Last boundary'] = 'N/A' elif 'last_updated' in config: expected['Last boundary'] = config[ 'last_updated'].strftime('%-m/%-d/%Y') if 'source_url' in config: expected['Contact'] = 'N/A' expected['Received via'] = 'online' elif expected['Province or territory'] == 'SK' and expected[ 'Geographic type'] == 'RM': expected[ 'Contact'] = 'Mikael Nagel\nMapping Technician\nTel: 1-306-569-2988 x205\nToll free: 1-800-663-6864\[email protected]' expected['Received via'] = 'purchase' else: match = all_rights_reserved_terms_re.search( license_text) if match: expected['Contact'] = match.group(1) expected['Received via'] = 'email' # The spreadsheet doesn't track borough boundaries. elif '/borough:' not in division_id: sys.stderr.write('%-25s no record\n' % division_id) else: sys.stderr.write('%-25s no extra\n' % slug) reader = csv_dict_reader( 'https://docs.google.com/spreadsheets/d/1ihCIDc9EtvxF7kzPg3Yk6e928DN7JzaycH92IBYr0QU/pub?gid=25&single=true&output=csv', 'utf-8') actuals = set() for actual in reader: expected = expecteds[actual['OCD']] actuals.add(actual['OCD']) for key in actual.keys() - ('Population', 'URL', 'Request notes', 'Next boundary', 'Response notes'): e = expected[key] a = actual[key] # Note: Some of the following conditions are repetitive for readability. if e != a: # Change expectations for in-progress requests. if expected['Shapefile?'] == 'Request' and actual[ 'Shapefile?'] == 'Requested' and ( # Request sent. (key == 'Shapefile?' and e == 'Request' and a == 'Requested') or # Contact found. (key == 'Contact' and not e and a)): continue # Some provinces and territories have missing expectations, in which case we defer to the spreadsheet. elif actual['Province or territory'] in ( 'ON', 'MB' ) and not expected['Shapefile?'] and ( # We determined that we needed to request boundaries. (key == 'Shapefile?' and not e and a in ('Request', 'Requested')) or # We determined that we needed to request boundaries, and did so. (actual['Shapefile?'] == 'Requested' and key == 'Contact' and not e and a)): continue # Contacts for private data are only stored in the spreadsheet. elif ((expected['Permission to distribute'] == 'N' and key == 'Contact' and not e and a) or # MFIPPA receptions are only stored in the spreadsheet. (key == 'Received via' and e == 'email' and a == 'MFIPPA' )): continue sys.stderr.write('%-25s %s: expected "%s" got "%s"\n' % (key, actual['OCD'], e, a)) for division_id in expecteds.keys() - actuals: record = expecteds[division_id] if record['Shapefile?'] != 'N/A': sys.stderr.write('%s\t%s\t%s\t%s\t%s\n' % (division_id, record['Geographic name'], record['Province or territory'], record['Geographic type'], record['Population'])) for division_id in actuals - expecteds.keys(): sys.stderr.write('Remove %s\n' % division_id)
def province_and_territory_codes(): if not province_and_territory_codes_memo: for division in Division.all('ca'): if division._type in ('province', 'territory'): province_and_territory_codes_memo[division.attrs['sgc']] = division.id return province_and_territory_codes_memo
def get_definition(division_id, aggregation=False): """ Returns the expected configuration for a given division. """ if not ocdid_to_type_name_map: # Map census division type codes to names. census_division_type_names = {} document = lxml.html.fromstring( requests.get( 'https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/tab/t1_4-eng.cfm' ).content) for text in document.xpath('//table//th[@headers]/text()'): code, name = text.split(' – ', 1) census_division_type_names[code] = name.split(' / ', 1)[0] # Map census subdivision type codes to names. census_subdivision_type_names = {} document = lxml.html.fromstring( requests.get( 'https://www12.statcan.gc.ca/census-recensement/2016/ref/dict/tab/t1_5-eng.cfm' ).content) for text in document.xpath('//table//th[@headers]/text()'): code, name = text.split(' – ', 1) # non-breaking space census_subdivision_type_names[code] = name.split(' / ', 1)[0] # Map OCD identifiers to census types. for division in Division.all('ca', from_csv=ocd_division_csv): if division._type == 'cd': ocdid_to_type_name_map[ division.id] = census_division_type_names[ division.attrs['classification']] elif division._type == 'csd': ocdid_to_type_name_map[ division.id] = census_subdivision_type_names[ division.attrs['classification']] division = Division.get(division_id, from_csv=ocd_division_csv) ocd_type_id = type_id(division.id) expected = {} vowels = ('A', 'À', 'E', 'É', 'I', 'Î', 'O', 'Ô', 'U') # Determine the module name, name and classification. if division._type == 'country': expected['module_name'] = 'ca' expected['name'] = 'Parliament of Canada' elif division._type in ('province', 'territory'): pattern = 'ca_{}_municipalities' if aggregation else 'ca_{}' expected['module_name'] = pattern.format(ocd_type_id) if aggregation: expected['name'] = '{} Municipalities'.format(division.name) elif ocd_type_id in ('nl', 'ns'): expected['name'] = '{} House of Assembly'.format(division.name) elif ocd_type_id == 'qc': expected['name'] = 'Assemblée nationale du Québec' else: expected['name'] = 'Legislative Assembly of {}'.format( division.name) elif division._type == 'cd': expected['module_name'] = 'ca_{}_{}'.format( province_or_territory_abbreviation(division.id), slug(division.name)) name_infix = ocdid_to_type_name_map[division.id] if name_infix == 'Regional municipality': name_infix = 'Regional' expected['name'] = '{} {} Council'.format(division.name, name_infix) elif division._type == 'csd': expected['module_name'] = 'ca_{}_{}'.format( province_or_territory_abbreviation(division.id), slug(division.name)) if ocd_type_id[:2] == '24': if division.name[0] in vowels: expected['name'] = "Conseil municipal d'{}".format( division.name) else: expected['name'] = "Conseil municipal de {}".format( division.name) else: name_infix = ocdid_to_type_name_map[division.id] if name_infix in ('Municipality', 'Specialized municipality'): name_infix = 'Municipal' elif name_infix == 'District municipality': name_infix = 'District' elif name_infix == 'Regional municipality': name_infix = 'Regional' expected['name'] = '{} {} Council'.format(division.name, name_infix) elif division._type == 'arrondissement': expected['module_name'] = 'ca_{}_{}_{}'.format( province_or_territory_abbreviation(division.parent.id), slug(division.parent.name), slug(division.name)) if division.name[0] in vowels: expected['name'] = "Conseil d'arrondissement d'{}".format( division.name) elif division.name[:3] == 'Le ': expected['name'] = "Conseil d'arrondissement du {}".format( division.name[3:]) else: expected['name'] = "Conseil d'arrondissement de {}".format( division.name) else: raise Exception('{}: Unrecognized OCD type {}'.format( division.id, division._type)) # Determine the class name. class_name_parts = re.split( '[ -]', re.sub("[—–]", '-', re.sub("['.]", '', division.name))) expected['class_name'] = unidecode( str(''.join(word if re.match('[A-Z]', word) else word.capitalize() for word in class_name_parts))) if aggregation: expected['class_name'] += 'Municipalities' # Determine the url. expected['url'] = division.attrs['url'] # Determine the division name. expected['division_name'] = division.name return expected
def handle(self, *args, **options): mappings = {} divisions = list(Division.all('ca')) # cache all divisions for obj in requests.get( 'https://represent.opennorth.ca/boundary-sets/?limit=0').json( )['objects']: slug = obj['url'].split('/')[2] if obj['url'] in ('/boundary-sets/census-divisions/', '/boundary-sets/census-subdivisions/'): continue if obj['url'] in ( '/boundary-sets/federal-electoral-districts/', '/boundary-sets/federal-electoral-districts-next-election/' ): prefix = 'ocd-division/country:ca/ed:' boundary_key = 'external_id' else: url = 'https://represent.opennorth.ca{}'.format(obj['url']) boundary_set = requests.get(url).json() if boundary_set['extra'].get('ocd_division'): division_id = boundary_set['extra']['ocd_division'] elif boundary_set['extra'].get('geographic_code'): geographic_code = boundary_set['extra']['geographic_code'] geographic_code_length = len(geographic_code) if geographic_code_length == 7: division_id = 'ocd-division/country:ca/csd:{}'.format( geographic_code) elif geographic_code_length == 4: division_id = 'ocd-division/country:ca/cd:{}'.format( geographic_code) elif geographic_code_length == 2: division_id = next( (division for division in divisions if division.attrs['sgc'] == geographic_code), None).id else: log.error('Unrecognized geographic_code {}'.format( geographic_code)) continue try: division = Division.get(division_id) if division._type == 'borough': division = division.parent division_id = division.id exclude = ('place', 'borough') elif 'boroughs' in obj['name']: exclude = ('place', 'district') elif 'districts' in obj['name']: exclude = ('place', 'borough') else: exclude = ('place', ) subtypes = set(child._type for child in division.children() if child._type not in exclude) if len(subtypes) == 0: log.warn('No subtypes for {}'.format(division_id)) continue elif len(subtypes) > 1: log.warn('>1 subtypes for {}: {}'.format( division_id, list(subtypes))) continue else: prefix = '{}/{}:'.format(division_id, subtypes.pop()) boundary_key = 'external_id' for child in division.children(): if child._type not in exclude: type_id = child.id.rsplit(':', 1)[1] if not numeric_re.search(type_id): if len(type_id) in ( 1, 3 ): # Lunenburg 1-letter identifiers, BC uses 3-letter identifiers boundary_key = 'lower' else: boundary_key = 'matcher' break except KeyError: log.warn('No division for {}'.format(url)) mappings[slug] = { 'key': 'id', 'prefix': prefix, 'boundary_key': boundary_key, } with open(os.path.join(settings.BASE_DIR, 'mappings.py'), 'w') as f: f.write("# DO NOT EDIT THIS AUTO-GENERATED FILE\n") f.write("import re\n\n") f.write("from django.template.defaultfilters import slugify\n\n") f.write("leading_zero_re = re.compile(r'^0+')\n") f.write("invalid_re = re.compile(r'[^a-z\d._~-]')\n") f.write("leading_district_re = re.compile(r'^District ')\n") f.write( "lower = lambda boundary: boundary['external_id'].lower()\n\n") f.write( "matcher = lambda boundary: leading_district_re.sub('', leading_zero_re.sub('', invalid_re.sub('~', boundary['name'].lower().replace(' ', '_'))))\n\n" ) f.write('IMAGO_BOUNDARY_MAPPINGS = {\n') for slug, data in OrderedDict(sorted(mappings.items())).items(): f.write(" '{}': {{\n".format(slug)) for key, value in OrderedDict(sorted(data.items())).items(): if key == 'boundary_key' and value in ('lower', 'matcher'): f.write(" '{}': {},\n".format(key, value)) else: f.write(" '{}': '{}',\n".format(key, value)) f.write(" },\n") f.write('}\n')
def get_definition(division_id, aggregation=False): if not ocdid_to_type_name_map: # Map census division type codes to names. census_division_type_names = {} document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2011/ref/dict/table-tableau/table-tableau-4-eng.cfm').content) for abbr in document.xpath('//table/tbody/tr/th[1]/abbr'): census_division_type_names[abbr.text_content()] = re.sub(' /.+\Z', '', abbr.attrib['title']) # Map census subdivision type codes to names. census_subdivision_type_names = {} document = lxml.html.fromstring(requests.get('https://www12.statcan.gc.ca/census-recensement/2011/ref/dict/table-tableau/table-tableau-5-eng.cfm').content) for abbr in document.xpath('//table/tbody/tr/th[1]/abbr'): census_subdivision_type_names[abbr.text_content()] = re.sub(' /.+\Z', '', abbr.attrib['title']) # Map OCD identifiers to census types. for division in Division.all('ca'): if division._type == 'cd': ocdid_to_type_name_map[division.id] = census_division_type_names[division.attrs['classification']] elif division._type == 'csd': ocdid_to_type_name_map[division.id] = census_subdivision_type_names[division.attrs['classification']] codes = province_and_territory_codes() division = Division.get(division_id) expected = {} vowels = ('A', 'À', 'E', 'É', 'I', 'Î', 'O', 'Ô', 'U') sections = division_id.split('/') ocd_type, ocd_type_id = sections[-1].split(':') # Determine the module name, name and classification. if ocd_type == 'country': expected['module_name'] = 'ca' expected['name'] = 'Parliament of Canada' elif ocd_type in ('province', 'territory'): pattern = 'ca_{}_municipalities' if aggregation else 'ca_{}' expected['module_name'] = pattern.format(ocd_type_id) if aggregation: expected['name'] = '{} Municipalities'.format(division.name) elif ocd_type_id in ('nl', 'ns'): expected['name'] = '{} House of Assembly'.format(division.name) elif ocd_type_id == 'qc': expected['name'] = 'Assemblée nationale du Québec' else: expected['name'] = 'Legislative Assembly of {}'.format(division.name) elif ocd_type == 'cd': province_or_territory_type_id = codes[ocd_type_id[:2]].split(':')[-1] expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_type_id, slug(division.name)) name_infix = ocdid_to_type_name_map[division_id] if name_infix == 'Regional municipality': name_infix = 'Regional' expected['name'] = '{} {} Council'.format(division.name, name_infix) elif ocd_type == 'csd': province_or_territory_type_id = codes[ocd_type_id[:2]].split(':')[-1] expected['module_name'] = 'ca_{}_{}'.format(province_or_territory_type_id, slug(division.name)) if ocd_type_id[:2] == '24': if division.name[0] in vowels: expected['name'] = "Conseil municipal d'{}".format(division.name) else: expected['name'] = "Conseil municipal de {}".format(division.name) else: name_infix = ocdid_to_type_name_map[division_id] if name_infix in ('Municipality', 'Specialized municipality'): name_infix = 'Municipal' elif name_infix == 'District municipality': name_infix = 'District' elif name_infix == 'Regional municipality': name_infix = 'Regional' expected['name'] = '{} {} Council'.format(division.name, name_infix) elif ocd_type == 'arrondissement': census_subdivision_type_id = sections[-2].split(':')[-1] province_or_territory_type_id = codes[census_subdivision_type_id[:2]].split(':')[-1] expected['module_name'] = 'ca_{}_{}_{}'.format(province_or_territory_type_id, slug(Division.get('/'.join(sections[:-1])).name), slug(division.name)) if division.name[0] in vowels: expected['name'] = "Conseil d'arrondissement d'{}".format(division.name) elif division.name[:3] == 'Le ': expected['name'] = "Conseil d'arrondissement du {}".format(division.name[3:]) else: expected['name'] = "Conseil d'arrondissement de {}".format(division.name) else: raise Exception('{}: Unrecognized OCD type {}'.format(division_id, ocd_type)) # Determine the class name. class_name_parts = re.split('[ -]', re.sub("[—–]", '-', re.sub("['.]", '', division.name))) expected['class_name'] = unidecode(text_type(''.join(word if re.match('[A-Z]', word) else word.capitalize() for word in class_name_parts))) if aggregation: expected['class_name'] += 'Municipalities' # Determine the url. expected['url'] = division.attrs['url'] # Determine the division name. expected['division_name'] = division.name return expected