예제 #1
0
def export_ne0(simp: int):
    levels = read_json(id_dir / 'ne012.json')
    simp_str = f'-{simp}' if simp else ''
    countries = read_json(geojson_dir /
                          f'countries{simp_str}.geojson')['features']

    features_by_id = {}

    for feature in countries:
        prop = feature['properties']
        for key in prop:
            prop[key.lower()] = prop.pop(key)
        ne_id = prop['ne_id']
        features_by_id[ne_id] = feature

    for ne0, ne0_data in levels.items():
        ne_id = ne0_data['ne_id']
        feature_data = features_by_id[ne_id]
        fix_props(feature_data, ne0_data, ne0)

        filename = ne0[4:].lower()
        export_subdir = export_geojson_dir / f'{simp_map[simp]}' / 'ne0'
        export_path = export_subdir / f'{filename}.geojson'

        export_subdir.mkdir(exist_ok=True, parents=True)
        write_json(export_path, feature_data)

    print(f'{len(countries)} ne0 GeoJSONs exported, simplification: {simp}')
예제 #2
0
def export_ne3(simp: int):
    simp_str = f'-{simp}' if simp else ''
    states = read_json(geojson_dir / f'states{simp_str}.geojson')['features']

    features_by_id = {}

    for feature in states:
        prop = feature['properties']
        for key in prop:
            prop[key.lower()] = prop.pop(key)
        ne_id = prop['ne_id']
        features_by_id[ne_id] = feature

    country_jsons = ne3_dir.glob('*.json')
    for country_json in country_jsons:
        country_code = country_json.stem
        country_level_3 = read_json(country_json)

        for ne3, ne3_data in country_level_3.items():
            ne_id = ne3_data['ne_id']
            feature_data = features_by_id[ne_id]
            fix_props(feature_data, ne3_data, ne3)

            ne3_start, ne3_end = ne3.split(':')[1].split('-')
            assert ne3_start.lower() == country_code

            export_subdir = export_geojson_dir / f'{simp_map[simp]}' / 'ne3' / country_code
            export_path = export_subdir / f'{ne3_end.lower()}.geojson'

            export_subdir.mkdir(exist_ok=True, parents=True)
            write_json(export_path, feature_data)

    print(f'{len(states)} ne3 GeoJSONs exported, simplification: {simp}')
예제 #3
0
def generate_iso1_list():
    iso1_json = read_json(export_dir / 'iso1.json')

    iso2_json = read_json(export_dir / 'iso2.json')
    iso2_country_codes = {i.split('-')[0] for i in iso2_json}

    doc_md = (
        '# ISO 3166-1 list\n'
        'Name | ISO1 | ISO2 | GeoJSON | OSM | Wikidata | Wikipedia | population \n'
        '--- | --- | --- | --- | --- | --- | --- | --: \n')

    for item in sorted(iso1_json.values(), key=lambda k: k['name']):
        data = build_row_data(item)

        iso1 = item['iso1']
        geojson_path = item['geojson_path']
        geojson_link = f'[GeoJSON](../export/geojson/q7/{geojson_path})'

        if iso1 in iso2_country_codes:
            iso_2_link = f'[ISO2](iso2_list/{iso1}.md)'
        else:
            iso_2_link = ''

        doc_md += (
            f'{data["name"]} | {iso1} | {iso_2_link} | {geojson_link} | '
            f'{data["osm_link"]} | {data["wikidata_link"]} | '
            f'{data["wikipedia_link"]} | {data["population_str"]}'
            f'\n')

    write_file(docs_dir / 'iso1_list.md', doc_md)
예제 #4
0
def collect_iso():
    global osm_iso1_map, osm_iso2_map, osm_wd_map

    osm_iso1_map = get_osm_iso1_map()
    osm_iso2_map = get_osm_iso2_map()
    osm_wd_map = get_osm_wd_map()

    custom_osm = read_json(fixes_dir / 'custom_osm.json')
    custom_iso1 = {
        int(k): v['iso1']
        for k, v in custom_osm.items() if 'iso1' in v
    }
    custom_iso2 = {
        int(k): v['iso2']
        for k, v in custom_osm.items() if 'iso2' in v
    }
    custom_wd = {
        int(k): v['wikidata_id']
        for k, v in custom_osm.items() if 'wikidata_id' in v
    }
    osm_iso1_map.update(custom_iso1)
    osm_iso2_map.update(custom_iso2)
    osm_wd_map.update(custom_wd)

    download_dir = wam_geojson_download_dir
    if os.environ.get('COUNTRY'):
        download_dir = download_dir / os.environ.get('COUNTRY')

    geojson_files = (download_dir).glob(
        '**/*.GeoJson')  # strange capitalization inside zips

    collected_dir.mkdir(parents=True, exist_ok=True)
    iso1_found = open(iso1_collected_path, 'w')
    iso2_found = open(iso2_collected_path, 'w')

    geojson_files_sorted = sorted(geojson_files,
                                  key=lambda p: p.stem,
                                  reverse=False)

    for f in geojson_files_sorted:
        print(f.parent.stem, f.stem)
        try:
            features = read_json(f)['features']
        except json.decoder.JSONDecodeError as e:
            print(f'  Error reading {f.stem} {e}')
            continue
        add_iso(features, iso1_found, iso2_found)

    # add features from osm_missing
    if not os.environ.get('COUNTRY'):
        print('osm_missing_features')
        osm_missing_features = get_osm_missing_features()
        add_iso(osm_missing_features, iso1_found, iso2_found, is_fixes=True)

    iso1_found.close()
    iso2_found.close()
예제 #5
0
def get_geometry_from_medium_high(geojson_path):
    medium_geojson_path = export_dir / 'geojson' / 'medium' / geojson_path
    high_geojson_path = export_dir / 'geojson' / 'high' / geojson_path

    if medium_geojson_path.is_file():
        medium_geojson = read_json(medium_geojson_path)
        if medium_geojson['geometry']:
            print('    using geometry from medium geojson')
            return medium_geojson['geometry']

    high_geojson = read_json(high_geojson_path)
    print('    using geometry from high geojson')
    return high_geojson['geometry']
예제 #6
0
def save_wam_population():
    features = read_json(simp_dir / 'high' / f'iso1.geojson')['features']
    iso1_ids = {f['properties']['wikidata_id'] for f in features}

    features = read_json(simp_dir / 'high' / f'iso2.geojson')['features']
    iso2_ids = {f['properties']['wikidata_id'] for f in features}

    all_ids = list(iso1_ids.union(iso2_ids))

    population_data = get_population(all_ids)

    wam_data_dir.mkdir(parents=True, exist_ok=True)
    write_json(wam_data_dir / 'population.json',
               population_data,
               indent=2,
               sort_keys=True)
예제 #7
0
def generate_fips_list():
    fips_json = read_json(export_dir / 'fips.json')
    counties = sorted(fips_json.values(), key=lambda k: k['name'])
    state_by_code = get_state_codes()[0]

    doc_md = f'# US county FIPS code list\n'
    doc_md += '[GeoJSON for all counties](../export/geojson/q5/fips_all.geojson)'

    county_by_state = {}
    for item in sorted(counties, key=lambda k: k['state_code']):
        state_code = item['state_code']
        county_by_state.setdefault(state_code, [])
        county_by_state[state_code].append(item)

    for state_code, state_items in county_by_state.items():
        state_name = state_by_code[state_code]
        doc_md += f'\n\n#### {state_name}, state code: {state_code}\n'
        doc_md += 'Name | FIPS | GeoJSON | population \n'
        doc_md += '--- | --- | --- | --: \n'

        for item in sorted(state_items, key=lambda k: k['name']):
            fips = item['fips']
            name = item['name']
            population = item['population']

            population_str = ''
            if population:
                population_str = f'{population:,}'

            state_code_str = fips[:2]
            geojson_link = f'[GeoJSON](../export/geojson/q8/fips/{state_code_str}/{fips}.geojson)'

            doc_md += f'{name} | {fips} | {geojson_link} | {population_str}\n'

    write_file(docs_dir / 'fips_list.md', doc_md)
예제 #8
0
def get_osm_wd_map():
    file_path = wikidata_dir / 'osm_wd_map.json'
    if file_path.is_file():
        return read_json(file_path)

    endpoint_url = "https://query.wikidata.org/sparql"

    query = """SELECT ?region ?osm WHERE {
          ?region wdt:P297 ?iso1;
            wdt:P402 ?osm.
        }"""

    iso1_results = get_results(endpoint_url, query)

    query = """SELECT ?region ?osm WHERE {
              ?region wdt:P300 ?iso2;
                wdt:P402 ?osm.
            }"""

    iso2_results = get_results(endpoint_url, query)

    osm_wd_map = {}

    for result in iso1_results["results"]["bindings"] + iso2_results["results"]["bindings"]:
        osm = int(result['osm']['value'])
        wd_id = result['region']['value'].split('/')[-1]

        osm_wd_map[osm] = wd_id

    write_json(file_path, osm_wd_map, indent=2)
    return osm_wd_map
예제 #9
0
def generate_br_muni_list():
    br_muni_json = read_json(export_dir / 'br_muni.json')
    counties = sorted(br_muni_json.values(), key=lambda k: k['name'])

    doc_md = f'# Brazil municipality IBGE code list\n'
    doc_md += '[GeoJSON for all municipalities](../export/geojson/q5/br_muni_all.geojson)'

    county_by_state = {}
    for item in sorted(counties, key=lambda k: k['state_code']):
        state_code = item['state_code']
        county_by_state.setdefault(state_code, [])
        county_by_state[state_code].append(item)

    for state_code, state_items in county_by_state.items():
        doc_md += f'\n\n#### {state_code}\n'
        doc_md += 'Name | IBGE code | GeoJSON | population \n'
        doc_md += '--- | --- | --- | --: \n'

        for item in sorted(state_items, key=lambda k: k['name']):
            ibge_code = item['ibge_code']
            name_long = item['name_long']
            population = item['population']
            geojson_path = item['geojson_path']

            population_str = ''
            if population:
                population_str = f'{population:,}'

            geojson_link = f'[GeoJSON](../geojson/q8/{geojson_path})'

            doc_md += f'{name_long} | {ibge_code} | {geojson_link} | {population_str}\n'

    docs_dir.mkdir(parents=True, exist_ok=True)
    write_file(docs_dir / 'br_muni_list.md', doc_md)
예제 #10
0
def generate_iso2_list_country(iso1):
    iso2_json = read_json(export_dir / 'iso2.json')
    iso2_filtered = [
        i for i in iso2_json.values() if i['iso2'].split('-')[0] == iso1
    ]

    doc_md = f'# ISO 3166-2 list: {iso1}\n'

    iso2_by_level = {}
    for item in sorted(iso2_filtered, key=lambda k: k['admin_level']):
        level = item['admin_level']
        iso2_by_level.setdefault(level, [])
        iso2_by_level[level].append(item)

    for level, level_items in iso2_by_level.items():
        doc_md += f'\n\n#### Level {level}\n'
        doc_md += (
            'Name | ISO2 | GeoJSON | OSM | Wikidata | Wikipedia | population \n'
            '--- | --- | --- | --- | --- | --- | --: \n')

        for item in sorted(level_items, key=lambda k: k['name']):
            data = build_row_data(item)

            iso2 = item['iso2']
            geojson_path = item['geojson_path']
            geojson_link = f'[GeoJSON](../../export/geojson/q8/{geojson_path})'

            doc_md += (f'{data["name"]} | {iso2} | {geojson_link} | '
                       f'{data["osm_link"]} | {data["wikidata_link"]} | '
                       f'{data["wikipedia_link"]} | {data["population_str"]}'
                       f'\n')

    write_file(docs_dir / 'iso2_list' / f'{iso1}.md', doc_md)
예제 #11
0
def generate_iso2_list():
    iso2_json = read_json(export_dir / 'iso2.json')
    country_codes = {i.split('-')[0] for i in iso2_json}

    subdir = docs_dir / 'iso2_list'
    shutil.rmtree(subdir, ignore_errors=True)
    subdir.mkdir(parents=True)

    for country_code in sorted(country_codes):
        generate_iso2_list_country(country_code)
예제 #12
0
def export_ne2(simp: int):
    levels = read_json(id_dir / 'ne012.json')
    simp_str = f'-{simp}' if simp else ''
    subunits = read_json(geojson_dir /
                         f'subunits{simp_str}.geojson')['features']

    features_by_id = {}

    for feature in subunits:
        prop = feature['properties']
        for key in prop:
            prop[key.lower()] = prop.pop(key)
        ne_id = prop['ne_id']
        features_by_id[ne_id] = feature

    counter = 0
    for ne0, ne0_data in levels.items():
        if 'sub1' not in ne0_data:
            continue

        sub1 = ne0_data['sub1']
        for ne1, ne1_data in sub1.items():
            if 'sub2' not in ne1_data:
                continue

            sub2 = ne1_data['sub2']
            for ne2, ne2_data in sub2.items():
                ne_id = ne2_data['ne_id']
                feature_data = features_by_id[ne_id]
                fix_props(feature_data, ne2_data, ne2)

                filename = ne2[4:].lower()
                export_subdir = export_geojson_dir / f'{simp_map[simp]}' / 'ne2'
                export_path = export_subdir / f'{filename}.geojson'

                export_subdir.mkdir(exist_ok=True, parents=True)
                write_json(export_path, feature_data)

                counter += 1

    print(f'{counter} ne2 GeoJSONs exported, simplification: {simp}')
예제 #13
0
def split_geojson(iso_level: int, simp_level: str):
    global population_map, iso1_json, iso2_json
    if not population_map:
        population_map = read_json(wam_data_dir / 'population.json')

    if simp_level != 'high':
        iso1_json = read_json(export_dir / 'iso1.json')
        iso2_json = read_json(export_dir / 'iso2.json')

    print(f'Splitting iso{iso_level} to level: {simp_level}')
    file_path = simp_dir / simp_level / f'iso{iso_level}.geojson'

    features = read_json(file_path)['features']
    features_sorted = sorted(features,
                             key=lambda i: i['properties']['admin_level'])

    features_by_iso = dict()

    for feature in features_sorted:
        feature_processed = process_feature_properties(feature, iso_level,
                                                       simp_level)
        if feature_processed is None:
            continue
        feature_clean = feature_processed['feature']

        iso = feature_processed['iso']
        if iso_level == 1:
            if not validate_iso1(iso):
                print(f'invalid iso1: {iso}')
                continue
        else:
            if not validate_iso2(iso):
                print(f'invalid iso2: {iso}')
                continue

        features_by_iso.setdefault(iso, list())
        features_by_iso[iso].append(feature_clean)

    deduplicated_by_iso = deduplicate_features_by_iso(features_by_iso)
    write_json_and_geojsons(deduplicated_by_iso, iso_level, simp_level)
예제 #14
0
def generate_country_list():
    levels = read_json(ne_id_dir / 'ne012.json')
    doc_md = '# Country list'

    for ne0, ne0_data in sorted(levels.items(),
                                key=lambda item: item[1]['name']):
        name = ne0_data['name']
        code = ne0[4:].lower()

        doc_md += (
            f'\n{name}{md_space}'
            f'code: **{ne0}**{md_space}'
            f'[view](../export/geojson/medium/ne0/{code}.geojson){md_space}')

        if (ne3_dir / f'{code}.json').is_file():
            doc_md += f'[states/provinces](country_list_ne3/{code}.md)'

        doc_md += '\n\n'

        if 'sub1' not in ne0_data:
            continue

        sub1 = ne0_data['sub1']
        for ne1, ne1_data in sorted(sub1.items(),
                                    key=lambda item: item[1]['name']):
            name = ne1_data['name']
            level = ne1[:3]
            code = ne1[4:].lower()

            doc_md += (
                f'  - {name}{md_space}'
                f'code: **{ne1}**{md_space}'
                f'[view](../export/geojson/medium/{level}/{code}.geojson){md_space} '
                f'\n\n')
            if 'sub2' not in ne1_data:
                continue

            sub2 = ne1_data['sub2']
            for ne2, ne2_data in sorted(sub2.items(),
                                        key=lambda item: item[1]['name']):
                name = ne2_data['name']
                level = ne2[:3]
                code = ne2[4:].lower()

                doc_md += (
                    f'    - {name}{md_space}'
                    f'code: **{ne2}**{md_space}'
                    f'[view](../export/geojson/medium/{level}/{code}.geojson){md_space}'
                    f'\n\n')

    write_file(docs_dir / 'country_list.md', doc_md)
    print(f'country_list.md updated')
예제 #15
0
def process_ne3():
    countries = read_json(geojson_dir / 'countries.geojson')['features']
    states = read_json(geojson_dir / 'states.geojson')['features']
    print(f'{len(states)} states')

    adm_iso_map = create_adm_iso_map(countries)
    processed_states = build_states(states, adm_iso_map)
    clean_duplicate_states(processed_states)

    ne3_data = dict()

    for feature in processed_states:
        prop = feature['properties']['_clean']
        state_iso = prop['state_iso']
        state_name = prop['state_name']
        country_iso = prop['country_iso']
        country_name = prop['country_name']
        ne_id = prop['ne_id']
        population = prop['population']
        wikidata_id = prop['wikidata_id']

        ne3_data.setdefault(country_iso, {})

        ne3 = f'ne3:{state_iso}'
        print(f'{country_name}; {state_name}; {ne3}')
        ne3_data[country_iso][ne3] = {
            'name': state_name,
            'ne_id': ne_id,
            'wikidata_id': wikidata_id,
            'population': population,
        }

    for country_iso, country_states in ne3_data.items():
        ne3_dir.mkdir(exist_ok=True, parents=True)
        filename = f'{country_iso.lower()}.json'
        write_json(ne3_dir / filename,
                   country_states,
                   indent=2,
                   sort_keys=True)
예제 #16
0
def download_all_regions():
    config = read_json(wam_data_dir / 'config.json')
    for country_code, country_data in config.items():
        print(country_data['name'])

        if country_code == 'USA':
            continue

        downloaded = download_country(country_code, 2, 8)
        if downloaded:
            time.sleep(60)

    download_country('USA', 2, 6)
예제 #17
0
def get_all_ids(
    get_countries: bool = True,
    get_units: bool = True,
    get_subunits: bool = True,
    get_states: bool = True,
):
    if get_countries:
        countries = read_json(geojson_dir / 'countries.geojson')['features']
    else:
        countries = []

    if get_units:
        units = read_json(geojson_dir / 'units.geojson')['features']
    else:
        units = []

    if get_subunits:
        subunits = read_json(geojson_dir / 'subunits.geojson')['features']
    else:
        subunits = []

    if get_states:
        states = read_json(geojson_dir / 'states.geojson')['features']
    else:
        states = []

    all_ids = set()

    for feature in countries + units + subunits + states:
        prop = feature['properties']
        for key in prop:
            prop[key.lower()] = prop.pop(key)

        if not prop.get('wikidataid'):
            continue

        all_ids.add(prop['wikidataid'])

    return sorted(all_ids)
예제 #18
0
def generate_ne3_md(country_iso):
    filename = f'{country_iso.lower()}.json'
    level3 = read_json(ne3_dir / filename)
    level012 = read_json(ne_id_dir / 'ne012.json')

    country_name = level012[f'ne0:{country_iso.upper()}']['name']

    doc_md = f'# {country_name} states/provinces/counties'

    for ne3, ne3_data in sorted(level3.items(),
                                key=lambda item: item[1]['name']):
        name = ne3_data['name']
        ne3_country, ne3_state = ne3.split(':')[1].split('-')
        assert country_iso.lower() == ne3_country.lower()

        doc_md += (
            f'\n{name}{md_space}'
            f'code: **{ne3}**{md_space}'
            f'[view](../../export/geojson/medium/ne3/{country_iso.lower()}/{ne3_state.lower()}.geojson){md_space}'
            f'\n\n')

    write_file(docs_dir / 'country_list_ne3' / f'{country_iso}.md', doc_md)
예제 #19
0
def get_osm_missing_features():
    osm_missing_dir = fixes_dir / 'osm_missing'
    geojson_paths = osm_missing_dir.glob('*.geojson')

    features = []
    for geojson_path in geojson_paths:
        geojson = read_json(geojson_path)
        if geojson['type'] == 'Feature':
            clean_tags_overpass(geojson)
            features.append(geojson)
        if geojson['type'] == 'FeatureCollection':
            for feature in geojson['features']:
                clean_tags_overpass(feature)
                features.append(feature)

    return features
예제 #20
0
def generate_fips_list():
    fips_json = read_json(export_dir / 'fips.json')
    counties = sorted(fips_json.values(), key=lambda k: k['name'])
    states_by_int = get_state_data()[0]

    doc_md = f'# US county FIPS code list\n'
    doc_md += '[GeoJSON for all counties](../export/geojson/q5/fips_all.geojson)'

    county_by_state = {}
    for item in sorted(counties, key=lambda k: k['state_code_int']):
        state_code_int = item['state_code_int']
        county_by_state.setdefault(state_code_int, [])
        county_by_state[state_code_int].append(item)

    for state_code_int, state_items in county_by_state.items():
        state_name = states_by_int[state_code_int]['name']
        state_code_postal = states_by_int[state_code_int]['postal_code']
        doc_md += f'\n\n#### {state_name} - {state_code_postal}, state code: {state_code_int}\n'
        doc_md += 'Name | FIPS | GeoJSON | population \n'
        doc_md += '--- | --- | --- | --: \n'

        for item in sorted(state_items, key=lambda k: k['name']):
            fips = item['fips']
            name_long = item['name_long']
            population = item['population']
            geojson_path = item['geojson_path']

            population_str = ''
            if population:
                population_str = f'{population:,}'

            geojson_link = f'[GeoJSON](../geojson/q8/{geojson_path})'

            doc_md += f'{name_long} | {fips} | {geojson_link} | {population_str}\n'

    docs_dir.mkdir(parents=True, exist_ok=True)
    write_file(docs_dir / 'fips_list.md', doc_md)
예제 #21
0
def get_osm_iso1_map():
    file_path = wikidata_dir / 'osm_iso1_map.json'
    if file_path.is_file():
        return read_json(file_path)

    endpoint_url = "https://query.wikidata.org/sparql"

    query = """SELECT ?region ?iso1 ?iso2 ?osm WHERE {
      ?region wdt:P297 ?iso1;
        wdt:P402 ?osm.
    }"""

    results = get_results(endpoint_url, query)

    osm_iso1_map = {}

    for result in results["results"]["bindings"]:
        iso1 = result['iso1']['value']
        osm = int(result['osm']['value'])

        osm_iso1_map[osm] = iso1

    write_json(file_path, osm_iso1_map, indent=2)
    return osm_iso1_map
예제 #22
0
import shutil
import sys

from country_levels_lib.fips import fips_utils
from country_levels_lib.config import export_dir, fixes_dir
from country_levels_lib.geo import calculate_centroid, find_timezone
from country_levels_lib.utils import read_json, osm_url, write_json, wikidata_url
from country_levels_lib.wam.wam_collect import validate_iso1, validate_iso2, simp_dir
from country_levels_lib.wam.wam_download import wam_data_dir
from area import area

population_map = None
population_fixes = read_json(fixes_dir / 'population.json')
timezone_fixes = read_json(fixes_dir / 'timezone.json')
us_states_by_postal = fips_utils.get_state_data()[1]
iso1_json = None
iso2_json = None


def split_geojson(iso_level: int, simp_level: str):
    global population_map, iso1_json, iso2_json
    if not population_map:
        population_map = read_json(wam_data_dir / 'population.json')

    if simp_level != 'high':
        iso1_json = read_json(export_dir / 'iso1.json')
        iso2_json = read_json(export_dir / 'iso2.json')

    print(f'Splitting iso{iso_level} to level: {simp_level}')
    file_path = simp_dir / simp_level / f'iso{iso_level}.geojson'
예제 #23
0
def process_ne012():
    countries = read_json(geojson_dir / 'countries.geojson')['features']
    print(f'{len(countries)} countries')

    units = read_json(geojson_dir / 'units.geojson')['features']
    print(f'{len(units)} units')

    subunits = read_json(geojson_dir / 'subunits.geojson')['features']
    print(f'{len(subunits)} subunits')

    wikidata_population = read_json(wikidata_dir / 'population.json')

    adm_iso_map = create_adm_iso_map(countries)
    levels = dict()

    for feature in countries:
        prop = feature['properties']
        for key in prop:
            prop[key.lower()] = prop.pop(key)

        country_name = prop['admin']
        country_iso = adm_iso_map[prop['adm0_a3']]
        validate_iso_012(country_iso)

        ne_id = prop['ne_id']
        assert type(ne_id) == int

        wikidata_id = prop.get('wikidataid')
        population = calculate_population(prop, wikidata_population)

        ne0 = f'ne0:{country_iso}'

        levels.setdefault(
            ne0,
            {
                'name': country_name,
                'ne_id': ne_id,
                'wikidata_id': wikidata_id,
                'population': population,
                'sub1': {},
            },
        )

    for feature in units:
        prop = feature['properties']
        for key in prop:
            prop[key.lower()] = prop.pop(key)

        country_iso = adm_iso_map[prop['adm0_a3']]
        validate_iso_012(country_iso)

        unit_name = prop['geounit']
        unit_iso = prop['gu_a3']
        validate_iso_012(unit_iso)

        ne_id = prop['ne_id']
        assert type(ne_id) == int

        wikidata_id = prop.get('wikidataid')
        population = calculate_population(prop, wikidata_population)

        ne0 = f'ne0:{country_iso}'
        ne1 = f'ne1:{unit_iso}'

        sub1 = levels[ne0]['sub1']
        sub1.setdefault(
            ne1,
            {
                'name': unit_name,
                'ne_id': ne_id,
                'wikidata_id': wikidata_id,
                'population': population,
                'sub2': {},
            },
        )

    for feature in subunits:
        prop = feature['properties']
        for key in prop:
            prop[key.lower()] = prop.pop(key)

        country_iso = adm_iso_map[prop['adm0_a3']]
        validate_iso_012(country_iso)

        unit_iso = prop['gu_a3']
        validate_iso_012(unit_iso)

        subunit_name = prop['subunit']
        subunit_iso = prop['su_a3']
        validate_iso_012(subunit_iso)

        ne_id = prop['ne_id']
        assert type(ne_id) == int

        wikidata_id = prop.get('wikidataid')
        population = calculate_population(prop, wikidata_population)

        ne0 = f'ne0:{country_iso}'
        ne1 = f'ne1:{unit_iso}'
        ne2 = f'ne2:{subunit_iso}'

        sub1 = levels[ne0]['sub1']
        sub2 = sub1[ne1]['sub2']
        sub2.setdefault(
            ne2,
            {
                'name': subunit_name,
                'ne_id': ne_id,
                'wikidata_id': wikidata_id,
                'population': population,
            },
        )

    cleanup_sub2(levels)
    cleanup_sub1(levels)
    one_to_one_fix(levels)

    id_dir.mkdir(exist_ok=True, parents=True)
    write_json(id_dir / 'ne012.json', levels, indent=2, sort_keys=True)
예제 #24
0
def process_fips_quality(quality):
    assert quality in [5, 7, 8]

    print(f'Processing FIPS county GeoJSON {quality_map[quality]}')

    features = read_json(
        fips_geojson_dir /
        f'counties_{quality_map[quality]}.geojson')['features']

    counties_by_str = get_county_data()[1]

    states_by_code = get_state_codes()[0]

    geojson_export_dir = export_dir / 'geojson' / f'q{quality}' / 'fips'
    shutil.rmtree(geojson_export_dir, ignore_errors=True)

    new_features = list()
    json_data = dict()

    count = 0
    for feature in features:
        prop = feature['properties']
        full_code_str = prop['GEOID']
        state_code = int(prop['STATEFP'])
        county_code = int(prop['COUNTYFP'])

        # skip minor islands without state code found in 500k dataset
        if state_code not in states_by_code:
            continue

        county_data = counties_by_str[full_code_str]

        assert county_data['county_code'] == county_code
        assert county_data['state_code'] == state_code

        name = county_data['name']
        population = county_data['population']

        countrylevel_id = f'fips:{full_code_str}'

        for key in ['NAME', 'GEOID', 'STATEFP', 'COUNTYFP']:
            del prop[key]

        new_prop = {
            'name': name,
            'fips': full_code_str,
            'state_code': state_code,
            'county_code': county_code,
            'population': population,
            'countrylevel_id': countrylevel_id,
            'census_data': prop,
        }
        feature['properties'] = new_prop
        new_features.append(feature)

        state_code_str = full_code_str[:2]
        state_subdir = geojson_export_dir / state_code_str
        state_subdir.mkdir(parents=True, exist_ok=True)
        write_json(state_subdir / f'{full_code_str}.geojson', feature)
        count += 1

        json_data[full_code_str] = {
            k: v
            for k, v in new_prop.items() if k != 'census_data'
        }
        json_data[full_code_str][
            'geojson_path'] = f'fips/{state_code_str}/{full_code_str}.geojson'

    write_json(
        export_dir / 'geojson' / f'q{quality}' / 'fips_all.geojson',
        {
            "type": "FeatureCollection",
            "features": new_features
        },
    )

    if quality == 5:  # only write the file once
        write_json(export_dir / f'fips.json',
                   json_data,
                   indent=2,
                   sort_keys=True)

    assert count == len(counties_by_str)
    print(f'  {count} GeoJSON processed')
예제 #25
0
def build_states(states: list, adm_iso_map: dict):
    wikidata_population = read_json(wikidata_dir / 'population.json')
    wikidata_iso_ne3 = read_json(wikidata_dir / 'iso_ne3.json')

    clean_states = list()

    for feature in states:
        prop = feature['properties']
        for key in prop:
            prop[key.lower()] = prop.pop(key)

        country_name = prop['admin']
        country_iso = adm_iso_map[prop['adm0_a3']]
        validate_iso_012(country_iso)

        state_name = prop['name']
        state_iso = fix_iso_3_codes.get(prop['iso_3166_2'], prop['iso_3166_2'])

        wikidata_id = prop.get('wikidataid')
        wikidata_url = f'https://www.wikidata.org/wiki/{wikidata_id}'
        population = wikidata_population.get(wikidata_id, 0)
        wikidata_iso = wikidata_iso_ne3.get(wikidata_id)

        if wikidata_iso is not None and wikidata_iso != state_iso:
            state_iso = wikidata_iso

        ne_id = prop['ne_id']
        assert type(ne_id) == int

        # skipping minor island
        if prop['featurecla'] == 'Admin-1 minor island':
            continue

        # skipping unnamed places (right now the same as minor islands)
        if state_name is None:
            continue

        if state_iso.startswith('-99-'):
            continue

        # check if state's iso code matches country's iso code
        country_iso_from_state, country_code_from_state = state_iso.split('-')
        if country_iso_from_state != country_iso:
            # print(
            #     f'ci: {country_iso} si:{state_iso} cn:{country_name} sn: {state_name} {wikidata_url}'
            # )
            # country_iso = country_iso_from_state
            state_iso = f'{country_iso}-{country_code_from_state}'

        # clean up state_iso
        state_iso = state_iso.replace('~', '')

        # regex check state_iso
        validate_iso_3(state_iso)

        prop['_clean'] = {
            'country_name': country_name,
            'country_iso': country_iso,
            'state_name': state_name,
            'state_iso': state_iso,
            'ne_id': ne_id,
            'population': population,
            'wikidata_id': wikidata_id,
        }

        clean_states.append(feature)

    return clean_states
예제 #26
0
def process_fips_quality(quality):
    print(f'Processing FIPS county GeoJSON {quality_map[quality]}')

    features = read_json(fips_geojson_dir / f'counties_{quality_map[quality]}.geojson')['features']

    counties_by_str = get_county_data()[1]
    states_by_int = get_state_data()[0]

    geojson_export_dir = export_dir / 'geojson' / quality / 'fips'
    shutil.rmtree(geojson_export_dir, ignore_errors=True)

    new_features = list()
    json_data = dict()

    count = 0
    for feature in features:
        prop = feature['properties']
        full_code_str = prop['GEOID']
        state_code_int = int(prop['STATEFP'])
        county_code = int(prop['COUNTYFP'])

        # skip minor islands without state code found in 500k dataset
        if state_code_int not in states_by_int:
            continue

        centroid = calculate_centroid(feature)
        timezone = find_timezone(centroid['lon'], centroid['lat'])
        area_m2 = int(prop['ALAND'] + prop['AWATER'])

        county_data = counties_by_str[full_code_str]
        assert county_data['county_code'] == county_code
        assert county_data['state_code_int'] == state_code_int

        name = county_data['name']
        name_long = county_data['name_long']

        state_code_int = county_data['state_code_int']
        state_code_postal = county_data['state_code_postal']
        state_code_iso = county_data['state_code_iso']

        population = county_data['population']

        countrylevel_id = f'fips:{full_code_str}'

        for key in ['NAME', 'GEOID', 'STATEFP', 'COUNTYFP']:
            del prop[key]

        new_prop = {
            'name': name,
            'name_long': name_long,
            'fips': full_code_str,
            'state_code_int': state_code_int,
            'state_code_postal': state_code_postal,
            'state_code_iso': state_code_iso,
            'county_code': county_code,
            'population': population,
            'countrylevel_id': countrylevel_id,
            'census_data': prop,
            'center_lat': round(centroid['lat'], 2),
            'center_lon': round(centroid['lon'], 2),
            'area_m2': area_m2,
            'timezone': timezone,
        }
        feature['properties'] = new_prop
        new_features.append(feature)

        state_code_str = full_code_str[:2]
        state_subdir = geojson_export_dir / state_code_str
        state_subdir.mkdir(parents=True, exist_ok=True)
        write_json(state_subdir / f'{full_code_str}.geojson', feature)
        count += 1

        json_data[full_code_str] = new_prop
        # json_data[full_code_str] = {k: v for k, v in new_prop.items() if k != 'census_data'}
        json_data[full_code_str]['geojson_path'] = f'fips/{state_code_str}/{full_code_str}.geojson'

    write_json(
        export_dir / 'geojson' / quality / 'fips_all.geojson',
        {"type": "FeatureCollection", "features": new_features},
    )

    if quality == 'high':  # only write the file once
        write_json(export_dir / f'fips.json', json_data, indent=2, sort_keys=True)

    assert count == len(counties_by_str)
    print(f'  {count} GeoJSON processed')
예제 #27
0
def split_geojson(iso_level: int, simp_level, *, debug: bool = False):
    assert iso_level in [1, 2]

    print(f'Splitting iso{iso_level} to level: q{simp_level}')
    file_path = wam_geojson_simp_dir / f'iso{iso_level}-{simp_level}.geojson'

    features = read_json(file_path)['features']
    features_sorted = sorted(features,
                             key=lambda i: i['properties']['admin_level'])

    level_subdir = export_dir / 'geojson' / f'q{simp_level}' / f'iso{iso_level}'
    level_subdir.mkdir(parents=True)

    population_map = read_json(wam_data_dir / 'population.json')

    json_data = dict()
    seen = dict()

    for feature in features_sorted:
        prop = feature['properties']
        alltags = prop['alltags']

        name = prop.pop('name')
        osm_id = int(prop.pop('id'))
        iso = prop.pop(f'iso{iso_level}')
        admin_level = int(prop.pop('admin_level'))
        wikidata_id = prop.pop('wikidata_id', None)
        countrylevel_id = f'iso{iso_level}:{iso}'
        population = population_map.get(wikidata_id)

        wikipedia_from_prop = prop.pop('wikipedia', None)
        wikipedia_from_alltags = alltags.pop('wikipedia', None)
        if (wikipedia_from_prop and wikipedia_from_alltags
                and wikipedia_from_prop != wikipedia_from_alltags):
            print(wikipedia_from_prop, wikipedia_from_alltags)
        wikipedia_id = wikipedia_from_alltags
        if wikipedia_from_prop:
            wikipedia_id = wikipedia_from_prop

        del feature['bbox']

        for key in ['boundary', 'note', 'rpath', 'srid', 'timestamp']:
            prop.pop(key, None)

        for key in [
                'ISO3166-1',
                'ISO3166-1:alpha2',
                'ISO3166-1:numeric',
                'ISO3166-2',
                'ISO3166-2:alpha2',
                'ISO3166-2:numeric',
                'land_area',
                'wikidata',
        ]:
            alltags.pop(key, None)

        seen.setdefault(iso, list())
        if seen[iso] and not debug:
            # print(f'  duplicate {iso}, skipping')
            continue

        new_prop = {
            'name': name,
            f'iso{iso_level}': iso,
            'admin_level': admin_level,
            'osm_id': osm_id,
            'wikidata_id': wikidata_id,
            'wikipedia_id': wikipedia_id,
            'population': population,
            'countrylevel_id': countrylevel_id,
            'osm_data': prop,
        }
        new_prop_without_osm_data = {
            k: v
            for k, v in new_prop.items() if k != 'osm_data'
        }
        feature['properties'] = new_prop

        seen[iso].append(new_prop_without_osm_data)
        json_data[iso] = new_prop_without_osm_data

        if iso_level == 1:
            if not validate_iso1(iso):
                print(f'invalid iso1: {iso}')
                continue

            write_json(level_subdir / f'{iso}.geojson', feature)
            json_data[iso]['geojson_path'] = f'iso1/{iso}.geojson'

        else:
            if not validate_iso2(iso):
                print(f'invalid iso2: {iso}')
                continue

            iso2_start, iso2_end = iso.split('-')

            iso2_subdir = level_subdir / iso2_start
            iso2_subdir.mkdir(exist_ok=True)

            write_json(level_subdir / iso2_start / f'{iso}.geojson', feature)
            json_data[iso]['geojson_path'] = f'iso2/{iso2_start}/{iso}.geojson'

    if simp_level == 5:
        write_json(export_dir / f'iso{iso_level}.json',
                   json_data,
                   indent=2,
                   sort_keys=True)

    #
    #
    if debug:  # debug duplicates, fixed by sorting by admin_level
        debug_dir = geojson_dir / 'wam' / 'debug' / f'iso{iso_level}'
        shutil.rmtree(debug_dir, ignore_errors=True)
        debug_dir.mkdir(parents=True)

        # choose lowest admin level from available ones
        for iso, iso_matches in seen.items():
            if len(iso_matches) != 1:
                matches_sorted = sorted(iso_matches,
                                        key=lambda i: i['admin_level'])

                print(f'duplicate iso{iso_level}: {iso}')
                for match in matches_sorted:
                    name = match['name']
                    osm_id = match['osm_id']
                    url = osm_url(osm_id)
                    admin_level = match['admin_level']
                    print(f'  {name} {admin_level} {url}')
예제 #28
0
def process_br_muni_quality(quality):
    assert quality in [5, 7, 8]

    print(f'Processing BR_Muni county GeoJSON {quality}')

    features = read_json(geojson_dir / 'br_muni' / 'simp' /
                         f'simp-{quality}.geojson')['features']

    geojson_export_dir = export_dir / 'geojson' / f'q{quality}' / 'br_muni'
    shutil.rmtree(geojson_export_dir, ignore_errors=True)

    json_data = dict()

    for feature in features:
        prop = feature['properties']

        name = prop.pop('name')
        name_long = prop.pop('name_long')
        population = int(prop.pop('population'))
        state_code = prop.pop('state')
        ibge_code = prop.pop('ibge_code')

        assert not prop  # make sure we removed everything from the original properties

        countrylevel_id = f'br_muni:{ibge_code}'

        centroid = calculate_centroid(feature)
        timezone = find_timezone(centroid['lon'], centroid['lat'])

        new_prop = {
            'name': name,
            'name_long': name_long,
            'state_code': state_code,
            'state_code_iso': f'iso2:BR-{state_code}',
            'ibge_code': ibge_code,
            'population': population,
            'countrylevel_id': countrylevel_id,
            'center_lat': round(centroid['lat'], 2),
            'center_lon': round(centroid['lon'], 2),
            'timezone': timezone,
        }
        feature['properties'] = new_prop

        state_subdir = geojson_export_dir / state_code
        state_subdir.mkdir(parents=True, exist_ok=True)
        write_json(state_subdir / f'{ibge_code}.geojson', feature)

        json_data[ibge_code] = dict(new_prop)
        json_data[ibge_code][
            'geojson_path'] = f'br_muni/{state_code}/{ibge_code}.geojson'

    write_json(
        export_dir / 'geojson' / f'q{quality}' / 'br_muni_all.geojson',
        {
            "type": "FeatureCollection",
            "features": features
        },
    )

    if quality == 7:  # only write the file once
        write_json(export_dir / f'br_muni.json',
                   json_data,
                   indent=2,
                   sort_keys=True)

    print(f'  {len(features)} GeoJSON processed')
예제 #29
0
    get_osm_iso1_map,
    get_osm_wd_map,
    get_osm_iso2_map,
)
from country_levels_lib.wikidata.wikidata_population import get_population

collected_dir = geojson_dir / 'wam' / 'collected'
iso1_collected_path = collected_dir / 'iso1.ndjson'
iso2_collected_path = collected_dir / 'iso2.ndjson'
simp_dir = geojson_dir / 'wam' / 'simp'

osm_iso1_map = {}
osm_iso2_map = {}
osm_wd_map = {}

skip_osm_features = {int(i) for i in read_json(fixes_dir / 'skip_osm.json')}

iso1_regex = re.compile('[A-Z]{2}')
iso2_regex = re.compile('[A-Z]{2}-[A-Z0-9]{1,4}')


def collect_iso():
    global osm_iso1_map, osm_iso2_map, osm_wd_map

    osm_iso1_map = get_osm_iso1_map()
    osm_iso2_map = get_osm_iso2_map()
    osm_wd_map = get_osm_wd_map()

    custom_osm = read_json(fixes_dir / 'custom_osm.json')
    custom_iso1 = {
        int(k): v['iso1']