コード例 #1
0
def main(args):
    assert len(args) == 2
    csv_reader = csv.reader(open(sys.argv[1], 'rb'))
    fb = freebase.Freebase()
    osm_fetcher = osm.OSM()

    for idx, row in enumerate(csv_reader):
        osm_type, osm_id, wiki_title, name = row
        if not is_english_wikititle(wiki_title):
            continue
        if wiki_title.startswith("en:"):
            wiki_title = wiki_title[3:]

        path = os.path.join(osm_fetcher.cache_dir,
                            '%s%s.xml.json' % (osm_type, osm_id))
        try:
            d = json.load(file(path))
        except (ValueError, IOError):
            continue

        props = get_feature_props(d, osm_type, osm_id)
        if not props:
            continue

        if not has_polygon(d):
            continue
        freebase_data = fb._get_from_cache(wiki_title)
        if not freebase_data:
            continue

        try:
            total_languages = len(
                freebase_data['property']
                ['/common/topic/topic_equivalent_webpage']['values'])
        except KeyError:
            total_languages = 0

        pass_condition = None
        if props.get('leisure') == 'park':
            if total_languages >= 10:
                pass_condition = ['park', str(total_languages)]
        admin_level = props.get('admin_level')
        if admin_level:
            admin_level = int(admin_level)
            if admin_level <= 4:
                continue  # no states, countries -- other sources cover these.
            # Counties are generally not that interesting, with a few
            # exceptions like NYC boroughs and Los Angeles.
            if 'County' in name:
                if name not in COUNTY_WHITELIST:
                    continue

            d = fetch_metadata.extract_freebase_metadata(
                '', wiki_title, freebase_data)
            if d.get('population', 0) >= 100000:
                pass_condition = [
                    'admin_pop',
                    str(admin_level),
                    str(d.get('population'))
                ]

        if pass_condition:
            print '\t'.join(row + pass_condition)
コード例 #2
0
#!/usr/bin/env python
'''Collect statistics about OSM features.'''

from collections import defaultdict
import glob
import json
import os
from data import geojson_util
from data import osm
from bs4 import BeautifulSoup

osm_fetcher = osm.OSM()


def has_polygon(d):
    polys, total = polygon_stats(d)
    return polys > 0


def polygon_stats(d):
    if d['type'] == 'Feature':
        if d['geometry']['type'] in ['Polygon', 'MultiPolygon']:
            return 1, 1
        else:
            return 0, 1
    elif d['type'] == 'FeatureCollection':
        a, b = 0, 0
        for feat in d['features']:
            c, d = polygon_stats(feat)
            a += c
            b += d
コード例 #3
0
def main(args):
    osm_fetcher = osm.OSM()
    fb = freebase.Freebase()
    features_out = []
    for line in fileinput.input():
        osm_type, osm_id, wiki_title, name = line.strip().split('\t')[:4]
        if wiki_title.startswith("en:"):
            wiki_title = wiki_title[3:]

        # GeoJSON data.
        path = os.path.join(osm_fetcher.cache_dir,
                            '%s%s.xml.json' % (osm_type, osm_id))
        try:
            d = json.load(file(path))
        except (ValueError, IOError):
            continue

        props = osm_filter.get_feature_props(d, osm_type, osm_id)
        if not props:
            continue
        freebase_data = fb._get_from_cache(wiki_title)
        assert (freebase_data)

        freebase_extract = fetch_metadata.extract_freebase_metadata(
            '', wiki_title, freebase_data)

        #land_json = path.replace('.xml.json', '.xml.land.simple.json')
        land_json = path.replace('.xml.json', '.xml.land.json')
        if os.path.exists(land_json):
            try:
                d = json.load(file(land_json))
            except (ValueError, IOError):
                continue
        else:
            sys.stderr.write('Could not find %s\n' % land_json)

        d['id'] = make_comparea_id(osm_type, osm_id)
        props = {
            'population': 0,
            'population_date': '',
            'population_source': '',
            'population_source_url': '',
            'area_km2_source_url': '#'
        }
        props.update(
            fetch_metadata.extract_freebase_metadata('', wiki_title,
                                                     freebase_data))
        if 'area_km2' in props: del props['area_km2']
        props.update({
            'name': wiki_title.replace('_', ' '),
        })

        only_polygons.remove_non_polygons(d)
        geojson_util.make_polygons_clockwise(d)

        apply_monkey_patches(d)
        area_km2 = geojson_util.get_area_of_feature(d) / 1e6
        if area_km2 == 0:
            sys.stderr.write('Discarding %s (no area)\n' % wiki_title)
            continue
        c_lon, c_lat = geojson_util.centroid_of_feature(d)
        props.update({
            'area_km2': area_km2,
            'area_km2_source': 'calculated',
            'centroid_lon': c_lon,
            'centroid_lat': c_lat
        })
        d['properties'] = props
        adjust_name(d, freebase_data)

        features_out.append(d)

    geojson_out = {'type': 'FeatureCollection', 'features': features_out}
    print json.dumps(geojson_out, indent=2)