def main(args): assert len(args) == 2 csv_reader = csv.reader(open(sys.argv[1], 'rb')) fb = freebase.Freebase() osm_fetcher = osm.OSM() for idx, row in enumerate(csv_reader): osm_type, osm_id, wiki_title, name = row if not is_english_wikititle(wiki_title): continue if wiki_title.startswith("en:"): wiki_title = wiki_title[3:] path = os.path.join(osm_fetcher.cache_dir, '%s%s.xml.json' % (osm_type, osm_id)) try: d = json.load(file(path)) except (ValueError, IOError): continue props = get_feature_props(d, osm_type, osm_id) if not props: continue if not has_polygon(d): continue freebase_data = fb._get_from_cache(wiki_title) if not freebase_data: continue try: total_languages = len( freebase_data['property'] ['/common/topic/topic_equivalent_webpage']['values']) except KeyError: total_languages = 0 pass_condition = None if props.get('leisure') == 'park': if total_languages >= 10: pass_condition = ['park', str(total_languages)] admin_level = props.get('admin_level') if admin_level: admin_level = int(admin_level) if admin_level <= 4: continue # no states, countries -- other sources cover these. # Counties are generally not that interesting, with a few # exceptions like NYC boroughs and Los Angeles. if 'County' in name: if name not in COUNTY_WHITELIST: continue d = fetch_metadata.extract_freebase_metadata( '', wiki_title, freebase_data) if d.get('population', 0) >= 100000: pass_condition = [ 'admin_pop', str(admin_level), str(d.get('population')) ] if pass_condition: print '\t'.join(row + pass_condition)
#!/usr/bin/env python '''Collect statistics about OSM features.''' from collections import defaultdict import glob import json import os from data import geojson_util from data import osm from bs4 import BeautifulSoup osm_fetcher = osm.OSM() def has_polygon(d): polys, total = polygon_stats(d) return polys > 0 def polygon_stats(d): if d['type'] == 'Feature': if d['geometry']['type'] in ['Polygon', 'MultiPolygon']: return 1, 1 else: return 0, 1 elif d['type'] == 'FeatureCollection': a, b = 0, 0 for feat in d['features']: c, d = polygon_stats(feat) a += c b += d
def main(args): osm_fetcher = osm.OSM() fb = freebase.Freebase() features_out = [] for line in fileinput.input(): osm_type, osm_id, wiki_title, name = line.strip().split('\t')[:4] if wiki_title.startswith("en:"): wiki_title = wiki_title[3:] # GeoJSON data. path = os.path.join(osm_fetcher.cache_dir, '%s%s.xml.json' % (osm_type, osm_id)) try: d = json.load(file(path)) except (ValueError, IOError): continue props = osm_filter.get_feature_props(d, osm_type, osm_id) if not props: continue freebase_data = fb._get_from_cache(wiki_title) assert (freebase_data) freebase_extract = fetch_metadata.extract_freebase_metadata( '', wiki_title, freebase_data) #land_json = path.replace('.xml.json', '.xml.land.simple.json') land_json = path.replace('.xml.json', '.xml.land.json') if os.path.exists(land_json): try: d = json.load(file(land_json)) except (ValueError, IOError): continue else: sys.stderr.write('Could not find %s\n' % land_json) d['id'] = make_comparea_id(osm_type, osm_id) props = { 'population': 0, 'population_date': '', 'population_source': '', 'population_source_url': '', 'area_km2_source_url': '#' } props.update( fetch_metadata.extract_freebase_metadata('', wiki_title, freebase_data)) if 'area_km2' in props: del props['area_km2'] props.update({ 'name': wiki_title.replace('_', ' '), }) only_polygons.remove_non_polygons(d) geojson_util.make_polygons_clockwise(d) apply_monkey_patches(d) area_km2 = geojson_util.get_area_of_feature(d) / 1e6 if area_km2 == 0: sys.stderr.write('Discarding %s (no area)\n' % wiki_title) continue c_lon, c_lat = geojson_util.centroid_of_feature(d) props.update({ 'area_km2': area_km2, 'area_km2_source': 'calculated', 'centroid_lon': c_lon, 'centroid_lat': c_lat }) d['properties'] = props adjust_name(d, freebase_data) features_out.append(d) geojson_out = {'type': 'FeatureCollection', 'features': features_out} print json.dumps(geojson_out, indent=2)