def run(): freebase_api = freebase.Freebase() output = {} gj = json.load(open("comparea/static/data/comparea.geo.json")) wiki_url_prefix = 'http://en.wikipedia.org/wiki/' for feature in gj['features']: key = feature['id'] props = feature['properties'] url = props['wikipedia_url'] title = freebase.wiki_url_to_title(url) if not title: sys.stderr.write('ERROR %s has invalid wiki URL: %s\n' % (title, url)) continue try: d = freebase_api.get_topic_json(title) except IOError: sys.stderr.write('ERROR unable to fetch %s\n' % title) continue if 'error' in d: sys.stderr.write('ERROR unable to fetch %s\n' % title) continue md = extract_freebase_metadata(key, title, d) try: md.update(cia.get_country_data(key)) except KeyError: pass # no CIA data try: md.update(spreadsheet.get_feature_data(key)) except KeyError: pass # no manual data output[key] = md print(json.dumps(output, indent=2, sort_keys=True))
def setUp(self): self.freebase = freebase.Freebase(api_key=1234, use_cache=False)
def main(args): assert len(args) == 2 csv_reader = csv.reader(open(sys.argv[1], 'rb')) fb = freebase.Freebase() osm_fetcher = osm.OSM() for idx, row in enumerate(csv_reader): osm_type, osm_id, wiki_title, name = row if not is_english_wikititle(wiki_title): continue if wiki_title.startswith("en:"): wiki_title = wiki_title[3:] path = os.path.join(osm_fetcher.cache_dir, '%s%s.xml.json' % (osm_type, osm_id)) try: d = json.load(file(path)) except (ValueError, IOError): continue props = get_feature_props(d, osm_type, osm_id) if not props: continue if not has_polygon(d): continue freebase_data = fb._get_from_cache(wiki_title) if not freebase_data: continue try: total_languages = len( freebase_data['property'] ['/common/topic/topic_equivalent_webpage']['values']) except KeyError: total_languages = 0 pass_condition = None if props.get('leisure') == 'park': if total_languages >= 10: pass_condition = ['park', str(total_languages)] admin_level = props.get('admin_level') if admin_level: admin_level = int(admin_level) if admin_level <= 4: continue # no states, countries -- other sources cover these. # Counties are generally not that interesting, with a few # exceptions like NYC boroughs and Los Angeles. if 'County' in name: if name not in COUNTY_WHITELIST: continue d = fetch_metadata.extract_freebase_metadata( '', wiki_title, freebase_data) if d.get('population', 0) >= 100000: pass_condition = [ 'admin_pop', str(admin_level), str(d.get('population')) ] if pass_condition: print '\t'.join(row + pass_condition)
osm_type,osm_id,wikipedia_title,name This doesn't output anything, it just populates the local freebase cache. ''' import csv import sys import time import urllib2 import os from data import freebase if __name__ == '__main__': csv = csv.reader(open(sys.argv[1], 'rb')) fb = freebase.Freebase() for idx, row in enumerate(csv): osm_type, osm_id, wiki_title, name = row[:4] if wiki_title.startswith("en:"): wiki_title = wiki_title[3:] if '://' in wiki_title: continue try: fb.get_topic_json(wiki_title) except urllib2.HTTPError: sys.stderr.write("Failed to fetch fb data for %s\n" % wiki_title) pass except: sys.stderr.write("Other error on %s\n" % wiki_title)
def main(args): osm_fetcher = osm.OSM() fb = freebase.Freebase() features_out = [] for line in fileinput.input(): osm_type, osm_id, wiki_title, name = line.strip().split('\t')[:4] if wiki_title.startswith("en:"): wiki_title = wiki_title[3:] # GeoJSON data. path = os.path.join(osm_fetcher.cache_dir, '%s%s.xml.json' % (osm_type, osm_id)) try: d = json.load(file(path)) except (ValueError, IOError): continue props = osm_filter.get_feature_props(d, osm_type, osm_id) if not props: continue freebase_data = fb._get_from_cache(wiki_title) assert (freebase_data) freebase_extract = fetch_metadata.extract_freebase_metadata( '', wiki_title, freebase_data) #land_json = path.replace('.xml.json', '.xml.land.simple.json') land_json = path.replace('.xml.json', '.xml.land.json') if os.path.exists(land_json): try: d = json.load(file(land_json)) except (ValueError, IOError): continue else: sys.stderr.write('Could not find %s\n' % land_json) d['id'] = make_comparea_id(osm_type, osm_id) props = { 'population': 0, 'population_date': '', 'population_source': '', 'population_source_url': '', 'area_km2_source_url': '#' } props.update( fetch_metadata.extract_freebase_metadata('', wiki_title, freebase_data)) if 'area_km2' in props: del props['area_km2'] props.update({ 'name': wiki_title.replace('_', ' '), }) only_polygons.remove_non_polygons(d) geojson_util.make_polygons_clockwise(d) apply_monkey_patches(d) area_km2 = geojson_util.get_area_of_feature(d) / 1e6 if area_km2 == 0: sys.stderr.write('Discarding %s (no area)\n' % wiki_title) continue c_lon, c_lat = geojson_util.centroid_of_feature(d) props.update({ 'area_km2': area_km2, 'area_km2_source': 'calculated', 'centroid_lon': c_lon, 'centroid_lat': c_lat }) d['properties'] = props adjust_name(d, freebase_data) features_out.append(d) geojson_out = {'type': 'FeatureCollection', 'features': features_out} print json.dumps(geojson_out, indent=2)