def extract_geojson(csvfile, uri_name, simplify_tolerance): features = tab_file(csvfile, columns["nrhp"]) for feature in features: for k,v in feature.items(): feature[k] = v.strip() if feature["main.REFNUM"] == "main.REFNUM": continue coords = transform_utm_to_wgs84(int(feature["UTMEASTING"]), int(feature["UTMNORTHIN"]), int(feature["UTMZONE"])) centroid = [coords[0], coords[1]] geometry = {"type": "Point", "coordinates": centroid} name = feature["RESNAME"] if feature["ADDRESS"] and len(feature["ADDRESS"]) > 3: address = { "number" : '', "street" : feature["ADDRESS"], "city" : feature["CITY"], "state" : feature["STATE"] } feature_code = "HSTS" source = feature timeframe = {} updated = datetime.datetime.utcnow().replace(second=0, microsecond=0).isoformat() uri = uri_name + str(feature["main.REFNUM"]) alternates = [] if feature["MULTNAME"] and len(feature["MULTNAME"]) > 3: alternates = [{ "lang": "en", "name": feature["MULTNAME"] } ] place = { "name":name, "centroid":centroid, "feature_code": feature_code, "geometry":geometry, "is_primary": True, "source": source, "alternate": alternates, "updated": updated, "uris":[uri], "relationships": [], "timeframe":timeframe, "admin":[] } dump.write(uri, place)
def extract_shapefile(shapefile, uri_name, simplify_tolerance=None): features = tab_file(shapefile, columns["perris"]) for feature in features: if feature["geom"] == "geom": #skip first item continue del feature["user_id"] del feature["layer_id"] del feature["created_at"] del feature["updated_at"] for k, v in feature.items(): feature[k] = v.strip() wkt_geom = feature["geom"] del feature["geom"] geom_obj = wkt.loads(wkt_geom) if simplify_tolerance: geom_obj = geom_obj.simplify(simplify_tolerance) try: centroid = [geom_obj.centroid.x, geom_obj.centroid.y] except AttributeError: print "Error: ", feature continue geometry = mapping(geom_obj) number = feature["number"] street = "" if number: street = " " street = street + feature["street"] addr_name = number + street name = addr_name #alternate names alternates = [] if feature["name"]: name = feature["name"] #put address in alternate names if addr_name: alternates = [{"lang": "en", "name": addr_name}] if feature["secondary_address_temp"]: alternates.append({ "lang": "en", "name": feature["secondary_address_temp"] }) address = { "number": feature["number"], "street": feature["street"], "city": "New York City", "state": "NY" } #feature code mapping feature_code = "BLDG" #default code (building) if feature["use_type"]: feature_code = use_types_map[feature["use_type"]] if feature["use_subtype"]: try: feature_code = use_sub_types_map[feature["use_subtype"]] except KeyError: pass source = feature #keep all fields in source # unique URI which internally gets converted to the place id # Must be unique! uri = uri_name + "." + feature["FID"] timeframe = { "start": "1854-01-01", "start_range": 0, "end": "1854-01-01", "end_range": 0 } updated = "2012-10-01" place = { "name": name, "centroid": centroid, "feature_code": feature_code, "geometry": geometry, "is_primary": True, "source": source, "alternate": alternates, "updated": updated, "uris": [uri], "relationships": [], "timeframe": timeframe, "admin": [], "address": address } dump.write(uri, place)
def extract_shapefile(shapefile, uri_name, simplify_tolerance=None): features = tab_file(shapefile, columns["perris"]) for feature in features: if feature["geom"] == "geom": #skip first item continue del feature["user_id"] del feature["layer_id"] del feature["created_at"] del feature["updated_at"] for k,v in feature.items(): feature[k] = v.strip() wkt_geom = feature["geom"] del feature["geom"] geom_obj = wkt.loads(wkt_geom) if simplify_tolerance: geom_obj = geom_obj.simplify(simplify_tolerance) try: centroid = [geom_obj.centroid.x , geom_obj.centroid.y] except AttributeError: print "Error: ", feature continue geometry = mapping(geom_obj) number = feature["number"] street = "" if number: street = " " street = street + feature["street"] addr_name = number + street name = addr_name #alternate names alternates = [] if feature["name"]: name = feature["name"] #put address in alternate names if addr_name: alternates = [ { "lang": "en", "name": addr_name } ] if feature["secondary_address_temp"]: alternates.append( { "lang":"en", "name": feature["secondary_address_temp"] } ) address = { "number" : feature["number"], "street" : feature["street"], "city" : "New York City", "state" : "NY" } #feature code mapping feature_code = "BLDG" #default code (building) if feature["use_type"]: feature_code = use_types_map[feature["use_type"]] if feature["use_subtype"]: try: feature_code = use_sub_types_map[feature["use_subtype"]] except KeyError: pass source = feature #keep all fields in source # unique URI which internally gets converted to the place id # Must be unique! uri = uri_name + "." + feature["FID"] timeframe = {"start": "1854-01-01", "start_range":0, "end": "1854-01-01", "end_range":0} updated = "2012-10-01" place = { "name":name, "centroid":centroid, "feature_code": feature_code, "geometry": geometry, "is_primary": True, "source": source, "alternate": alternates, "updated": updated, "uris":[uri], "relationships": [], "timeframe":timeframe, "admin":[], "address": address } dump.write(uri, place)
def extract_geonames(data_path, dump_path): alt_names = tab_file(data_path + "/alternateNames.sorted.txt", columns["alternate"]) geonames = tab_file(data_path + "/allCountries.sorted.txt", columns["geoname"]) dump = Dump(dump_path + "/geonames/geonames.%04d.json.gz") extra_alt_name = {} for geoname in geonames: alt_name_list = [] if extra_alt_name.get("geoname_id") == geoname["geoname_id"]: alt_name_list.append(extra_alt_name) extra_alt_name = {} for alt_name in alt_names: if alt_name["geoname_id"] == geoname["geoname_id"]: alt_name_list.append(alt_name) else: extra_alt_name = alt_name break geoname["alternate_names"] = alt_name_list try: for col in ("latitude", "longitude"): geoname[col] = float(geoname[col]) except ValueError: ### busted coordinates continue centroid = [geoname["longitude"], geoname["latitude"]] population = None try: population = int(geoname["population"]) except ValueError: pass uri = "http://geonames.org/" + geoname["geoname_id"] names = [] alt_name_list.append({ "name": geoname["name"], "lang": "", "type": "preferred" }) for alt_name in alt_name_list: name_type = "" if alt_name.get("is_colloquial"): name_type = "colloquial" if alt_name.get("is_historic"): name_type = "historic" if alt_name.get("is_preferred"): name_type = "preferred" if alt_name.get("is_short"): name_type = "short" alt_name = { "lang": alt_name["lang"], "type": name_type, "name": alt_name["name"] } names.append(alt_name) ascii_name = transliterate(alt_name) if ascii_name: names.append(ascii_name) place = { "name": geoname["name"], "centroid": centroid, "feature_code": geoname["feature_code"], "geometry": {"type": "Point", "coordinates": centroid}, "is_primary": True, "source": geoname, "alternate": names, "updated": geoname["changed_at"], "population": population, "uris": [uri], "relationships": [], "timeframe": {}, "admin": [] } dump.write(uri, place) dump.close()
def extract_shapefile(shapefile, uri_name, simplify_tolerance=None): features = tab_file(shapefile, columns["nrhp"]) for feature in features: for k, v in feature.items(): feature[k] = v.strip() if feature["Historic_Place_Name"] == "Historic_Place_Name": continue centroid = [float(feature["Longitude"]), float(feature["Latitude"])] geometry = {"type": "Point", "coordinates": centroid} addr = "" if feature["Address"]: name = feature["Address"] addr = name alternates = [] if feature["Historic_Place_Name"]: name = feature["Historic_Place_Name"] if addr: alternates = [{"lang": "en", "name": addr}] address = { "number": '', "street": feature["Address"], "city": feature["City"], "state": feature["State"] } #feature code mapping feature_code = "HSTS" source = feature #keep all fields anyhow # unique URI which internally gets converted to the place id # Must be unique! uri = uri_name + "." + feature["NPS_Reference_Number"] timeframe = {} updated = "2009-06-23" place = { "name": name, "centroid": centroid, "feature_code": feature_code, "geometry": geometry, "is_primary": True, "source": source, "alternate": alternates, "updated": updated, "uris": [uri], "relationships": [], "timeframe": timeframe, "admin": [], "address": address } #print place dump.write(uri, place)
def extract_shapefile(shapefile, uri_name, simplify_tolerance=None): features = tab_file(shapefile, columns["nrhp"]) for feature in features: for k,v in feature.items(): feature[k] = v.strip() if feature["Historic_Place_Name"] == "Historic_Place_Name": continue centroid = [float(feature["Longitude"]), float(feature["Latitude"])] geometry = {"type": "Point", "coordinates": centroid} addr = "" if feature["Address"]: name = feature["Address"] addr = name alternates = [] if feature["Historic_Place_Name"]: name = feature["Historic_Place_Name"] if addr: alternates = [ { "lang": "en", "name": addr } ] address = { "number" : '', "street" : feature["Address"], "city" : feature["City"], "state" : feature["State"] } #feature code mapping feature_code = "HSTS" source = feature #keep all fields anyhow # unique URI which internally gets converted to the place id # Must be unique! uri = uri_name + "." + feature["NPS_Reference_Number"] timeframe = {} updated = "2009-06-23" place = { "name":name, "centroid":centroid, "feature_code": feature_code, "geometry":geometry, "is_primary": True, "source": source, "alternate": alternates, "updated": updated, "uris":[uri], "relationships": [], "timeframe":timeframe, "admin":[], "address": address } #print place dump.write(uri, place)
def extract_geonames(data_path, dump_path): alt_names = tab_file(data_path + "/alternateNames.sorted.txt", columns["alternate"]) geonames = tab_file(data_path + "/allCountries.sorted.txt", columns["geoname"]) dump = Dump(dump_path + "/geonames/geonames.%04d.json.gz") extra_alt_name = {} for geoname in geonames: alt_name_list = [] if extra_alt_name.get("geoname_id") == geoname["geoname_id"]: alt_name_list.append(extra_alt_name) extra_alt_name = {} for alt_name in alt_names: if alt_name["geoname_id"] == geoname["geoname_id"]: alt_name_list.append(alt_name) else: extra_alt_name = alt_name break geoname["alternate_names"] = alt_name_list try: for col in ("latitude", "longitude"): geoname[col] = float(geoname[col]) except ValueError: ### busted coordinates continue centroid = [geoname["longitude"], geoname["latitude"]] population = None try: population = int(geoname["population"]) except ValueError: pass uri = "http://geonames.org/" + geoname["geoname_id"] names = [] alt_name_list.append({ "name": geoname["name"], "lang": "", "type": "preferred" }) for alt_name in alt_name_list: name_type = "" if alt_name.get("is_colloquial"): name_type = "colloquial" if alt_name.get("is_historic"): name_type = "historic" if alt_name.get("is_preferred"): name_type = "preferred" if alt_name.get("is_short"): name_type = "short" alt_name = { "lang": alt_name["lang"], "type": name_type, "name": alt_name["name"] } names.append(alt_name) ascii_name = transliterate(alt_name) if ascii_name: names.append(ascii_name) place = { "name": geoname["name"], "centroid": centroid, "feature_code": geoname["feature_code"], "geometry": { "type": "Point", "coordinates": centroid }, "is_primary": True, "source": geoname, "alternate": names, "updated": geoname["changed_at"], "population": population, "uris": [uri], "relationships": [], "timeframe": {}, "admin": [] } dump.write(uri, place) dump.close()