Beispiel #1
0
def extract_geojson(csvfile, uri_name, simplify_tolerance):
    features = tab_file(csvfile, columns["nrhp"])

    for feature in features:
        for k,v in feature.items():
            feature[k] = v.strip()
        
        if feature["main.REFNUM"] == "main.REFNUM":
            continue
            
        coords = transform_utm_to_wgs84(int(feature["UTMEASTING"]), int(feature["UTMNORTHIN"]), int(feature["UTMZONE"]))

        centroid = [coords[0], coords[1]]
        geometry =  {"type": "Point", "coordinates": centroid}

        name = feature["RESNAME"]
        if feature["ADDRESS"] and len(feature["ADDRESS"]) > 3:
            address = {
                "number" : '',
                "street" : feature["ADDRESS"],
                "city" : feature["CITY"],
                "state" : feature["STATE"]
            }
        feature_code = "HSTS"
        source = feature
        timeframe = {}
        updated = datetime.datetime.utcnow().replace(second=0, microsecond=0).isoformat()
        uri = uri_name  + str(feature["main.REFNUM"])
        
        alternates = []
        if feature["MULTNAME"] and len(feature["MULTNAME"]) > 3:
            alternates = [{
                    "lang": "en", 
                    "name": feature["MULTNAME"]
                } ]

        place = {
            "name":name,
            "centroid":centroid,
            "feature_code": feature_code,
            "geometry":geometry,
            "is_primary": True,
            "source": source,
            "alternate": alternates,
            "updated": updated,
            "uris":[uri],
            "relationships": [],
            "timeframe":timeframe,
            "admin":[]
        }
        
        dump.write(uri, place)
def extract_shapefile(shapefile, uri_name, simplify_tolerance=None):

    features = tab_file(shapefile, columns["perris"])

    for feature in features:

        if feature["geom"] == "geom":
            #skip first item
            continue

        del feature["user_id"]
        del feature["layer_id"]
        del feature["created_at"]
        del feature["updated_at"]

        for k, v in feature.items():
            feature[k] = v.strip()

        wkt_geom = feature["geom"]
        del feature["geom"]

        geom_obj = wkt.loads(wkt_geom)

        if simplify_tolerance:
            geom_obj = geom_obj.simplify(simplify_tolerance)

        try:
            centroid = [geom_obj.centroid.x, geom_obj.centroid.y]
        except AttributeError:
            print "Error: ", feature
            continue
        geometry = mapping(geom_obj)

        number = feature["number"]
        street = ""
        if number:
            street = " "
        street = street + feature["street"]

        addr_name = number + street

        name = addr_name

        #alternate names
        alternates = []

        if feature["name"]:
            name = feature["name"]

            #put address in alternate names
            if addr_name:
                alternates = [{"lang": "en", "name": addr_name}]

        if feature["secondary_address_temp"]:
            alternates.append({
                "lang": "en",
                "name": feature["secondary_address_temp"]
            })

        address = {
            "number": feature["number"],
            "street": feature["street"],
            "city": "New York City",
            "state": "NY"
        }

        #feature code mapping
        feature_code = "BLDG"  #default code (building)

        if feature["use_type"]:
            feature_code = use_types_map[feature["use_type"]]
        if feature["use_subtype"]:
            try:
                feature_code = use_sub_types_map[feature["use_subtype"]]
            except KeyError:
                pass

        source = feature  #keep all fields  in source

        # unique URI which internally gets converted to the place id
        # Must be unique!
        uri = uri_name + "." + feature["FID"]

        timeframe = {
            "start": "1854-01-01",
            "start_range": 0,
            "end": "1854-01-01",
            "end_range": 0
        }

        updated = "2012-10-01"

        place = {
            "name": name,
            "centroid": centroid,
            "feature_code": feature_code,
            "geometry": geometry,
            "is_primary": True,
            "source": source,
            "alternate": alternates,
            "updated": updated,
            "uris": [uri],
            "relationships": [],
            "timeframe": timeframe,
            "admin": [],
            "address": address
        }

        dump.write(uri, place)
def extract_shapefile(shapefile, uri_name, simplify_tolerance=None):
    
    features = tab_file(shapefile, columns["perris"])

    for feature in features:
        
        
        if feature["geom"] == "geom":
            #skip first item
            continue
            
        del feature["user_id"]
        del feature["layer_id"]
        del feature["created_at"]
        del feature["updated_at"]
        
        for k,v in feature.items():
            feature[k] = v.strip()
    
        wkt_geom = feature["geom"]
        del feature["geom"]
        
        geom_obj = wkt.loads(wkt_geom)
        
        if simplify_tolerance:
            geom_obj = geom_obj.simplify(simplify_tolerance)
        
        try:
            centroid = [geom_obj.centroid.x , geom_obj.centroid.y]    
        except AttributeError:
            print "Error: ", feature
            continue
        geometry = mapping(geom_obj)
       
        
        number = feature["number"]
        street = ""
        if number:
            street = " "
        street = street + feature["street"]
          
        addr_name = number + street

        name = addr_name
        
        #alternate names
        alternates = []
        
        if feature["name"]:
            name = feature["name"]
            
            #put address in alternate names
            if addr_name:
                alternates = [ {
                    "lang": "en", 
                    "name": addr_name
                } ]
                
        if feature["secondary_address_temp"]:
            alternates.append( { "lang":"en",  "name":  feature["secondary_address_temp"] } )
                
        address = {
                "number" : feature["number"],
                "street" : feature["street"],
                "city" : "New York City",
                "state" : "NY"
        }

            
        #feature code mapping
        feature_code = "BLDG" #default code (building)
        
        if feature["use_type"]:
            feature_code = use_types_map[feature["use_type"]]
        if feature["use_subtype"]:
            try:
                feature_code = use_sub_types_map[feature["use_subtype"]]
            except KeyError:
                pass
                
        source = feature  #keep all fields  in source
        
        # unique URI which internally gets converted to the place id
        # Must be unique!
        uri = uri_name + "." + feature["FID"]
         
        timeframe = {"start": "1854-01-01", "start_range":0,
                     "end": "1854-01-01", "end_range":0}
        
        updated = "2012-10-01"

        place = {
            "name":name,
            "centroid":centroid,
            "feature_code": feature_code,
            "geometry": geometry,
            "is_primary": True,
            "source": source,
            "alternate": alternates,
            "updated": updated,
            "uris":[uri],
            "relationships": [],
            "timeframe":timeframe,
            "admin":[],
            "address": address

        }

        dump.write(uri, place)
def extract_geonames(data_path, dump_path):
    alt_names = tab_file(data_path + "/alternateNames.sorted.txt", columns["alternate"])
    geonames = tab_file(data_path + "/allCountries.sorted.txt", columns["geoname"])
    dump = Dump(dump_path + "/geonames/geonames.%04d.json.gz")
    extra_alt_name = {}
    for geoname in geonames:
        alt_name_list = []
        if extra_alt_name.get("geoname_id") == geoname["geoname_id"]:
            alt_name_list.append(extra_alt_name)
            extra_alt_name = {}
        for alt_name in alt_names:
            if alt_name["geoname_id"] == geoname["geoname_id"]:
                alt_name_list.append(alt_name)
            else:
                extra_alt_name = alt_name
                break
        geoname["alternate_names"] = alt_name_list
        try:
            for col in ("latitude", "longitude"):
                geoname[col] = float(geoname[col])
        except ValueError:
            ### busted coordinates
            continue
        centroid = [geoname["longitude"], geoname["latitude"]]
        population = None
        try:
            population = int(geoname["population"])
        except ValueError:
            pass
        uri = "http://geonames.org/" + geoname["geoname_id"]
        names = []
        alt_name_list.append({
            "name": geoname["name"],
            "lang": "",
            "type": "preferred"
        })
        for alt_name in alt_name_list:
            name_type = ""
            if alt_name.get("is_colloquial"): name_type = "colloquial"
            if alt_name.get("is_historic"): name_type = "historic"
            if alt_name.get("is_preferred"): name_type = "preferred"
            if alt_name.get("is_short"): name_type = "short"
            alt_name = {
                "lang": alt_name["lang"], 
                "type": name_type, 
                "name": alt_name["name"]
            }
            names.append(alt_name)
            ascii_name = transliterate(alt_name)
            if ascii_name: names.append(ascii_name)
        place = {
            "name": geoname["name"],
            "centroid": centroid,
            "feature_code": geoname["feature_code"],
            "geometry": {"type": "Point", "coordinates": centroid},
            "is_primary": True,
            "source": geoname,
            "alternate": names,
            "updated": geoname["changed_at"],
            "population": population,
            "uris": [uri],
            "relationships": [],
            "timeframe": {},
            "admin": []
        }
        dump.write(uri, place)

    dump.close()
Beispiel #5
0
def extract_shapefile(shapefile, uri_name, simplify_tolerance=None):

    features = tab_file(shapefile, columns["nrhp"])

    for feature in features:
        for k, v in feature.items():
            feature[k] = v.strip()

        if feature["Historic_Place_Name"] == "Historic_Place_Name":
            continue

        centroid = [float(feature["Longitude"]), float(feature["Latitude"])]
        geometry = {"type": "Point", "coordinates": centroid}

        addr = ""
        if feature["Address"]:
            name = feature["Address"]
            addr = name

        alternates = []

        if feature["Historic_Place_Name"]:
            name = feature["Historic_Place_Name"]
            if addr:
                alternates = [{"lang": "en", "name": addr}]

        address = {
            "number": '',
            "street": feature["Address"],
            "city": feature["City"],
            "state": feature["State"]
        }
        #feature code mapping
        feature_code = "HSTS"

        source = feature  #keep all fields anyhow

        # unique URI which internally gets converted to the place id
        # Must be unique!
        uri = uri_name + "." + feature["NPS_Reference_Number"]

        timeframe = {}

        updated = "2009-06-23"

        place = {
            "name": name,
            "centroid": centroid,
            "feature_code": feature_code,
            "geometry": geometry,
            "is_primary": True,
            "source": source,
            "alternate": alternates,
            "updated": updated,
            "uris": [uri],
            "relationships": [],
            "timeframe": timeframe,
            "admin": [],
            "address": address
        }
        #print place
        dump.write(uri, place)
def extract_shapefile(shapefile, uri_name, simplify_tolerance=None):
    
    features = tab_file(shapefile, columns["nrhp"])
   
    for feature in features:
        for k,v in feature.items():
            feature[k] = v.strip()


        if feature["Historic_Place_Name"] == "Historic_Place_Name":
            continue
            
        centroid = [float(feature["Longitude"]), float(feature["Latitude"])]
        geometry =  {"type": "Point", "coordinates": centroid}
                   
        addr = ""
        if feature["Address"]:
            name = feature["Address"]
            addr = name
            
        alternates = []    
            
        if feature["Historic_Place_Name"]:
            name = feature["Historic_Place_Name"]
            if addr:
                alternates = [ {
                    "lang": "en", 
                    "name": addr
                } ]

        address = {
                "number" : '',
                "street" : feature["Address"],
                "city" : feature["City"],
                "state" : feature["State"]
        }
        #feature code mapping
        feature_code = "HSTS"
                
        source = feature  #keep all fields anyhow
        
        # unique URI which internally gets converted to the place id
        # Must be unique!
        uri = uri_name + "." + feature["NPS_Reference_Number"]
         
        timeframe = {}
        
        updated = "2009-06-23"
        
        place = {
            "name":name,
            "centroid":centroid,
            "feature_code": feature_code,
            "geometry":geometry,
            "is_primary": True,
            "source": source,
            "alternate": alternates,
            "updated": updated,
            "uris":[uri],
            "relationships": [],
            "timeframe":timeframe,
            "admin":[],
            "address": address

        }
        #print place
        dump.write(uri, place)
Beispiel #7
0
def extract_geonames(data_path, dump_path):
    alt_names = tab_file(data_path + "/alternateNames.sorted.txt",
                         columns["alternate"])
    geonames = tab_file(data_path + "/allCountries.sorted.txt",
                        columns["geoname"])
    dump = Dump(dump_path + "/geonames/geonames.%04d.json.gz")
    extra_alt_name = {}
    for geoname in geonames:
        alt_name_list = []
        if extra_alt_name.get("geoname_id") == geoname["geoname_id"]:
            alt_name_list.append(extra_alt_name)
            extra_alt_name = {}
        for alt_name in alt_names:
            if alt_name["geoname_id"] == geoname["geoname_id"]:
                alt_name_list.append(alt_name)
            else:
                extra_alt_name = alt_name
                break
        geoname["alternate_names"] = alt_name_list
        try:
            for col in ("latitude", "longitude"):
                geoname[col] = float(geoname[col])
        except ValueError:
            ### busted coordinates
            continue
        centroid = [geoname["longitude"], geoname["latitude"]]
        population = None
        try:
            population = int(geoname["population"])
        except ValueError:
            pass
        uri = "http://geonames.org/" + geoname["geoname_id"]
        names = []
        alt_name_list.append({
            "name": geoname["name"],
            "lang": "",
            "type": "preferred"
        })
        for alt_name in alt_name_list:
            name_type = ""
            if alt_name.get("is_colloquial"): name_type = "colloquial"
            if alt_name.get("is_historic"): name_type = "historic"
            if alt_name.get("is_preferred"): name_type = "preferred"
            if alt_name.get("is_short"): name_type = "short"
            alt_name = {
                "lang": alt_name["lang"],
                "type": name_type,
                "name": alt_name["name"]
            }
            names.append(alt_name)
            ascii_name = transliterate(alt_name)
            if ascii_name: names.append(ascii_name)
        place = {
            "name": geoname["name"],
            "centroid": centroid,
            "feature_code": geoname["feature_code"],
            "geometry": {
                "type": "Point",
                "coordinates": centroid
            },
            "is_primary": True,
            "source": geoname,
            "alternate": names,
            "updated": geoname["changed_at"],
            "population": population,
            "uris": [uri],
            "relationships": [],
            "timeframe": {},
            "admin": []
        }
        dump.write(uri, place)

    dump.close()