コード例 #1
0
def extract_geonames(data_path, dump_path):
    alt_names = tab_file(data_path + "/alternateNames.sorted.txt", columns["alternate"])
    geonames = tab_file(data_path + "/allCountries.sorted.txt", columns["geoname"])
    dump = Dump(dump_path + "/geonames/geonames.%04d.json.gz")
    extra_alt_name = {}
    for geoname in geonames:
        alt_name_list = []
        if extra_alt_name.get("geoname_id") == geoname["geoname_id"]:
            alt_name_list.append(extra_alt_name)
            extra_alt_name = {}
        for alt_name in alt_names:
            if alt_name["geoname_id"] == geoname["geoname_id"]:
                alt_name_list.append(alt_name)
            else:
                extra_alt_name = alt_name
                break
        geoname["alternate_names"] = alt_name_list
        try:
            for col in ("latitude", "longitude"):
                geoname[col] = float(geoname[col])
        except ValueError:
            ### busted coordinates
            continue
        centroid = [geoname["longitude"], geoname["latitude"]]
        population = None
        try:
            population = int(geoname["population"])
        except ValueError:
            pass
        uri = "http://geonames.org/" + geoname["geoname_id"]
        names = []
        alt_name_list.append({
            "name": geoname["name"],
            "lang": "",
            "type": "preferred"
        })
        for alt_name in alt_name_list:
            name_type = ""
            if alt_name.get("is_colloquial"): name_type = "colloquial"
            if alt_name.get("is_historic"): name_type = "historic"
            if alt_name.get("is_preferred"): name_type = "preferred"
            if alt_name.get("is_short"): name_type = "short"
            alt_name = {
                "lang": alt_name["lang"], 
                "type": name_type, 
                "name": alt_name["name"]
            }
            names.append(alt_name)
            ascii_name = transliterate(alt_name)
            if ascii_name: names.append(ascii_name)
        place = {
            "name": geoname["name"],
            "centroid": centroid,
            "feature_code": geoname["feature_code"],
            "geometry": {"type": "Point", "coordinates": centroid},
            "is_primary": True,
            "source": geoname,
            "alternate": names,
            "updated": geoname["changed_at"],
            "population": population,
            "uris": [uri],
            "relationships": [],
            "timeframe": {},
            "admin": []
        }
        dump.write(uri, place)

    dump.close()
コード例 #2
0
def extract_geonames(data_path, dump_path):
    alt_names = tab_file(data_path + "/alternateNames.sorted.txt",
                         columns["alternate"])
    geonames = tab_file(data_path + "/allCountries.sorted.txt",
                        columns["geoname"])
    dump = Dump(dump_path + "/geonames/geonames.%04d.json.gz")
    extra_alt_name = {}
    for geoname in geonames:
        alt_name_list = []
        if extra_alt_name.get("geoname_id") == geoname["geoname_id"]:
            alt_name_list.append(extra_alt_name)
            extra_alt_name = {}
        for alt_name in alt_names:
            if alt_name["geoname_id"] == geoname["geoname_id"]:
                alt_name_list.append(alt_name)
            else:
                extra_alt_name = alt_name
                break
        geoname["alternate_names"] = alt_name_list
        try:
            for col in ("latitude", "longitude"):
                geoname[col] = float(geoname[col])
        except ValueError:
            ### busted coordinates
            continue
        centroid = [geoname["longitude"], geoname["latitude"]]
        population = None
        try:
            population = int(geoname["population"])
        except ValueError:
            pass
        uri = "http://geonames.org/" + geoname["geoname_id"]
        names = []
        alt_name_list.append({
            "name": geoname["name"],
            "lang": "",
            "type": "preferred"
        })
        for alt_name in alt_name_list:
            name_type = ""
            if alt_name.get("is_colloquial"): name_type = "colloquial"
            if alt_name.get("is_historic"): name_type = "historic"
            if alt_name.get("is_preferred"): name_type = "preferred"
            if alt_name.get("is_short"): name_type = "short"
            alt_name = {
                "lang": alt_name["lang"],
                "type": name_type,
                "name": alt_name["name"]
            }
            names.append(alt_name)
            ascii_name = transliterate(alt_name)
            if ascii_name: names.append(ascii_name)
        place = {
            "name": geoname["name"],
            "centroid": centroid,
            "feature_code": geoname["feature_code"],
            "geometry": {
                "type": "Point",
                "coordinates": centroid
            },
            "is_primary": True,
            "source": geoname,
            "alternate": names,
            "updated": geoname["changed_at"],
            "population": population,
            "uris": [uri],
            "relationships": [],
            "timeframe": {},
            "admin": []
        }
        dump.write(uri, place)

    dump.close()
コード例 #3
0
ファイル: osm.py プロジェクト: LibraryOfCongress/gazetteer
def extract_osm(database, table, osm_type, dump):
    geom_col = "way"
    conn = psycopg2.connect("dbname=" + database)
    cursor = conn.cursor(table + "_get_items")
    cursor.execute("""SELECT *, X(st_centroid(%s)) AS centroid_x,
                                Y(st_centroid(%s)) AS centroid_y,
                                ST_AsGeoJSON(%s) AS geojson FROM %s
                                WHERE ST_GeometryType(ST_Centroid(way)) = 'ST_Point'"""
                                % (geom_col, geom_col, geom_col, table))
    for row in Result(cursor):
        preferred_name = row.get("name:en", row.get("name"))
        feature_code = admin_level_map.get(str(row["admin_level"]))
        if not feature_code:
            for tag in key_tags:
                if row.get(tag) and feature_code_map.get((tag, row[tag])):
                    feature_code = feature_code_map[tag, row[tag]]
                    break
        if not preferred_name or not feature_code:
            continue
        centroid = [row["centroid_x"], row["centroid_y"]]
        geometry = json.loads(row["geojson"])
        uri = "http://osm.org/browse/%s/%s" % (osm_type, row["osm_id"])
        if "way/-" in uri:
            uri = uri.replace("way/-", "relation/") + ("#%d" % (binascii.crc32(row["geojson"]) & 0xffffffff))
        names = []
        if row["name"]:
            names.append({"name": row["name"], "lang": "", "type": "preferred"})
        if row["name:"]:
            try:
                alt_names = json.loads("{" + row["name:"].replace("=>", ":") + "}")
                for lang, name in alt_names.items():
                    names.append({
                        "name": name,
                        "lang": lang,
                        "type": "preferred"
                    })
            except (ValueError, UnicodeDecodeError):
                pass
        for alt_name in names:
            ascii_name = transliterate(alt_name)
            if ascii_name and ascii_name["name"] != alt_name["name"]:
                names.append(ascii_name)
        source = dict(row)
        for key in source.keys():
            if not source[key] or key in ("geojson", "centroid_x", "centroid_y", "way"):
                del source[key]
        place = {
            "name": preferred_name,
            "centroid": centroid,
            "feature_code": feature_code, 
            "geometry": geometry,
            "is_primary": True,
            "source": source,
            "alternate": names,
            "updated": row["timestamp"],
            "uris": [uri],
            "relationships": [],
            "timeframe": {},
            "admin": []
        }
        dump.write(uri, place)
コード例 #4
0
ファイル: osm.py プロジェクト: senaiassefa/gazetteer
def extract_osm(database, table, osm_type, dump):
    geom_col = "way"
    conn = psycopg2.connect("dbname=" + database)
    cursor = conn.cursor(table + "_get_items")
    cursor.execute("""SELECT *, X(st_centroid(%s)) AS centroid_x,
                                Y(st_centroid(%s)) AS centroid_y,
                                ST_AsGeoJSON(%s) AS geojson FROM %s
                                WHERE ST_GeometryType(ST_Centroid(way)) = 'ST_Point'"""
                   % (geom_col, geom_col, geom_col, table))
    for row in Result(cursor):
        preferred_name = row.get("name:en", row.get("name"))
        feature_code = admin_level_map.get(str(row["admin_level"]))
        if not feature_code:
            for tag in key_tags:
                if row.get(tag) and feature_code_map.get((tag, row[tag])):
                    feature_code = feature_code_map[tag, row[tag]]
                    break
        if not preferred_name or not feature_code:
            continue
        centroid = [row["centroid_x"], row["centroid_y"]]
        geometry = json.loads(row["geojson"])
        uri = "http://osm.org/browse/%s/%s" % (osm_type, row["osm_id"])
        if "way/-" in uri:
            uri = uri.replace("way/-", "relation/") + (
                "#%d" % (binascii.crc32(row["geojson"]) & 0xffffffff))
        names = []
        if row["name"]:
            names.append({
                "name": row["name"],
                "lang": "",
                "type": "preferred"
            })
        if row["name:"]:
            try:
                alt_names = json.loads("{" + row["name:"].replace("=>", ":") +
                                       "}")
                for lang, name in alt_names.items():
                    names.append({
                        "name": name,
                        "lang": lang,
                        "type": "preferred"
                    })
            except (ValueError, UnicodeDecodeError):
                pass
        for alt_name in names:
            ascii_name = transliterate(alt_name)
            if ascii_name and ascii_name["name"] != alt_name["name"]:
                names.append(ascii_name)
        source = dict(row)
        for key in source.keys():
            if not source[key] or key in ("geojson", "centroid_x",
                                          "centroid_y", "way"):
                del source[key]
        place = {
            "name": preferred_name,
            "centroid": centroid,
            "feature_code": feature_code,
            "geometry": geometry,
            "is_primary": True,
            "source": source,
            "alternate": names,
            "updated": row["timestamp"],
            "uris": [uri],
            "relationships": [],
            "timeframe": {},
            "admin": []
        }
        dump.write(uri, place)