Exemple #1
0
def saveOSMToMongo(rowslist,
                   logger,
                   db_server,
                   db_port,
                   db_database,
                   db_collection,
                   firstInsert=False):
    client = MongoClient(db_server, int(db_port))
    db = client[db_database]
    collection = db[db_collection]

    CommonFunctions.addLogMessage(
        'Save ' + str(len(rowslist)) + ' elements in mongo database', logger,
        CommonFunctions.INFO_LOG)
    bulkop = collection.initialize_unordered_bulk_op()
    for row in rowslist:
        if "___remove___" in row:
            bulkop.find({
                'id': row["id"],
                'osm_type': row["osm_type"]
            }).remove()
        else:
            bulkop.find({
                'id': row["id"],
                'osm_type': row["osm_type"]
            }).upsert().update({'$set': row})
        if firstInsert:
            if (row["osm_type"] == 'node'):
                bulkop.find({
                    'nd.ref': row["id"],
                    'id': {
                        '$exists': True
                    }
                }).update({'$unset': {
                    'geometry': ""
                }})
            bulkop.find({
                'member.ref': row["id"],
                'member.type': row["osm_type"],
                'id': {
                    '$exists': True
                }
            }).update({'$unset': {
                'geometry': ""
            }})
    try:
        retval = bulkop.execute()
    except BulkWriteError as bwe:
        werrors = bwe.details['writeErrors']
        pass
    CommonFunctions.addLogMessage(
        str(len(rowslist)) + ' elements saved in mongo database', logger,
        CommonFunctions.INFO_LOG)
    del rowslist
    client.close()
Exemple #2
0
def deleteOSMFromMongo(rowslist, logger, elemtype):
    CommonFunctions.addLogMessage(
        'Delete ' + str(len(rowslist)) + ' ' + elemtype +
        ' from mongo database', logger, CommonFunctions.INFO_LOG)
    bulkop = collection.initialize_ordered_bulk_op()
    for row in rowslist:
        bulkop.find({'ID': row["id"], 'type': row["type"]}).remove()
    retval = bulkop.execute()
    CommonFunctions.addLogMessage(
        str(len(rowslist)) + ' ' + elemtype + ' removed from mongo database',
        logger, CommonFunctions.INFO_LOG)
def uncompressFile(file, logger, done_path, db_server, db_port, db_database,
                   db_collection):
    CommonFunctions.addLogMessage('Uncompress file: ' + file, logger,
                                  CommonFunctions.INFO_LOG)
    file_transformation = file.split(".")
    del file_transformation[file_transformation.__len__() - 1]
    osmfilepath = '.'.join(file_transformation)

    if (os.path.exists(osmfilepath)): os.remove(osmfilepath)
    with open(osmfilepath, 'wb') as new_file:
        with open(file, 'rb') as filebz2:
            decompressor = bz2.BZ2Decompressor()
            for block in iter(lambda: filebz2.read(900 * 1024), b''):
                new_file.write(decompressor.decompress(block))

    CommonFunctions.addLogMessage('Uncompress file finished: ' + file, logger,
                                  CommonFunctions.INFO_LOG)
    fname = file.replace("\\", "/")
    fname = fname.split("/")[len(fname.split("/")) - 1]
    CommonFunctions.addLogMessage('Move file to done directory: ' + fname,
                                  logger, CommonFunctions.INFO_LOG)
    shutil.move(file.replace("\\", "/"), done_path + "/" + fname)
    processOSMFile(osmfilepath, logger, db_server, db_port, db_database,
                   db_collection)

    OSM_Commons.updateWays(db_server, int(db_port), db_database, db_collection,
                           logger, element_at_time)
    OSM_Commons.updateRelations(db_server, int(db_port), db_database,
                                db_collection, logger, element_at_time)

    os.remove(osmfilepath)
    return True
Exemple #4
0
def checkGeometry(docsearch, coords_list, geoms_list, logger):
    if docsearch['geometry']['type'] == 'LineString':
        coords_list = coords_list + docsearch['geometry']['coordinates']
        if len(coords_list) > 0:
            if coords_list[0] == coords_list[len(coords_list) - 1]:
                geoms_list.append(list(coords_list))
                del coords_list[:]
    elif docsearch['geometry']['type'] == 'Polygon':
        geoms_list.append(docsearch['geometry']['coordinates'][0])
    elif docsearch['geometry']['type'] == 'MultiLineString' or docsearch[
            'geometry']['type'] == 'MultiPolygon':
        for cds in docsearch['geometry']['coordinates']:
            geoms_list.append(cds)
    elif docsearch['geometry']['type'] == 'Point':
        coords_list.append(docsearch['geometry'])
    else:
        CommonFunctions.addLogMessage(
            "Unknown check geometry - " +
            str(docsearch['geometry']['type'] + ": " + str(docsearch)), logger,
            CommonFunctions.INFO_LOG)
        CommonFunctions.addLogMessage(
            "Unknown geoms_list - " + str(geoms_list), logger,
            CommonFunctions.INFO_LOG)
    return coords_list, geoms_list
def main():
    gdb_path = arcpy.GetParameterAsText(0)
    input_feature = arcpy.GetParameter(1)
    all_the_world = bool(arcpy.GetParameter(2))
    to_clip = bool(arcpy.GetParameter(3))
    osm_scheme = arcpy.GetParameterAsText(4)
    layer_config_file = arcpy.GetParameterAsText(5)
    aprx_model = arcpy.GetParameterAsText(6)
    create_vtpk = bool(arcpy.GetParameter(7))

    pythonPath = os.path.dirname(os.path.realpath(sys.argv[0]))
    settings = ConfigParser()
    settings.read(pythonPath + "/settings.ini")

    db_server = CommonFunctions.readParameter(settings, "database",
                                              'db_server')
    db_port = CommonFunctions.readParameter(settings, "database", 'db_port')
    db_database = CommonFunctions.readParameter(settings, "database",
                                                'db_database')
    db_collection = CommonFunctions.readParameter(settings, "database",
                                                  'db_collection')
    done_path = CommonFunctions.readParameter(settings, "directories",
                                              'done_path')
    tiling_scheme = CommonFunctions.readParameter(settings, "models",
                                                  'tiling_scheme')
    global element_at_time
    element_at_time = int(
        CommonFunctions.readParameter(settings, "general", 'element_at_time'))

    client = MongoClient(db_server, int(db_port))
    db = client[db_database]
    collection = db[db_collection]
    collection.create_index([("id", ASCENDING)], background=True)
    collection.create_index([("osm_type", ASCENDING)], background=True)
    collection.ensure_index([("geometry", GEOSPHERE)], background=True)
    collection.create_index([("geometry.type", ASCENDING)], background=True)
    collection.create_index([("nd.ref", ASCENDING)], background=True)
    collection.create_index([("member.ref", ASCENDING)], background=True)
    collection.create_index([("osm_type", ASCENDING), ("geometry", ASCENDING)],
                            background=True)
    collection.create_index([("osm_type", ASCENDING), ("id", ASCENDING)],
                            background=True)

    geometries = []
    if not all_the_world:
        if (os.path.exists(os.path.join(done_path, "workjson.geojson"))):
            os.remove(os.path.join(done_path, "workjson.geojson"))
        arcpy.FeaturesToJSON_conversion(
            input_feature,
            os.path.join(done_path, "workjson.geojson").replace("\\", "/"),
            geoJSON="GEOJSON")

        time.sleep(1)
        content = ''
        with open(
                os.path.join(done_path, "workjson.geojson").replace("\\",
                                                                    "/")) as f:
            content = f.readlines()

        resultjson = ''
        for single in content:
            resultjson = resultjson + single.replace("\n", "")
        if (os.path.exists(os.path.join(done_path, "workjson.geojson"))):
            os.remove(os.path.join(done_path, "workjson.geojson"))
        d = json.loads(resultjson)
        features = d['features']
        for feature in features:
            geometries.append(feature['geometry'])

        if to_clip:
            if (arcpy.Exists("in_memory/polygon_selection")):
                arcpy.Delete_management("in_memory/polygon_selection")
            arcpy.management.CreateFeatureclass(
                "in_memory",
                "polygon_selection",
                "POLYGON",
                "",
                "DISABLED",
                "DISABLED",
                spatial_reference=
                "GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]];-400 -400 11258999068426.2;-100000 10000;-100000 10000;8.98315284119521E-09;0.001;0.001;IsHighPrecision"
            )

            # Open an InsertCursor and insert the new geometry
            cursor = arcpy.da.InsertCursor('in_memory/polygon_selection',
                                           ['SHAPE@'])
            for feature in features:
                if (feature['geometry']['type'] == "Polygon"):
                    geom = feature["geometry"]["coordinates"][0]
                    array = arcpy.Array()
                    for g in geom:
                        array.append(arcpy.Point(g[0], g[1]))
                polygon = arcpy.Polygon(array)
                cursor.insertRow([polygon])
            # Delete cursor object
            del cursor

    gdbname = gdb_path.replace("\\", "/")
    gdbname = gdbname.split("/")[len(gdbname.split("/")) - 1]
    database_path = gdb_path.replace(gdbname, "")
    arcpy.AddMessage("Create Geodatabase: " + gdbname + " using " +
                     osm_scheme + " in directory " + database_path)
    arcpy.CreateFileGDB_management(database_path, gdbname)
    arcpy.ImportXMLWorkspaceDocument_management(gdb_path, osm_scheme)

    arcpy.AddMessage("Read layer config file")
    with open(layer_config_file) as f:
        content = f.readlines()

    for single in content:
        single = single.replace("\n", "")
        arcpy.AddMessage("Process " + single.split(",")[1] + ": " +
                         single.split(",")[0])
        readSingleLayer(collection, single, geometries,
                        os.path.join(database_path, gdbname), all_the_world,
                        to_clip)
    client.close()

    if aprx_model != "":
        arcpy.AddMessage('Rebuild aprx file from model')
        aprx = arcpy.mp.ArcGISProject(aprx_model)

        dbs = []
        m = aprx.listMaps()[0]
        arcpy.AddMessage("Update Model databases")
        for lyr in m.listLayers():
            if (lyr.supports("connectionProperties") == True):
                if lyr.connectionProperties:
                    if lyr.connectionProperties['connection_info'][
                            'database'] not in dbs:
                        dbs.append(lyr.connectionProperties['connection_info']
                                   ['database'])

        for db in dbs:
            aprx.updateConnectionProperties(
                db, os.path.join(database_path, gdbname), True, False)

        absname = gdbname.split(".")[0]
        if (arcpy.Exists(os.path.join(database_path, absname + ".aprx"))):
            arcpy.Delete_management(
                os.path.join(database_path, absname + ".aprx"))
        aprx.saveACopy(os.path.join(database_path, absname + ".aprx"))

        if create_vtpk:
            for m in aprx.listMaps():
                arcpy.AddMessage("Tile index creation")
                if (arcpy.Exists(database_path + "/" + absname + "Index.gdb")):
                    arcpy.Delete_management(database_path + "/" + absname +
                                            "Index.gdb")
                arcpy.CreateFileGDB_management(database_path,
                                               absname + "Index.gdb")
                arcpy.management.CreateVectorTileIndex(
                    m, database_path + "/" + absname + "Index.gdb/osmIndex",
                    "EXISTING", tiling_scheme, 10000)

                arcpy.AddMessage("Vector tile map creation")
                if (arcpy.Exists(database_path + "/" + absname + ".vtpk")):
                    arcpy.Delete_management(database_path + "/" + absname +
                                            ".vtpk")
                arcpy.management.CreateVectorTilePackage(
                    m, database_path + "/" + absname + ".vtpk", "EXISTING",
                    tiling_scheme, "INDEXED", 73957190.9489637,
                    1128.49717634527,
                    database_path + "/" + absname + "Index.gdb/osmIndex",
                    "OSM", "World, Vector")
        del aprx

    arcpy.ClearWorkspaceCache_management()
Exemple #6
0
def updateWays(db_server,
               db_port,
               db_database,
               db_collection,
               logger,
               element_at_time,
               checkarray=[],
               number_of_core=1):
    save_packages = []
    number_of_processors = multiprocessing.cpu_count()
    CommonFunctions.addLogMessage("Update geometries", logger,
                                  CommonFunctions.INFO_LOG)
    client = MongoClient(db_server, int(db_port))
    db = client[db_database]
    collection = db[db_collection]

    rowslist = []
    CommonFunctions.addLogMessage("Calculate ways", logger,
                                  CommonFunctions.INFO_LOG,
                                  len(checkarray) > 0)
    aggrfilter = [{
        '$match': {
            'osm_type': 'way',
            'geometry': {
                '$exists': False
            }
        }
    }, {
        '$graphLookup': {
            'from': db_collection,
            'startWith': "$nd.ref",
            'connectFromField': "nd.ref",
            'connectToField': "id",
            'as': "childs_geometries",
            'restrictSearchWithMatch': {
                "osm_type": "node"
            }
        }
    }]
    mongocursor = collection.aggregate(aggrfilter)
    mongocursor.batch_size(int(element_at_time / 10))
    for document in mongocursor:
        singlerow = {'id': document['id'], 'osm_type': 'way'}
        geom = {}
        coords = []
        for nds in document['nd']:
            for child in document['childs_geometries']:
                if child['id'] == nds['ref']:
                    if 'geometry' in child:
                        if 'coordinates' in child['geometry']:
                            coords.append(child['geometry']['coordinates'])
                    break
        if document['nd'][0] != document['nd'][len(document['nd']) - 1] or \
                ("barrier" in document and ("area" not in document or str(document["area"]).lower()=='no')) or \
                ("highway" in document and ("area" not in document or str(document["area"]).lower()=='no')):
            geom['type'] = 'LineString'
            if len(coords) > 0:
                geom['coordinates'] = list(coords)
                del coords[:]
        else:
            geom['type'] = 'Polygon'
            coords = [coords]
            if len(coords) > 0:
                geom['coordinates'] = list(coords)
                del coords[:]

        if geom != {}:
            singlerow['geometry'] = geom
            rowslist.append(singlerow)

        if len(rowslist) == int(element_at_time / 10):
            save_packages.append(list(rowslist))
            CommonFunctions.addLogMessage(
                "Package " + str(len(save_packages)) + " of " +
                str(number_of_processors) + " prepared", logger,
                CommonFunctions.INFO_LOG)
            if len(save_packages) == number_of_processors:
                processes = []
                for save_package in save_packages:
                    p = threading.Thread(target=saveOSMToMongo,
                                         args=(list(save_package), logger,
                                               db_server, db_port, db_database,
                                               db_collection))
                    p.start()
                    processes.append(p)
                    del save_package
                for process in processes:
                    process.join()
                del processes
                del save_packages[:]
            del rowslist[:]
            del geom

    if len(rowslist) > 0:
        save_packages.append(list(rowslist))
        processes = []
        for save_package in save_packages:
            p = threading.Thread(target=saveOSMToMongo,
                                 args=(list(save_package), logger, db_server,
                                       db_port, db_database, db_collection))
            p.start()
            processes.append(p)
            del save_package
        for process in processes:
            process.join()
        del processes
        del save_packages[:]

        del rowslist[:]

    del collection
    del db
    client.close()
    del client
Exemple #7
0
def updateRelations(db_server, db_port, db_database, db_collection, logger,
                    element_at_time):
    unknown_types = []
    save_packages = []
    number_of_processors = multiprocessing.cpu_count()
    CommonFunctions.addLogMessage("Calculate relations", logger,
                                  CommonFunctions.INFO_LOG)
    client = MongoClient(db_server, int(db_port))
    db = client[db_database]
    collection = db[db_collection]

    rowslist = []
    '''mongocursor = collection.aggregate([
        {'$match': {'osm_type': 'relation', 'geometry': {'$exists': False}, 'type': {'$in': ['multipolygon', 'boundary', 'multilinestring', 'osm']}}},
        {'$graphLookup':{'from': db_collection,'startWith': "$member.ref", 'connectFromField': "member.ref", 'connectToField': "id",'as': "childs_geometries",
                         'restrictSearchWithMatch': {'geometry': {'$exists': True}}}}
    ])'''
    #TODO Riprendere da A_Buiding svuotando tutte le tabelle successive e controllando se va meglio con i barch number sotto
    mongocursor = collection.find(
        {
            'osm_type': 'relation',
            'geometry': {
                '$exists': False
            }
        },
        no_cursor_timeout=True)
    mongocursor.batch_size(int(element_at_time / 10))
    for document in mongocursor:
        outer_coords = []
        outer_polygons = []
        inner_coords = []
        inner_polygons = []
        singlerow = {'id': document['id'], 'osm_type': 'relation'}
        geom = {}
        for member in document['member']:
            mongocursor_child = collection.find({
                'osm_type': member['type'],
                'id': member['ref'],
                'geometry': {
                    '$exists': True
                }
            })
            for child in mongocursor_child:
                if 'geometry' in child and 'type' in document:
                    if document['type'] == 'multipolygon':
                        if member['role'] == 'outer':
                            outer_coords, outer_polygons = checkGeometry(
                                child, outer_coords, outer_polygons, logger)
                        else:
                            inner_coords, inner_polygons = checkGeometry(
                                child, inner_coords, inner_polygons, logger)
                    elif document['type'] == 'route' or document['type'] == 'route_master' \
                            or document['type'] == 'superroute' or document['type'] == 'restriction' \
                            or document['type'] == 'site' or document['type'] == 'associatedStreet' \
                            or document['type'] == 'public_transport' or document['type'] == 'street' \
                            or document['type'] == 'destination_sign' or document['type'] == 'waterway' \
                            or document['type'] == 'enforcement' or document['type'] == 'bridge' \
                            or document['type'] == 'tunnel':
                        if child['geometry']['type'] == 'GeometryCollection':
                            outer_coords = outer_coords + child['geometry'][
                                'geometries']
                        else:
                            outer_coords.append(child['geometry'])
                    elif document['type'] == 'boundary' or document['type'] == 'multilinestring' \
                            or document['type'] == 'osm':
                        if member['role'] == 'outer' or member[
                                'role'] == 'inner' or document[
                                    'type'] == 'multilinestring':
                            if child['geometry']['type'] == 'LineString':
                                outer_coords.append(
                                    child['geometry']['coordinates'])
                            elif child['geometry'][
                                    'type'] == 'Polygon' or child['geometry'][
                                        'type'] == 'MultiLineString':
                                for coordinate in child['geometry'][
                                        'coordinates']:
                                    outer_coords.append(coordinate)
                            elif child['geometry'][
                                    'type'] == 'GeometryCollection':
                                for sgeometry in child['geometry'][
                                        'geometries']:
                                    if sgeometry['type'] == 'LineString':
                                        outer_coords.append(
                                            sgeometry['coordinates'])
                            elif child['geometry']['type'] == 'Point':
                                continue
                            else:
                                CommonFunctions.addLogMessage(
                                    "Unknown geometry - " +
                                    str(child['geometry']) + ": " +
                                    str(document), logger,
                                    CommonFunctions.INFO_LOG)
                    elif document['type'] == 'building':
                        if child['osm_type'] == 'way':
                            geom['type'] = 'Polygon'
                            geom['coordinates'] = child['geometry'][
                                'coordinates']
                    else:
                        if str(document['type']) not in unknown_types:
                            CommonFunctions.addLogMessage(
                                "Unknown type - " + str(document['type']),
                                logger, CommonFunctions.INFO_LOG)
                            unknown_types.append(str(document['type']))
                            #CommonFunctions.addLogMessage(str(document['member']), logger, CommonFunctions.INFO_LOG)
                break

        if 'type' in document:
            if document['type'] == 'multipolygon':
                if len(outer_polygons) > 0:
                    pols = []
                    for outer_polygon in outer_polygons:
                        vctrow = []
                        vctrow.append(outer_polygon)
                        if len(inner_polygons) > 0:
                            vctrow.append(inner_polygons[0])
                        pols.append(vctrow)
                    if len(pols) > 0:
                        geom['type'] = 'MultiPolygon'
                        geom['coordinates'] = pols
            elif document['type'] == 'route' or document['type'] == 'route_master' \
                    or document['type'] == 'superroute' or document['type'] == 'restriction' \
                    or document['type'] == 'site' or document['type'] == 'associatedStreet' \
                    or document['type'] == 'public_transport' or document['type'] == 'street' \
                    or document['type'] == 'destination_sign' or document['type'] == 'waterway' \
                    or document['type'] == 'enforcement' or document['type'] == 'bridge' \
                    or document['type'] == 'tunnel' or document['type'] == 'circuit' \
                    or document['type'] == 'land_area' or document['type'] == 'network' \
                    or document['type'] == 'water' or document['type'] == 'collection' \
                    or document['type'] == 'landarea' or document['type'] == 'defaults':
                if len(outer_coords) > 0:
                    geom['type'] = 'GeometryCollection'
                    geom['geometries'] = list(outer_coords)
                    del outer_coords[:]
            elif document['type'] == 'boundary' or document[
                    'type'] == 'multilinestring':
                if len(outer_coords) > 0:
                    geom['type'] = 'MultiLineString'
                    geom['coordinates'] = list(outer_coords)
                    del outer_coords[:]

        if geom != {}:
            singlerow['geometry'] = geom
            rowslist.append(singlerow)
        if len(rowslist) == int(element_at_time / 10):
            save_packages.append(list(rowslist))
            CommonFunctions.addLogMessage(
                "Package " + str(len(save_packages)) + " of " +
                str(number_of_processors) + " prepared", logger,
                CommonFunctions.INFO_LOG)
            if len(save_packages) == number_of_processors:
                processes = []
                for save_package in save_packages:
                    p = threading.Thread(target=saveOSMToMongo,
                                         args=(list(save_package), logger,
                                               db_server, db_port, db_database,
                                               db_collection))
                    p.start()
                    processes.append(p)
                    del save_package
                for process in processes:
                    process.join()
                del processes
                del save_packages[:]
            del rowslist[:]

    if len(rowslist) > 0:
        save_packages.append(list(rowslist))
        processes = []
        for save_package in save_packages:
            p = threading.Thread(target=saveOSMToMongo,
                                 args=(list(save_package), logger, db_server,
                                       db_port, db_database, db_collection))
            p.start()
            processes.append(p)
            del save_package
        for process in processes:
            process.join()
        del processes
        del save_packages[:]

        del rowslist[:]

    del collection
    del db
    client.close()
    del client
def processOSMFile(file, logger, db_server, db_port, db_database,
                   db_collection):
    save_packages = []
    number_of_processors = multiprocessing.cpu_count()
    CommonFunctions.addLogMessage('Process file: ' + file, logger,
                                  CommonFunctions.INFO_LOG)
    client = MongoClient(db_server, db_port)
    db = client[db_database]
    collection = db[db_collection]
    CommonFunctions.addLogMessage('Read elements from: ' + file, logger,
                                  CommonFunctions.INFO_LOG)
    osm_nodes = etree.iterparse(file, events=('start', 'end'))
    rowslist = []
    singlerow = {}
    childs = []

    for event, elem in osm_nodes:
        if event == 'start':
            if elem.tag == 'node' or elem.tag == 'way' or elem.tag == 'relation':
                singlerow = {}
                singlerow["osm_type"] = elem.tag
                childs = []
                for key in elem.attrib.keys():
                    singlerow[key] = elem.attrib[key]

            elif elem.tag == 'tag':
                if "k" in elem.attrib:
                    keyname = str(elem.attrib["k"]).replace(":", "_")
                    keyvalue = str(elem.attrib["v"])
                    singlerow[keyname] = keyvalue

            elif elem.tag == 'nd' or elem.tag == 'member':
                if elem.tag == 'member' and elem.attrib['type'] == 'relation':
                    insertDelay = True
                singletag = {}
                for key in elem.attrib.keys():
                    singletag[key] = elem.attrib[key]
                childs.append(singletag)
                if (len(childs) > 0): singlerow[elem.tag] = childs

        elif event == 'end':
            if elem.tag == 'node' or elem.tag == 'way' or elem.tag == 'relation':
                if elem.tag == 'node':
                    geom = {}
                    geom['type'] = 'Point'
                    geom['coordinates'] = [
                        float(elem.attrib['lon']),
                        float(elem.attrib['lat'])
                    ]
                    singlerow['geometry'] = geom

                rowslist.append(singlerow)
                if len(rowslist) == element_at_time:
                    save_packages.append(list(rowslist))
                    CommonFunctions.addLogMessage(
                        "Package " + str(len(save_packages)) + " of " +
                        str(number_of_processors) + " prepared", logger,
                        CommonFunctions.INFO_LOG)
                    if len(save_packages) == number_of_processors:
                        processes = []
                        for save_package in save_packages:
                            p = threading.Thread(
                                target=OSM_Commons.saveOSMToMongo,
                                args=(list(save_package), logger, db_server,
                                      db_port, db_database, db_collection,
                                      True))
                            p.start()
                            processes.append(p)
                            del save_package
                        for process in processes:
                            process.join()
                        del processes
                        del save_packages[:]
                    del rowslist[:]
                del childs

            elem.clear()
            while elem.getprevious() is not None:
                del elem.getparent()[0]
    del osm_nodes
    if (len(rowslist) > 0):
        save_packages.append(list(rowslist))
        processes = []
        for save_package in save_packages:
            p = threading.Thread(target=OSM_Commons.saveOSMToMongo,
                                 args=(list(save_package), logger, db_server,
                                       db_port, db_database, db_collection,
                                       True))
            p.start()
            processes.append(p)
            del save_package
        for process in processes:
            process.join()
        del processes
        del save_packages[:]
    del rowslist
    del collection
    del db
    client.close()
    del client
def main():
    pythonPath = os.path.dirname(os.path.realpath(sys.argv[0]))
    settings = ConfigParser()
    settings.read(pythonPath + "/settings.ini")

    if not os.path.exists("log"): os.mkdir("log")
    logger = CommonFunctions.getLogger("OSMIngestion", pythonPath, "log")

    global element_at_time
    element_at_time = int(
        CommonFunctions.readParameter(settings, "general", 'element_at_time'))

    input_path = CommonFunctions.readParameter(settings, "directories",
                                               'input_path')
    done_path = CommonFunctions.readParameter(settings, "directories",
                                              'done_path')

    db_server = CommonFunctions.readParameter(settings, "database",
                                              'db_server')
    db_port = CommonFunctions.readParameter(settings, "database", 'db_port')
    db_database = CommonFunctions.readParameter(settings, "database",
                                                'db_database')
    db_collection = CommonFunctions.readParameter(settings, "database",
                                                  'db_collection')

    client = MongoClient(db_server, int(db_port))
    db = client[db_database]
    collection = db[db_collection]
    collection.create_index([("id", ASCENDING)], background=True)
    collection.create_index([("osm_type", ASCENDING)], background=True)
    collection.ensure_index([("geometry", GEOSPHERE)], background=True)
    collection.create_index([("geometry.type", ASCENDING)], background=True)
    collection.ensure_index([("nd.ref", ASCENDING)], background=True)
    collection.ensure_index([("member.ref", ASCENDING)], background=True)
    collection.create_index([("osm_type", ASCENDING), ("geometry", ASCENDING)],
                            background=True)
    collection.create_index([("osm_type", ASCENDING), ("id", ASCENDING)],
                            background=True)

    number_of_processors = multiprocessing.cpu_count()
    CommonFunctions.addLogMessage(
        "Start OSM Ingestion with " + str(number_of_processors) +
        " processors", logger, CommonFunctions.INFO_LOG)

    #global tp
    #tp = ThreadPool()
    #tp.init(logger, number_of_processors)

    CommonFunctions.addLogMessage(
        'Check Openstreetmap data in folder: ' + input_path, logger,
        CommonFunctions.INFO_LOG)
    for root, dirs, files in os.walk(input_path):
        for file in files:
            if (file.lower().endswith('.bz2')):
                uncompressFile(os.path.join(root, file), logger, done_path,
                               db_server, int(db_port), db_database,
                               db_collection)

    OSM_Commons.updateWays(db_server, int(db_port), db_database, db_collection,
                           logger, element_at_time)
    for i in range(6):
        OSM_Commons.updateRelations(db_server, int(db_port), db_database,
                                    db_collection, logger, element_at_time)

    client.close()
    del collection
    del db
    del client
Exemple #10
0
def main():
    pythonPath = os.path.dirname(os.path.realpath(sys.argv[0]))
    settings = ConfigParser()
    settings.read(pythonPath + "/settings.ini")

    if not os.path.exists("log"): os.mkdir("log")
    logger = CommonFunctions.getLogger("OSMIngestion", pythonPath, "log")

    global element_at_time
    element_at_time = int(
        CommonFunctions.readParameter(settings, "general", 'element_at_time'))
    number_of_processors = int(
        CommonFunctions.readParameter(settings, "general",
                                      'number_of_processors'))

    input_path = CommonFunctions.readParameter(settings, "directories",
                                               'input_path')
    done_path = CommonFunctions.readParameter(settings, "directories",
                                              'done_path')
    download_path = CommonFunctions.readParameter(settings, "directories",
                                                  'download_path')

    db_server = CommonFunctions.readParameter(settings, "database",
                                              'db_server')
    db_port = CommonFunctions.readParameter(settings, "database", 'db_port')
    db_database = CommonFunctions.readParameter(settings, "database",
                                                'db_database')
    db_uploads = CommonFunctions.readParameter(settings, "database",
                                               'db_uploads')
    db_collection = CommonFunctions.readParameter(settings, "database",
                                                  'db_collection')

    client = MongoClient(db_server, int(db_port))
    db = client[db_database]
    global uploads
    uploads = db[db_uploads]
    global collection
    collection = db[db_collection]
    collection.create_index([("ID", ASCENDING)])
    collection.ensure_index([("geometry", GEOSPHERE)])

    updates_file = settings.get('database', 'updates_file')

    content = ''
    with open(updates_file) as f:
        content = f.readlines()

    for single in content:
        single = single.replace("\n", "")
        singleparams = single.split(",")
        package_name = singleparams[0]
        mongocursor = uploads.find({'name': package_name})
        upload_url = ''
        last_page = 0
        last_package = 0
        for document in mongocursor:
            upload_url = document['upload_url']
            last_page = document['last_page']
            last_package = document['last_package']
        if upload_url == '':
            upload_url = singleparams[1]
            last_page = 0
            last_package = 0
            uploads.insert({
                'name': package_name,
                'upload_url': upload_url,
                'last_page': last_page,
                'last_package': last_package
            })

        last_page, last_package = checkUrl(upload_url, last_page, last_package,
                                           logger, download_path, package_name,
                                           db_server, db_port, db_database,
                                           db_collection)

    for p in processes:
        p.join()

    OSM_Commons.updateWays(db_server, int(db_port), db_database, db_collection,
                           logger, element_at_time)
    for i in range(6):
        OSM_Commons.updateRelations(db_server, int(db_port), db_database,
                                    db_collection, logger, element_at_time)

    del db
    client.close()
    del client
Exemple #11
0
def checkUrl(upload_url, last_page, last_package, logger, download_path,
             package_name, db_server, db_port, db_database, db_collection):
    CommonFunctions.addLogMessage('Check Url: ' + upload_url, logger,
                                  CommonFunctions.INFO_LOG)
    maxpage = last_page
    maxpackage = last_package
    with urllib.request.urlopen(upload_url) as conn:
        html_file = conn.read()

        checknumbers = str(html_file).split('/">')
        for checknumber in checknumbers:
            checkvalue = checknumber.split('href="')[
                len(checknumber.split('href="')) - 1]
            if CommonFunctions.RepresentsInt(checkvalue):
                pagechk, packagechk = checkUrl(upload_url + checkvalue + "/",
                                               last_page, last_package, logger,
                                               download_path, package_name,
                                               db_server, db_port, db_database,
                                               db_collection)
                if (pagechk * 10000) + packagechk > (maxpage *
                                                     10000) + maxpackage:
                    maxpage = pagechk
                    maxpackage = packagechk
        if '.osc.gz">' in str(html_file):
            pagenumber = int(
                upload_url.split("/")[len(upload_url.split("/")) - 2])
            checklinks = str(html_file).split('.osc.gz">')
            for checklink in checklinks:
                checkvalue = checklink.split('href="')[
                    len(checklink.split('href="')) - 1]
                if CommonFunctions.RepresentsInt(checkvalue):
                    checkvalue = int(checkvalue)
                    if (pagenumber * 10000) + checkvalue > (
                            maxpage * 10000) + maxpackage:
                        package_url = upload_url + str(checkvalue) + ".osc.gz"
                        file_destination = os.path.join(
                            download_path,
                            str(checkvalue) + ".osc.gz").replace("\\", "/")
                        CommonFunctions.addLogMessage(
                            'Download package: ' + package_url, logger,
                            CommonFunctions.INFO_LOG)
                        if (os.path.exists(file_destination)):
                            os.remove(file_destination)
                        urllib.request.urlretrieve(package_url,
                                                   file_destination)
                        CommonFunctions.addLogMessage(
                            'Uncompress package: ' + file_destination, logger,
                            CommonFunctions.INFO_LOG)
                        file_transformation = file_destination.split(".")
                        del file_transformation[file_transformation.__len__() -
                                                1]
                        oscfilepath = '.'.join(file_transformation)
                        if (os.path.exists(oscfilepath)):
                            os.remove(oscfilepath)
                        with gzip.open(file_destination, 'rb') as f_in:
                            with open(oscfilepath, 'wb') as f_out:
                                shutil.copyfileobj(f_in, f_out)

                        processOSCFile(oscfilepath, logger, db_server, db_port,
                                       db_database, db_collection)
                        os.remove(oscfilepath)
                        os.remove(file_destination)

                        maxpage = pagenumber
                        maxpackage = checkvalue

                        result = uploads.update({'name': package_name}, {
                            '$set': {
                                'last_page': maxpage,
                                'last_package': maxpackage
                            }
                        },
                                                upsert=True)

    return int(maxpage), int(maxpackage)