def saveOSMToMongo(rowslist, logger, db_server, db_port, db_database, db_collection, firstInsert=False): client = MongoClient(db_server, int(db_port)) db = client[db_database] collection = db[db_collection] CommonFunctions.addLogMessage( 'Save ' + str(len(rowslist)) + ' elements in mongo database', logger, CommonFunctions.INFO_LOG) bulkop = collection.initialize_unordered_bulk_op() for row in rowslist: if "___remove___" in row: bulkop.find({ 'id': row["id"], 'osm_type': row["osm_type"] }).remove() else: bulkop.find({ 'id': row["id"], 'osm_type': row["osm_type"] }).upsert().update({'$set': row}) if firstInsert: if (row["osm_type"] == 'node'): bulkop.find({ 'nd.ref': row["id"], 'id': { '$exists': True } }).update({'$unset': { 'geometry': "" }}) bulkop.find({ 'member.ref': row["id"], 'member.type': row["osm_type"], 'id': { '$exists': True } }).update({'$unset': { 'geometry': "" }}) try: retval = bulkop.execute() except BulkWriteError as bwe: werrors = bwe.details['writeErrors'] pass CommonFunctions.addLogMessage( str(len(rowslist)) + ' elements saved in mongo database', logger, CommonFunctions.INFO_LOG) del rowslist client.close()
def deleteOSMFromMongo(rowslist, logger, elemtype): CommonFunctions.addLogMessage( 'Delete ' + str(len(rowslist)) + ' ' + elemtype + ' from mongo database', logger, CommonFunctions.INFO_LOG) bulkop = collection.initialize_ordered_bulk_op() for row in rowslist: bulkop.find({'ID': row["id"], 'type': row["type"]}).remove() retval = bulkop.execute() CommonFunctions.addLogMessage( str(len(rowslist)) + ' ' + elemtype + ' removed from mongo database', logger, CommonFunctions.INFO_LOG)
def uncompressFile(file, logger, done_path, db_server, db_port, db_database, db_collection): CommonFunctions.addLogMessage('Uncompress file: ' + file, logger, CommonFunctions.INFO_LOG) file_transformation = file.split(".") del file_transformation[file_transformation.__len__() - 1] osmfilepath = '.'.join(file_transformation) if (os.path.exists(osmfilepath)): os.remove(osmfilepath) with open(osmfilepath, 'wb') as new_file: with open(file, 'rb') as filebz2: decompressor = bz2.BZ2Decompressor() for block in iter(lambda: filebz2.read(900 * 1024), b''): new_file.write(decompressor.decompress(block)) CommonFunctions.addLogMessage('Uncompress file finished: ' + file, logger, CommonFunctions.INFO_LOG) fname = file.replace("\\", "/") fname = fname.split("/")[len(fname.split("/")) - 1] CommonFunctions.addLogMessage('Move file to done directory: ' + fname, logger, CommonFunctions.INFO_LOG) shutil.move(file.replace("\\", "/"), done_path + "/" + fname) processOSMFile(osmfilepath, logger, db_server, db_port, db_database, db_collection) OSM_Commons.updateWays(db_server, int(db_port), db_database, db_collection, logger, element_at_time) OSM_Commons.updateRelations(db_server, int(db_port), db_database, db_collection, logger, element_at_time) os.remove(osmfilepath) return True
def checkGeometry(docsearch, coords_list, geoms_list, logger): if docsearch['geometry']['type'] == 'LineString': coords_list = coords_list + docsearch['geometry']['coordinates'] if len(coords_list) > 0: if coords_list[0] == coords_list[len(coords_list) - 1]: geoms_list.append(list(coords_list)) del coords_list[:] elif docsearch['geometry']['type'] == 'Polygon': geoms_list.append(docsearch['geometry']['coordinates'][0]) elif docsearch['geometry']['type'] == 'MultiLineString' or docsearch[ 'geometry']['type'] == 'MultiPolygon': for cds in docsearch['geometry']['coordinates']: geoms_list.append(cds) elif docsearch['geometry']['type'] == 'Point': coords_list.append(docsearch['geometry']) else: CommonFunctions.addLogMessage( "Unknown check geometry - " + str(docsearch['geometry']['type'] + ": " + str(docsearch)), logger, CommonFunctions.INFO_LOG) CommonFunctions.addLogMessage( "Unknown geoms_list - " + str(geoms_list), logger, CommonFunctions.INFO_LOG) return coords_list, geoms_list
def main(): gdb_path = arcpy.GetParameterAsText(0) input_feature = arcpy.GetParameter(1) all_the_world = bool(arcpy.GetParameter(2)) to_clip = bool(arcpy.GetParameter(3)) osm_scheme = arcpy.GetParameterAsText(4) layer_config_file = arcpy.GetParameterAsText(5) aprx_model = arcpy.GetParameterAsText(6) create_vtpk = bool(arcpy.GetParameter(7)) pythonPath = os.path.dirname(os.path.realpath(sys.argv[0])) settings = ConfigParser() settings.read(pythonPath + "/settings.ini") db_server = CommonFunctions.readParameter(settings, "database", 'db_server') db_port = CommonFunctions.readParameter(settings, "database", 'db_port') db_database = CommonFunctions.readParameter(settings, "database", 'db_database') db_collection = CommonFunctions.readParameter(settings, "database", 'db_collection') done_path = CommonFunctions.readParameter(settings, "directories", 'done_path') tiling_scheme = CommonFunctions.readParameter(settings, "models", 'tiling_scheme') global element_at_time element_at_time = int( CommonFunctions.readParameter(settings, "general", 'element_at_time')) client = MongoClient(db_server, int(db_port)) db = client[db_database] collection = db[db_collection] collection.create_index([("id", ASCENDING)], background=True) collection.create_index([("osm_type", ASCENDING)], background=True) collection.ensure_index([("geometry", GEOSPHERE)], background=True) collection.create_index([("geometry.type", ASCENDING)], background=True) collection.create_index([("nd.ref", ASCENDING)], background=True) collection.create_index([("member.ref", ASCENDING)], background=True) collection.create_index([("osm_type", ASCENDING), ("geometry", ASCENDING)], background=True) collection.create_index([("osm_type", ASCENDING), ("id", ASCENDING)], background=True) geometries = [] if not all_the_world: if (os.path.exists(os.path.join(done_path, "workjson.geojson"))): os.remove(os.path.join(done_path, "workjson.geojson")) arcpy.FeaturesToJSON_conversion( input_feature, os.path.join(done_path, "workjson.geojson").replace("\\", "/"), geoJSON="GEOJSON") time.sleep(1) content = '' with open( os.path.join(done_path, "workjson.geojson").replace("\\", "/")) as f: content = f.readlines() resultjson = '' for single in content: resultjson = resultjson + single.replace("\n", "") if (os.path.exists(os.path.join(done_path, "workjson.geojson"))): os.remove(os.path.join(done_path, "workjson.geojson")) d = json.loads(resultjson) features = d['features'] for feature in features: geometries.append(feature['geometry']) if to_clip: if (arcpy.Exists("in_memory/polygon_selection")): arcpy.Delete_management("in_memory/polygon_selection") arcpy.management.CreateFeatureclass( "in_memory", "polygon_selection", "POLYGON", "", "DISABLED", "DISABLED", spatial_reference= "GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]];-400 -400 11258999068426.2;-100000 10000;-100000 10000;8.98315284119521E-09;0.001;0.001;IsHighPrecision" ) # Open an InsertCursor and insert the new geometry cursor = arcpy.da.InsertCursor('in_memory/polygon_selection', ['SHAPE@']) for feature in features: if (feature['geometry']['type'] == "Polygon"): geom = feature["geometry"]["coordinates"][0] array = arcpy.Array() for g in geom: array.append(arcpy.Point(g[0], g[1])) polygon = arcpy.Polygon(array) cursor.insertRow([polygon]) # Delete cursor object del cursor gdbname = gdb_path.replace("\\", "/") gdbname = gdbname.split("/")[len(gdbname.split("/")) - 1] database_path = gdb_path.replace(gdbname, "") arcpy.AddMessage("Create Geodatabase: " + gdbname + " using " + osm_scheme + " in directory " + database_path) arcpy.CreateFileGDB_management(database_path, gdbname) arcpy.ImportXMLWorkspaceDocument_management(gdb_path, osm_scheme) arcpy.AddMessage("Read layer config file") with open(layer_config_file) as f: content = f.readlines() for single in content: single = single.replace("\n", "") arcpy.AddMessage("Process " + single.split(",")[1] + ": " + single.split(",")[0]) readSingleLayer(collection, single, geometries, os.path.join(database_path, gdbname), all_the_world, to_clip) client.close() if aprx_model != "": arcpy.AddMessage('Rebuild aprx file from model') aprx = arcpy.mp.ArcGISProject(aprx_model) dbs = [] m = aprx.listMaps()[0] arcpy.AddMessage("Update Model databases") for lyr in m.listLayers(): if (lyr.supports("connectionProperties") == True): if lyr.connectionProperties: if lyr.connectionProperties['connection_info'][ 'database'] not in dbs: dbs.append(lyr.connectionProperties['connection_info'] ['database']) for db in dbs: aprx.updateConnectionProperties( db, os.path.join(database_path, gdbname), True, False) absname = gdbname.split(".")[0] if (arcpy.Exists(os.path.join(database_path, absname + ".aprx"))): arcpy.Delete_management( os.path.join(database_path, absname + ".aprx")) aprx.saveACopy(os.path.join(database_path, absname + ".aprx")) if create_vtpk: for m in aprx.listMaps(): arcpy.AddMessage("Tile index creation") if (arcpy.Exists(database_path + "/" + absname + "Index.gdb")): arcpy.Delete_management(database_path + "/" + absname + "Index.gdb") arcpy.CreateFileGDB_management(database_path, absname + "Index.gdb") arcpy.management.CreateVectorTileIndex( m, database_path + "/" + absname + "Index.gdb/osmIndex", "EXISTING", tiling_scheme, 10000) arcpy.AddMessage("Vector tile map creation") if (arcpy.Exists(database_path + "/" + absname + ".vtpk")): arcpy.Delete_management(database_path + "/" + absname + ".vtpk") arcpy.management.CreateVectorTilePackage( m, database_path + "/" + absname + ".vtpk", "EXISTING", tiling_scheme, "INDEXED", 73957190.9489637, 1128.49717634527, database_path + "/" + absname + "Index.gdb/osmIndex", "OSM", "World, Vector") del aprx arcpy.ClearWorkspaceCache_management()
def updateWays(db_server, db_port, db_database, db_collection, logger, element_at_time, checkarray=[], number_of_core=1): save_packages = [] number_of_processors = multiprocessing.cpu_count() CommonFunctions.addLogMessage("Update geometries", logger, CommonFunctions.INFO_LOG) client = MongoClient(db_server, int(db_port)) db = client[db_database] collection = db[db_collection] rowslist = [] CommonFunctions.addLogMessage("Calculate ways", logger, CommonFunctions.INFO_LOG, len(checkarray) > 0) aggrfilter = [{ '$match': { 'osm_type': 'way', 'geometry': { '$exists': False } } }, { '$graphLookup': { 'from': db_collection, 'startWith': "$nd.ref", 'connectFromField': "nd.ref", 'connectToField': "id", 'as': "childs_geometries", 'restrictSearchWithMatch': { "osm_type": "node" } } }] mongocursor = collection.aggregate(aggrfilter) mongocursor.batch_size(int(element_at_time / 10)) for document in mongocursor: singlerow = {'id': document['id'], 'osm_type': 'way'} geom = {} coords = [] for nds in document['nd']: for child in document['childs_geometries']: if child['id'] == nds['ref']: if 'geometry' in child: if 'coordinates' in child['geometry']: coords.append(child['geometry']['coordinates']) break if document['nd'][0] != document['nd'][len(document['nd']) - 1] or \ ("barrier" in document and ("area" not in document or str(document["area"]).lower()=='no')) or \ ("highway" in document and ("area" not in document or str(document["area"]).lower()=='no')): geom['type'] = 'LineString' if len(coords) > 0: geom['coordinates'] = list(coords) del coords[:] else: geom['type'] = 'Polygon' coords = [coords] if len(coords) > 0: geom['coordinates'] = list(coords) del coords[:] if geom != {}: singlerow['geometry'] = geom rowslist.append(singlerow) if len(rowslist) == int(element_at_time / 10): save_packages.append(list(rowslist)) CommonFunctions.addLogMessage( "Package " + str(len(save_packages)) + " of " + str(number_of_processors) + " prepared", logger, CommonFunctions.INFO_LOG) if len(save_packages) == number_of_processors: processes = [] for save_package in save_packages: p = threading.Thread(target=saveOSMToMongo, args=(list(save_package), logger, db_server, db_port, db_database, db_collection)) p.start() processes.append(p) del save_package for process in processes: process.join() del processes del save_packages[:] del rowslist[:] del geom if len(rowslist) > 0: save_packages.append(list(rowslist)) processes = [] for save_package in save_packages: p = threading.Thread(target=saveOSMToMongo, args=(list(save_package), logger, db_server, db_port, db_database, db_collection)) p.start() processes.append(p) del save_package for process in processes: process.join() del processes del save_packages[:] del rowslist[:] del collection del db client.close() del client
def updateRelations(db_server, db_port, db_database, db_collection, logger, element_at_time): unknown_types = [] save_packages = [] number_of_processors = multiprocessing.cpu_count() CommonFunctions.addLogMessage("Calculate relations", logger, CommonFunctions.INFO_LOG) client = MongoClient(db_server, int(db_port)) db = client[db_database] collection = db[db_collection] rowslist = [] '''mongocursor = collection.aggregate([ {'$match': {'osm_type': 'relation', 'geometry': {'$exists': False}, 'type': {'$in': ['multipolygon', 'boundary', 'multilinestring', 'osm']}}}, {'$graphLookup':{'from': db_collection,'startWith': "$member.ref", 'connectFromField': "member.ref", 'connectToField': "id",'as': "childs_geometries", 'restrictSearchWithMatch': {'geometry': {'$exists': True}}}} ])''' #TODO Riprendere da A_Buiding svuotando tutte le tabelle successive e controllando se va meglio con i barch number sotto mongocursor = collection.find( { 'osm_type': 'relation', 'geometry': { '$exists': False } }, no_cursor_timeout=True) mongocursor.batch_size(int(element_at_time / 10)) for document in mongocursor: outer_coords = [] outer_polygons = [] inner_coords = [] inner_polygons = [] singlerow = {'id': document['id'], 'osm_type': 'relation'} geom = {} for member in document['member']: mongocursor_child = collection.find({ 'osm_type': member['type'], 'id': member['ref'], 'geometry': { '$exists': True } }) for child in mongocursor_child: if 'geometry' in child and 'type' in document: if document['type'] == 'multipolygon': if member['role'] == 'outer': outer_coords, outer_polygons = checkGeometry( child, outer_coords, outer_polygons, logger) else: inner_coords, inner_polygons = checkGeometry( child, inner_coords, inner_polygons, logger) elif document['type'] == 'route' or document['type'] == 'route_master' \ or document['type'] == 'superroute' or document['type'] == 'restriction' \ or document['type'] == 'site' or document['type'] == 'associatedStreet' \ or document['type'] == 'public_transport' or document['type'] == 'street' \ or document['type'] == 'destination_sign' or document['type'] == 'waterway' \ or document['type'] == 'enforcement' or document['type'] == 'bridge' \ or document['type'] == 'tunnel': if child['geometry']['type'] == 'GeometryCollection': outer_coords = outer_coords + child['geometry'][ 'geometries'] else: outer_coords.append(child['geometry']) elif document['type'] == 'boundary' or document['type'] == 'multilinestring' \ or document['type'] == 'osm': if member['role'] == 'outer' or member[ 'role'] == 'inner' or document[ 'type'] == 'multilinestring': if child['geometry']['type'] == 'LineString': outer_coords.append( child['geometry']['coordinates']) elif child['geometry'][ 'type'] == 'Polygon' or child['geometry'][ 'type'] == 'MultiLineString': for coordinate in child['geometry'][ 'coordinates']: outer_coords.append(coordinate) elif child['geometry'][ 'type'] == 'GeometryCollection': for sgeometry in child['geometry'][ 'geometries']: if sgeometry['type'] == 'LineString': outer_coords.append( sgeometry['coordinates']) elif child['geometry']['type'] == 'Point': continue else: CommonFunctions.addLogMessage( "Unknown geometry - " + str(child['geometry']) + ": " + str(document), logger, CommonFunctions.INFO_LOG) elif document['type'] == 'building': if child['osm_type'] == 'way': geom['type'] = 'Polygon' geom['coordinates'] = child['geometry'][ 'coordinates'] else: if str(document['type']) not in unknown_types: CommonFunctions.addLogMessage( "Unknown type - " + str(document['type']), logger, CommonFunctions.INFO_LOG) unknown_types.append(str(document['type'])) #CommonFunctions.addLogMessage(str(document['member']), logger, CommonFunctions.INFO_LOG) break if 'type' in document: if document['type'] == 'multipolygon': if len(outer_polygons) > 0: pols = [] for outer_polygon in outer_polygons: vctrow = [] vctrow.append(outer_polygon) if len(inner_polygons) > 0: vctrow.append(inner_polygons[0]) pols.append(vctrow) if len(pols) > 0: geom['type'] = 'MultiPolygon' geom['coordinates'] = pols elif document['type'] == 'route' or document['type'] == 'route_master' \ or document['type'] == 'superroute' or document['type'] == 'restriction' \ or document['type'] == 'site' or document['type'] == 'associatedStreet' \ or document['type'] == 'public_transport' or document['type'] == 'street' \ or document['type'] == 'destination_sign' or document['type'] == 'waterway' \ or document['type'] == 'enforcement' or document['type'] == 'bridge' \ or document['type'] == 'tunnel' or document['type'] == 'circuit' \ or document['type'] == 'land_area' or document['type'] == 'network' \ or document['type'] == 'water' or document['type'] == 'collection' \ or document['type'] == 'landarea' or document['type'] == 'defaults': if len(outer_coords) > 0: geom['type'] = 'GeometryCollection' geom['geometries'] = list(outer_coords) del outer_coords[:] elif document['type'] == 'boundary' or document[ 'type'] == 'multilinestring': if len(outer_coords) > 0: geom['type'] = 'MultiLineString' geom['coordinates'] = list(outer_coords) del outer_coords[:] if geom != {}: singlerow['geometry'] = geom rowslist.append(singlerow) if len(rowslist) == int(element_at_time / 10): save_packages.append(list(rowslist)) CommonFunctions.addLogMessage( "Package " + str(len(save_packages)) + " of " + str(number_of_processors) + " prepared", logger, CommonFunctions.INFO_LOG) if len(save_packages) == number_of_processors: processes = [] for save_package in save_packages: p = threading.Thread(target=saveOSMToMongo, args=(list(save_package), logger, db_server, db_port, db_database, db_collection)) p.start() processes.append(p) del save_package for process in processes: process.join() del processes del save_packages[:] del rowslist[:] if len(rowslist) > 0: save_packages.append(list(rowslist)) processes = [] for save_package in save_packages: p = threading.Thread(target=saveOSMToMongo, args=(list(save_package), logger, db_server, db_port, db_database, db_collection)) p.start() processes.append(p) del save_package for process in processes: process.join() del processes del save_packages[:] del rowslist[:] del collection del db client.close() del client
def processOSMFile(file, logger, db_server, db_port, db_database, db_collection): save_packages = [] number_of_processors = multiprocessing.cpu_count() CommonFunctions.addLogMessage('Process file: ' + file, logger, CommonFunctions.INFO_LOG) client = MongoClient(db_server, db_port) db = client[db_database] collection = db[db_collection] CommonFunctions.addLogMessage('Read elements from: ' + file, logger, CommonFunctions.INFO_LOG) osm_nodes = etree.iterparse(file, events=('start', 'end')) rowslist = [] singlerow = {} childs = [] for event, elem in osm_nodes: if event == 'start': if elem.tag == 'node' or elem.tag == 'way' or elem.tag == 'relation': singlerow = {} singlerow["osm_type"] = elem.tag childs = [] for key in elem.attrib.keys(): singlerow[key] = elem.attrib[key] elif elem.tag == 'tag': if "k" in elem.attrib: keyname = str(elem.attrib["k"]).replace(":", "_") keyvalue = str(elem.attrib["v"]) singlerow[keyname] = keyvalue elif elem.tag == 'nd' or elem.tag == 'member': if elem.tag == 'member' and elem.attrib['type'] == 'relation': insertDelay = True singletag = {} for key in elem.attrib.keys(): singletag[key] = elem.attrib[key] childs.append(singletag) if (len(childs) > 0): singlerow[elem.tag] = childs elif event == 'end': if elem.tag == 'node' or elem.tag == 'way' or elem.tag == 'relation': if elem.tag == 'node': geom = {} geom['type'] = 'Point' geom['coordinates'] = [ float(elem.attrib['lon']), float(elem.attrib['lat']) ] singlerow['geometry'] = geom rowslist.append(singlerow) if len(rowslist) == element_at_time: save_packages.append(list(rowslist)) CommonFunctions.addLogMessage( "Package " + str(len(save_packages)) + " of " + str(number_of_processors) + " prepared", logger, CommonFunctions.INFO_LOG) if len(save_packages) == number_of_processors: processes = [] for save_package in save_packages: p = threading.Thread( target=OSM_Commons.saveOSMToMongo, args=(list(save_package), logger, db_server, db_port, db_database, db_collection, True)) p.start() processes.append(p) del save_package for process in processes: process.join() del processes del save_packages[:] del rowslist[:] del childs elem.clear() while elem.getprevious() is not None: del elem.getparent()[0] del osm_nodes if (len(rowslist) > 0): save_packages.append(list(rowslist)) processes = [] for save_package in save_packages: p = threading.Thread(target=OSM_Commons.saveOSMToMongo, args=(list(save_package), logger, db_server, db_port, db_database, db_collection, True)) p.start() processes.append(p) del save_package for process in processes: process.join() del processes del save_packages[:] del rowslist del collection del db client.close() del client
def main(): pythonPath = os.path.dirname(os.path.realpath(sys.argv[0])) settings = ConfigParser() settings.read(pythonPath + "/settings.ini") if not os.path.exists("log"): os.mkdir("log") logger = CommonFunctions.getLogger("OSMIngestion", pythonPath, "log") global element_at_time element_at_time = int( CommonFunctions.readParameter(settings, "general", 'element_at_time')) input_path = CommonFunctions.readParameter(settings, "directories", 'input_path') done_path = CommonFunctions.readParameter(settings, "directories", 'done_path') db_server = CommonFunctions.readParameter(settings, "database", 'db_server') db_port = CommonFunctions.readParameter(settings, "database", 'db_port') db_database = CommonFunctions.readParameter(settings, "database", 'db_database') db_collection = CommonFunctions.readParameter(settings, "database", 'db_collection') client = MongoClient(db_server, int(db_port)) db = client[db_database] collection = db[db_collection] collection.create_index([("id", ASCENDING)], background=True) collection.create_index([("osm_type", ASCENDING)], background=True) collection.ensure_index([("geometry", GEOSPHERE)], background=True) collection.create_index([("geometry.type", ASCENDING)], background=True) collection.ensure_index([("nd.ref", ASCENDING)], background=True) collection.ensure_index([("member.ref", ASCENDING)], background=True) collection.create_index([("osm_type", ASCENDING), ("geometry", ASCENDING)], background=True) collection.create_index([("osm_type", ASCENDING), ("id", ASCENDING)], background=True) number_of_processors = multiprocessing.cpu_count() CommonFunctions.addLogMessage( "Start OSM Ingestion with " + str(number_of_processors) + " processors", logger, CommonFunctions.INFO_LOG) #global tp #tp = ThreadPool() #tp.init(logger, number_of_processors) CommonFunctions.addLogMessage( 'Check Openstreetmap data in folder: ' + input_path, logger, CommonFunctions.INFO_LOG) for root, dirs, files in os.walk(input_path): for file in files: if (file.lower().endswith('.bz2')): uncompressFile(os.path.join(root, file), logger, done_path, db_server, int(db_port), db_database, db_collection) OSM_Commons.updateWays(db_server, int(db_port), db_database, db_collection, logger, element_at_time) for i in range(6): OSM_Commons.updateRelations(db_server, int(db_port), db_database, db_collection, logger, element_at_time) client.close() del collection del db del client
def main(): pythonPath = os.path.dirname(os.path.realpath(sys.argv[0])) settings = ConfigParser() settings.read(pythonPath + "/settings.ini") if not os.path.exists("log"): os.mkdir("log") logger = CommonFunctions.getLogger("OSMIngestion", pythonPath, "log") global element_at_time element_at_time = int( CommonFunctions.readParameter(settings, "general", 'element_at_time')) number_of_processors = int( CommonFunctions.readParameter(settings, "general", 'number_of_processors')) input_path = CommonFunctions.readParameter(settings, "directories", 'input_path') done_path = CommonFunctions.readParameter(settings, "directories", 'done_path') download_path = CommonFunctions.readParameter(settings, "directories", 'download_path') db_server = CommonFunctions.readParameter(settings, "database", 'db_server') db_port = CommonFunctions.readParameter(settings, "database", 'db_port') db_database = CommonFunctions.readParameter(settings, "database", 'db_database') db_uploads = CommonFunctions.readParameter(settings, "database", 'db_uploads') db_collection = CommonFunctions.readParameter(settings, "database", 'db_collection') client = MongoClient(db_server, int(db_port)) db = client[db_database] global uploads uploads = db[db_uploads] global collection collection = db[db_collection] collection.create_index([("ID", ASCENDING)]) collection.ensure_index([("geometry", GEOSPHERE)]) updates_file = settings.get('database', 'updates_file') content = '' with open(updates_file) as f: content = f.readlines() for single in content: single = single.replace("\n", "") singleparams = single.split(",") package_name = singleparams[0] mongocursor = uploads.find({'name': package_name}) upload_url = '' last_page = 0 last_package = 0 for document in mongocursor: upload_url = document['upload_url'] last_page = document['last_page'] last_package = document['last_package'] if upload_url == '': upload_url = singleparams[1] last_page = 0 last_package = 0 uploads.insert({ 'name': package_name, 'upload_url': upload_url, 'last_page': last_page, 'last_package': last_package }) last_page, last_package = checkUrl(upload_url, last_page, last_package, logger, download_path, package_name, db_server, db_port, db_database, db_collection) for p in processes: p.join() OSM_Commons.updateWays(db_server, int(db_port), db_database, db_collection, logger, element_at_time) for i in range(6): OSM_Commons.updateRelations(db_server, int(db_port), db_database, db_collection, logger, element_at_time) del db client.close() del client
def checkUrl(upload_url, last_page, last_package, logger, download_path, package_name, db_server, db_port, db_database, db_collection): CommonFunctions.addLogMessage('Check Url: ' + upload_url, logger, CommonFunctions.INFO_LOG) maxpage = last_page maxpackage = last_package with urllib.request.urlopen(upload_url) as conn: html_file = conn.read() checknumbers = str(html_file).split('/">') for checknumber in checknumbers: checkvalue = checknumber.split('href="')[ len(checknumber.split('href="')) - 1] if CommonFunctions.RepresentsInt(checkvalue): pagechk, packagechk = checkUrl(upload_url + checkvalue + "/", last_page, last_package, logger, download_path, package_name, db_server, db_port, db_database, db_collection) if (pagechk * 10000) + packagechk > (maxpage * 10000) + maxpackage: maxpage = pagechk maxpackage = packagechk if '.osc.gz">' in str(html_file): pagenumber = int( upload_url.split("/")[len(upload_url.split("/")) - 2]) checklinks = str(html_file).split('.osc.gz">') for checklink in checklinks: checkvalue = checklink.split('href="')[ len(checklink.split('href="')) - 1] if CommonFunctions.RepresentsInt(checkvalue): checkvalue = int(checkvalue) if (pagenumber * 10000) + checkvalue > ( maxpage * 10000) + maxpackage: package_url = upload_url + str(checkvalue) + ".osc.gz" file_destination = os.path.join( download_path, str(checkvalue) + ".osc.gz").replace("\\", "/") CommonFunctions.addLogMessage( 'Download package: ' + package_url, logger, CommonFunctions.INFO_LOG) if (os.path.exists(file_destination)): os.remove(file_destination) urllib.request.urlretrieve(package_url, file_destination) CommonFunctions.addLogMessage( 'Uncompress package: ' + file_destination, logger, CommonFunctions.INFO_LOG) file_transformation = file_destination.split(".") del file_transformation[file_transformation.__len__() - 1] oscfilepath = '.'.join(file_transformation) if (os.path.exists(oscfilepath)): os.remove(oscfilepath) with gzip.open(file_destination, 'rb') as f_in: with open(oscfilepath, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) processOSCFile(oscfilepath, logger, db_server, db_port, db_database, db_collection) os.remove(oscfilepath) os.remove(file_destination) maxpage = pagenumber maxpackage = checkvalue result = uploads.update({'name': package_name}, { '$set': { 'last_page': maxpage, 'last_package': maxpackage } }, upsert=True) return int(maxpage), int(maxpackage)