def read_osm_pbf(subregion_name, data_dir=None, parsed=True, file_size_limit=50, fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, update=False, download_confirmation_required=True, pickle_it=False, rm_osm_pbf=False, verbose=False): """ :param subregion_name: [str] e.g. 'rutland' :param data_dir: [str; None (default)] customised path of a .osm.pbf file :param parsed: [bool] (default: True) :param file_size_limit: [numbers.Number] (default: 50) limit of file size (in MB), e.g. 50, or 100 :param fmt_other_tags: [bool] (default: True) :param fmt_single_geom: [bool] (default: True) :param fmt_multi_geom: [bool] (default: True) :param update: [bool] (default: False) :param download_confirmation_required: [bool] (default: True) :param pickle_it: [bool] (default: False) :param rm_osm_pbf: [bool] (default: False) :param verbose: [bool] (default: False) :return: [dict; None] If 'subregion' is the name of the subregion, the default file path will be used. Example: subregion_name = 'Rutland' data_dir = None parsed = True file_size_limit = 50 fmt_other_tags = True fmt_single_geom = True fmt_multi_geom = True update = False download_confirmation_required = True pickle_it = False rm_osm_pbf = True verbose = False read_osm_pbf(subregion_name, data_dir, parsed, file_size_limit, fmt_other_tags, fmt_single_geom, fmt_multi_geom, update, download_confirmation_required, pickle_it, rm_osm_pbf, verbose) """ assert isinstance(file_size_limit, int) or file_size_limit is None osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file( subregion_name, ".osm.pbf", mkdir=False) if osm_pbf_filename and path_to_osm_pbf: if not data_dir: # Go to default file path path_to_osm_pbf = path_to_osm_pbf else: osm_pbf_dir = regulate_input_data_dir(data_dir) path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename) subregion_filename = os.path.basename(path_to_osm_pbf) path_to_pickle = path_to_osm_pbf.replace( ".osm.pbf", ".pickle" if parsed else "-raw.pickle") if os.path.isfile(path_to_pickle) and not update: osm_pbf_data = load_pickle(path_to_pickle, verbose=verbose) else: # If the target file is not available, try downloading it first. if not os.path.isfile(path_to_osm_pbf) or update: try: download_subregion_osm_file(subregion_name, osm_file_format=".osm.pbf", download_dir=data_dir, download_confirmation_required= download_confirmation_required, update=update, verbose=False) except Exception as e: print("Cancelled reading data. CAUSE: {}".format(e)) osm_pbf_data = None else: file_size_in_mb = round( os.path.getsize(path_to_osm_pbf) / (1024**2), 1) if file_size_limit and file_size_in_mb > file_size_limit: # Parsing the '.osm.pbf' file in a chunk-wise way chunks_no = math.ceil(file_size_in_mb / file_size_limit) else: chunks_no = None print("\nParsing \"{}\" ... ".format(subregion_filename), end="") if verbose else "" try: osm_pbf_data = parse_osm_pbf(path_to_osm_pbf, chunks_no, parsed, fmt_other_tags, fmt_single_geom, fmt_multi_geom) print("Successfully.\n") if verbose else "" if pickle_it: save_pickle(osm_pbf_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. CAUSE: \"{}\"\n".format(e)) osm_pbf_data = None if rm_osm_pbf: remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose) return osm_pbf_data else: print( "Errors occur. Maybe check with the input \"subregion_name\" first." )
def psql_osm_pbf_data_extracts(*subregion_name, database_name='OSM_Geofabrik', data_dir=None, update_osm_pbf=False, if_table_exists='replace', file_size_limit=50, parsed=True, fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, rm_raw_file=False, verbose=False): """ Import data of selected or all (sub)regions, which do not have (sub-)subregions, into PostgreSQL server :param subregion_name: [str] :param database_name: [str] (default: 'OSM_Geofabrik') :param data_dir: [str; None (default)] :param update_osm_pbf: [bool] (default: False) :param if_table_exists: [str] 'replace' (default); 'append'; or 'fail' :param file_size_limit: [int] (default: 100) :param parsed: [bool] (default: True) :param fmt_other_tags: [bool] (default: True) :param fmt_single_geom: [bool] (default: True) :param fmt_multi_geom: [bool] (default: True) :param rm_raw_file: [bool] (default: False) :param verbose: [bool] (default: False) """ if not subregion_name: subregion_names = fetch_region_subregion_tier("GeoFabrik-non-subregion-list") confirm_msg = "To dump GeoFabrik OSM data extracts of all subregions to PostgreSQL? " else: subregion_names = retrieve_names_of_subregions_of(*subregion_name) confirm_msg = "To dump GeoFabrik OSM data extracts of the following subregions to PostgreSQL? \n{}?\n".format( ", ".join(subregion_names)) if confirmed(confirm_msg): # Connect to PostgreSQL server osmdb = OSM() osmdb.connect_db(database_name=database_name) err_subregion_names = [] for subregion_name_ in subregion_names: default_pbf_filename, default_path_to_pbf = get_default_path_to_osm_file(subregion_name_, ".osm.pbf") if not data_dir: # Go to default file path path_to_osm_pbf = default_path_to_pbf else: osm_pbf_dir = regulate_input_data_dir(data_dir) path_to_osm_pbf = os.path.join(osm_pbf_dir, default_pbf_filename) download_subregion_osm_file(subregion_name_, osm_file_format=".osm.pbf", download_dir=data_dir, update=update_osm_pbf, download_confirmation_required=False, verbose=verbose) file_size_in_mb = round(os.path.getsize(path_to_osm_pbf) / (1024 ** 2), 1) try: if file_size_in_mb <= file_size_limit: subregion_osm_pbf = read_osm_pbf(subregion_name_, data_dir, parsed, file_size_limit, fmt_other_tags, fmt_single_geom, fmt_multi_geom, update=False, download_confirmation_required=False, pickle_it=False, rm_osm_pbf=rm_raw_file) if subregion_osm_pbf is not None: osmdb.dump_osm_pbf_data(subregion_osm_pbf, table_name=subregion_name_, if_exists=if_table_exists) del subregion_osm_pbf gc.collect() else: print("\nParsing and importing \"{}\" feature-wisely to PostgreSQL ... ".format(subregion_name_)) # Reference: https://gdal.org/python/osgeo.ogr.Feature-class.html raw_osm_pbf = ogr.Open(path_to_osm_pbf) layer_count = raw_osm_pbf.GetLayerCount() for i in range(layer_count): lyr = raw_osm_pbf.GetLayerByIndex(i) # Hold the i-th layer lyr_name = lyr.GetName() print(" {} ... ".format(lyr_name), end="") try: lyr_feats = [feat for _, feat in enumerate(lyr)] feats_no, chunks_no = len(lyr_feats), math.ceil(file_size_in_mb / file_size_limit) chunked_lyr_feats = split_list(lyr_feats, chunks_no) del lyr_feats gc.collect() if osmdb.subregion_table_exists(lyr_name, subregion_name_) and if_table_exists == 'replace': osmdb.drop_subregion_data_by_layer(subregion_name_, lyr_name) # Loop through all available features for lyr_chunk in chunked_lyr_feats: lyr_chunk_dat = pd.DataFrame(rapidjson.loads(f.ExportToJson()) for f in lyr_chunk) lyr_chunk_dat = parse_layer_data(lyr_chunk_dat, lyr_name, fmt_other_tags, fmt_single_geom, fmt_multi_geom) if_exists_ = if_table_exists if if_table_exists == 'fail' else 'append' osmdb.dump_osm_pbf_data_by_layer(lyr_chunk_dat, if_exists=if_exists_, schema_name=lyr_name, table_name=subregion_name_) del lyr_chunk_dat gc.collect() print("Done. Total amount of features: {}".format(feats_no)) except Exception as e: print("Failed. {}".format(e)) raw_osm_pbf.Release() del raw_osm_pbf gc.collect() if rm_raw_file: remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose) except Exception as e: print(e) err_subregion_names.append(subregion_name_) if subregion_name_ != subregion_names[-1]: time.sleep(60) if len(err_subregion_names) == 0: print("\nMission accomplished.\n") else: print("\nErrors occurred when parsing data of the following subregion(s):") print(*err_subregion_names, sep=", ") osmdb.disconnect() del osmdb
def read_shp_zip(subregion_name, layer, feature=None, data_dir=None, update=False, download_confirmation_required=True, pickle_it=False, rm_extracts=False, rm_shp_zip=False, verbose=False): """ :param subregion_name: [str] e.g. 'england', 'oxfordshire', or 'europe'; case-insensitive :param layer: [str] e.g. 'railways' :param feature: [str; None (default)] e.g. 'rail'; if None, all available features included :param data_dir: [str; None (default)] :param update: [bool] (default: False) whether to update the relevant file/information :param download_confirmation_required: [bool] (default: False) :param pickle_it: [bool] (default: False) :param rm_extracts: [bool] (default: False) whether to delete extracted files from the .shp.zip file :param rm_shp_zip: [bool] (default: False) whether to delete the downloaded .shp.zip file :param verbose: [bool] (default: False) :return: [gpd.GeoDataFrame] Example: subregion_name = 'Rutland' layer = 'railways' feature = None data_dir = cd("test_read_GeoFabrik") update = False download_confirmation_required = True pickle_it = False rm_extracts = True rm_shp_zip = False verbose = True read_shp_zip(subregion_name, layer, feature, data_dir, update, download_confirmation_required, pickle_it, rm_extracts, rm_shp_zip, verbose) """ shp_zip_filename, path_to_shp_zip = get_default_path_to_osm_file( subregion_name, ".shp.zip", mkdir=False) if shp_zip_filename and path_to_shp_zip: extract_dir = os.path.splitext(path_to_shp_zip)[0] if data_dir: shp_zip_dir = regulate_input_data_dir(data_dir) path_to_shp_zip = os.path.join(shp_zip_dir, shp_zip_filename) extract_dir = os.path.join(shp_zip_dir, os.path.basename(extract_dir)) # Make a local path for saving a pickle file for .shp data sub_name = "-".join(x for x in [ shp_zip_filename.replace( "-latest-free.shp.zip", ""), layer, feature ] if x) path_to_shp_pickle = os.path.join(extract_dir, sub_name + ".shp.pickle") if os.path.isfile(path_to_shp_pickle) and not update: shp_data = load_pickle(path_to_shp_pickle, verbose=verbose) else: # Download the requested OSM file urlretrieve(download_url, file_path) if not os.path.exists(extract_dir): download_subregion_osm_file(shp_zip_filename, osm_file_format=".shp.zip", download_dir=data_dir, update=update, verbose=verbose, download_confirmation_required= download_confirmation_required) if os.path.isfile(path_to_shp_zip): extract_shp_zip(path_to_shp_zip, extract_dir, layer=layer, verbose=verbose) path_to_shp = glob.glob( os.path.join(extract_dir, "*{}*.shp".format(layer))) if len(path_to_shp) == 0: shp_data = None elif len(path_to_shp) == 1: shp_data = gpd.read_file( path_to_shp[0] ) # gpd.GeoDataFrame(read_shp_file(path_to_shp)) if feature: path_to_shp_feat = path_to_shp[0].replace( layer, layer + "_" + feature) shp_data = gpd.GeoDataFrame( shp_data[shp_data.fclass == feature]) shp_data.crs = { 'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat' } shp_data.to_file(path_to_shp_feat, driver='ESRI Shapefile') else: # len(path_to_shp) > 1: if not feature: path_to_orig_shp = [ p for p in path_to_shp if layer + '_a' in p or layer + '_free' in p ] if len(path_to_orig_shp ) == 1: # "_a*.shp" is not available shp_data = gpd.read_file(path_to_orig_shp[0]) else: shp_data = [gpd.read_file(p) for p in path_to_shp] shp_data = pd.concat(shp_data, axis=0, ignore_index=True) else: # feature is None path_to_shp_feat = [ p for p in path_to_shp if layer + "_" + feature not in p ] if len(path_to_shp_feat) == 1: # "_a*.shp" does not exist shp_data = gpd.read_file(path_to_shp_feat[0]) shp_data = shp_data[shp_data.fclass == feature] else: # both "_a*" and "_free*" .shp for feature is available shp_data = [ dat[dat.fclass == feature] for dat in (gpd.read_file(p) for p in path_to_shp_feat) ] shp_data = pd.concat(shp_data, axis=0, ignore_index=True) shp_data.crs = { 'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat' } shp_data.to_file(path_to_shp_feat[0].replace( layer, layer + "_" + feature), driver='ESRI Shapefile') if pickle_it: save_pickle(shp_data, path_to_shp_pickle, verbose=verbose) if os.path.exists(extract_dir) and rm_extracts: # import shutil; shutil.rmtree(extract_dir) for f in glob.glob(os.path.join(extract_dir, "gis_osm*")): # if layer not in f: os.remove(f) if os.path.isfile(path_to_shp_zip) and rm_shp_zip: remove_subregion_osm_file(path_to_shp_zip, verbose=verbose) return shp_data
def read_osm_pbf(subregion_name, data_dir=None, parsed=True, file_size_limit=50, fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, update=False, download_confirmation_required=True, pickle_it=False, rm_osm_pbf=True): """ :param subregion_name: [str] e.g. 'london' :param data_dir: [str or None] customised path of a .osm.pbf file :param parsed: [bool] :param file_size_limit: [numbers.Number] limit of file size (in MB), e.g. 50, or 100(default) :param fmt_other_tags: [bool] :param fmt_single_geom: [bool] :param fmt_multi_geom: [bool] :param update: [bool] :param download_confirmation_required: [bool] :param pickle_it: [bool] :param rm_osm_pbf: [bool] :return: [dict] or None If 'subregion' is the name of the subregion, the default file path will be used. """ assert isinstance(file_size_limit, int) or file_size_limit is None osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file( subregion_name, ".osm.pbf", mkdir=False) if not data_dir: # Go to default file path path_to_osm_pbf = path_to_osm_pbf else: osm_pbf_dir = regulate_input_data_dir(data_dir) path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename) subregion_filename = os.path.basename(path_to_osm_pbf) path_to_pickle = path_to_osm_pbf.replace( ".osm.pbf", ".pickle" if parsed else "-raw.pickle") if os.path.isfile(path_to_pickle) and not update: osm_pbf_data = load_pickle(path_to_pickle) else: # If the target file is not available, try downloading it first. download_subregion_osm_file( subregion_name, osm_file_format=".osm.pbf", download_dir=data_dir, update=update, download_confirmation_required=download_confirmation_required, verbose=False) file_size_in_mb = round( os.path.getsize(path_to_osm_pbf) / (1024**2), 1) if file_size_limit and file_size_in_mb > file_size_limit: chunks_no = math.ceil( file_size_in_mb / file_size_limit ) # Parsing the '.osm.pbf' file in a chunk-wise way else: chunks_no = None print("\nParsing \"{}\" ... ".format(subregion_filename), end="") try: osm_pbf_data = parse_osm_pbf(path_to_osm_pbf, chunks_no, parsed, fmt_other_tags, fmt_single_geom, fmt_multi_geom) print("Successfully.\n") except Exception as e: print("Failed. {}\n".format(e)) osm_pbf_data = None if pickle_it: save_pickle(osm_pbf_data, path_to_pickle) if rm_osm_pbf: remove_subregion_osm_file(path_to_osm_pbf) return osm_pbf_data