def validate_bbbike_download_info(subregion_name, osm_file_format, download_dir=None): """ :param subregion_name: [str] :param osm_file_format: [str] :param download_dir: [str; None (default)] :return: [tuple] of length 4 ([str], [str], [str], [str]) subregion name, filename, download url and file path Example: subregion_name = 'leeds' osm_file_format = 'pbf' download_dir = None validate_bbbike_download_info(subregion_name, osm_file_format, download_dir) """ subregion_name_, download_url = get_bbbike_subregion_download_url( subregion_name, osm_file_format) osm_filename = os.path.basename(download_url) if not download_dir: # Download the requested OSM file to default directory path_to_file = cd_dat_bbbike(subregion_name_, osm_filename) else: path_to_file = os.path.join(regulate_input_data_dir(download_dir), osm_filename) return subregion_name_, osm_filename, download_url, path_to_file
def download_bbbike_subregion_osm_all_files(subregion_name, download_dir=None, download_confirmation_required=True): """ :param subregion_name: [str] :param download_dir: [str or None] :param download_confirmation_required: [bool] """ subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name) bbbike_download_dictionary = fetch_bbbike_download_catalogue("BBBike-download-catalogue") sub_download_catalogue = bbbike_download_dictionary[subregion_name_] data_dir = cd_dat_bbbike(subregion_name_) if not download_dir else regulate_input_data_dir(download_dir) if confirmed("Confirm to download all available BBBike data for \"{}\"?".format(subregion_name_), confirmation_required=download_confirmation_required): print("\nStart to download all available OSM data for \"{}\" ... \n".format(subregion_name_)) for download_url, osm_filename in zip(sub_download_catalogue.URL, sub_download_catalogue.Filename): print("\n\n\"{}\" (below): ".format(osm_filename)) try: path_to_file = os.path.join(data_dir, subregion_name_, osm_filename) download(download_url, path_to_file) # if os.path.getsize(path_to_file) / (1024 ** 2) <= 5: # time.sleep(5) except Exception as e: print("\nFailed to download \"{}\". {}.".format(osm_filename, e)) print("\nCheck out the downloaded OSM data for \"{}\" at \"{}\".".format( subregion_name_, os.path.join(data_dir, subregion_name_))) else: print("The downloading process was not activated.")
def download_subregion_osm_file(*subregion_name, osm_file_format, download_dir=None, update=False, download_confirmation_required=True, verbose=True): """ :param subregion_name: [str] case-insensitive, e.g. 'greater London', 'london' :param osm_file_format: [str] ".osm.pbf", ".shp.zip", or ".osm.bz2" :param download_dir: [str] directory to save the downloaded file(s), or None (using default directory) :param update: [bool] whether to update (i.e. re-download) data :param download_confirmation_required: [bool] whether to confirm before downloading :param verbose: [bool] """ for sub_reg_name in subregion_name: # Get download URL subregion_name_, download_url = get_subregion_download_url( sub_reg_name, osm_file_format, update=False) if not download_dir: # Download the requested OSM file to default directory osm_filename, path_to_file = get_default_path_to_osm_file( subregion_name_, osm_file_format, mkdir=True) else: regulated_dir = regulate_input_data_dir(download_dir) osm_filename = get_default_osm_filename( subregion_name_, osm_file_format=osm_file_format) path_to_file = os.path.join(regulated_dir, osm_filename) if os.path.isfile(path_to_file) and not update: if verbose: print( "\n\"{}\" is already available for \"{}\" at: \n\"{}\".\n". format(osm_filename, subregion_name_, path_to_file)) else: pass else: if confirmed("\nTo download {} data for {}".format( osm_file_format, subregion_name_), confirmation_required=download_confirmation_required): op = "Updating" if os.path.isfile( path_to_file) else "Downloading" try: download(download_url, path_to_file) print("\n{} \"{}\" for \"{}\" ... Done.".format( op, osm_filename, subregion_name_)) print("Check out: \"{}\".".format(path_to_file)) except Exception as e: print("\nFailed to download \"{}\". {}.".format( osm_filename, e)) else: print("The downloading process was not activated.")
def find_osm_pbf_file(subregion_name, data_dir=None): """ :param subregion_name: [str] :param data_dir: [str or None] :return: [str] path to .osm.pbf file """ osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file( subregion_name, ".osm.pbf", mkdir=False) if not data_dir: # Go to default file path path_to_osm_pbf_ = path_to_osm_pbf else: osm_pbf_dir = regulate_input_data_dir(data_dir) path_to_osm_pbf_ = os.path.join(osm_pbf_dir, osm_pbf_filename) return path_to_osm_pbf_ if os.path.isfile(path_to_osm_pbf_) else None
def find_osm_shp_file(subregion_name, layer=None, feature=None, data_dir=None, file_ext=".shp"): """ :param subregion_name: [str] case-insensitive, e.g. 'greater London', 'london' :param layer: [str; None (default)] name of a .shp layer, e.g. 'railways' :param feature: [str or None(default)] feature name, e.g. 'rail'; if None, all available features included :param data_dir: [str; None (default)] directory in which the function go to; if None, use default directory :param file_ext: [str] (default: ".shp") file extension, e.g. ".shp" (default) :return: [list] a list of paths fetch_osm_file('england', 'railways', feature=None, file_format=".shp", update=False) should return ['...\\Europe\\Great Britain\\england-latest-free.shp\\gis.osm_railways_free_1.shp'], if such a file exists, and [] otherwise. Testing e.g. subregion_name = 'london' layer = None feature = None data_dir = None file_ext = ".shp" find_osm_shp_file(subregion_name, layer, feature, data_dir, file_ext) """ if not data_dir: # Go to default file path _, path_to_shp_zip = get_default_path_to_osm_file( subregion_name, osm_file_format=".shp.zip", mkdir=False) shp_dir = os.path.splitext(path_to_shp_zip)[0] else: shp_dir = regulate_input_data_dir(data_dir) if not layer: osm_file_paths = glob.glob(shp_dir + "\\*" + file_ext) else: pat = re.compile( "{}(_a)?_free".format(layer)) if not feature else re.compile( "{}_{}".format(layer, feature)) osm_file_paths = [ f for f in glob.glob(shp_dir + "\\*" + file_ext) if re.search(pat, f) ] # if not osm_file_paths: print("The required file may not exist.") return osm_file_paths
def read_osm_pbf(subregion_name, data_dir=None, parsed=True, file_size_limit=50, fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, update=False, download_confirmation_required=True, pickle_it=False, rm_osm_pbf=False, verbose=False): """ :param subregion_name: [str] e.g. 'rutland' :param data_dir: [str; None (default)] customised path of a .osm.pbf file :param parsed: [bool] (default: True) :param file_size_limit: [numbers.Number] (default: 50) limit of file size (in MB), e.g. 50, or 100 :param fmt_other_tags: [bool] (default: True) :param fmt_single_geom: [bool] (default: True) :param fmt_multi_geom: [bool] (default: True) :param update: [bool] (default: False) :param download_confirmation_required: [bool] (default: True) :param pickle_it: [bool] (default: False) :param rm_osm_pbf: [bool] (default: False) :param verbose: [bool] (default: False) :return: [dict; None] If 'subregion' is the name of the subregion, the default file path will be used. Example: subregion_name = 'Rutland' data_dir = None parsed = True file_size_limit = 50 fmt_other_tags = True fmt_single_geom = True fmt_multi_geom = True update = False download_confirmation_required = True pickle_it = False rm_osm_pbf = True verbose = False read_osm_pbf(subregion_name, data_dir, parsed, file_size_limit, fmt_other_tags, fmt_single_geom, fmt_multi_geom, update, download_confirmation_required, pickle_it, rm_osm_pbf, verbose) """ assert isinstance(file_size_limit, int) or file_size_limit is None osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file( subregion_name, ".osm.pbf", mkdir=False) if osm_pbf_filename and path_to_osm_pbf: if not data_dir: # Go to default file path path_to_osm_pbf = path_to_osm_pbf else: osm_pbf_dir = regulate_input_data_dir(data_dir) path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename) subregion_filename = os.path.basename(path_to_osm_pbf) path_to_pickle = path_to_osm_pbf.replace( ".osm.pbf", ".pickle" if parsed else "-raw.pickle") if os.path.isfile(path_to_pickle) and not update: osm_pbf_data = load_pickle(path_to_pickle, verbose=verbose) else: # If the target file is not available, try downloading it first. if not os.path.isfile(path_to_osm_pbf) or update: try: download_subregion_osm_file(subregion_name, osm_file_format=".osm.pbf", download_dir=data_dir, download_confirmation_required= download_confirmation_required, update=update, verbose=False) except Exception as e: print("Cancelled reading data. CAUSE: {}".format(e)) osm_pbf_data = None else: file_size_in_mb = round( os.path.getsize(path_to_osm_pbf) / (1024**2), 1) if file_size_limit and file_size_in_mb > file_size_limit: # Parsing the '.osm.pbf' file in a chunk-wise way chunks_no = math.ceil(file_size_in_mb / file_size_limit) else: chunks_no = None print("\nParsing \"{}\" ... ".format(subregion_filename), end="") if verbose else "" try: osm_pbf_data = parse_osm_pbf(path_to_osm_pbf, chunks_no, parsed, fmt_other_tags, fmt_single_geom, fmt_multi_geom) print("Successfully.\n") if verbose else "" if pickle_it: save_pickle(osm_pbf_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. CAUSE: \"{}\"\n".format(e)) osm_pbf_data = None if rm_osm_pbf: remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose) return osm_pbf_data else: print( "Errors occur. Maybe check with the input \"subregion_name\" first." )
def read_shp_zip(subregion_name, layer, feature=None, data_dir=None, update=False, download_confirmation_required=True, pickle_it=False, rm_extracts=False, rm_shp_zip=False, verbose=False): """ :param subregion_name: [str] e.g. 'england', 'oxfordshire', or 'europe'; case-insensitive :param layer: [str] e.g. 'railways' :param feature: [str; None (default)] e.g. 'rail'; if None, all available features included :param data_dir: [str; None (default)] :param update: [bool] (default: False) whether to update the relevant file/information :param download_confirmation_required: [bool] (default: False) :param pickle_it: [bool] (default: False) :param rm_extracts: [bool] (default: False) whether to delete extracted files from the .shp.zip file :param rm_shp_zip: [bool] (default: False) whether to delete the downloaded .shp.zip file :param verbose: [bool] (default: False) :return: [gpd.GeoDataFrame] Example: subregion_name = 'Rutland' layer = 'railways' feature = None data_dir = cd("test_read_GeoFabrik") update = False download_confirmation_required = True pickle_it = False rm_extracts = True rm_shp_zip = False verbose = True read_shp_zip(subregion_name, layer, feature, data_dir, update, download_confirmation_required, pickle_it, rm_extracts, rm_shp_zip, verbose) """ shp_zip_filename, path_to_shp_zip = get_default_path_to_osm_file( subregion_name, ".shp.zip", mkdir=False) if shp_zip_filename and path_to_shp_zip: extract_dir = os.path.splitext(path_to_shp_zip)[0] if data_dir: shp_zip_dir = regulate_input_data_dir(data_dir) path_to_shp_zip = os.path.join(shp_zip_dir, shp_zip_filename) extract_dir = os.path.join(shp_zip_dir, os.path.basename(extract_dir)) # Make a local path for saving a pickle file for .shp data sub_name = "-".join(x for x in [ shp_zip_filename.replace( "-latest-free.shp.zip", ""), layer, feature ] if x) path_to_shp_pickle = os.path.join(extract_dir, sub_name + ".shp.pickle") if os.path.isfile(path_to_shp_pickle) and not update: shp_data = load_pickle(path_to_shp_pickle, verbose=verbose) else: # Download the requested OSM file urlretrieve(download_url, file_path) if not os.path.exists(extract_dir): download_subregion_osm_file(shp_zip_filename, osm_file_format=".shp.zip", download_dir=data_dir, update=update, verbose=verbose, download_confirmation_required= download_confirmation_required) if os.path.isfile(path_to_shp_zip): extract_shp_zip(path_to_shp_zip, extract_dir, layer=layer, verbose=verbose) path_to_shp = glob.glob( os.path.join(extract_dir, "*{}*.shp".format(layer))) if len(path_to_shp) == 0: shp_data = None elif len(path_to_shp) == 1: shp_data = gpd.read_file( path_to_shp[0] ) # gpd.GeoDataFrame(read_shp_file(path_to_shp)) if feature: path_to_shp_feat = path_to_shp[0].replace( layer, layer + "_" + feature) shp_data = gpd.GeoDataFrame( shp_data[shp_data.fclass == feature]) shp_data.crs = { 'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat' } shp_data.to_file(path_to_shp_feat, driver='ESRI Shapefile') else: # len(path_to_shp) > 1: if not feature: path_to_orig_shp = [ p for p in path_to_shp if layer + '_a' in p or layer + '_free' in p ] if len(path_to_orig_shp ) == 1: # "_a*.shp" is not available shp_data = gpd.read_file(path_to_orig_shp[0]) else: shp_data = [gpd.read_file(p) for p in path_to_shp] shp_data = pd.concat(shp_data, axis=0, ignore_index=True) else: # feature is None path_to_shp_feat = [ p for p in path_to_shp if layer + "_" + feature not in p ] if len(path_to_shp_feat) == 1: # "_a*.shp" does not exist shp_data = gpd.read_file(path_to_shp_feat[0]) shp_data = shp_data[shp_data.fclass == feature] else: # both "_a*" and "_free*" .shp for feature is available shp_data = [ dat[dat.fclass == feature] for dat in (gpd.read_file(p) for p in path_to_shp_feat) ] shp_data = pd.concat(shp_data, axis=0, ignore_index=True) shp_data.crs = { 'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat' } shp_data.to_file(path_to_shp_feat[0].replace( layer, layer + "_" + feature), driver='ESRI Shapefile') if pickle_it: save_pickle(shp_data, path_to_shp_pickle, verbose=verbose) if os.path.exists(extract_dir) and rm_extracts: # import shutil; shutil.rmtree(extract_dir) for f in glob.glob(os.path.join(extract_dir, "gis_osm*")): # if layer not in f: os.remove(f) if os.path.isfile(path_to_shp_zip) and rm_shp_zip: remove_subregion_osm_file(path_to_shp_zip, verbose=verbose) return shp_data
def merge_multi_shp(subregion_names, layer, update_shp_zip=False, download_confirmation_required=True, data_dir=None, prefix="gis_osm", rm_zip_extracts=False, rm_shp_parts=False, merged_shp_dir=None, verbose=False): """ :param subregion_names: [list] a list of subregion names, e.g. ['rutland', 'essex'] :param layer: [str] name of a OSM layer, e.g. 'railways' :param update_shp_zip: [bool] (default: False) indicates whether to update the relevant file/information :param download_confirmation_required: [bool] (default: True) :param data_dir: [str; None] :param prefix: [str] (default: "gis_osm") :param rm_zip_extracts: [bool] (default: False) :param rm_shp_parts: [bool] (default: False) :param merged_shp_dir: [str; None (default)] if None, use the layer name as the name of the folder where the merged shp files will be saved :param verbose: [bool] (default: False) Layers include 'buildings', 'landuse', 'natural', 'places', 'points', 'railways', 'roads' and 'waterways' Note that this function does not create projection (.prj) for the merged map. Reference: http://geospatialpython.com/2011/02/create-prj-projection-file-for.html for creating a .prj file. Example: subregion_names = ['Rutland', 'Herefordshire'] layer = 'railways' update_shp_zip = False download_confirmation_required = True data_dir = cd("test_read_GeoFabrik") prefix = "gis_osm" rm_zip_extracts = False rm_shp_parts = False merged_shp_dir = None verbose = True merge_multi_shp(subregion_names, layer, update_shp_zip, download_confirmation_required, output_dir) """ # Make sure all the required shape files are ready subregion_names_, file_format = [ regulate_input_subregion_name(x) for x in subregion_names ], ".shp.zip" download_subregion_osm_file( *subregion_names_, osm_file_format=file_format, download_dir=data_dir, update=update_shp_zip, download_confirmation_required=download_confirmation_required, verbose=verbose) # Extract all files from .zip if not data_dir: # output_dir is None or output_dir == "" file_paths = (get_default_path_to_osm_file(x, file_format, mkdir=False)[1] for x in subregion_names_) else: default_filenames = (get_default_path_to_osm_file(x, file_format, mkdir=False)[0] for x in subregion_names_) file_paths = [ cd(regulate_input_data_dir(data_dir), f) for f in default_filenames ] extract_info = [(p, os.path.splitext(p)[0]) for p in file_paths] extract_dirs = [] for file_path, extract_dir in extract_info: extract_shp_zip(file_path, extract_dir, layer=layer, verbose=verbose) extract_dirs.append(extract_dir) # Specify a directory that stores files for the specific layer if not data_dir: path_to_merged = cd(os.path.commonpath(extract_info[0]), "merged_" + layer) else: path_to_merged = cd(regulate_input_data_dir(data_dir), "merged_" + layer) if not os.path.exists(path_to_merged): os.mkdir(path_to_merged) # Copy .shp files (e.g. gis_osm_***_free_1.shp) into the output directory for subregion, p in zip(subregion_names, extract_dirs): for original_filename in glob.glob1(p, "*{}*".format(layer)): dest = os.path.join( path_to_merged, "{}_{}".format(subregion.lower().replace(' ', '-'), original_filename)) if rm_zip_extracts: shutil.move(os.path.join(p, original_filename), dest) shutil.rmtree(p) else: shutil.copyfile(os.path.join(p, original_filename), dest) # Resource: https://github.com/GeospatialPython/pyshp shp_file_paths = [ x for x in glob.glob(os.path.join(path_to_merged, "*.shp")) if not os.path.basename(x).startswith("merged_") ] path_to_merged_shp_file = cd(path_to_merged, "merged_" + prefix + "_" + layer) w = shapefile.Writer(path_to_merged_shp_file) if verbose: print("\nMerging the following shape files:\n {}".format( "\n ".join(os.path.basename(f) for f in shp_file_paths))) print("In progress ... ", end="") try: for f in shp_file_paths: r = shapefile.Reader(f) w.fields = r.fields[1:] # skip first deletion field w.shapeType = r.shapeType for shaperec in r.iterShapeRecords(): w.record(*shaperec.record) w.shape(shaperec.shape) r.close() w.close() merged_shp_data = gpd.read_file(path_to_merged_shp_file + ".shp") merged_shp_data.crs = { 'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat' } merged_shp_data.to_file(filename=path_to_merged_shp_file, driver="ESRI Shapefile") print("Successfully.") if verbose else "" except Exception as e: print("Failed. {}".format(e)) if verbose else "" print("The output .shp file is saved in \"{}\".".format( path_to_merged)) if verbose else "" if rm_shp_parts: if merged_shp_dir: new_shp_dir = cd(regulate_input_data_dir(merged_shp_dir), mkdir=True) else: new_shp_dir = cd(data_dir, layer, mkdir=True) for x in glob.glob(cd(path_to_merged, "merged_*")): shutil.move( x, cd(new_shp_dir, os.path.basename(x).replace("merged_", "", 1))) shutil.rmtree(path_to_merged)
def psql_osm_pbf_data_extracts(*subregion_name, database_name='OSM_Geofabrik', data_dir=None, update_osm_pbf=False, if_table_exists='replace', file_size_limit=50, parsed=True, fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, rm_raw_file=False, verbose=False): """ Import data of selected or all (sub)regions, which do not have (sub-)subregions, into PostgreSQL server :param subregion_name: [str] :param database_name: [str] (default: 'OSM_Geofabrik') :param data_dir: [str; None (default)] :param update_osm_pbf: [bool] (default: False) :param if_table_exists: [str] 'replace' (default); 'append'; or 'fail' :param file_size_limit: [int] (default: 100) :param parsed: [bool] (default: True) :param fmt_other_tags: [bool] (default: True) :param fmt_single_geom: [bool] (default: True) :param fmt_multi_geom: [bool] (default: True) :param rm_raw_file: [bool] (default: False) :param verbose: [bool] (default: False) """ if not subregion_name: subregion_names = fetch_region_subregion_tier("GeoFabrik-non-subregion-list") confirm_msg = "To dump GeoFabrik OSM data extracts of all subregions to PostgreSQL? " else: subregion_names = retrieve_names_of_subregions_of(*subregion_name) confirm_msg = "To dump GeoFabrik OSM data extracts of the following subregions to PostgreSQL? \n{}?\n".format( ", ".join(subregion_names)) if confirmed(confirm_msg): # Connect to PostgreSQL server osmdb = OSM() osmdb.connect_db(database_name=database_name) err_subregion_names = [] for subregion_name_ in subregion_names: default_pbf_filename, default_path_to_pbf = get_default_path_to_osm_file(subregion_name_, ".osm.pbf") if not data_dir: # Go to default file path path_to_osm_pbf = default_path_to_pbf else: osm_pbf_dir = regulate_input_data_dir(data_dir) path_to_osm_pbf = os.path.join(osm_pbf_dir, default_pbf_filename) download_subregion_osm_file(subregion_name_, osm_file_format=".osm.pbf", download_dir=data_dir, update=update_osm_pbf, download_confirmation_required=False, verbose=verbose) file_size_in_mb = round(os.path.getsize(path_to_osm_pbf) / (1024 ** 2), 1) try: if file_size_in_mb <= file_size_limit: subregion_osm_pbf = read_osm_pbf(subregion_name_, data_dir, parsed, file_size_limit, fmt_other_tags, fmt_single_geom, fmt_multi_geom, update=False, download_confirmation_required=False, pickle_it=False, rm_osm_pbf=rm_raw_file) if subregion_osm_pbf is not None: osmdb.dump_osm_pbf_data(subregion_osm_pbf, table_name=subregion_name_, if_exists=if_table_exists) del subregion_osm_pbf gc.collect() else: print("\nParsing and importing \"{}\" feature-wisely to PostgreSQL ... ".format(subregion_name_)) # Reference: https://gdal.org/python/osgeo.ogr.Feature-class.html raw_osm_pbf = ogr.Open(path_to_osm_pbf) layer_count = raw_osm_pbf.GetLayerCount() for i in range(layer_count): lyr = raw_osm_pbf.GetLayerByIndex(i) # Hold the i-th layer lyr_name = lyr.GetName() print(" {} ... ".format(lyr_name), end="") try: lyr_feats = [feat for _, feat in enumerate(lyr)] feats_no, chunks_no = len(lyr_feats), math.ceil(file_size_in_mb / file_size_limit) chunked_lyr_feats = split_list(lyr_feats, chunks_no) del lyr_feats gc.collect() if osmdb.subregion_table_exists(lyr_name, subregion_name_) and if_table_exists == 'replace': osmdb.drop_subregion_data_by_layer(subregion_name_, lyr_name) # Loop through all available features for lyr_chunk in chunked_lyr_feats: lyr_chunk_dat = pd.DataFrame(rapidjson.loads(f.ExportToJson()) for f in lyr_chunk) lyr_chunk_dat = parse_layer_data(lyr_chunk_dat, lyr_name, fmt_other_tags, fmt_single_geom, fmt_multi_geom) if_exists_ = if_table_exists if if_table_exists == 'fail' else 'append' osmdb.dump_osm_pbf_data_by_layer(lyr_chunk_dat, if_exists=if_exists_, schema_name=lyr_name, table_name=subregion_name_) del lyr_chunk_dat gc.collect() print("Done. Total amount of features: {}".format(feats_no)) except Exception as e: print("Failed. {}".format(e)) raw_osm_pbf.Release() del raw_osm_pbf gc.collect() if rm_raw_file: remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose) except Exception as e: print(e) err_subregion_names.append(subregion_name_) if subregion_name_ != subregion_names[-1]: time.sleep(60) if len(err_subregion_names) == 0: print("\nMission accomplished.\n") else: print("\nErrors occurred when parsing data of the following subregion(s):") print(*err_subregion_names, sep=", ") osmdb.disconnect() del osmdb
def read_osm_pbf(subregion_name, data_dir=None, parsed=True, file_size_limit=50, fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, update=False, download_confirmation_required=True, pickle_it=False, rm_osm_pbf=True): """ :param subregion_name: [str] e.g. 'london' :param data_dir: [str or None] customised path of a .osm.pbf file :param parsed: [bool] :param file_size_limit: [numbers.Number] limit of file size (in MB), e.g. 50, or 100(default) :param fmt_other_tags: [bool] :param fmt_single_geom: [bool] :param fmt_multi_geom: [bool] :param update: [bool] :param download_confirmation_required: [bool] :param pickle_it: [bool] :param rm_osm_pbf: [bool] :return: [dict] or None If 'subregion' is the name of the subregion, the default file path will be used. """ assert isinstance(file_size_limit, int) or file_size_limit is None osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file( subregion_name, ".osm.pbf", mkdir=False) if not data_dir: # Go to default file path path_to_osm_pbf = path_to_osm_pbf else: osm_pbf_dir = regulate_input_data_dir(data_dir) path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename) subregion_filename = os.path.basename(path_to_osm_pbf) path_to_pickle = path_to_osm_pbf.replace( ".osm.pbf", ".pickle" if parsed else "-raw.pickle") if os.path.isfile(path_to_pickle) and not update: osm_pbf_data = load_pickle(path_to_pickle) else: # If the target file is not available, try downloading it first. download_subregion_osm_file( subregion_name, osm_file_format=".osm.pbf", download_dir=data_dir, update=update, download_confirmation_required=download_confirmation_required, verbose=False) file_size_in_mb = round( os.path.getsize(path_to_osm_pbf) / (1024**2), 1) if file_size_limit and file_size_in_mb > file_size_limit: chunks_no = math.ceil( file_size_in_mb / file_size_limit ) # Parsing the '.osm.pbf' file in a chunk-wise way else: chunks_no = None print("\nParsing \"{}\" ... ".format(subregion_filename), end="") try: osm_pbf_data = parse_osm_pbf(path_to_osm_pbf, chunks_no, parsed, fmt_other_tags, fmt_single_geom, fmt_multi_geom) print("Successfully.\n") except Exception as e: print("Failed. {}\n".format(e)) osm_pbf_data = None if pickle_it: save_pickle(osm_pbf_data, path_to_pickle) if rm_osm_pbf: remove_subregion_osm_file(path_to_osm_pbf) return osm_pbf_data
def download_subregion_osm_file(*subregion_name, osm_file_format, download_dir=None, update=False, download_confirmation_required=True, deep_retry=False, verbose=False): """ :param subregion_name: [str] case-insensitive, e.g. 'greater London', 'london' :param osm_file_format: [str] ".osm.pbf", ".shp.zip", or ".osm.bz2" :param download_dir: [str; None (default)] directory to save the downloaded file(s); None (using default directory) :param update: [bool] (default: False) whether to update (i.e. re-download) data :param download_confirmation_required: [bool] (default: True) whether to confirm before downloading :param deep_retry: [bool] (default: False) :param verbose: [bool] (default: True) Example: subregion_name = 'london' osm_file_format = ".osm.pbf" download_dir = None update = False download_confirmation_required = True verbose = True download_subregion_osm_file(subregion_name, osm_file_format=osm_file_format, download_dir=download_dir, update=update, download_confirmation_required=download_confirmation_required, verbose=verbose) """ for sub_reg_name in subregion_name: # Get download URL subregion_name_, download_url = get_subregion_download_url( sub_reg_name, osm_file_format, update=False) if pd.isna(download_url): if verbose: print( "\"{}\" data is not available for \"{}\" from the server. " "Try to download the data of its subregions instead. ". format(osm_file_format, subregion_name_)) sub_subregions = retrieve_names_of_subregions_of(subregion_name_, deep=deep_retry) download_dir_ = cd( download_dir, subregion_name_.replace(" ", "-").lower() + os.path.splitext(osm_file_format)[0]) download_subregion_osm_file( *sub_subregions, osm_file_format=osm_file_format, download_dir=download_dir_, update=update, download_confirmation_required=download_confirmation_required, verbose=verbose) else: if not download_dir: # Download the requested OSM file to default directory osm_filename, path_to_file = get_default_path_to_osm_file( subregion_name_, osm_file_format, mkdir=True) else: regulated_dir = regulate_input_data_dir(download_dir) osm_filename = get_default_osm_filename( subregion_name_, osm_file_format=osm_file_format) path_to_file = os.path.join(regulated_dir, osm_filename) if os.path.isfile(path_to_file) and not update: print( "\n\"{}\" for \"{}\" is already available: \"{}\".".format( osm_filename, subregion_name_, path_to_file)) if verbose else "" else: op = "Updating" if os.path.isfile( path_to_file) else "Downloading" if confirmed( "To download the {} data of \"{}\", saved as \"{}\"\n". format(osm_file_format, subregion_name_, path_to_file), confirmation_required=download_confirmation_required): try: from pyhelpers.download import download download(download_url, path_to_file) if verbose: print("{} \"{}\" for \"{}\" ... Done.".format( op, osm_filename, subregion_name_)) except Exception as e: print("Failed to download \"{}\". {}.\n".format( osm_filename, e)) if verbose else "" else: print("The {} of \"{}\" was cancelled.\n".format( op.lower(), osm_filename)) if verbose else ""