def drop_layer_data_by_subregion(self, schema_name, *table_names, subregion_name_as_table_name=True, confirmation_required=True, verbose=False): """ :param schema_name: [str] name of a layer name :param subregion_name_as_table_name: [bool] (default: True) whether to use subregion name as 'table_name' :param table_names: [str] one or multiple names of subregions :param confirmation_required: [bool] (default: True) :param verbose: [bool] (default: False) """ table_names_ = (regulate_table_name(table_name, subregion_name_as_table_name) for table_name in table_names) _, tbls_msg = self.multi_names_msg(*table_names, desc='table') if confirmed( "Confirmed to drop the {} from the database \"{}\"".format( tbls_msg, self.database_name), confirmation_required=confirmation_required): tables = tuple(('{}.\"{}\"'.format(schema_name, table_name) for table_name in table_names_)) if verbose: print(("Dropping " + "%s, " * (len(tables) - 2) + "%s and %s" + " ... ") % tables, end="") try: self.engine.execute( ('DROP TABLE IF EXISTS ' + '%s, ' * (len(tables) - 1) + '%s CASCADE;') % tables) print("Done.") if verbose else "" except Exception as e: print("Failed. CAUSE: \"{}\"".format(e))
def drop_schema(self, *schema_names, confirmation_required=True, verbose=False): """ :param schema_names: [str] name of one schema, or names of multiple schemas :param confirmation_required: [bool] (default: True) :param verbose: [bool] (default: False) """ schemas, schemas_msg = self.multi_names_msg(*schema_names, desc='schema') if confirmed( "Confirmed to drop the {} from the database \"{}\"".format( schemas_msg, self.database_name), confirmation_required=confirmation_required): try: print("Dropping the {} ... ".format(schemas_msg), end="") if verbose else "" self.engine.execute('DROP SCHEMA IF EXISTS ' + ('%s, ' * (len(schemas) - 1) + '%s') % schemas + ' CASCADE;') print("Done.") if verbose else "" except Exception as e: print("Failed. CAUSE: \"{}\"".format(e))
def download_bbbike_subregion_osm_all_files( subregion_name, download_dir=None, download_confirmation_required=True): """ :param subregion_name: [str] :param download_dir: [str or None] :param download_confirmation_required: [bool] Example: subregion_name = 'leeds' download_dir = None download_confirmation_required = True download_bbbike_subregion_osm_all_files(subregion_name, download_dir, download_confirmation_required) """ subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name) bbbike_download_dictionary = fetch_bbbike_download_catalogue( "BBBike-download-catalogue") sub_download_catalogue = bbbike_download_dictionary[subregion_name_] data_dir = cd_dat_bbbike( subregion_name_) if not download_dir else regulate_input_data_dir( download_dir) if confirmed( "Confirm to download all available BBBike data for \"{}\"?".format( subregion_name_), confirmation_required=download_confirmation_required): print("\nStart to download all available OSM data for \"{}\" ... \n". format(subregion_name_)) for download_url, osm_filename in zip(sub_download_catalogue.URL, sub_download_catalogue.Filename): print("\n\n\"{}\" (below): ".format(osm_filename)) try: path_to_file = os.path.join(data_dir, osm_filename) if not download_dir \ else os.path.join(data_dir, subregion_name_, osm_filename) download(download_url, path_to_file) # if os.path.getsize(path_to_file) / (1024 ** 2) <= 5: # time.sleep(5) except Exception as e: print("\nFailed to download \"{}\". {}.".format( osm_filename, e)) print( "\nCheck out the downloaded OSM data for \"{}\" at \"{}\".".format( subregion_name_, os.path.join(data_dir, subregion_name_))) else: print("The downloading process was not activated.")
def download_bbbike_subregion_osm(*subregion_name, osm_file_format, download_dir=None, update=False, download_confirmation_required=True): """ :param subregion_name: [str] :param osm_file_format: [str] :param download_dir: [str; None (default)] :param update: [bool] (default: False) :param download_confirmation_required: [bool] (default: True) Example: subregion_name = 'leeds' osm_file_format = 'pbf' download_dir = None update = False download_confirmation_required = True download_bbbike_subregion_osm(subregion_name, osm_file_format=osm_file_format, download_dir=download_dir, update=update, download_confirmation_required=download_confirmation_required) """ for sub_reg_name in subregion_name: subregion_name_, osm_filename, download_url, path_to_file = validate_bbbike_download_info( sub_reg_name, osm_file_format, download_dir) if os.path.isfile(path_to_file) and not update: print("\"{}\" is already available for \"{}\" at: \n\"{}\".\n". format(osm_filename, subregion_name_, path_to_file)) else: if confirmed("\nTo download {} data for {}".format( osm_file_format, subregion_name_), confirmation_required=download_confirmation_required): try: download(download_url, path_to_file) print( "\n\"{}\" has been downloaded for \"{}\", which is now available at \n\"{}\".\n" .format(osm_filename, subregion_name_, path_to_file)) if os.path.getsize(path_to_file) / (1024**2) <= 5: time.sleep(5) except Exception as e: print("\nFailed to download \"{}\". {}.".format( osm_filename, e)) else: print("The downloading process was not activated.")
def download_sub_subregion_osm_file(*subregion_name, osm_file_format, download_dir=None, update=False, download_confirmation_required=True, interval_sec=5, verbose=False): """ :param subregion_name: [str] case-insensitive, e.g. 'greater London', 'london' :param osm_file_format: [str] ".osm.pbf", ".shp.zip", or ".osm.bz2" :param download_dir: [str; None (default)] directory to save the downloaded file(s); None (using default directory) :param update: [bool] (default: False) whether to update (i.e. re-download) data :param download_confirmation_required: [bool] (default: True) whether to confirm before downloading :param interval_sec: [int; None] (default: 5) interval (in sec) between downloading two subregions :param verbose: [bool] (default: True) Example: subregion_name_1 = 'bedfordshire' subregion_name_2 = 'rutland' osm_file_format = ".osm.pbf" download_dir = None update = False download_confirmation_required = True verbose = True interval_sec = 5 verbose = False download_sub_subregion_osm_file(subregion_name_1, subregion_name_2, osm_file_format=osm_file_format, download_dir=download_dir, update=update, download_confirmation_required=download_confirmation_required, interval_sec=interval_sec, verbose=verbose) """ subregions = retrieve_names_of_subregions_of(*subregion_name) if confirmed( "\nTo download {} data for all the following subregions: \n{}?\n". format(osm_file_format, ", ".join(subregions)), confirmation_required=download_confirmation_required): download_subregion_osm_file(*subregions, osm_file_format=osm_file_format, download_dir=download_dir, update=update, download_confirmation_required=False, verbose=verbose) if interval_sec: time.sleep(interval_sec)
def collect_continents_subregion_tables(confirmation_required=True, verbose=False): """ :param confirmation_required: [bool] (default: True) whether to confirm before starting to collect the information :param verbose: [bool] (default: False) Example: confirmation_required = True verbose = True collect_continents_subregion_tables(confirmation_required, verbose) """ if confirmed("To collect information about subregions of each continent? ", confirmation_required=confirmation_required): try: home_link = 'https://download.geofabrik.de/' source = requests.get(home_link) soup = bs4.BeautifulSoup(source.text, 'lxml').find_all('td', {'class': 'subregion'}) source.close() continent_names = [td.a.text for td in soup] continent_links = [ urllib.parse.urljoin(home_link, td.a['href']) for td in soup ] subregion_tbls = dict( zip(continent_names, [ get_subregion_table(url, verbose) for url in continent_links ])) save_pickle(subregion_tbls, cd_dat("GeoFabrik-continents-subregion-tables.pickle"), verbose=verbose) except Exception as e: print( "Failed to collect the required information ... {}.".format(e)) else: print( "The information collection process was not activated. The existing local copy will be loaded instead." )
def drop(self, database_name=None, confirmation_required=True, verbose=False): """ :param database_name: [str; None (default)] database to be disconnected; if None, to disconnect the current one :param confirmation_required: [bool] (default: True) :param verbose: [bool] (default: False) """ db_name = self.database_name if database_name is None else database_name if confirmed("Confirmed to drop the database \"{}\" for {}@{}?".format( db_name, self.user, self.host), confirmation_required=confirmation_required): self.disconnect(db_name) try: print("Dropping the database \"{}\" ... ".format(db_name), end="") if verbose else "" self.engine.execute( 'DROP DATABASE IF EXISTS "{}"'.format(db_name)) print("Done.") if verbose else "" except Exception as e: print("Failed. CAUSE: \"{}\"".format(e))
def update_pkg_metadata(confirmation_required=True, verbose=True): if confirmed("Updating package metadata may take a few minutes. Continue?"): collect_subregion_info_catalogue(confirmation_required=confirmation_required, verbose=verbose) time.sleep(10) collect_continents_subregion_tables(confirmation_required=confirmation_required, verbose=verbose) time.sleep(10) collect_region_subregion_tier(confirmation_required=confirmation_required, update=False, verbose=verbose) time.sleep(10) collect_bbbike_subregion_catalogue(confirmation_required=confirmation_required, verbose=verbose) time.sleep(10) collect_bbbike_download_catalogue(confirmation_required=confirmation_required, verbose=verbose) if verbose: print("\nUpdate finished.")
def collect_bbbike_subregion_catalogue(confirmation_required=True, verbose=False): """ :param confirmation_required: [bool] (default: True) :param verbose: [bool] (default: False) Example: confirmation_required = True verbose = True collect_bbbike_subregion_catalogue(confirmation_required, verbose) """ if confirmed("To collect BBBike subregion catalogue? ", confirmation_required=confirmation_required): try: home_url = 'http://download.bbbike.org/osm/bbbike/' bbbike_subregion_catalogue = pd.read_html( home_url, header=0, parse_dates=['Last Modified'])[0].drop(0) bbbike_subregion_catalogue.Name = bbbike_subregion_catalogue.Name.map( lambda x: x.strip('/')) save_pickle(bbbike_subregion_catalogue, cd_dat("BBBike-subregion-catalogue.pickle"), verbose=verbose) bbbike_subregion_names = bbbike_subregion_catalogue.Name.tolist() save_pickle(bbbike_subregion_names, cd_dat("BBBike-subregion-name-list.pickle"), verbose=verbose) except Exception as e: print("Failed to get the required information ... {}.".format(e)) else: print( "The information collection process was not activated. The existing local copy will be loaded instead." )
def collect_bbbike_download_catalogue(confirmation_required=True, verbose=False): """ :param confirmation_required: [bool] (default: True) :param verbose: [bool] (default: False) Example: confirmation_required = True verbose = True collect_bbbike_download_catalogue(confirmation_required, verbose) """ # def collect_bbbike_subregion_download_catalogue(subregion_name): """ :param subregion_name: [str] Example: subregion_name = 'leeds' confirmation_required = True verbose = True collect_bbbike_subregion_download_catalogue(subregion_name, confirmation_required, verbose) """ def parse_dlc(dlc): dlc_href = dlc.get('href') # URL filename, download_url = dlc_href.strip( './'), urllib.parse.urljoin(url, dlc_href) if not dlc.has_attr('title'): file_format, file_size, last_update = 'Poly', None, None else: if len(dlc.contents) < 3: file_format, file_size = 'Txt', None else: file_format, file_size, _ = dlc.contents # File type and size file_format, file_size = file_format.strip( ), file_size.text last_update = pd.to_datetime(dlc.get('title')) # Date and time parsed_dat = [ filename, download_url, file_format, file_size, last_update ] return parsed_dat subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name) # try: print(" \"{}\" ... ".format(subregion_name_), end="") if verbose else "" url = 'https://download.bbbike.org/osm/bbbike/{}/'.format( subregion_name_) source = urllib.request.urlopen(url) import bs4 source_soup = bs4.BeautifulSoup(source, 'lxml') download_links_class = source_soup.find_all( name='a', attrs={'class': ['download_link', 'small']}) subregion_downloads_catalogue = pd.DataFrame( parse_dlc(x) for x in download_links_class) subregion_downloads_catalogue.columns = [ 'Filename', 'URL', 'DataType', 'Size', 'LastUpdate' ] # path_to_file = cd_dat_bbbike(subregion_name_, subregion_name_ + "-download-catalogue.pickle") # save_pickle(subregion_downloads_catalogue, path_to_file, verbose=verbose) print("Done. ") if verbose else "" except Exception as e_: subregion_downloads_catalogue = None print("Failed. {}".format(subregion_name_, e_)) if verbose else "" return subregion_downloads_catalogue if confirmed("To collect BBBike download dictionary? ", confirmation_required=confirmation_required): try: bbbike_subregion_names = fetch_bbbike_subregion_catalogue( "BBBike-subregion-name-list", verbose=verbose) print("Collecting BBBike download catalogue for: " ) if verbose else "" download_catalogue = [ collect_bbbike_subregion_download_catalogue(subregion_name) for subregion_name in bbbike_subregion_names ] sr_name, sr_download_catalogue = bbbike_subregion_names[ 0], download_catalogue[0] # Available file formats file_fmt = [ re.sub('{}|CHECKSUM'.format(sr_name), '', f) for f in sr_download_catalogue.Filename ] save_pickle(file_fmt[:-2], cd_dat("BBBike-osm-file-formats.pickle"), verbose=verbose) # Available data types data_typ = sr_download_catalogue.DataType.tolist() save_pickle(data_typ[:-2], cd_dat("BBBike-osm-data-types.pickle"), verbose=verbose) # available_file_formats = dict(zip(file_fmt, file_ext)) downloads_dictionary = dict( zip(bbbike_subregion_names, download_catalogue)) save_pickle(downloads_dictionary, cd_dat("BBBike-download-catalogue.pickle"), verbose=verbose) except Exception as e: print("Failed to collect BBBike download dictionary. {}".format( e)) if verbose else ""
def psql_osm_pbf_data_extracts(*subregion_name, username='******', password=None, host='localhost', port=5432, database_name='OSM_Geofabrik_PBF', data_dir=None, update_osm_pbf=False, if_table_exists='replace', file_size_limit=50, parsed=True, fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, pickle_raw_file=False, rm_raw_file=False, confirmation_required=True, verbose=False): """ :param subregion_name: [str] :param username: [str] (default: 'postgres') :param password: [None (default); anything as input] :param host: [str] (default: 'localhost') :param port: [int] (default: 5432) :param database_name: [str] (default: 'OSM_Geofabrik') :param data_dir: [str; None (default)] :param update_osm_pbf: [bool] (default: False) :param if_table_exists: [str] 'replace' (default); 'append'; or 'fail' :param file_size_limit: [int] (default: 100) :param parsed: [bool] (default: True) :param fmt_other_tags: [bool] (default: True) :param fmt_single_geom: [bool] (default: True) :param fmt_multi_geom: [bool] (default: True) :param pickle_raw_file: [bool] (default: False) :param rm_raw_file: [bool] (default: False) :param confirmation_required: [bool] (default: True) :param verbose: [bool] (default: False) Example: subregions = retrieve_names_of_subregions_of('England') confirmation_required = True username = '******' password = None host = 'localhost' port = 5432 database_name = 'geofabrik_osm_pbf' data_dir = cd("test_osm_dump") update_osm_pbf = False if_table_exists = 'replace' file_size_limit = 50 parsed = True fmt_other_tags = True fmt_single_geom = True fmt_multi_geom = True pickle_raw_file = True rm_raw_file = True verbose = True psql_osm_pbf_data_extracts(*subregion_name, database_name='OSM_Geofabrik', data_dir=None, update_osm_pbf=False, if_table_exists='replace', file_size_limit=50, parsed=True, fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, rm_raw_file=False, verbose=False) """ if not subregion_name: subregion_names = fetch_region_subregion_tier( "GeoFabrik-non-subregion-list") confirm_msg = "To dump GeoFabrik OSM data extracts of all subregions to PostgreSQL? " else: subregion_names = retrieve_names_of_subregions_of(*subregion_name) confirm_msg = "To dump GeoFabrik OSM data extracts of the following subregions to PostgreSQL? \n{}?\n".format( ", ".join(subregion_names)) if confirmed(confirm_msg, confirmation_required=confirmation_required): # Connect to PostgreSQL server osmdb = OSM(username, password, host, port, database_name=database_name) err_subregion_names = [] for subregion_name_ in subregion_names: default_pbf_filename, default_path_to_pbf = get_default_path_to_osm_file( subregion_name_, ".osm.pbf") if not data_dir: # Go to default file path path_to_osm_pbf = default_path_to_pbf else: osm_pbf_dir = regulate_input_data_dir(data_dir) path_to_osm_pbf = os.path.join(osm_pbf_dir, default_pbf_filename) download_subregion_osm_file(subregion_name_, osm_file_format=".osm.pbf", download_dir=data_dir, update=update_osm_pbf, download_confirmation_required=False, verbose=verbose) file_size_in_mb = round( os.path.getsize(path_to_osm_pbf) / (1024**2), 1) try: if file_size_in_mb <= file_size_limit: subregion_osm_pbf = read_osm_pbf( subregion_name_, data_dir, parsed, file_size_limit, fmt_other_tags, fmt_single_geom, fmt_multi_geom, update=False, download_confirmation_required=False, pickle_it=pickle_raw_file, rm_osm_pbf=False, verbose=verbose) if subregion_osm_pbf is not None: osmdb.dump_osm_pbf_data(subregion_osm_pbf, table_name=subregion_name_, if_exists=if_table_exists, verbose=verbose) del subregion_osm_pbf gc.collect() else: print("\nParsing and importing \"{}\" feature-wisely to PostgreSQL ... ".format(subregion_name_)) \ if verbose else "" # Reference: https://gdal.org/python/osgeo.ogr.Feature-class.html raw_osm_pbf = ogr.Open(path_to_osm_pbf) layer_count = raw_osm_pbf.GetLayerCount() for i in range(layer_count): layer = raw_osm_pbf.GetLayerByIndex( i) # Hold the i-th layer layer_name = layer.GetName() print(" {} ... ".format( layer_name), end="") if verbose else "" try: features = [ feature for _, feature in enumerate(layer) ] feats_no, chunks_no = len(features), math.ceil( file_size_in_mb / file_size_limit) feats = split_list(features, chunks_no) del features gc.collect() if osmdb.subregion_table_exists(layer_name, subregion_name_) and \ if_table_exists == 'replace': osmdb.drop_subregion_data_by_layer( subregion_name_, layer_name) # Loop through all available features for feat in feats: lyr_dat = pd.DataFrame( rapidjson.loads(f.ExportToJson()) for f in feat) lyr_dat = parse_osm_pbf_layer_data( lyr_dat, layer_name, fmt_other_tags, fmt_single_geom, fmt_multi_geom) if_exists_ = if_table_exists if if_table_exists == 'fail' else 'append' osmdb.dump_osm_pbf_data_by_layer( lyr_dat, layer_name, subregion_name_, if_exists=if_exists_) del lyr_dat gc.collect() print("Done. Total amount of features: {}".format( feats_no)) if verbose else "" except Exception as e: print("Failed. {}".format(e)) raw_osm_pbf.Release() del raw_osm_pbf gc.collect() if rm_raw_file: remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose) except Exception as e: print(e) err_subregion_names.append(subregion_name_) if subregion_name_ != subregion_names[-1]: time.sleep(60) if len(err_subregion_names) == 0: print("Mission accomplished.\n") if verbose else "" else: print( "Errors occurred when parsing data of the following subregion(s):" ) print(*err_subregion_names, sep=", ") osmdb.disconnect() del osmdb
def download_subregion_osm_file(*subregion_name, osm_file_format, download_dir=None, update=False, download_confirmation_required=True, deep_retry=False, verbose=False): """ :param subregion_name: [str] case-insensitive, e.g. 'greater London', 'london' :param osm_file_format: [str] ".osm.pbf", ".shp.zip", or ".osm.bz2" :param download_dir: [str; None (default)] directory to save the downloaded file(s); None (using default directory) :param update: [bool] (default: False) whether to update (i.e. re-download) data :param download_confirmation_required: [bool] (default: True) whether to confirm before downloading :param deep_retry: [bool] (default: False) :param verbose: [bool] (default: True) Example: subregion_name = 'london' osm_file_format = ".osm.pbf" download_dir = None update = False download_confirmation_required = True verbose = True download_subregion_osm_file(subregion_name, osm_file_format=osm_file_format, download_dir=download_dir, update=update, download_confirmation_required=download_confirmation_required, verbose=verbose) """ for sub_reg_name in subregion_name: # Get download URL subregion_name_, download_url = get_subregion_download_url( sub_reg_name, osm_file_format, update=False) if pd.isna(download_url): if verbose: print( "\"{}\" data is not available for \"{}\" from the server. " "Try to download the data of its subregions instead. ". format(osm_file_format, subregion_name_)) sub_subregions = retrieve_names_of_subregions_of(subregion_name_, deep=deep_retry) download_dir_ = cd( download_dir, subregion_name_.replace(" ", "-").lower() + os.path.splitext(osm_file_format)[0]) download_subregion_osm_file( *sub_subregions, osm_file_format=osm_file_format, download_dir=download_dir_, update=update, download_confirmation_required=download_confirmation_required, verbose=verbose) else: if not download_dir: # Download the requested OSM file to default directory osm_filename, path_to_file = get_default_path_to_osm_file( subregion_name_, osm_file_format, mkdir=True) else: regulated_dir = regulate_input_data_dir(download_dir) osm_filename = get_default_osm_filename( subregion_name_, osm_file_format=osm_file_format) path_to_file = os.path.join(regulated_dir, osm_filename) if os.path.isfile(path_to_file) and not update: print( "\n\"{}\" for \"{}\" is already available: \"{}\".".format( osm_filename, subregion_name_, path_to_file)) if verbose else "" else: op = "Updating" if os.path.isfile( path_to_file) else "Downloading" if confirmed( "To download the {} data of \"{}\", saved as \"{}\"\n". format(osm_file_format, subregion_name_, path_to_file), confirmation_required=download_confirmation_required): try: from pyhelpers.download import download download(download_url, path_to_file) if verbose: print("{} \"{}\" for \"{}\" ... Done.".format( op, osm_filename, subregion_name_)) except Exception as e: print("Failed to download \"{}\". {}.\n".format( osm_filename, e)) if verbose else "" else: print("The {} of \"{}\" was cancelled.\n".format( op.lower(), osm_filename)) if verbose else ""
def collect_region_subregion_tier(confirmation_required=True, update=False, verbose=False): """ :param confirmation_required: [bool] (default: True) whether to confirm before collecting region-subregion tier :param update: [bool] (default: False) :param verbose: [bool] (default: False) Example: confirmation_required = True update = False verbose = True collect_region_subregion_tier(confirmation_required, update, verbose) """ # Find out the all regions and their subregions def compile_region_subregion_tier(sub_reg_tbls): """ :param sub_reg_tbls: [pd.DataFrame] obtained from fetch_continents_subregion_tables() :return: ([dict], [list]) a dictionary of region-subregion, and a list of (sub)regions without subregions """ having_subregions = copy.deepcopy(sub_reg_tbls) region_subregion_tiers = copy.deepcopy(sub_reg_tbls) non_subregions_list = [] for k, v in sub_reg_tbls.items(): if v is not None and isinstance(v, pd.DataFrame): region_subregion_tiers = update_nested_dict( sub_reg_tbls, {k: set(v.Subregion)}) else: non_subregions_list.append(k) for x in non_subregions_list: having_subregions.pop(x) having_subregions_temp = copy.deepcopy(having_subregions) while having_subregions_temp: for region_name, subregion_table in having_subregions.items(): # subregion_names, subregion_links = subregion_table.Subregion, subregion_table.SubregionURL sub_subregion_tables = dict( zip(subregion_names, [ get_subregion_table(link) for link in subregion_links ])) subregion_index, without_subregion_ = compile_region_subregion_tier( sub_subregion_tables) non_subregions_list += without_subregion_ region_subregion_tiers.update({region_name: subregion_index}) having_subregions_temp.pop(region_name) # Russian Federation in both pages of Asia and Europe, so that there are duplicates in non_subregions_list import more_itertools non_subregions_list = list( more_itertools.unique_everseen(non_subregions_list)) return region_subregion_tiers, non_subregions_list if confirmed( "To compile a region-subregion tier? (Note that it may take a few minutes.) ", confirmation_required=confirmation_required): print("Compiling a region-subregion tier ... ", end="") if verbose else "" try: subregion_tables = fetch_continents_subregion_tables(update=update) region_subregion_tier, non_subregions = compile_region_subregion_tier( subregion_tables) print("Done. ") if verbose else "" save_pickle(region_subregion_tier, cd_dat("GeoFabrik-region-subregion-tier.pickle"), verbose=verbose) save_json(region_subregion_tier, cd_dat("GeoFabrik-region-subregion-tier.json"), verbose=verbose) save_pickle(non_subregions, cd_dat("GeoFabrik-non-subregion-list.pickle"), verbose=verbose) except Exception as e: print("Failed to get the required information ... {}.".format( e)) if verbose else ""
def collect_subregion_info_catalogue(confirmation_required=True, verbose=False): """ :param confirmation_required: [bool] (default: False) whether to confirm before starting to collect information :param verbose: [bool] (default: False) Example: confirmation_required = True verbose = True collect_subregion_info_catalogue(confirmation_required, verbose) """ if confirmed( "To collect all available subregion links? (Note that it may take a few minutes.) ", confirmation_required=confirmation_required): home_url = 'http://download.geofabrik.de/' try: source = requests.get(home_url) soup = bs4.BeautifulSoup(source.text, 'lxml') source.close() # avail_subregions = [td.a.text for td in soup.find_all('td', {'class': 'subregion'})] subregion_href = soup.find_all('td', {'class': 'subregion'}) avail_subregion_urls = (urllib.parse.urljoin( home_url, td.a['href']) for td in subregion_href) avail_subregion_url_tables_0 = (get_subregion_table( sub_url, verbose) for sub_url in avail_subregion_urls) avail_subregion_url_tables = [ tbl for tbl in avail_subregion_url_tables_0 if tbl is not None ] subregion_url_tables = list(avail_subregion_url_tables) while subregion_url_tables: subregion_url_tables_ = [] for subregion_url_table in subregion_url_tables: # subregions = list(subregion_url_table.Subregion) subregion_urls = list(subregion_url_table.SubregionURL) subregion_url_tables_0 = [ get_subregion_table(sr_url, verbose) for sr_url in subregion_urls ] subregion_url_tables_ += [ tbl for tbl in subregion_url_tables_0 if tbl is not None ] # (Note that 'Russian Federation' data is available in both 'Asia' and 'Europe') # avail_subregions += subregions # avail_subregion_urls += subregion_urls avail_subregion_url_tables += subregion_url_tables_ subregion_url_tables = list(subregion_url_tables_) # All available URLs for downloading home_subregion_url_table = get_subregion_table(home_url) avail_subregion_url_tables.append(home_subregion_url_table) subregion_downloads_index = pd.DataFrame( pd.concat(avail_subregion_url_tables, ignore_index=True)) subregion_downloads_index.drop_duplicates(inplace=True) duplicated = subregion_downloads_index[ subregion_downloads_index.Subregion.duplicated(keep=False)] if not duplicated.empty: import humanfriendly for i in range(0, 2, len(duplicated)): temp = duplicated.iloc[i:i + 2] size = temp['.osm.pbf_Size'].map( lambda x: humanfriendly.parse_size( x.strip('(').strip(')').replace('\xa0', ' '))) idx = size[size == size.min()].index subregion_downloads_index.drop(idx, inplace=True) subregion_downloads_index.index = range( len(subregion_downloads_index)) subregion_downloads_index_json = subregion_downloads_index.set_index( 'Subregion').to_json() # Save subregion_index_downloads to local disk save_pickle( subregion_downloads_index, cd_dat("GeoFabrik-subregion-downloads-catalogue.pickle"), verbose=verbose) save_json(subregion_downloads_index_json, cd_dat("GeoFabrik-subregion-downloads-catalogue.json"), verbose=verbose) avail_subregions = list(subregion_downloads_index.Subregion) avail_subregion_urls = list(subregion_downloads_index.SubregionURL) # Subregion index - {Subregion: URL} subregion_url_index = dict( zip(avail_subregions, avail_subregion_urls)) # Save a list of available subregions locally save_pickle(avail_subregions, cd_dat("GeoFabrik-subregion-name-list.pickle"), verbose=verbose) # Save subregion_index to local disk save_pickle( subregion_url_index, cd_dat("GeoFabrik-subregion-name-url-dictionary.pickle"), verbose=verbose) save_json(subregion_url_index, cd_dat("GeoFabrik-subregion-name-url-dictionary.json"), verbose=verbose) except Exception as e: print("Failed to get the required information ... {}.".format(e)) else: print("The information collection process was not activated.")
def update_package_data(confirmation_required=True, interval_sec=2, verbose=True): """ Update package data. :param confirmation_required: whether asking for confirmation to proceed, defaults to ``True`` :type confirmation_required: bool :param interval_sec: time gap (in seconds) between the updating of different classes, defaults to ``5`` :type interval_sec: int :param verbose: whether to print relevant information in console, defaults to ``True`` :type verbose: bool, int **Example**:: >>> from pydriosm.updater import update_package_data >>> update_package_data(confirmation_required=True, verbose=True) | (**THE END OF** :ref:`Modules<modules>`.) """ if confirmed("To update resources (which may take a few minutes)\n?"): update = True geofabrik_downloader = GeofabrikDownloader() _ = geofabrik_downloader.get_download_index( update=update, confirmation_required=confirmation_required, verbose=verbose) time.sleep(interval_sec) _ = geofabrik_downloader.get_continents_subregion_tables( update=update, confirmation_required=confirmation_required, verbose=verbose) time.sleep(interval_sec) _ = geofabrik_downloader.get_region_subregion_tier( update=update, confirmation_required=confirmation_required, verbose=verbose) time.sleep(interval_sec) _ = geofabrik_downloader.get_download_catalogue( update=update, confirmation_required=confirmation_required, verbose=verbose) time.sleep(interval_sec) _ = geofabrik_downloader.get_list_of_subregion_names( update=update, confirmation_required=confirmation_required, verbose=verbose) time.sleep(interval_sec) bbbike_downloader = BBBikeDownloader() _ = bbbike_downloader.get_list_of_cities( update=update, confirmation_required=confirmation_required, verbose=verbose) time.sleep(interval_sec) _ = bbbike_downloader.get_coordinates_of_cities( update=update, confirmation_required=confirmation_required, verbose=verbose) time.sleep(interval_sec) _ = bbbike_downloader.get_subregion_catalogue( update=update, confirmation_required=confirmation_required, verbose=verbose) time.sleep(interval_sec) _ = bbbike_downloader.get_list_of_subregion_names( update=update, confirmation_required=confirmation_required, verbose=verbose) time.sleep(interval_sec) _ = bbbike_downloader.get_download_index( update=update, confirmation_required=confirmation_required, verbose=verbose) if verbose: print("\nUpdate finished.")