def collect_bbbike_subregion_catalogue(confirmation_required=True, verbose=False): """ :param confirmation_required: [bool] (default: True) :param verbose: [bool] (default: False) Testing e.g. confirmation_required = True verbose = True collect_bbbike_subregion_catalogue(confirmation_required, verbose) """ if confirmed("To collect BBBike subregion catalogue? ", confirmation_required=confirmation_required): try: home_url = 'http://download.bbbike.org/osm/bbbike/' bbbike_subregion_catalogue = pd.read_html( home_url, header=0, parse_dates=['Last Modified'])[0].drop(0) bbbike_subregion_catalogue.Name = bbbike_subregion_catalogue.Name.map( lambda x: x.strip('/')) save_pickle(bbbike_subregion_catalogue, cd_dat("BBBike-subregion-catalogue.pickle"), verbose=verbose) bbbike_subregion_names = bbbike_subregion_catalogue.Name.tolist() save_pickle(bbbike_subregion_names, cd_dat("BBBike-subregion-name-list.pickle"), verbose=verbose) except Exception as e: print("Failed to get the required information ... {}.".format(e)) else: print( "The information collection process was not activated. The existing local copy will be loaded instead." )
def collect_continents_subregion_tables(confirmation_required=True, verbose=False): """ :param confirmation_required: [bool] (default: True) whether to confirm before starting to collect the information :param verbose: [bool] (default: False) Testing e.g. confirmation_required = True verbose = True collect_continents_subregion_tables(confirmation_required, verbose) """ if confirmed("To collect information about subregions of each continent? ", confirmation_required=confirmation_required): try: home_link = 'https://download.geofabrik.de/' source = requests.get(home_link) soup = bs4.BeautifulSoup(source.text, 'lxml').find_all('td', {'class': 'subregion'}) source.close() continent_names = [td.a.text for td in soup] continent_links = [ urllib.parse.urljoin(home_link, td.a['href']) for td in soup ] subregion_tbls = dict( zip(continent_names, [ get_subregion_table(url, verbose) for url in continent_links ])) save_pickle(subregion_tbls, cd_dat("GeoFabrik-continents-subregion-tables.pickle"), verbose=verbose) except Exception as e: print( "Failed to collect the required information ... {}.".format(e)) else: print( "The information collection process was not activated. The existing local copy will be loaded instead." )
def collect_bbbike_download_catalogue(confirmation_required=True, verbose=False): """ :param confirmation_required: [bool] (default: True) :param verbose: [bool] (default: False) Testing e.g. confirmation_required = True verbose = True collect_bbbike_download_catalogue(confirmation_required, verbose) """ if confirmed("To collect BBBike download dictionary? ", confirmation_required=confirmation_required): try: bbbike_subregion_names = fetch_bbbike_subregion_catalogue( "BBBike-subregion-name-list", update=True) download_catalogue = [ fetch_bbbike_subregion_download_catalogue( subregion_name, update=True, confirmation_required=False) for subregion_name in bbbike_subregion_names ] subregion_name, subregion_download_catalogue = bbbike_subregion_names[ 0], download_catalogue[0] # Available file formats file_fmt = [ re.sub('{}|CHECKSUM'.format(subregion_name), '', f) for f in subregion_download_catalogue.Filename ] save_pickle(file_fmt[:-2], cd_dat("BBBike-osm-file-formats.pickle"), verbose=verbose) # Available data types data_typ = subregion_download_catalogue.DataType.tolist() save_pickle(data_typ[:-2], cd_dat("BBBike-osm-data-types.pickle"), verbose=verbose) # available_file_formats = dict(zip(file_fmt, file_ext)) downloads_dictionary = dict( zip(bbbike_subregion_names, download_catalogue)) save_pickle(downloads_dictionary, cd_dat("BBBike-download-catalogue.pickle"), verbose=verbose) except Exception as e: print("Failed to collect BBBike download dictionary. {}".format(e)) else: print( "The information collection process was not activated. The existing local copy will be loaded instead." )
def read_osm_pbf(subregion_name, data_dir=None, parsed=True, file_size_limit=50, fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, update=False, download_confirmation_required=True, pickle_it=False, rm_osm_pbf=False, verbose=False): """ :param subregion_name: [str] e.g. 'rutland' :param data_dir: [str; None (default)] customised path of a .osm.pbf file :param parsed: [bool] (default: True) :param file_size_limit: [numbers.Number] (default: 50) limit of file size (in MB), e.g. 50, or 100 :param fmt_other_tags: [bool] (default: True) :param fmt_single_geom: [bool] (default: True) :param fmt_multi_geom: [bool] (default: True) :param update: [bool] (default: False) :param download_confirmation_required: [bool] (default: True) :param pickle_it: [bool] (default: False) :param rm_osm_pbf: [bool] (default: False) :param verbose: [bool] (default: False) :return: [dict; None] If 'subregion' is the name of the subregion, the default file path will be used. Example: subregion_name = 'Rutland' data_dir = None parsed = True file_size_limit = 50 fmt_other_tags = True fmt_single_geom = True fmt_multi_geom = True update = False download_confirmation_required = True pickle_it = False rm_osm_pbf = True verbose = False read_osm_pbf(subregion_name, data_dir, parsed, file_size_limit, fmt_other_tags, fmt_single_geom, fmt_multi_geom, update, download_confirmation_required, pickle_it, rm_osm_pbf, verbose) """ assert isinstance(file_size_limit, int) or file_size_limit is None osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file( subregion_name, ".osm.pbf", mkdir=False) if osm_pbf_filename and path_to_osm_pbf: if not data_dir: # Go to default file path path_to_osm_pbf = path_to_osm_pbf else: osm_pbf_dir = regulate_input_data_dir(data_dir) path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename) subregion_filename = os.path.basename(path_to_osm_pbf) path_to_pickle = path_to_osm_pbf.replace( ".osm.pbf", ".pickle" if parsed else "-raw.pickle") if os.path.isfile(path_to_pickle) and not update: osm_pbf_data = load_pickle(path_to_pickle, verbose=verbose) else: # If the target file is not available, try downloading it first. if not os.path.isfile(path_to_osm_pbf) or update: try: download_subregion_osm_file(subregion_name, osm_file_format=".osm.pbf", download_dir=data_dir, download_confirmation_required= download_confirmation_required, update=update, verbose=False) except Exception as e: print("Cancelled reading data. CAUSE: {}".format(e)) osm_pbf_data = None else: file_size_in_mb = round( os.path.getsize(path_to_osm_pbf) / (1024**2), 1) if file_size_limit and file_size_in_mb > file_size_limit: # Parsing the '.osm.pbf' file in a chunk-wise way chunks_no = math.ceil(file_size_in_mb / file_size_limit) else: chunks_no = None print("\nParsing \"{}\" ... ".format(subregion_filename), end="") if verbose else "" try: osm_pbf_data = parse_osm_pbf(path_to_osm_pbf, chunks_no, parsed, fmt_other_tags, fmt_single_geom, fmt_multi_geom) print("Successfully.\n") if verbose else "" if pickle_it: save_pickle(osm_pbf_data, path_to_pickle, verbose=verbose) except Exception as e: print("Failed. CAUSE: \"{}\"\n".format(e)) osm_pbf_data = None if rm_osm_pbf: remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose) return osm_pbf_data else: print( "Errors occur. Maybe check with the input \"subregion_name\" first." )
def read_shp_zip(subregion_name, layer, feature=None, data_dir=None, update=False, download_confirmation_required=True, pickle_it=False, rm_extracts=False, rm_shp_zip=False, verbose=False): """ :param subregion_name: [str] e.g. 'england', 'oxfordshire', or 'europe'; case-insensitive :param layer: [str] e.g. 'railways' :param feature: [str; None (default)] e.g. 'rail'; if None, all available features included :param data_dir: [str; None (default)] :param update: [bool] (default: False) whether to update the relevant file/information :param download_confirmation_required: [bool] (default: False) :param pickle_it: [bool] (default: False) :param rm_extracts: [bool] (default: False) whether to delete extracted files from the .shp.zip file :param rm_shp_zip: [bool] (default: False) whether to delete the downloaded .shp.zip file :param verbose: [bool] (default: False) :return: [gpd.GeoDataFrame] Example: subregion_name = 'Rutland' layer = 'railways' feature = None data_dir = cd("test_read_GeoFabrik") update = False download_confirmation_required = True pickle_it = False rm_extracts = True rm_shp_zip = False verbose = True read_shp_zip(subregion_name, layer, feature, data_dir, update, download_confirmation_required, pickle_it, rm_extracts, rm_shp_zip, verbose) """ shp_zip_filename, path_to_shp_zip = get_default_path_to_osm_file( subregion_name, ".shp.zip", mkdir=False) if shp_zip_filename and path_to_shp_zip: extract_dir = os.path.splitext(path_to_shp_zip)[0] if data_dir: shp_zip_dir = regulate_input_data_dir(data_dir) path_to_shp_zip = os.path.join(shp_zip_dir, shp_zip_filename) extract_dir = os.path.join(shp_zip_dir, os.path.basename(extract_dir)) # Make a local path for saving a pickle file for .shp data sub_name = "-".join(x for x in [ shp_zip_filename.replace( "-latest-free.shp.zip", ""), layer, feature ] if x) path_to_shp_pickle = os.path.join(extract_dir, sub_name + ".shp.pickle") if os.path.isfile(path_to_shp_pickle) and not update: shp_data = load_pickle(path_to_shp_pickle, verbose=verbose) else: # Download the requested OSM file urlretrieve(download_url, file_path) if not os.path.exists(extract_dir): download_subregion_osm_file(shp_zip_filename, osm_file_format=".shp.zip", download_dir=data_dir, update=update, verbose=verbose, download_confirmation_required= download_confirmation_required) if os.path.isfile(path_to_shp_zip): extract_shp_zip(path_to_shp_zip, extract_dir, layer=layer, verbose=verbose) path_to_shp = glob.glob( os.path.join(extract_dir, "*{}*.shp".format(layer))) if len(path_to_shp) == 0: shp_data = None elif len(path_to_shp) == 1: shp_data = gpd.read_file( path_to_shp[0] ) # gpd.GeoDataFrame(read_shp_file(path_to_shp)) if feature: path_to_shp_feat = path_to_shp[0].replace( layer, layer + "_" + feature) shp_data = gpd.GeoDataFrame( shp_data[shp_data.fclass == feature]) shp_data.crs = { 'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat' } shp_data.to_file(path_to_shp_feat, driver='ESRI Shapefile') else: # len(path_to_shp) > 1: if not feature: path_to_orig_shp = [ p for p in path_to_shp if layer + '_a' in p or layer + '_free' in p ] if len(path_to_orig_shp ) == 1: # "_a*.shp" is not available shp_data = gpd.read_file(path_to_orig_shp[0]) else: shp_data = [gpd.read_file(p) for p in path_to_shp] shp_data = pd.concat(shp_data, axis=0, ignore_index=True) else: # feature is None path_to_shp_feat = [ p for p in path_to_shp if layer + "_" + feature not in p ] if len(path_to_shp_feat) == 1: # "_a*.shp" does not exist shp_data = gpd.read_file(path_to_shp_feat[0]) shp_data = shp_data[shp_data.fclass == feature] else: # both "_a*" and "_free*" .shp for feature is available shp_data = [ dat[dat.fclass == feature] for dat in (gpd.read_file(p) for p in path_to_shp_feat) ] shp_data = pd.concat(shp_data, axis=0, ignore_index=True) shp_data.crs = { 'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat' } shp_data.to_file(path_to_shp_feat[0].replace( layer, layer + "_" + feature), driver='ESRI Shapefile') if pickle_it: save_pickle(shp_data, path_to_shp_pickle, verbose=verbose) if os.path.exists(extract_dir) and rm_extracts: # import shutil; shutil.rmtree(extract_dir) for f in glob.glob(os.path.join(extract_dir, "gis_osm*")): # if layer not in f: os.remove(f) if os.path.isfile(path_to_shp_zip) and rm_shp_zip: remove_subregion_osm_file(path_to_shp_zip, verbose=verbose) return shp_data
def collect_bbbike_download_catalogue(confirmation_required=True, verbose=False): """ :param confirmation_required: [bool] (default: True) :param verbose: [bool] (default: False) Example: confirmation_required = True verbose = True collect_bbbike_download_catalogue(confirmation_required, verbose) """ # def collect_bbbike_subregion_download_catalogue(subregion_name): """ :param subregion_name: [str] Example: subregion_name = 'leeds' confirmation_required = True verbose = True collect_bbbike_subregion_download_catalogue(subregion_name, confirmation_required, verbose) """ def parse_dlc(dlc): dlc_href = dlc.get('href') # URL filename, download_url = dlc_href.strip( './'), urllib.parse.urljoin(url, dlc_href) if not dlc.has_attr('title'): file_format, file_size, last_update = 'Poly', None, None else: if len(dlc.contents) < 3: file_format, file_size = 'Txt', None else: file_format, file_size, _ = dlc.contents # File type and size file_format, file_size = file_format.strip( ), file_size.text last_update = pd.to_datetime(dlc.get('title')) # Date and time parsed_dat = [ filename, download_url, file_format, file_size, last_update ] return parsed_dat subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name) # try: print(" \"{}\" ... ".format(subregion_name_), end="") if verbose else "" url = 'https://download.bbbike.org/osm/bbbike/{}/'.format( subregion_name_) source = urllib.request.urlopen(url) import bs4 source_soup = bs4.BeautifulSoup(source, 'lxml') download_links_class = source_soup.find_all( name='a', attrs={'class': ['download_link', 'small']}) subregion_downloads_catalogue = pd.DataFrame( parse_dlc(x) for x in download_links_class) subregion_downloads_catalogue.columns = [ 'Filename', 'URL', 'DataType', 'Size', 'LastUpdate' ] # path_to_file = cd_dat_bbbike(subregion_name_, subregion_name_ + "-download-catalogue.pickle") # save_pickle(subregion_downloads_catalogue, path_to_file, verbose=verbose) print("Done. ") if verbose else "" except Exception as e_: subregion_downloads_catalogue = None print("Failed. {}".format(subregion_name_, e_)) if verbose else "" return subregion_downloads_catalogue if confirmed("To collect BBBike download dictionary? ", confirmation_required=confirmation_required): try: bbbike_subregion_names = fetch_bbbike_subregion_catalogue( "BBBike-subregion-name-list", verbose=verbose) print("Collecting BBBike download catalogue for: " ) if verbose else "" download_catalogue = [ collect_bbbike_subregion_download_catalogue(subregion_name) for subregion_name in bbbike_subregion_names ] sr_name, sr_download_catalogue = bbbike_subregion_names[ 0], download_catalogue[0] # Available file formats file_fmt = [ re.sub('{}|CHECKSUM'.format(sr_name), '', f) for f in sr_download_catalogue.Filename ] save_pickle(file_fmt[:-2], cd_dat("BBBike-osm-file-formats.pickle"), verbose=verbose) # Available data types data_typ = sr_download_catalogue.DataType.tolist() save_pickle(data_typ[:-2], cd_dat("BBBike-osm-data-types.pickle"), verbose=verbose) # available_file_formats = dict(zip(file_fmt, file_ext)) downloads_dictionary = dict( zip(bbbike_subregion_names, download_catalogue)) save_pickle(downloads_dictionary, cd_dat("BBBike-download-catalogue.pickle"), verbose=verbose) except Exception as e: print("Failed to collect BBBike download dictionary. {}".format( e)) if verbose else ""
def collect_region_subregion_tier(confirmation_required=True, verbose=False): """ :param confirmation_required: [bool] (default: True) whether to confirm before collecting region-subregion tier :param verbose: [bool] (default: False) Testing e.g. confirmation_required = True verbose = True collect_region_subregion_tier(confirmation_required, verbose) """ # Find out the all regions and their subregions def compile_region_subregion_tier(sub_reg_tbls): """ :param sub_reg_tbls: [pd.DataFrame] obtained from fetch_continents_subregion_tables() :return: ([dict], [list]) a dictionary of region-subregion, and a list of (sub)regions without subregions """ having_subregions = copy.deepcopy(sub_reg_tbls) region_subregion_tiers = copy.deepcopy(sub_reg_tbls) non_subregions_list = [] for k, v in sub_reg_tbls.items(): if v is not None and isinstance(v, pd.DataFrame): region_subregion_tiers = update_nested_dict( sub_reg_tbls, {k: set(v.Subregion)}) else: non_subregions_list.append(k) for x in non_subregions_list: having_subregions.pop(x) having_subregions_temp = copy.deepcopy(having_subregions) while having_subregions_temp: for region_name, subregion_table in having_subregions.items(): # subregion_names, subregion_links = subregion_table.Subregion, subregion_table.SubregionURL sub_subregion_tables = dict( zip(subregion_names, [ get_subregion_table(link) for link in subregion_links ])) subregion_index, without_subregion_ = compile_region_subregion_tier( sub_subregion_tables) non_subregions_list += without_subregion_ region_subregion_tiers.update({region_name: subregion_index}) having_subregions_temp.pop(region_name) # Russian Federation in both pages of Asia and Europe, so that there are duplicates in non_subregions_list non_subregions_list = list( more_itertools.unique_everseen(non_subregions_list)) return region_subregion_tiers, non_subregions_list if confirmed( "To compile a region-subregion tier? (Note that it may take a few minutes.) ", confirmation_required=confirmation_required): try: subregion_tables = fetch_continents_subregion_tables(update=True) region_subregion_tier, non_subregions = compile_region_subregion_tier( subregion_tables) save_pickle(region_subregion_tier, cd_dat("GeoFabrik-region-subregion-tier.pickle"), verbose=verbose) save_json(region_subregion_tier, cd_dat("GeoFabrik-region-subregion-tier.json"), verbose=verbose) save_pickle(non_subregions, cd_dat("GeoFabrik-non-subregion-list.pickle"), verbose=verbose) except Exception as e: print("Failed to get the required information ... {}.".format(e)) else: print("The information collection process was not activated.")
def collect_subregion_info_catalogue(confirmation_required=True, verbose=False): """ :param confirmation_required: [bool] (default: False) whether to confirm before starting to collect information :param verbose: [bool] (default: False) Testing e.g. confirmation_required = True verbose = True collect_subregion_info_catalogue(confirmation_required, verbose) """ if confirmed( "To collect all available subregion links? (Note that it may take a few minutes.) ", confirmation_required=confirmation_required): home_url = 'http://download.geofabrik.de/' try: source = requests.get(home_url) soup = bs4.BeautifulSoup(source.text, 'lxml') source.close() avail_subregions = [ td.a.text for td in soup.find_all('td', {'class': 'subregion'}) ] avail_subregion_urls = [ urllib.parse.urljoin(home_url, td.a['href']) for td in soup.find_all('td', {'class': 'subregion'}) ] avail_subregion_url_tables = [ get_subregion_table(sub_url, verbose) for sub_url in avail_subregion_urls ] avail_subregion_url_tables = [ tbl for tbl in avail_subregion_url_tables if tbl is not None ] subregion_url_tables = list(avail_subregion_url_tables) while subregion_url_tables: subregion_url_tables_ = [] for subregion_url_table in subregion_url_tables: subregions = list(subregion_url_table.Subregion) subregion_urls = list(subregion_url_table.SubregionURL) subregion_url_tables_0 = [ get_subregion_table(subregion_url, verbose) for subregion_url in subregion_urls ] subregion_url_tables_ += [ tbl for tbl in subregion_url_tables_0 if tbl is not None ] # (Note that 'Russian Federation' data is available in both 'Asia' and 'Europe') avail_subregions += subregions avail_subregion_urls += subregion_urls avail_subregion_url_tables += subregion_url_tables_ subregion_url_tables = list(subregion_url_tables_) # Save a list of available subregions locally save_pickle(avail_subregions, cd_dat("GeoFabrik-subregion-name-list.pickle"), verbose=verbose) # Subregion index - {Subregion: URL} subregion_url_index = dict( zip(avail_subregions, avail_subregion_urls)) # Save subregion_index to local disk save_pickle( subregion_url_index, cd_dat("GeoFabrik-subregion-name-url-dictionary.pickle"), verbose=verbose) save_json(subregion_url_index, cd_dat("GeoFabrik-subregion-name-url-dictionary.json"), verbose=verbose) # All available URLs for downloading home_subregion_url_table = get_subregion_table(home_url) avail_subregion_url_tables.append(home_subregion_url_table) subregion_downloads_index = pd.DataFrame( pd.concat(avail_subregion_url_tables, ignore_index=True)) subregion_downloads_index.drop_duplicates(inplace=True) subregion_downloads_index_json = subregion_downloads_index.set_index( 'Subregion').to_json() # Save subregion_index_downloads to local disk save_pickle( subregion_downloads_index, cd_dat("GeoFabrik-subregion-downloads-catalogue.pickle"), verbose=verbose) save_json(subregion_downloads_index_json, cd_dat("GeoFabrik-subregion-downloads-catalogue.json"), verbose=verbose) except Exception as e: print("Failed to get the required information ... {}.".format(e)) else: print("The information collection process was not activated.")
def collect_bbbike_subregion_download_catalogue(subregion_name, confirmation_required=True, verbose=False): """ :param subregion_name: [str] :param confirmation_required: [bool] (default: True) :param verbose: [bool] (default: False) Testing e.g. subregion_name = 'leeds' confirmation_required = True verbose = True collect_bbbike_subregion_download_catalogue(subregion_name, confirmation_required, verbose) """ def parse_dlc(dlc): dlc_href = dlc.get('href') # URL filename, download_url = dlc_href.strip('./'), urllib.parse.urljoin( url, dlc_href) if not dlc.has_attr('title'): file_format, file_size, last_update = 'Poly', None, None else: if len(dlc.contents) < 3: file_format, file_size = 'Txt', None else: file_format, file_size, _ = dlc.contents # File type and size file_format, file_size = file_format.strip(), file_size.text last_update = pd.to_datetime(dlc.get('title')) # Date and time parsed_dat = [ filename, download_url, file_format, file_size, last_update ] return parsed_dat subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name) # if confirmed("To collect BBBike download catalogue for \"{}\"? ".format( subregion_name_), confirmation_required=confirmation_required): try: url = 'https://download.bbbike.org/osm/bbbike/{}/'.format( subregion_name_) source = urllib.request.urlopen(url) source_soup = bs4.BeautifulSoup(source, 'lxml') download_links_class = source_soup.find_all( name='a', attrs={'class': ['download_link', 'small']}) subregion_downloads_catalogue = pd.DataFrame( parse_dlc(x) for x in download_links_class) subregion_downloads_catalogue.columns = [ 'Filename', 'URL', 'DataType', 'Size', 'LastUpdate' ] path_to_file = cd_dat_bbbike( subregion_name_, subregion_name_ + "-download-catalogue.pickle") save_pickle(subregion_downloads_catalogue, path_to_file, verbose=verbose) except Exception as e: print("Failed to collect download catalogue for \"{}\". {}".format( subregion_name_, e)) else: print( "The information collection process was not activated. The existing local copy will be loaded instead." )
def collect_subregion_info_catalogue(confirmation_required=True, verbose=False): """ :param confirmation_required: [bool] (default: False) whether to confirm before starting to collect information :param verbose: [bool] (default: False) Example: confirmation_required = True verbose = True collect_subregion_info_catalogue(confirmation_required, verbose) """ if confirmed( "To collect all available subregion links? (Note that it may take a few minutes.) ", confirmation_required=confirmation_required): home_url = 'http://download.geofabrik.de/' try: source = requests.get(home_url) soup = bs4.BeautifulSoup(source.text, 'lxml') source.close() # avail_subregions = [td.a.text for td in soup.find_all('td', {'class': 'subregion'})] subregion_href = soup.find_all('td', {'class': 'subregion'}) avail_subregion_urls = (urllib.parse.urljoin( home_url, td.a['href']) for td in subregion_href) avail_subregion_url_tables_0 = (get_subregion_table( sub_url, verbose) for sub_url in avail_subregion_urls) avail_subregion_url_tables = [ tbl for tbl in avail_subregion_url_tables_0 if tbl is not None ] subregion_url_tables = list(avail_subregion_url_tables) while subregion_url_tables: subregion_url_tables_ = [] for subregion_url_table in subregion_url_tables: # subregions = list(subregion_url_table.Subregion) subregion_urls = list(subregion_url_table.SubregionURL) subregion_url_tables_0 = [ get_subregion_table(sr_url, verbose) for sr_url in subregion_urls ] subregion_url_tables_ += [ tbl for tbl in subregion_url_tables_0 if tbl is not None ] # (Note that 'Russian Federation' data is available in both 'Asia' and 'Europe') # avail_subregions += subregions # avail_subregion_urls += subregion_urls avail_subregion_url_tables += subregion_url_tables_ subregion_url_tables = list(subregion_url_tables_) # All available URLs for downloading home_subregion_url_table = get_subregion_table(home_url) avail_subregion_url_tables.append(home_subregion_url_table) subregion_downloads_index = pd.DataFrame( pd.concat(avail_subregion_url_tables, ignore_index=True)) subregion_downloads_index.drop_duplicates(inplace=True) duplicated = subregion_downloads_index[ subregion_downloads_index.Subregion.duplicated(keep=False)] if not duplicated.empty: import humanfriendly for i in range(0, 2, len(duplicated)): temp = duplicated.iloc[i:i + 2] size = temp['.osm.pbf_Size'].map( lambda x: humanfriendly.parse_size( x.strip('(').strip(')').replace('\xa0', ' '))) idx = size[size == size.min()].index subregion_downloads_index.drop(idx, inplace=True) subregion_downloads_index.index = range( len(subregion_downloads_index)) subregion_downloads_index_json = subregion_downloads_index.set_index( 'Subregion').to_json() # Save subregion_index_downloads to local disk save_pickle( subregion_downloads_index, cd_dat("GeoFabrik-subregion-downloads-catalogue.pickle"), verbose=verbose) save_json(subregion_downloads_index_json, cd_dat("GeoFabrik-subregion-downloads-catalogue.json"), verbose=verbose) avail_subregions = list(subregion_downloads_index.Subregion) avail_subregion_urls = list(subregion_downloads_index.SubregionURL) # Subregion index - {Subregion: URL} subregion_url_index = dict( zip(avail_subregions, avail_subregion_urls)) # Save a list of available subregions locally save_pickle(avail_subregions, cd_dat("GeoFabrik-subregion-name-list.pickle"), verbose=verbose) # Save subregion_index to local disk save_pickle( subregion_url_index, cd_dat("GeoFabrik-subregion-name-url-dictionary.pickle"), verbose=verbose) save_json(subregion_url_index, cd_dat("GeoFabrik-subregion-name-url-dictionary.json"), verbose=verbose) except Exception as e: print("Failed to get the required information ... {}.".format(e)) else: print("The information collection process was not activated.")