def validate_bbbike_download_info(subregion_name, osm_file_format, download_dir=None): """ :param subregion_name: [str] :param osm_file_format: [str] :param download_dir: [str; None (default)] :return: [tuple] of length 4 ([str], [str], [str], [str]) subregion name, filename, download url and file path Example: subregion_name = 'leeds' osm_file_format = 'pbf' download_dir = None validate_bbbike_download_info(subregion_name, osm_file_format, download_dir) """ subregion_name_, download_url = get_bbbike_subregion_download_url( subregion_name, osm_file_format) osm_filename = os.path.basename(download_url) if not download_dir: # Download the requested OSM file to default directory path_to_file = cd_dat_bbbike(subregion_name_, osm_filename) else: path_to_file = os.path.join(regulate_input_data_dir(download_dir), osm_filename) return subregion_name_, osm_filename, download_url, path_to_file
def fetch_bbbike_subregion_download_catalogue(subregion_name, update=False, confirmation_required=True, verbose=False): """ :param subregion_name: [str] :param update: [bool] (default: False) :param confirmation_required: [bool] (default: True) :param verbose: [bool] (default: False) :return: [pd.DataFrame] Testing e.g. subregion_name = 'leeds' update = False confirmation_required = True verbose = True fetch_bbbike_subregion_download_catalogue(subregion_name, update, confirmation_required, verbose) """ subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name) path_to_file = cd_dat_bbbike( subregion_name_, subregion_name_ + "-download-catalogue.pickle") if not os.path.isfile(path_to_file) or update: collect_bbbike_subregion_download_catalogue( subregion_name, confirmation_required=confirmation_required, verbose=verbose) try: subregion_download_catalogue = load_pickle(path_to_file, verbose=verbose) return subregion_download_catalogue except Exception as e: print(e)
def download_bbbike_subregion_osm_all_files(subregion_name, download_dir=None, download_confirmation_required=True): """ :param subregion_name: [str] :param download_dir: [str or None] :param download_confirmation_required: [bool] """ subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name) bbbike_download_dictionary = fetch_bbbike_download_catalogue("BBBike-download-catalogue") sub_download_catalogue = bbbike_download_dictionary[subregion_name_] data_dir = cd_dat_bbbike(subregion_name_) if not download_dir else regulate_input_data_dir(download_dir) if confirmed("Confirm to download all available BBBike data for \"{}\"?".format(subregion_name_), confirmation_required=download_confirmation_required): print("\nStart to download all available OSM data for \"{}\" ... \n".format(subregion_name_)) for download_url, osm_filename in zip(sub_download_catalogue.URL, sub_download_catalogue.Filename): print("\n\n\"{}\" (below): ".format(osm_filename)) try: path_to_file = os.path.join(data_dir, subregion_name_, osm_filename) download(download_url, path_to_file) # if os.path.getsize(path_to_file) / (1024 ** 2) <= 5: # time.sleep(5) except Exception as e: print("\nFailed to download \"{}\". {}.".format(osm_filename, e)) print("\nCheck out the downloaded OSM data for \"{}\" at \"{}\".".format( subregion_name_, os.path.join(data_dir, subregion_name_))) else: print("The downloading process was not activated.")
def fetch_bbbike_subregion_download_catalogue(subregion_name, update=False, confirmation_required=True): """ :param subregion_name: [str] :param update: [bool] :param confirmation_required: [bool] :return: [pandas.DataFrame] or null """ subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name) path_to_file = cd_dat_bbbike(subregion_name_, subregion_name_ + "-download-catalogue.pickle") if not os.path.isfile(path_to_file) or update: collect_bbbike_subregion_download_catalogue(subregion_name, confirmation_required=confirmation_required) try: subregion_download_catalogue = load_pickle(path_to_file) return subregion_download_catalogue except Exception as e: print(e)
def collect_bbbike_subregion_download_catalogue(subregion_name, confirmation_required=True): def parse_dlc(dlc): dlc_href = dlc.get('href') # URL filename, download_url = dlc_href.strip('./'), urllib.parse.urljoin(url, dlc_href) if not dlc.has_attr('title'): file_format, file_size, last_update = 'Poly', None, None else: if len(dlc.contents) < 3: file_format, file_size = 'Txt', None else: file_format, file_size, _ = dlc.contents # File type and size file_format, file_size = file_format.strip(), file_size.text last_update = pd.to_datetime(dlc.get('title')) # Date and time parsed_dat = [filename, download_url, file_format, file_size, last_update] return parsed_dat subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name) # if confirmed("To collect BBBike download catalogue for \"{}\"? ".format(subregion_name_), confirmation_required=confirmation_required): try: url = 'https://download.bbbike.org/osm/bbbike/{}/'.format(subregion_name_) source = urllib.request.urlopen(url) source_soup = bs4.BeautifulSoup(source, 'lxml') download_links_class = source_soup.find_all(name='a', attrs={'class': ['download_link', 'small']}) subregion_downloads_catalogue = pd.DataFrame(parse_dlc(x) for x in download_links_class) subregion_downloads_catalogue.columns = ['Filename', 'URL', 'DataType', 'Size', 'LastUpdate'] path_to_file = cd_dat_bbbike(subregion_name_, subregion_name_ + "-download-catalogue.pickle") save_pickle(subregion_downloads_catalogue, path_to_file) except Exception as e: print("Failed to collect download catalogue for \"{}\". {}".format(subregion_name_, e)) else: print("The information collection process was not activated. The existing local copy will be loaded instead.")