Exemple #1
0
def validate_bbbike_download_info(subregion_name,
                                  osm_file_format,
                                  download_dir=None):
    """
    :param subregion_name: [str]
    :param osm_file_format: [str]
    :param download_dir: [str; None (default)]
    :return: [tuple] of length 4 ([str], [str], [str], [str]) subregion name, filename, download url and file path

    Example:
        subregion_name  = 'leeds'
        osm_file_format = 'pbf'
        download_dir    = None
        validate_bbbike_download_info(subregion_name, osm_file_format, download_dir)
    """
    subregion_name_, download_url = get_bbbike_subregion_download_url(
        subregion_name, osm_file_format)
    osm_filename = os.path.basename(download_url)
    if not download_dir:
        # Download the requested OSM file to default directory
        path_to_file = cd_dat_bbbike(subregion_name_, osm_filename)
    else:
        path_to_file = os.path.join(regulate_input_data_dir(download_dir),
                                    osm_filename)
    return subregion_name_, osm_filename, download_url, path_to_file
Exemple #2
0
def fetch_bbbike_subregion_download_catalogue(subregion_name,
                                              update=False,
                                              confirmation_required=True,
                                              verbose=False):
    """
    :param subregion_name: [str]
    :param update: [bool] (default: False)
    :param confirmation_required: [bool] (default: True)
    :param verbose: [bool] (default: False)
    :return: [pd.DataFrame]

    Testing e.g.
        subregion_name        = 'leeds'
        update                = False
        confirmation_required = True
        verbose               = True
        fetch_bbbike_subregion_download_catalogue(subregion_name, update, confirmation_required, verbose)
    """
    subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name)
    path_to_file = cd_dat_bbbike(
        subregion_name_, subregion_name_ + "-download-catalogue.pickle")
    if not os.path.isfile(path_to_file) or update:
        collect_bbbike_subregion_download_catalogue(
            subregion_name,
            confirmation_required=confirmation_required,
            verbose=verbose)
    try:
        subregion_download_catalogue = load_pickle(path_to_file,
                                                   verbose=verbose)
        return subregion_download_catalogue
    except Exception as e:
        print(e)
def download_bbbike_subregion_osm_all_files(subregion_name, download_dir=None, download_confirmation_required=True):
    """
    :param subregion_name: [str]
    :param download_dir: [str or None]
    :param download_confirmation_required: [bool]
    """
    subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name)
    bbbike_download_dictionary = fetch_bbbike_download_catalogue("BBBike-download-catalogue")
    sub_download_catalogue = bbbike_download_dictionary[subregion_name_]

    data_dir = cd_dat_bbbike(subregion_name_) if not download_dir else regulate_input_data_dir(download_dir)

    if confirmed("Confirm to download all available BBBike data for \"{}\"?".format(subregion_name_),
                 confirmation_required=download_confirmation_required):
        print("\nStart to download all available OSM data for \"{}\" ... \n".format(subregion_name_))
        for download_url, osm_filename in zip(sub_download_catalogue.URL, sub_download_catalogue.Filename):
            print("\n\n\"{}\" (below): ".format(osm_filename))
            try:
                path_to_file = os.path.join(data_dir, subregion_name_, osm_filename)
                download(download_url, path_to_file)
                # if os.path.getsize(path_to_file) / (1024 ** 2) <= 5:
                #     time.sleep(5)
            except Exception as e:
                print("\nFailed to download \"{}\". {}.".format(osm_filename, e))
        print("\nCheck out the downloaded OSM data for \"{}\" at \"{}\".".format(
            subregion_name_, os.path.join(data_dir, subregion_name_)))
    else:
        print("The downloading process was not activated.")
def fetch_bbbike_subregion_download_catalogue(subregion_name, update=False, confirmation_required=True):
    """
    :param subregion_name: [str]
    :param update: [bool]
    :param confirmation_required: [bool]
    :return: [pandas.DataFrame] or null
    """
    subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name)
    path_to_file = cd_dat_bbbike(subregion_name_, subregion_name_ + "-download-catalogue.pickle")
    if not os.path.isfile(path_to_file) or update:
        collect_bbbike_subregion_download_catalogue(subregion_name, confirmation_required=confirmation_required)
    try:
        subregion_download_catalogue = load_pickle(path_to_file)
        return subregion_download_catalogue
    except Exception as e:
        print(e)
def collect_bbbike_subregion_download_catalogue(subregion_name, confirmation_required=True):

    def parse_dlc(dlc):
        dlc_href = dlc.get('href')  # URL
        filename, download_url = dlc_href.strip('./'), urllib.parse.urljoin(url, dlc_href)
        if not dlc.has_attr('title'):
            file_format, file_size, last_update = 'Poly', None, None
        else:
            if len(dlc.contents) < 3:
                file_format, file_size = 'Txt', None
            else:
                file_format, file_size, _ = dlc.contents  # File type and size
                file_format, file_size = file_format.strip(), file_size.text
            last_update = pd.to_datetime(dlc.get('title'))  # Date and time
        parsed_dat = [filename, download_url, file_format, file_size, last_update]
        return parsed_dat

    subregion_name_ = regulate_bbbike_input_subregion_name(subregion_name)
    #
    if confirmed("To collect BBBike download catalogue for \"{}\"? ".format(subregion_name_),
                 confirmation_required=confirmation_required):

        try:
            url = 'https://download.bbbike.org/osm/bbbike/{}/'.format(subregion_name_)

            source = urllib.request.urlopen(url)
            source_soup = bs4.BeautifulSoup(source, 'lxml')
            download_links_class = source_soup.find_all(name='a', attrs={'class': ['download_link', 'small']})

            subregion_downloads_catalogue = pd.DataFrame(parse_dlc(x) for x in download_links_class)
            subregion_downloads_catalogue.columns = ['Filename', 'URL', 'DataType', 'Size', 'LastUpdate']

            path_to_file = cd_dat_bbbike(subregion_name_, subregion_name_ + "-download-catalogue.pickle")
            save_pickle(subregion_downloads_catalogue, path_to_file)

        except Exception as e:
            print("Failed to collect download catalogue for \"{}\". {}".format(subregion_name_, e))
    else:
        print("The information collection process was not activated. The existing local copy will be loaded instead.")