Ejemplo n.º 1
0
def read_osm_pbf(subregion_name,
                 data_dir=None,
                 parsed=True,
                 file_size_limit=50,
                 fmt_other_tags=True,
                 fmt_single_geom=True,
                 fmt_multi_geom=True,
                 update=False,
                 download_confirmation_required=True,
                 pickle_it=False,
                 rm_osm_pbf=False,
                 verbose=False):
    """
    :param subregion_name: [str] e.g. 'rutland'
    :param data_dir: [str; None (default)] customised path of a .osm.pbf file
    :param parsed: [bool] (default: True)
    :param file_size_limit: [numbers.Number] (default: 50) limit of file size (in MB),  e.g. 50, or 100
    :param fmt_other_tags: [bool] (default: True)
    :param fmt_single_geom: [bool] (default: True)
    :param fmt_multi_geom: [bool] (default: True)
    :param update: [bool] (default: False)
    :param download_confirmation_required: [bool] (default: True)
    :param pickle_it: [bool] (default: False)
    :param rm_osm_pbf: [bool] (default: False)
    :param verbose: [bool] (default: False)
    :return: [dict; None]

    If 'subregion' is the name of the subregion, the default file path will be used.

    Example:
        subregion_name                 = 'Rutland'
        data_dir                       = None
        parsed                         = True
        file_size_limit                = 50
        fmt_other_tags                 = True
        fmt_single_geom                = True
        fmt_multi_geom                 = True
        update                         = False
        download_confirmation_required = True
        pickle_it                      = False
        rm_osm_pbf                     = True
        verbose                        = False
        read_osm_pbf(subregion_name, data_dir, parsed, file_size_limit, fmt_other_tags, fmt_single_geom, fmt_multi_geom,
                     update, download_confirmation_required, pickle_it, rm_osm_pbf, verbose)
    """
    assert isinstance(file_size_limit, int) or file_size_limit is None

    osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file(
        subregion_name, ".osm.pbf", mkdir=False)
    if osm_pbf_filename and path_to_osm_pbf:
        if not data_dir:  # Go to default file path
            path_to_osm_pbf = path_to_osm_pbf
        else:
            osm_pbf_dir = regulate_input_data_dir(data_dir)
            path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename)

        subregion_filename = os.path.basename(path_to_osm_pbf)

        path_to_pickle = path_to_osm_pbf.replace(
            ".osm.pbf", ".pickle" if parsed else "-raw.pickle")
        if os.path.isfile(path_to_pickle) and not update:
            osm_pbf_data = load_pickle(path_to_pickle, verbose=verbose)
        else:
            # If the target file is not available, try downloading it first.
            if not os.path.isfile(path_to_osm_pbf) or update:
                try:
                    download_subregion_osm_file(subregion_name,
                                                osm_file_format=".osm.pbf",
                                                download_dir=data_dir,
                                                download_confirmation_required=
                                                download_confirmation_required,
                                                update=update,
                                                verbose=False)
                except Exception as e:
                    print("Cancelled reading data. CAUSE: {}".format(e))
                osm_pbf_data = None

            else:
                file_size_in_mb = round(
                    os.path.getsize(path_to_osm_pbf) / (1024**2), 1)

                if file_size_limit and file_size_in_mb > file_size_limit:
                    # Parsing the '.osm.pbf' file in a chunk-wise way
                    chunks_no = math.ceil(file_size_in_mb / file_size_limit)
                else:
                    chunks_no = None

                print("\nParsing \"{}\" ... ".format(subregion_filename),
                      end="") if verbose else ""
                try:
                    osm_pbf_data = parse_osm_pbf(path_to_osm_pbf, chunks_no,
                                                 parsed, fmt_other_tags,
                                                 fmt_single_geom,
                                                 fmt_multi_geom)
                    print("Successfully.\n") if verbose else ""
                    if pickle_it:
                        save_pickle(osm_pbf_data,
                                    path_to_pickle,
                                    verbose=verbose)
                except Exception as e:
                    print("Failed. CAUSE: \"{}\"\n".format(e))
                    osm_pbf_data = None

                if rm_osm_pbf:
                    remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose)

        return osm_pbf_data

    else:
        print(
            "Errors occur. Maybe check with the input \"subregion_name\" first."
        )
Ejemplo n.º 2
0
def psql_osm_pbf_data_extracts(*subregion_name, database_name='OSM_Geofabrik', data_dir=None,
                               update_osm_pbf=False, if_table_exists='replace', file_size_limit=50, parsed=True,
                               fmt_other_tags=True, fmt_single_geom=True, fmt_multi_geom=True, rm_raw_file=False,
                               verbose=False):
    """
    Import data of selected or all (sub)regions, which do not have (sub-)subregions, into PostgreSQL server

    :param subregion_name: [str]
    :param database_name: [str] (default: 'OSM_Geofabrik')
    :param data_dir: [str; None (default)]
    :param update_osm_pbf: [bool] (default: False)
    :param if_table_exists: [str] 'replace' (default); 'append'; or 'fail'
    :param file_size_limit: [int] (default: 100)
    :param parsed: [bool] (default: True)
    :param fmt_other_tags: [bool] (default: True)
    :param fmt_single_geom: [bool] (default: True)
    :param fmt_multi_geom: [bool] (default: True)
    :param rm_raw_file: [bool] (default: False)
    :param verbose: [bool] (default: False)
    """
    if not subregion_name:
        subregion_names = fetch_region_subregion_tier("GeoFabrik-non-subregion-list")
        confirm_msg = "To dump GeoFabrik OSM data extracts of all subregions to PostgreSQL? "
    else:
        subregion_names = retrieve_names_of_subregions_of(*subregion_name)
        confirm_msg = "To dump GeoFabrik OSM data extracts of the following subregions to PostgreSQL? \n{}?\n".format(
            ", ".join(subregion_names))

    if confirmed(confirm_msg):

        # Connect to PostgreSQL server
        osmdb = OSM()
        osmdb.connect_db(database_name=database_name)

        err_subregion_names = []
        for subregion_name_ in subregion_names:
            default_pbf_filename, default_path_to_pbf = get_default_path_to_osm_file(subregion_name_, ".osm.pbf")
            if not data_dir:  # Go to default file path
                path_to_osm_pbf = default_path_to_pbf
            else:
                osm_pbf_dir = regulate_input_data_dir(data_dir)
                path_to_osm_pbf = os.path.join(osm_pbf_dir, default_pbf_filename)

            download_subregion_osm_file(subregion_name_, osm_file_format=".osm.pbf", download_dir=data_dir,
                                        update=update_osm_pbf, download_confirmation_required=False, verbose=verbose)

            file_size_in_mb = round(os.path.getsize(path_to_osm_pbf) / (1024 ** 2), 1)

            try:
                if file_size_in_mb <= file_size_limit:

                    subregion_osm_pbf = read_osm_pbf(subregion_name_, data_dir, parsed, file_size_limit,
                                                     fmt_other_tags, fmt_single_geom, fmt_multi_geom,
                                                     update=False, download_confirmation_required=False,
                                                     pickle_it=False, rm_osm_pbf=rm_raw_file)

                    if subregion_osm_pbf is not None:
                        osmdb.dump_osm_pbf_data(subregion_osm_pbf, table_name=subregion_name_,
                                                if_exists=if_table_exists)
                        del subregion_osm_pbf
                        gc.collect()

                else:
                    print("\nParsing and importing \"{}\" feature-wisely to PostgreSQL ... ".format(subregion_name_))
                    # Reference: https://gdal.org/python/osgeo.ogr.Feature-class.html
                    raw_osm_pbf = ogr.Open(path_to_osm_pbf)
                    layer_count = raw_osm_pbf.GetLayerCount()
                    for i in range(layer_count):
                        lyr = raw_osm_pbf.GetLayerByIndex(i)  # Hold the i-th layer
                        lyr_name = lyr.GetName()
                        print("                       {} ... ".format(lyr_name), end="")
                        try:
                            lyr_feats = [feat for _, feat in enumerate(lyr)]
                            feats_no, chunks_no = len(lyr_feats), math.ceil(file_size_in_mb / file_size_limit)
                            chunked_lyr_feats = split_list(lyr_feats, chunks_no)

                            del lyr_feats
                            gc.collect()

                            if osmdb.subregion_table_exists(lyr_name, subregion_name_) and if_table_exists == 'replace':
                                osmdb.drop_subregion_data_by_layer(subregion_name_, lyr_name)

                            # Loop through all available features
                            for lyr_chunk in chunked_lyr_feats:
                                lyr_chunk_dat = pd.DataFrame(rapidjson.loads(f.ExportToJson()) for f in lyr_chunk)
                                lyr_chunk_dat = parse_layer_data(lyr_chunk_dat, lyr_name,
                                                                 fmt_other_tags, fmt_single_geom, fmt_multi_geom)
                                if_exists_ = if_table_exists if if_table_exists == 'fail' else 'append'
                                osmdb.dump_osm_pbf_data_by_layer(lyr_chunk_dat, if_exists=if_exists_,
                                                                 schema_name=lyr_name, table_name=subregion_name_)
                                del lyr_chunk_dat
                                gc.collect()

                            print("Done. Total amount of features: {}".format(feats_no))

                        except Exception as e:
                            print("Failed. {}".format(e))

                    raw_osm_pbf.Release()
                    del raw_osm_pbf
                    gc.collect()

                if rm_raw_file:
                    remove_subregion_osm_file(path_to_osm_pbf, verbose=verbose)

            except Exception as e:
                print(e)
                err_subregion_names.append(subregion_name_)

            if subregion_name_ != subregion_names[-1]:
                time.sleep(60)

        if len(err_subregion_names) == 0:
            print("\nMission accomplished.\n")
        else:
            print("\nErrors occurred when parsing data of the following subregion(s):")
            print(*err_subregion_names, sep=", ")

        osmdb.disconnect()
        del osmdb
Ejemplo n.º 3
0
def read_shp_zip(subregion_name,
                 layer,
                 feature=None,
                 data_dir=None,
                 update=False,
                 download_confirmation_required=True,
                 pickle_it=False,
                 rm_extracts=False,
                 rm_shp_zip=False,
                 verbose=False):
    """
    :param subregion_name: [str] e.g. 'england', 'oxfordshire', or 'europe'; case-insensitive
    :param layer: [str] e.g. 'railways'
    :param feature: [str; None (default)] e.g. 'rail'; if None, all available features included
    :param data_dir: [str; None (default)]
    :param update: [bool] (default: False) whether to update the relevant file/information
    :param download_confirmation_required: [bool] (default: False)
    :param pickle_it: [bool] (default: False)
    :param rm_extracts: [bool] (default: False) whether to delete extracted files from the .shp.zip file
    :param rm_shp_zip: [bool] (default: False) whether to delete the downloaded .shp.zip file
    :param verbose: [bool] (default: False)
    :return: [gpd.GeoDataFrame]

    Example:
        subregion_name                 = 'Rutland'
        layer                          = 'railways'
        feature                        = None
        data_dir                       = cd("test_read_GeoFabrik")
        update                         = False
        download_confirmation_required = True
        pickle_it                      = False
        rm_extracts                    = True
        rm_shp_zip                     = False
        verbose                        = True
        read_shp_zip(subregion_name, layer, feature, data_dir, update, download_confirmation_required, pickle_it,
                     rm_extracts, rm_shp_zip, verbose)
    """
    shp_zip_filename, path_to_shp_zip = get_default_path_to_osm_file(
        subregion_name, ".shp.zip", mkdir=False)
    if shp_zip_filename and path_to_shp_zip:
        extract_dir = os.path.splitext(path_to_shp_zip)[0]
        if data_dir:
            shp_zip_dir = regulate_input_data_dir(data_dir)
            path_to_shp_zip = os.path.join(shp_zip_dir, shp_zip_filename)
            extract_dir = os.path.join(shp_zip_dir,
                                       os.path.basename(extract_dir))

        # Make a local path for saving a pickle file for .shp data
        sub_name = "-".join(x
                            for x in [
                                shp_zip_filename.replace(
                                    "-latest-free.shp.zip", ""), layer, feature
                            ] if x)
        path_to_shp_pickle = os.path.join(extract_dir,
                                          sub_name + ".shp.pickle")

        if os.path.isfile(path_to_shp_pickle) and not update:
            shp_data = load_pickle(path_to_shp_pickle, verbose=verbose)
        else:
            # Download the requested OSM file urlretrieve(download_url, file_path)
            if not os.path.exists(extract_dir):
                download_subregion_osm_file(shp_zip_filename,
                                            osm_file_format=".shp.zip",
                                            download_dir=data_dir,
                                            update=update,
                                            verbose=verbose,
                                            download_confirmation_required=
                                            download_confirmation_required)

            if os.path.isfile(path_to_shp_zip):
                extract_shp_zip(path_to_shp_zip,
                                extract_dir,
                                layer=layer,
                                verbose=verbose)

            path_to_shp = glob.glob(
                os.path.join(extract_dir, "*{}*.shp".format(layer)))
            if len(path_to_shp) == 0:
                shp_data = None
            elif len(path_to_shp) == 1:
                shp_data = gpd.read_file(
                    path_to_shp[0]
                )  # gpd.GeoDataFrame(read_shp_file(path_to_shp))
                if feature:
                    path_to_shp_feat = path_to_shp[0].replace(
                        layer, layer + "_" + feature)
                    shp_data = gpd.GeoDataFrame(
                        shp_data[shp_data.fclass == feature])
                    shp_data.crs = {
                        'no_defs': True,
                        'ellps': 'WGS84',
                        'datum': 'WGS84',
                        'proj': 'longlat'
                    }
                    shp_data.to_file(path_to_shp_feat, driver='ESRI Shapefile')
            else:  # len(path_to_shp) > 1:
                if not feature:
                    path_to_orig_shp = [
                        p for p in path_to_shp
                        if layer + '_a' in p or layer + '_free' in p
                    ]
                    if len(path_to_orig_shp
                           ) == 1:  # "_a*.shp" is not available
                        shp_data = gpd.read_file(path_to_orig_shp[0])
                    else:
                        shp_data = [gpd.read_file(p) for p in path_to_shp]
                        shp_data = pd.concat(shp_data,
                                             axis=0,
                                             ignore_index=True)
                else:  # feature is None
                    path_to_shp_feat = [
                        p for p in path_to_shp
                        if layer + "_" + feature not in p
                    ]
                    if len(path_to_shp_feat) == 1:  # "_a*.shp" does not exist
                        shp_data = gpd.read_file(path_to_shp_feat[0])
                        shp_data = shp_data[shp_data.fclass == feature]
                    else:  # both "_a*" and "_free*" .shp for feature is available
                        shp_data = [
                            dat[dat.fclass == feature]
                            for dat in (gpd.read_file(p)
                                        for p in path_to_shp_feat)
                        ]
                        shp_data = pd.concat(shp_data,
                                             axis=0,
                                             ignore_index=True)
                    shp_data.crs = {
                        'no_defs': True,
                        'ellps': 'WGS84',
                        'datum': 'WGS84',
                        'proj': 'longlat'
                    }
                    shp_data.to_file(path_to_shp_feat[0].replace(
                        layer, layer + "_" + feature),
                                     driver='ESRI Shapefile')

            if pickle_it:
                save_pickle(shp_data, path_to_shp_pickle, verbose=verbose)

            if os.path.exists(extract_dir) and rm_extracts:
                # import shutil; shutil.rmtree(extract_dir)
                for f in glob.glob(os.path.join(extract_dir, "gis_osm*")):
                    # if layer not in f:
                    os.remove(f)

            if os.path.isfile(path_to_shp_zip) and rm_shp_zip:
                remove_subregion_osm_file(path_to_shp_zip, verbose=verbose)

        return shp_data
Ejemplo n.º 4
0
def read_osm_pbf(subregion_name,
                 data_dir=None,
                 parsed=True,
                 file_size_limit=50,
                 fmt_other_tags=True,
                 fmt_single_geom=True,
                 fmt_multi_geom=True,
                 update=False,
                 download_confirmation_required=True,
                 pickle_it=False,
                 rm_osm_pbf=True):
    """
    :param subregion_name: [str] e.g. 'london'
    :param data_dir: [str or None] customised path of a .osm.pbf file
    :param parsed: [bool]
    :param file_size_limit: [numbers.Number] limit of file size (in MB),  e.g. 50, or 100(default)
    :param fmt_other_tags: [bool]
    :param fmt_single_geom: [bool]
    :param fmt_multi_geom: [bool]
    :param update: [bool]
    :param download_confirmation_required: [bool]
    :param pickle_it: [bool]
    :param rm_osm_pbf: [bool]
    :return: [dict] or None

    If 'subregion' is the name of the subregion, the default file path will be used.
    """
    assert isinstance(file_size_limit, int) or file_size_limit is None

    osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file(
        subregion_name, ".osm.pbf", mkdir=False)
    if not data_dir:  # Go to default file path
        path_to_osm_pbf = path_to_osm_pbf
    else:
        osm_pbf_dir = regulate_input_data_dir(data_dir)
        path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename)

    subregion_filename = os.path.basename(path_to_osm_pbf)

    path_to_pickle = path_to_osm_pbf.replace(
        ".osm.pbf", ".pickle" if parsed else "-raw.pickle")
    if os.path.isfile(path_to_pickle) and not update:
        osm_pbf_data = load_pickle(path_to_pickle)
    else:
        # If the target file is not available, try downloading it first.
        download_subregion_osm_file(
            subregion_name,
            osm_file_format=".osm.pbf",
            download_dir=data_dir,
            update=update,
            download_confirmation_required=download_confirmation_required,
            verbose=False)

        file_size_in_mb = round(
            os.path.getsize(path_to_osm_pbf) / (1024**2), 1)

        if file_size_limit and file_size_in_mb > file_size_limit:
            chunks_no = math.ceil(
                file_size_in_mb / file_size_limit
            )  # Parsing the '.osm.pbf' file in a chunk-wise way
        else:
            chunks_no = None

        print("\nParsing \"{}\" ... ".format(subregion_filename), end="")
        try:
            osm_pbf_data = parse_osm_pbf(path_to_osm_pbf, chunks_no, parsed,
                                         fmt_other_tags, fmt_single_geom,
                                         fmt_multi_geom)
            print("Successfully.\n")
        except Exception as e:
            print("Failed. {}\n".format(e))
            osm_pbf_data = None

        if pickle_it:
            save_pickle(osm_pbf_data, path_to_pickle)
        if rm_osm_pbf:
            remove_subregion_osm_file(path_to_osm_pbf)

    return osm_pbf_data