Beispiel #1
0
def test_exclude_filtering_nodes_and_relations(helsinki_pbf):
    from pyrosm import OSM
    # Initialize the reader
    osm = OSM(helsinki_pbf)
    custom_filter = {"amenity": ["library"]}

    gdf = osm.get_data_by_custom_criteria(
        custom_filter,
        filter_type="exclude",
    )
    assert gdf.shape == (1081, 37)
    assert "library" not in gdf["amenity"].unique().tolist()

    # There should be nodes, ways and relations
    assert gdf["osm_type"].unique().tolist() == ["node", "way", "relation"]

    # Test other way around
    gdf = osm.get_data_by_custom_criteria(
        custom_filter,
        filter_type="keep",
    )
    assert gdf.shape == (7, 23)
    assert gdf["amenity"].unique().tolist() == ["library"]

    # There should be nodes and ways (no relations)
    assert gdf["osm_type"].unique().tolist() == ["node", "way"]
Beispiel #2
0
def test_reading_with_custom_filters_selecting_specific_osm_element(
        helsinki_pbf):
    from pyrosm import OSM
    from geopandas import GeoDataFrame

    # Get first all data
    osm = OSM(filepath=helsinki_pbf)

    # Test getting only relations
    # ---------------------------
    filtered = osm.get_data_by_custom_criteria(
        custom_filter={'building': True},
        filter_type="keep",
        keep_nodes=False,
        keep_ways=False,
        keep_relations=True)
    assert isinstance(filtered, GeoDataFrame)

    # Now should only have 'relation' osm_type
    assert len(filtered['osm_type'].unique()) == 1
    assert filtered['osm_type'].unique()[0] == 'relation'
    assert len(filtered) == 66

    # Test getting only ways
    # ---------------------------
    filtered = osm.get_data_by_custom_criteria(
        custom_filter={'building': True},
        filter_type="keep",
        keep_nodes=False,
        keep_ways=True,
        keep_relations=False)
    assert isinstance(filtered, GeoDataFrame)

    # Now should only have 'way' osm_type
    assert len(filtered['osm_type'].unique()) == 1
    assert filtered['osm_type'].unique()[0] == 'way'
    assert len(filtered) == 422

    # Test getting only nodes
    # ---------------------------
    filtered = osm.get_data_by_custom_criteria(
        custom_filter={'building': True},
        filter_type="keep",
        keep_nodes=True,
        keep_ways=False,
        keep_relations=False)
    assert isinstance(filtered, GeoDataFrame)

    # Now should only have 'node' osm_type
    assert len(filtered['osm_type'].unique()) == 1
    assert filtered['osm_type'].unique()[0] == 'node'
    assert len(filtered) == 36
Beispiel #3
0
def test_reading_with_custom_filters_with_excluding(test_pbf):
    from pyrosm import OSM
    from shapely.geometry import Polygon
    from geopandas import GeoDataFrame

    # Get first all data
    osm = OSM(filepath=test_pbf)
    gdf_all = osm.get_buildings()

    # Find out all 'building' tags
    cnts = gdf_all['building'].value_counts()
    n = len(gdf_all)
    for filter_, cnt in cnts.items():
        # Use the custom filter
        filtered = osm.get_data_by_custom_criteria(
            custom_filter={'building': [filter_]}, filter_type="exclude")

        assert isinstance(filtered, GeoDataFrame)
        assert isinstance(filtered.loc[0, "geometry"], Polygon)
        assert len(filtered) == n - cnt
        # Now should not have the filter_ in buildings
        assert filter_ not in filtered["building"].unique()

        required_cols = ['building', 'id', 'timestamp', 'version', 'geometry']

        for col in required_cols:
            assert col in filtered.columns
Beispiel #4
0
def get_osm_gata(protobuf: str) -> typing.Tuple:
    """get osm-data from protobuf for parks, roads, industrials areas"""
    osm = OSM(protobuf)
    msk_parks = osm.get_data_by_custom_criteria(custom_filter={
        'leisure': ['park', 'garden'],
        'natural': ['wood']
    },
                                                filter_type='keep',
                                                keep_nodes=False,
                                                keep_ways=True,
                                                keep_relations=True)
    msk_parks = msk_parks[msk_parks.to_crs("EPSG:3395").area > 100000]
    drive_net = osm.get_data_by_custom_criteria(
        custom_filter={"highway": ["trunk", "primary", "secondary"]})
    indust = osm.get_data_by_custom_criteria(
        custom_filter={"landuse": ["industrial"]})
    return msk_parks.to_crs(CRS), drive_net.to_crs(CRS), indust.to_crs(CRS)
Beispiel #5
0
def test_adding_extra_attribute(helsinki_pbf):
    from pyrosm import OSM
    from geopandas import GeoDataFrame

    osm = OSM(filepath=helsinki_pbf)
    gdf = osm.get_data_by_custom_criteria({"highway": True})
    extra_col = "wikidata"
    extra = osm.get_data_by_custom_criteria({"highway": True},
                                            extra_attributes=[extra_col])

    # The extra should have one additional column compared to the original one
    assert extra.shape[1] == gdf.shape[1] + 1
    # Should have same number of rows
    assert extra.shape[0] == gdf.shape[0]
    assert extra_col in extra.columns
    assert len(extra[extra_col].dropna().unique()) > 0
    assert isinstance(gdf, GeoDataFrame)
Beispiel #6
0
def test_using_incorrect_booleans(test_pbf):
    from pyrosm import OSM
    osm = OSM(filepath=test_pbf)

    custom_filter = {"building": ["retail"]}
    incorrect_bool = "foo"
    # Test that passing incorrect data works as should
    try:
        gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter,
                                              keep_nodes=incorrect_bool)
    except ValueError as e:
        if "'keep_nodes' should be boolean type: True or False" in str(e):
            pass
        else:
            raise e

    try:
        gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter,
                                              keep_ways=incorrect_bool)
    except ValueError as e:
        if "'keep_ways' should be boolean type: True or False" in str(e):
            pass
        else:
            raise e

    try:
        gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter,
                                              keep_relations=incorrect_bool)
    except ValueError as e:
        if "'keep_relations' should be boolean type: True or False" in str(e):
            pass
        else:
            raise e

    try:
        gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter,
                                              keep_relations=False,
                                              keep_ways=False,
                                              keep_nodes=False)
    except ValueError as e:
        if "At least on of the following parameters should be True" in str(e):
            pass
        else:
            raise e
Beispiel #7
0
def test_using_incorrect_filter(test_pbf):
    from pyrosm import OSM
    osm = OSM(filepath=test_pbf)

    # Test that passing incorrect data works as should
    # 1.
    custom_filter = None
    try:
        gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter)
    except ValueError as e:
        if "should be a Python dictionary" in str(e):
            pass
        else:
            raise e

    custom_filter = {"building": [1]}
    # 2.
    try:
        gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter)
    except ValueError as e:
        if "string" in str(e):
            pass
        else:
            raise e

    custom_filter = {"building": ["correct_string", 1]}
    # 3.
    try:
        gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter)
    except ValueError as e:
        if "string" in str(e):
            pass
        else:
            raise e
    # 4.
    custom_filter = {0: ["residential"]}
    try:
        gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter)
    except ValueError as e:
        if "string" in str(e):
            pass
        else:
            raise e
Beispiel #8
0
def test_using_two_level_custom_filter(helsinki_region_pbf):
    from pyrosm import OSM

    osm = OSM(filepath=helsinki_region_pbf)
    osm_keys = ["building"]
    custom_filter = {"amenity": ["school"]}
    gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter,
                                          osm_keys_to_keep=osm_keys)

    assert gdf.shape == (72, 25)

    # Now 'building' and 'amenity' should not have NaNs
    assert not gdf["building"].hasnans
    assert not gdf["amenity"].hasnans
Beispiel #9
0
def test_using_incorrect_filter_type(test_pbf):
    from pyrosm import OSM
    osm = OSM(filepath=test_pbf)

    custom_filter = {"building": ["retail"]}
    filter_type = "incorrect_test"
    # Test that passing incorrect data works as should
    try:
        gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter,
                                              filter_type=filter_type)
    except ValueError as e:
        if "should be either 'keep' or 'exclude'" in str(e):
            pass
        else:
            raise e
Beispiel #10
0
def test_using_incorrect_osm_keys(test_pbf):
    from pyrosm import OSM
    osm = OSM(filepath=test_pbf)

    osm_keys = 1
    custom_filter = {"building": ["retail"]}
    # Test that passing incorrect data works as should
    try:
        gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter,
                                              osm_keys_to_keep=osm_keys)
    except ValueError as e:
        if "'osm_keys_to_keep' -parameter should be of type str or list." in str(
                e):
            pass
        else:
            raise e
Beispiel #11
0
def test_using_incorrect_tags(test_pbf):
    from pyrosm import OSM
    osm = OSM(filepath=test_pbf)

    # Incorrect tags
    # --------------
    tags_as_columns = [1]
    custom_filter = {"building": ["retail"]}
    # Test that passing incorrect data works as should
    try:
        gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter,
                                              tags_as_columns=tags_as_columns)
    except ValueError as e:
        if "All tags listed in 'tags_as_columns' should be strings" in str(e):
            pass
        else:
            raise e
Beispiel #12
0
def test_reading_custom_from_area_having_none(helsinki_pbf):
    from pyrosm import OSM
    from geopandas import GeoDataFrame

    # Bounding box for area that does not have any data
    bbox = [24.940514, 60.173849, 24.942, 60.175892]

    osm = OSM(filepath=helsinki_pbf, bounding_box=bbox)

    # The tool should warn if no buildings were found
    with pytest.warns(UserWarning) as w:
        gdf = osm.get_data_by_custom_criteria({"highway": ["primary"]})
        # Check the warning text
        if "could not find any OSM data" in str(w):
            pass

    # Result should be None
    assert gdf is None
Beispiel #13
0
def test_using_multiple_filters(helsinki_pbf):
    from pyrosm import OSM
    from geopandas import GeoDataFrame

    osm = OSM(filepath=helsinki_pbf)
    gdf = osm.get_data_by_custom_criteria({
        "shop": ["alcohol"],
        "amenity": ["pub"]
    })

    # shop and amenity columns should only contain alcohol and pub as requested
    # (in addition to None values)
    shop = gdf["shop"].unique().tolist()
    shop = [item for item in shop if isinstance(item, str)]
    amenity = gdf["amenity"].unique().tolist()
    amenity = [item for item in amenity if isinstance(item, str)]

    assert isinstance(gdf, GeoDataFrame)
    assert shop == ["alcohol"]
    assert amenity == ["pub"]
    assert gdf.shape == (59, 32)
Beispiel #14
0
def test_parsing_osm_with_custom_filter_by_including_tags(test_pbf):
    from pyrosm import OSM
    from geopandas import GeoDataFrame
    import pyproj
    osm = OSM(filepath=test_pbf)

    # Keep only building as column
    tags_as_columns = ["building"]
    # Get all buildings that are "retail"
    custom_filter = {"building": ["retail"]}
    filter_type = "keep"
    osm_type = "building"
    gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter,
                                          filter_type=filter_type,
                                          osm_keys_to_keep=osm_type,
                                          tags_as_columns=tags_as_columns)

    assert isinstance(gdf, GeoDataFrame)

    # Only following columns should exist after specifying tags_as_columns
    allowed_columns = [
        "geometry", "tags", "building", "id", "osm_type", "version",
        "timestamp", "changeset"
    ]
    for col in gdf.columns:
        assert col in allowed_columns

    # Building columns should not have any "residential" tags
    assert len(gdf["building"].unique()) == 1
    assert gdf["building"].unique()[0] == "retail"

    # Required keys
    required = ['id', 'geometry']
    for col in required:
        assert col in gdf.columns

    # Test shape
    assert len(gdf) == 2
    assert gdf.crs == pyproj.CRS.from_epsg(4326)
Beispiel #15
0
def test_custom(test_pbf):
    from pyrosm import OSM
    from geopandas import GeoDataFrame
    osm = OSM(test_pbf)
    gdf = osm.get_data_by_custom_criteria({"highway": ["secondary"]})
    assert isinstance(gdf, GeoDataFrame)
Beispiel #16
0
def test_custom_filters_with_custom_keys(helsinki_region_pbf):
    from pyrosm import OSM
    from geopandas import GeoDataFrame

    # Get first all data
    osm = OSM(filepath=helsinki_region_pbf)

    # Test reading public transport related data
    filtered = osm.get_data_by_custom_criteria(
        custom_filter={'public_transport': True},
        filter_type="keep",
    )
    assert isinstance(filtered, GeoDataFrame)
    assert len(filtered) == 5542

    # Test a more complicated query
    # -----------------------------

    # Test reading all transit related data (bus, trains, trams, metro etc.)
    # Exclude nodes (not keeping stops, etc.)
    routes = [
        "bus", "ferry", "railway", "subway", "train", "tram", "trolleybus"
    ]
    rails = ["tramway", "light_rail", "rail", "subway", "tram"]
    # 'express' comes with routes
    bus = ['yes', "express"]

    transit = osm.get_data_by_custom_criteria(custom_filter={
        'route': routes,
        'railway': rails,
        'bus': bus
    },
                                              filter_type="keep",
                                              keep_nodes=False)

    required_columns = ["railway", "bus", "route"]
    for col in required_columns:
        assert col in transit.columns

    # Check individual counts
    correct_counts = {'railway': 1456, 'route': 824, 'bus': 79}

    for col in required_columns:
        cnt = len(transit[col].dropna())
        correct = correct_counts[col]
        assert cnt == correct, f"Incorrect count for {col}. " \
                               f"Should have {correct}, found {cnt}."

    # Ensure that the data contains only data specified in the filters
    unique_route = transit["route"].unique()
    for v in unique_route:
        if v is None:
            continue
        elif str(v) == "nan":
            continue
        assert v in routes

    unique_rails = transit["railway"].unique()
    for v in unique_rails:
        if v is None:
            continue
        elif str(v) == "nan":
            continue
        assert v in rails

    unique_bus = transit["bus"].unique()
    for v in unique_bus:
        if v is None:
            continue
        elif str(v) == "nan":
            continue

        assert v in bus

    assert isinstance(transit, GeoDataFrame)
    assert len(transit) == 2357

    # When using custom filters all records should have a value
    # at least on one of the attributes specified in the custom_filter
    selected = transit[required_columns]
    # Try dropping out rows with NaNs on all columns
    no_nans = selected.dropna(subset=required_columns, how="all")
    assert selected.shape == no_nans.shape
Beispiel #17
0
class ProcessOSM:
    """
        Processing Class
    """
    def __init__(self, inputs, output, prefix, ext, themes, features):
        self.inputs = inputs
        self.output = output
        self.themes = themes
        self.features = features
        self.workers = 1
        self.clip_data = None
        self.clip_gdf = None
        self.osm = None
        self.keep = False  # False removes invalid geometries
        self.show_warning = False
        self.prefix = prefix
        self.ext = ext
        self.bbox = None
        self.layer = None

    def process(self):
        """
        Handle general multiprocessing workflow.  
        """

        # if self.show_warning:
        #    warnings.filterwarnings("ignore")
        # warnings.filterwarnings("ignore")

        begin_time = time.time()
        if self.clip_data is not None:
            if self.layer is None:
                self.clip_gdf = gpd.read_file(self.clip_data)
            else:
                try:
                    self.clip_gdf = gpd.read_file(self.clip_data,
                                                  driver="FileGDB",
                                                  layer=self.layer)
                except ValueError as e:
                    print(e)
                    exit()

            geo = self.clip_gdf.geometry.unary_union
            self.osm = OSM(self.inputs, geo)
        elif self.bbox is not None:
            self.osm = OSM(self.inputs, self.bbox)

            # Create Clip GDF from bbox coordinate
            p = Polygon([(self.bbox[0], self.bbox[1]),
                         (self.bbox[0], self.bbox[3]),
                         (self.bbox[2], self.bbox[3]),
                         (self.bbox[2], self.bbox[1])])
            self.clip_gdf = gpd.GeoDataFrame({'geometry': [p]},
                                             geometry='geometry')
            self.clip_gdf.set_crs(epsg=4326, inplace=True)
        else:
            self.osm = OSM(self.inputs)

        # self.process_key(self.themes[8])

        # for theme in self.themes:
        #    self.process_key(theme)

        futures = []
        with ProcessPoolExecutor(max_workers=self.workers) as executor:
            for theme in self.themes:
                futures.append(executor.submit(self.process_key, theme))
            # for f in futures:
            #    print(f, 'running?', f.running())
            for x in as_completed(futures):

                #for f in futures:
                #    print(f, 'running?', f.running())
                if x.exception() is not None:
                    print(f'Future Exception {x.exception()}')
                    # Kill remaining child processes
                    kill_child_processes(os.getpid())
                    # Plagued by general memory errors from pyrosm
                    # Consumes all memory ram/swap then hangs machine
                    # you can't cancel an active job
                    # shutdown only cancels queued tasks not active tasks
                    # only thing left to do is kill processes if there is an error

                    #print('cancel')
                    #for f in futures:
                    #    f.cancel()
                    # executor.shutdown(wait=False)
                    # for f in futures:
                    #     f.cancel()
                    #     print(f, 'running?', f.running())
                    #     if f.running():
                    #         f.cancel()
                    #         print('Cancelled? ', f.cancelled())
                    # exit()

                #try:
                #    print(x.result())
                #except Exception as exc:
                #    print(f'generated an exception: {exc}')
                #    exit()

        total_time = time.time() - begin_time
        print('Done after {} seconds.'.format(round(total_time, 0)))

    def process_key(self, theme):
        """
        Workflow for processing OSM data
        """
        begin_time = time.time()
        geod = {
            'point': ['Point', 'MultiPoint'],
            'line': ['LineString', 'MultiLineString'],
            'polygon': ['Polygon', 'MultiPolygon']
        }
        print(f'Processing PBF for {theme}')
        try:
            gdf = self.osm.get_data_by_custom_criteria(
                osm_keys_to_keep=theme, custom_filter={theme: True})
        except Exception as e:
            print('Bad Mojo')
            print(f'Exception Exit {e} theme :{theme}')
            raise  #RuntimeError(f'Exception Exit {e}')
            #exit()

        print('Done PBF for {} after {} seconds.'.format(
            theme, round(time.time() - begin_time, 0)))
        if gdf is not None:
            theme_time = time.time()
            gdf['geom_type'] = gdf.geometry.geom_type

            for geo in self.features:
                print(f'Processing {theme}:{geo}')
                theme_time = time.time()
                gdf_select = gdf[gdf["geom_type"].isin(geod[geo])]
                if not gdf_select.empty:
                    if self.clip_gdf is not None:
                        try:
                            # Remove bad geometries in OSM file before clipping
                            if not self.keep:
                                start = gdf_select.shape[0]
                                gdf_select = gdf_select[
                                    gdf_select.geometry.is_valid]
                                if start != gdf_select.shape[0]:
                                    end = start - gdf_select.shape[0]
                                    print(
                                        f'\tRemoving {end} geometries from {theme}:{geo}'
                                    )
                            gdp_clip = gpd.clip(gdf_select, self.clip_gdf)
                            print('{}:{} shape {}'.format(
                                theme, geo, gdp_clip.shape))
                            print(
                                'Done Geodataframe processing: {}:{} after {} seconds .'
                                .format(theme, geo,
                                        round(time.time() - theme_time, 0)))

                            self.write_data(gdp_clip, theme, geo)
                        except GEOSException:

                            print(
                                f'Unable to clip {theme}:{geo} exporting unclipped'
                            )
                            self.write_data(gdf_select, theme, geo)
                            continue
                    else:
                        print('{}:{} shape {}'.format(theme, geo,
                                                      gdf_select.shape))
                        print(
                            'Done Geodataframe processing: {}:{} after {} seconds .'
                            .format(theme, geo,
                                    round(time.time() - theme_time, 0)))

                        self.write_data(gdf_select, theme, geo)
                else:
                    print(f'\tEmpty dataframe {theme}:{geo}')
        else:
            print(f'\tEmpty theme {theme}')

        total_time = time.time() - begin_time
        print('Done {} after {} seconds.'.format(theme, round(total_time, 0)))
        return theme

    # noinspection SpellCheckingInspection
    def write_data(self, gdf_write, theme, geo):
        begin_time = time.time()
        if self.ext == 'shp':
            outputfile_shp = os.path.join(self.output,
                                          f'{self.prefix}_{theme}_{geo}.shp')
            gdf_write.to_file(outputfile_shp)
        elif self.ext == 'geojson':
            outputfile_gjson = os.path.join(
                self.output, f'{self.prefix}_{theme}_{geo}.geojson')
            gdf_write.to_file(outputfile_gjson, driver='GeoJSON')
        else:
            outputfile_gpkg = os.path.join(
                self.output, f'{self.prefix}_{theme}_{geo}.gpkg')
            gdf_write.to_file(outputfile_gpkg,
                              layer='{}_{}'.format(theme, geo),
                              driver="GPKG")

        print('Done {}:{} in {} seconds to file.'.format(
            theme, geo, round(time.time() - begin_time, 0)))