Beispiel #1
0
def test_parsing_landuse_with_defaults(test_pbf):
    from pyrosm import OSM
    from pyrosm.landuse import get_landuse_data
    from geopandas import GeoDataFrame
    import pyproj
    from pyrosm._arrays import concatenate_dicts_of_arrays
    osm = OSM(filepath=test_pbf)
    osm._read_pbf()
    tags_as_columns = osm.conf.tags.landuse

    nodes = concatenate_dicts_of_arrays(osm._nodes)
    gdf = get_landuse_data(nodes,
                           osm._node_coordinates,
                           osm._way_records,
                           osm._relations,
                           tags_as_columns,
                           None,
                           None)

    assert isinstance(gdf, GeoDataFrame)

    # Required keys
    required = ['id', 'geometry']
    for col in required:
        assert col in gdf.columns

    # Test shape
    assert len(gdf) == 50
    assert gdf.crs == pyproj.CRS.from_epsg(4326)
Beispiel #2
0
def test_parsing_pois_with_defaults(helsinki_pbf, default_filter):
    from pyrosm import OSM
    from pyrosm.pois import get_poi_data
    from geopandas import GeoDataFrame
    import pyproj
    from pyrosm._arrays import concatenate_dicts_of_arrays
    osm = OSM(filepath=helsinki_pbf)
    osm._read_pbf()
    tags_as_columns = []
    for k in default_filter.keys():
        tags_as_columns += getattr(osm.conf.tags, k)

    nodes = concatenate_dicts_of_arrays(osm._nodes)
    gdf = get_poi_data(nodes, osm._node_coordinates, osm._way_records,
                       osm._relations, tags_as_columns, default_filter, None)

    assert isinstance(gdf, GeoDataFrame)

    # Required keys
    required = ['id', 'geometry']
    for col in required:
        assert col in gdf.columns

    # Test shape
    assert len(gdf) == 1782
    assert gdf.crs == pyproj.CRS.from_epsg(4326)
Beispiel #3
0
    def get_natural(self, custom_filter=None, extra_attributes=None):
        """
        Parses natural from OSM.

        Parameters
        ----------

        custom_filter : dict
            What kind of natural to parse,
            see details below.

            You can opt-in specific elements by using 'custom_filter'.
            To keep only specific natural such as 'wood' and 'tree', you can apply
            a custom filter which is a Python dictionary with following format:
              `custom_filter={'natural': ['wood', 'tree']}`

        extra_attributes : list (optional)
            Additional OSM tag keys that will be converted into columns in the resulting GeoDataFrame.

        See Also
        --------

        Take a look at OSM documentation for further details about the data:

        `https://wiki.openstreetmap.org/wiki/Key:natural <https://wiki.openstreetmap.org/wiki/Key:natural>`__

        """

        if self._nodes is None or self._way_records is None:
            self._read_pbf()

        # Default tags to keep as columns
        tags_as_columns = self.conf.tags.natural

        if extra_attributes is not None:
            validate_tags_as_columns(extra_attributes)
            tags_as_columns += extra_attributes

        # If nodes are still in chunks, merge before passing forward
        if isinstance(self._nodes, list):
            self._nodes = concatenate_dicts_of_arrays(self._nodes)

        gdf = get_natural_data(self._nodes, self._node_coordinates,
                               self._way_records, self._relations,
                               tags_as_columns, custom_filter,
                               self.bounding_box)

        # Do not keep node information unless specifically asked for
        # (they are in a list, and can cause issues when saving the files)
        if not self.keep_node_info and gdf is not None:
            if "nodes" in gdf.columns:
                gdf = gdf.drop("nodes", axis=1)
        return gdf
Beispiel #4
0
    def get_data_by_custom_criteria(self,
                                    custom_filter,
                                    osm_keys_to_keep=None,
                                    filter_type="keep",
                                    tags_as_columns=None,
                                    keep_nodes=True,
                                    keep_ways=True,
                                    keep_relations=True,
                                    extra_attributes=None):
        """
        `
        Parse OSM data based on custom criteria.

        Parameters
        ----------

        custom_filter : dict (required)
            A custom filter to filter only specific POIs from OpenStreetMap.

        osm_keys_to_keep : str | list
            A filter to specify which OSM keys should be kept.

        filter_type : str
            "keep" | "exclude"
            Whether the filters should be used to keep or exclude the data from OSM.

        tags_as_columns : list
            Which tags should be kept as columns in the resulting GeoDataFrame.

        keep_nodes : bool
            Whether or not the nodes should be kept in the resulting GeoDataFrame if they are found.

        keep_ways : bool
            Whether or not the ways should be kept in the resulting GeoDataFrame if they are found.

        keep_relations : bool
            Whether or not the relations should be kept in the resulting GeoDataFrame if they are found.

        extra_attributes : list (optional)
            Additional OSM tag keys that will be converted into columns in the resulting GeoDataFrame.

        """

        # Check that the custom filter is in correct format
        custom_filter = validate_custom_filter(custom_filter)

        if not isinstance(filter_type, str):
            raise ValueError(
                "'filter_type' -parameter should be either 'keep' or 'exclude'. "
            )

        # Validate osm keys
        validate_osm_keys(osm_keys_to_keep)
        if isinstance(osm_keys_to_keep, str):
            osm_keys_to_keep = [osm_keys_to_keep]

        # Validate filter
        filter_type = filter_type.lower()
        if filter_type not in ["keep", "exclude"]:
            raise ValueError(
                "'filter_type' -parameter should be either 'keep' or 'exclude'. "
            )

        # Tags to keep as columns
        if tags_as_columns is None:
            tags_as_columns = []
            for k in custom_filter.keys():
                try:
                    tags_as_columns += getattr(self.conf.tags, k)
                except Exception as e:
                    pass
            # If tags weren't available in conf, store keys as columns by default
            # (all other tags in such cases will be stored in 'tags' column as JSON)
            if len(tags_as_columns) == 0:
                tags_as_columns = list(custom_filter.keys())

        else:
            # Validate tags
            validate_tags_as_columns(tags_as_columns)

        if extra_attributes is not None:
            validate_tags_as_columns(extra_attributes)
            tags_as_columns += extra_attributes

        # Validate booleans
        validate_booleans(keep_nodes, keep_ways, keep_relations)

        if self._nodes is None or self._way_records is None:
            self._read_pbf()

        # If nodes are still in chunks, merge before passing forward
        if isinstance(self._nodes, list):
            self._nodes = concatenate_dicts_of_arrays(self._nodes)

        gdf = get_user_defined_data(self._nodes, self._node_coordinates,
                                    self._way_records, self._relations,
                                    tags_as_columns, custom_filter,
                                    osm_keys_to_keep, filter_type, keep_nodes,
                                    keep_ways, keep_relations,
                                    self.bounding_box)

        # Do not keep node information unless specifically asked for
        # (they are in a list, and can cause issues when saving the files)
        if not self.keep_node_info and gdf is not None:
            if "nodes" in gdf.columns:
                gdf = gdf.drop("nodes", axis=1)
        return gdf
Beispiel #5
0
    def get_pois(self, custom_filter=None, extra_attributes=None):
        """
        Parse Point of Interest (POI) from OSM.

        Parameters
        ----------

        custom_filter : dict
            An optional custom filter to filter only specific POIs from OpenStreetMap,
            see details below.

        extra_attributes : list (optional)
            Additional OSM tag keys that will be converted into columns in the resulting GeoDataFrame.


        Notes
        -----

        By default, Pyrosm will parse all OSM elements (points, lines and polygons)
        that are associated with following keys:
          - amenity
          - shop
          - tourism

        You can opt-out / opt-in specific elements by using 'custom_filter'.
        To parse elements associated with only specific tags, such as amenities,
        you can specify:
          `custom_filter={"amenity": True}`

        You can also combine multiple filters at the same time.
        For instance, you can parse all 'amenity' elements AND specific 'shop' elements,
        such as supermarkets and book stores by specifying:
          `custom_filter={"amenity": True, "shop": ["supermarket", "books"]}`


        See Also
        --------

        You can check the most typical OSM tags for different map features from OSM Wiki
        `https://wiki.openstreetmap.org/wiki/Map_Features <https://wiki.openstreetmap.org/wiki/Map_Features>`__.
        It is also possible to get a quick look at the most typical OSM tags from Pyrosm configuration:

        >>> from pyrosm.config import Conf
        >>> print("All available OSM keys", Conf.tags.available)
        All available OSM keys ['aerialway', 'aeroway', 'amenity', 'building', 'craft',
        'emergency', 'geological', 'highway', 'historic', 'landuse', 'leisure',
        'natural', 'office', 'power', 'public_transport', 'railway', 'route',
        'place', 'shop', 'tourism', 'waterway']

        >>> print("Typical tags associated with tourism:", Conf.tags.tourism)
        ['alpine_hut', 'apartment', 'aquarium', 'artwork', 'attraction', 'camp_pitch',
        'camp_site', 'caravan_site', 'chalet', 'gallery', 'guest_house', 'hostel',
        'hotel', 'information', 'motel', 'museum', 'picnic_site', 'theme_park',
        'tourism', 'viewpoint', 'wilderness_hut', 'zoo']

        """
        # If custom_filter has not been defined, initialize with default
        if custom_filter is None:
            custom_filter = {"amenity": True, "shop": True, "tourism": True}

        else:
            # Check that the custom filter is in correct format
            if not isinstance(custom_filter, dict):
                raise ValueError(
                    f"'custom_filter' should be a Python dictionary. "
                    f"Got {custom_filter} with type {type(custom_filter)}.")

        if self._nodes is None or self._way_records is None:
            self._read_pbf()

        # Default tags to keep as columns
        tags_as_columns = []
        for k in custom_filter.keys():
            try:
                tags_as_columns += getattr(self.conf.tags, k)
            except AttributeError:
                tags_as_columns += self.conf.tags._basic_tags
            except Exception as e:
                raise e

        if extra_attributes is not None:
            validate_tags_as_columns(extra_attributes)
            tags_as_columns += extra_attributes

        # If nodes are still in chunks, merge before passing forward
        if isinstance(self._nodes, list):
            self._nodes = concatenate_dicts_of_arrays(self._nodes)

        gdf = get_poi_data(self._nodes, self._node_coordinates,
                           self._way_records, self._relations, tags_as_columns,
                           custom_filter, self.bounding_box)

        # Do not keep node information unless specifically asked for
        # (they are in a list, and can cause issues when saving the files)
        if not self.keep_node_info and gdf is not None:
            if "nodes" in gdf.columns:
                gdf = gdf.drop("nodes", axis=1)
        return gdf
Beispiel #6
0
    def get_boundaries(self,
                       boundary_type="administrative",
                       name=None,
                       custom_filter=None,
                       extra_attributes=None):
        """
        Parses boundaries from OSM.

        Parameters
        ----------

        boundary_type : str
            The type of boundaries to parse. Possible values:
              - `"administrative"` (default)
              - `"national_park"`
              - `"political"`
              - `"postal_code"`
              - `"protected_area"`
              - `"aboriginal_lands"`
              - `"maritime"`
              - `"lot"`
              - `"parcel"`
              - `"tract"`
              - `"marker"`
              - `"all"`

        name : str (optional)
            Name of the administrative area that will be searched for.

        custom_filter : dict (optional)
            Additional filter for what kind of boundary to parse.

        extra_attributes : list (optional)
            Additional OSM tag keys that will be converted into columns in the resulting GeoDataFrame.


        See Also
        --------

        Take a look at OSM documentation for further details about the data:

        `https://wiki.openstreetmap.org/wiki/Key:boundary <https://wiki.openstreetmap.org/wiki/Key:bondary>`__

        """

        if self._nodes is None or self._way_records is None:
            self._read_pbf()

        # Default tags to keep as columns
        tags_as_columns = self.conf.tags.boundary

        if extra_attributes is not None:
            validate_tags_as_columns(extra_attributes)
            tags_as_columns += extra_attributes

        # If nodes are still in chunks, merge before passing forward
        if isinstance(self._nodes, list):
            self._nodes = concatenate_dicts_of_arrays(self._nodes)

        # Check boundary type
        boundary_type = validate_boundary_type(boundary_type)

        if name is not None:
            if not isinstance(name, str):
                raise ValueError(f"'name' should be text."
                                 f"Got '{name}' of type {type(name)}.")

        gdf = get_boundary_data(self._node_coordinates, self._way_records,
                                self._relations, tags_as_columns,
                                custom_filter, boundary_type, name,
                                self.bounding_box)

        # Do not keep node information unless specifically asked for
        # (they are in a list, and can cause issues when saving the files)
        if not self.keep_node_info and gdf is not None:
            if "nodes" in gdf.columns:
                gdf = gdf.drop("nodes", axis=1)
        return gdf