def test_parsing_landuse_with_defaults(test_pbf): from pyrosm import OSM from pyrosm.landuse import get_landuse_data from geopandas import GeoDataFrame import pyproj from pyrosm._arrays import concatenate_dicts_of_arrays osm = OSM(filepath=test_pbf) osm._read_pbf() tags_as_columns = osm.conf.tags.landuse nodes = concatenate_dicts_of_arrays(osm._nodes) gdf = get_landuse_data(nodes, osm._node_coordinates, osm._way_records, osm._relations, tags_as_columns, None, None) assert isinstance(gdf, GeoDataFrame) # Required keys required = ['id', 'geometry'] for col in required: assert col in gdf.columns # Test shape assert len(gdf) == 50 assert gdf.crs == pyproj.CRS.from_epsg(4326)
def test_parsing_pois_with_defaults(helsinki_pbf, default_filter): from pyrosm import OSM from pyrosm.pois import get_poi_data from geopandas import GeoDataFrame import pyproj from pyrosm._arrays import concatenate_dicts_of_arrays osm = OSM(filepath=helsinki_pbf) osm._read_pbf() tags_as_columns = [] for k in default_filter.keys(): tags_as_columns += getattr(osm.conf.tags, k) nodes = concatenate_dicts_of_arrays(osm._nodes) gdf = get_poi_data(nodes, osm._node_coordinates, osm._way_records, osm._relations, tags_as_columns, default_filter, None) assert isinstance(gdf, GeoDataFrame) # Required keys required = ['id', 'geometry'] for col in required: assert col in gdf.columns # Test shape assert len(gdf) == 1782 assert gdf.crs == pyproj.CRS.from_epsg(4326)
def get_natural(self, custom_filter=None, extra_attributes=None): """ Parses natural from OSM. Parameters ---------- custom_filter : dict What kind of natural to parse, see details below. You can opt-in specific elements by using 'custom_filter'. To keep only specific natural such as 'wood' and 'tree', you can apply a custom filter which is a Python dictionary with following format: `custom_filter={'natural': ['wood', 'tree']}` extra_attributes : list (optional) Additional OSM tag keys that will be converted into columns in the resulting GeoDataFrame. See Also -------- Take a look at OSM documentation for further details about the data: `https://wiki.openstreetmap.org/wiki/Key:natural <https://wiki.openstreetmap.org/wiki/Key:natural>`__ """ if self._nodes is None or self._way_records is None: self._read_pbf() # Default tags to keep as columns tags_as_columns = self.conf.tags.natural if extra_attributes is not None: validate_tags_as_columns(extra_attributes) tags_as_columns += extra_attributes # If nodes are still in chunks, merge before passing forward if isinstance(self._nodes, list): self._nodes = concatenate_dicts_of_arrays(self._nodes) gdf = get_natural_data(self._nodes, self._node_coordinates, self._way_records, self._relations, tags_as_columns, custom_filter, self.bounding_box) # Do not keep node information unless specifically asked for # (they are in a list, and can cause issues when saving the files) if not self.keep_node_info and gdf is not None: if "nodes" in gdf.columns: gdf = gdf.drop("nodes", axis=1) return gdf
def get_data_by_custom_criteria(self, custom_filter, osm_keys_to_keep=None, filter_type="keep", tags_as_columns=None, keep_nodes=True, keep_ways=True, keep_relations=True, extra_attributes=None): """ ` Parse OSM data based on custom criteria. Parameters ---------- custom_filter : dict (required) A custom filter to filter only specific POIs from OpenStreetMap. osm_keys_to_keep : str | list A filter to specify which OSM keys should be kept. filter_type : str "keep" | "exclude" Whether the filters should be used to keep or exclude the data from OSM. tags_as_columns : list Which tags should be kept as columns in the resulting GeoDataFrame. keep_nodes : bool Whether or not the nodes should be kept in the resulting GeoDataFrame if they are found. keep_ways : bool Whether or not the ways should be kept in the resulting GeoDataFrame if they are found. keep_relations : bool Whether or not the relations should be kept in the resulting GeoDataFrame if they are found. extra_attributes : list (optional) Additional OSM tag keys that will be converted into columns in the resulting GeoDataFrame. """ # Check that the custom filter is in correct format custom_filter = validate_custom_filter(custom_filter) if not isinstance(filter_type, str): raise ValueError( "'filter_type' -parameter should be either 'keep' or 'exclude'. " ) # Validate osm keys validate_osm_keys(osm_keys_to_keep) if isinstance(osm_keys_to_keep, str): osm_keys_to_keep = [osm_keys_to_keep] # Validate filter filter_type = filter_type.lower() if filter_type not in ["keep", "exclude"]: raise ValueError( "'filter_type' -parameter should be either 'keep' or 'exclude'. " ) # Tags to keep as columns if tags_as_columns is None: tags_as_columns = [] for k in custom_filter.keys(): try: tags_as_columns += getattr(self.conf.tags, k) except Exception as e: pass # If tags weren't available in conf, store keys as columns by default # (all other tags in such cases will be stored in 'tags' column as JSON) if len(tags_as_columns) == 0: tags_as_columns = list(custom_filter.keys()) else: # Validate tags validate_tags_as_columns(tags_as_columns) if extra_attributes is not None: validate_tags_as_columns(extra_attributes) tags_as_columns += extra_attributes # Validate booleans validate_booleans(keep_nodes, keep_ways, keep_relations) if self._nodes is None or self._way_records is None: self._read_pbf() # If nodes are still in chunks, merge before passing forward if isinstance(self._nodes, list): self._nodes = concatenate_dicts_of_arrays(self._nodes) gdf = get_user_defined_data(self._nodes, self._node_coordinates, self._way_records, self._relations, tags_as_columns, custom_filter, osm_keys_to_keep, filter_type, keep_nodes, keep_ways, keep_relations, self.bounding_box) # Do not keep node information unless specifically asked for # (they are in a list, and can cause issues when saving the files) if not self.keep_node_info and gdf is not None: if "nodes" in gdf.columns: gdf = gdf.drop("nodes", axis=1) return gdf
def get_pois(self, custom_filter=None, extra_attributes=None): """ Parse Point of Interest (POI) from OSM. Parameters ---------- custom_filter : dict An optional custom filter to filter only specific POIs from OpenStreetMap, see details below. extra_attributes : list (optional) Additional OSM tag keys that will be converted into columns in the resulting GeoDataFrame. Notes ----- By default, Pyrosm will parse all OSM elements (points, lines and polygons) that are associated with following keys: - amenity - shop - tourism You can opt-out / opt-in specific elements by using 'custom_filter'. To parse elements associated with only specific tags, such as amenities, you can specify: `custom_filter={"amenity": True}` You can also combine multiple filters at the same time. For instance, you can parse all 'amenity' elements AND specific 'shop' elements, such as supermarkets and book stores by specifying: `custom_filter={"amenity": True, "shop": ["supermarket", "books"]}` See Also -------- You can check the most typical OSM tags for different map features from OSM Wiki `https://wiki.openstreetmap.org/wiki/Map_Features <https://wiki.openstreetmap.org/wiki/Map_Features>`__. It is also possible to get a quick look at the most typical OSM tags from Pyrosm configuration: >>> from pyrosm.config import Conf >>> print("All available OSM keys", Conf.tags.available) All available OSM keys ['aerialway', 'aeroway', 'amenity', 'building', 'craft', 'emergency', 'geological', 'highway', 'historic', 'landuse', 'leisure', 'natural', 'office', 'power', 'public_transport', 'railway', 'route', 'place', 'shop', 'tourism', 'waterway'] >>> print("Typical tags associated with tourism:", Conf.tags.tourism) ['alpine_hut', 'apartment', 'aquarium', 'artwork', 'attraction', 'camp_pitch', 'camp_site', 'caravan_site', 'chalet', 'gallery', 'guest_house', 'hostel', 'hotel', 'information', 'motel', 'museum', 'picnic_site', 'theme_park', 'tourism', 'viewpoint', 'wilderness_hut', 'zoo'] """ # If custom_filter has not been defined, initialize with default if custom_filter is None: custom_filter = {"amenity": True, "shop": True, "tourism": True} else: # Check that the custom filter is in correct format if not isinstance(custom_filter, dict): raise ValueError( f"'custom_filter' should be a Python dictionary. " f"Got {custom_filter} with type {type(custom_filter)}.") if self._nodes is None or self._way_records is None: self._read_pbf() # Default tags to keep as columns tags_as_columns = [] for k in custom_filter.keys(): try: tags_as_columns += getattr(self.conf.tags, k) except AttributeError: tags_as_columns += self.conf.tags._basic_tags except Exception as e: raise e if extra_attributes is not None: validate_tags_as_columns(extra_attributes) tags_as_columns += extra_attributes # If nodes are still in chunks, merge before passing forward if isinstance(self._nodes, list): self._nodes = concatenate_dicts_of_arrays(self._nodes) gdf = get_poi_data(self._nodes, self._node_coordinates, self._way_records, self._relations, tags_as_columns, custom_filter, self.bounding_box) # Do not keep node information unless specifically asked for # (they are in a list, and can cause issues when saving the files) if not self.keep_node_info and gdf is not None: if "nodes" in gdf.columns: gdf = gdf.drop("nodes", axis=1) return gdf
def get_boundaries(self, boundary_type="administrative", name=None, custom_filter=None, extra_attributes=None): """ Parses boundaries from OSM. Parameters ---------- boundary_type : str The type of boundaries to parse. Possible values: - `"administrative"` (default) - `"national_park"` - `"political"` - `"postal_code"` - `"protected_area"` - `"aboriginal_lands"` - `"maritime"` - `"lot"` - `"parcel"` - `"tract"` - `"marker"` - `"all"` name : str (optional) Name of the administrative area that will be searched for. custom_filter : dict (optional) Additional filter for what kind of boundary to parse. extra_attributes : list (optional) Additional OSM tag keys that will be converted into columns in the resulting GeoDataFrame. See Also -------- Take a look at OSM documentation for further details about the data: `https://wiki.openstreetmap.org/wiki/Key:boundary <https://wiki.openstreetmap.org/wiki/Key:bondary>`__ """ if self._nodes is None or self._way_records is None: self._read_pbf() # Default tags to keep as columns tags_as_columns = self.conf.tags.boundary if extra_attributes is not None: validate_tags_as_columns(extra_attributes) tags_as_columns += extra_attributes # If nodes are still in chunks, merge before passing forward if isinstance(self._nodes, list): self._nodes = concatenate_dicts_of_arrays(self._nodes) # Check boundary type boundary_type = validate_boundary_type(boundary_type) if name is not None: if not isinstance(name, str): raise ValueError(f"'name' should be text." f"Got '{name}' of type {type(name)}.") gdf = get_boundary_data(self._node_coordinates, self._way_records, self._relations, tags_as_columns, custom_filter, boundary_type, name, self.bounding_box) # Do not keep node information unless specifically asked for # (they are in a list, and can cause issues when saving the files) if not self.keep_node_info and gdf is not None: if "nodes" in gdf.columns: gdf = gdf.drop("nodes", axis=1) return gdf