def clip_by_extent(gdf: gpd.geodataframe.GeoDataFrame, bbox: List[Union[int, float]], inplace: bool = False) -> gpd.geodataframe.GeoDataFrame: """ Clipping vector data by extent Args: gdf: GeoDataFrame to be clipped bbox: list of bounds for the gdf to be clipped inplace: - bool - default False -> copy of the current gdf is created Return: gdf: GeoDataFrame with the clipped values """ # Checking if the gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('gdf must be of type GeoDataFrame') # Checking that the bbox is of type list if not isinstance(bbox, list): raise TypeError('Extent must be of type list') # Checking that all values are either ints or floats if not all(isinstance(n, (int, float)) for n in bbox): raise TypeError('Bounds values must be of type int or float') # Checking if inplace is of type bool if not isinstance(inplace, bool): raise TypeError('Inplace must be of type bool') # Creating the bounds from the bbox if len(bbox) == 6: minx, maxx, miny, maxy = bbox[0:4] else: minx, maxx, miny, maxy = bbox # Create deep copy of gdf if not inplace: gdf = gdf.copy(deep=True) # Adding XY values to gdf if they are not present yet if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()): gdf = extract_xy(gdf) # Clipping the GeoDataFrame gdf = gdf[(gdf.X >= minx) & (gdf.X <= maxx) & (gdf.Y >= miny) & (gdf.Y <= maxy)] # Drop geometry column gdf = gdf.drop('geometry', axis=1) # Create new geometry column gdf = gpd.GeoDataFrame(gdf, geometry=gpd.points_from_xy(gdf.X, gdf.Y), crs='EPSG:' + str(gdf.crs.to_epsg())) # Drop Duplicates gdf = gdf.drop_duplicates() return gdf
def extract_xy(gdf: gpd.geodataframe.GeoDataFrame, inplace: bool = False) -> gpd.geodataframe.GeoDataFrame: """ Extracting x,y coordinates from a GeoDataFrame (Points or LineStrings) and returning a GeoDataFrame with x,y coordinates as additional columns Args: gdf - gpd.geodataframe.GeoDataFrame created from shape file inplace - bool - default False -> copy of the current gdf is created Return: gdf - gpd.geodataframe.GeoDataFrame with appended x,y columns """ # Input object must be a GeoDataFrame assert isinstance( gdf, gpd.geodataframe.GeoDataFrame), 'Loaded object is not a GeoDataFrame' # Store CRS of gdf crs = gdf.crs # Create deep copy of gdf if not inplace: gdf = gdf.copy(deep=True) # Extract x,y coordinates from point shape file if all(gdf.geom_type == "Point"): gdf['X'] = gdf.geometry.x gdf['Y'] = gdf.geometry.y # Convert MultiLineString to LineString for further processing if all(gdf.geom_type == "MultiLineString"): gdf = gdf.explode() # Extract x,y coordinates from line shape file if all(gdf.geom_type == "LineString"): gdf['points'] = [list(geometry.coords) for geometry in gdf.geometry] df = pd.DataFrame(gdf).explode('points') df[['X', 'Y']] = pd.DataFrame(df['points'].tolist(), index=df.index) gdf = gpd.GeoDataFrame(df, geometry=df.geometry, crs=crs) # Convert dip and azimuth columns to floats if pd.Series(['dip']).isin(gdf.columns).all(): gdf['dip'] = gdf['dip'].astype(float) if pd.Series(['azimuth']).isin(gdf.columns).all(): gdf['azimuth'] = gdf['azimuth'].astype(float) # Convert formation column to string if pd.Series(['formation']).isin(gdf.columns).all(): gdf['formation'] = gdf['formation'].astype(str) return gdf
def create_linestring_gdf( gdf: gpd.geodataframe.GeoDataFrame) -> gpd.geodataframe.GeoDataFrame: """ Create LineStrings from Points Args: gdf: GeoDataFrame containing the points of intersections between topographic contours and layer boundaries Return: gdf_linestring: GeoDataFrame containing LineStrings """ # Checking if gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('gdf must be of type GeoDataFrame') # Checking geometry type of GeoDataFrame if not all(gdf.geom_type == 'Point'): raise ValueError( 'All objects of the GeoDataFrame must be of geom_type point') # Checking if X and Y values are in column if np.logical_not(pd.Series(['formation', 'Z']).isin(gdf.columns).all()): raise ValueError('formation or Z column missing in GeoDataFrame') # Create copy of gdf gdf_new = gdf.copy(deep=True) # Sort by Z values gdf_new = gdf_new.sort_values('Z') # Creae empty LineString list linestrings = [] # Create LineStrings and append to list for i in gdf_new['formation'].unique().tolist(): for j in gdf_new['Z'].unique().tolist(): linestring = create_linestring(gdf_new, i, j) linestrings.append(linestring) # Create gdf gdf_linestrings = gpd.GeoDataFrame(geometry=linestrings) # Add Z values gdf_linestrings['Z'] = gdf_new['Z'].unique() # Add formation name gdf_linestrings['formation'] = gdf['formation'].unique()[0] return gdf_linestrings
def create_linestring( gdf: gpd.geodataframe.GeoDataFrame, formation: str, altitude: Union[int, float]) -> shapely.geometry.linestring.LineString: """ Create a linestring object from a GeoDataFrame containing surface points at a given altitude and for a given formation Args: gdf: GeoDataFrame containing the points of intersections between topographic contours and layer boundaries formation: str/name of the formation altitude: int/float value of the altitude of the points Return: linestring: shapely.geometry.linestring.LineString containing a LineString object """ # Checking if gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('gdf must be of type GeoDataFrame') # Checking geometry type of GeoDataFrame if not all(gdf.geom_type == 'Point'): raise ValueError( 'All objects of the GeoDataFrame must be of geom_type point') # Checking if X and Y values are in column if np.logical_not(pd.Series(['formation', 'Z']).isin(gdf.columns).all()): raise ValueError('formation or Z column missing in GeoDataFrame') # Checking if the formation is of type string if not isinstance(formation, str): raise TypeError('formation must be of type string') # Checking if the altitude is of type int or float if not isinstance(altitude, (int, float)): raise TypeError('altitude must be of type int or float') # Creating a copy of the GeoDataFrame gdf_new = gdf.copy(deep=True) # Filtering GeoDataFrame by formation and altitude gdf_new = gdf_new[gdf_new['formation'] == formation] gdf_new = gdf_new[gdf_new['Z'] == altitude] # Creating LineString from all available points linestring = LineString(gdf_new.geometry.to_list()) return linestring
def load_surface_colors(path: str, gdf: gpd.geodataframe.GeoDataFrame) -> List[str]: """ Load surface colors from a qml file and store the color values as list to be displayed with gpd plots Args: path: str/path to the qml file gdf: GeoDataFrame of which objects are supposed to be plotted, usually loaded from a polygon/line shape file Return: cols: list of color values for each surface """ # Checking that the path is of type str if not isinstance(path, str): raise TypeError('path must be provided as string') # Checking that the gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('object must be of type GeoDataFrame') # Parse qml column, classes = parse_categorized_qml(path) # Create style dict style_df = pd.DataFrame(build_style_dict(classes)).transpose() # Create deep copy of gdf gdf_copy = gdf.copy(deep=True) # Append style_df to copied gdf gdf_copy["Color"] = gdf_copy[column].replace(style_df.color.to_dict()) # Sort values of gdf by provided column, usually the formation gdf_copy = gdf_copy.sort_values(column) # Filter for unique formations gdf_copy = gdf_copy.groupby([column], as_index=False).last() # Create list of remaining colors cols = gdf_copy['Color'].to_list() return cols
def clip_by_shape(gdf: gpd.geodataframe.GeoDataFrame, shape: gpd.geodataframe.GeoDataFrame, inplace: bool = False) -> gpd.geodataframe.GeoDataFrame: """ Clipping vector data by extent Args: gdf: GeoDataFrame to be clipped shape: GeoDataFrame acting as bbox inplace: - bool - default False -> copy of the current gdf is created Return: gdf: GeoDataFrame with the clipped values """ # Checking if the gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('gdf must be of type GeoDataFrame') # Checking if the shape is of type GeoDataFrame if not isinstance(shape, gpd.geodataframe.GeoDataFrame): raise TypeError('shape must be of type GeoDataFrame') # Checking if inplace is of type bool if not isinstance(inplace, bool): raise TypeError('Inplace must be of type bool') # Create deep copy of gdf if not inplace: gdf = gdf.copy(deep=True) # Setting the extent extent = set_extent(gdf=shape) # Clipping the gdf gdf = clip_by_extent(gdf, extent, inplace=inplace) return gdf
def extract_z(gdf: gpd.geodataframe.GeoDataFrame, dem: Union[np.ndarray, rasterio.io.DatasetReader], inplace: bool = False, **kwargs) -> gpd.geodataframe.GeoDataFrame: """ Extracting altitude values from digital elevation model Args: gdf - gpd.geodataframe.GeoDataFrame containing x,y values dem - rasterio.io.DatasetReader containing the z values inplace - bool - default False -> copy of the current gdf is created Kwargs: extent - list containing the extent of the np.ndarray, must be provided in the same CRS as the gdf Return: gdf - gpd.geodataframe.GeoDataFrame containing x,y,z values obtained from a DEM """ # Input object must be a GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('Loaded object is not a GeoDataFrame') # Create deep copy of gdf if not inplace: gdf = gdf.copy(deep=True) # Input object must be a np.ndarray or a rasterio.io.DatasetReader if not isinstance(dem, (np.ndarray, rasterio.io.DatasetReader)): raise TypeError( 'Loaded object is not a np.ndarray or rasterio.io.DatasetReader') # The GeoDataFrame must not contain a Z-column if pd.Series(['Z']).isin(gdf.columns).all(): raise ValueError('Data already contains Z-values') # Extracting z values from a DEM loaded with Rasterio if isinstance(dem, rasterio.io.DatasetReader): try: if gdf.crs == dem.crs: if np.logical_not( pd.Series(['X', 'Y']).isin(gdf.columns).all()): gdf = extract_xy(gdf) gdf['Z'] = [ z[0] for z in dem.sample(gdf[['X', 'Y']].to_numpy()) ] else: crs_old = gdf.crs gdf = gdf.to_crs(crs=dem.crs) gdf = extract_xy(gdf) gdf['Z'] = [ z[0] for z in dem.sample(gdf[['X', 'Y']].to_numpy()) ] gdf = gdf.to_crs(crs=crs_old) del gdf['X'] del gdf['Y'] gdf = extract_xy(gdf) except IndexError: raise ValueError( 'One or more points are located outside the boundaries of the raster' ) # Extracting z values from a DEM as np.ndarray else: if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()): gdf = extract_xy(gdf) extent = kwargs.get('extent', None) assert extent is not None, 'Extent of array is needed to extract Z values' gdf['Z'] = [ sample(dem, extent, gdf[['X', 'Y']].values.tolist()[i]) for i, point in enumerate(gdf[['X', 'Y']].values.tolist()) ] # Convert dip and azimuth columns to floats if pd.Series(['dip']).isin(gdf.columns).all(): gdf['dip'] = gdf['dip'].astype(float) if pd.Series(['azimuth']).isin(gdf.columns).all(): gdf['azimuth'] = gdf['azimuth'].astype(float) # Convert formation column to string if pd.Series(['formation']).isin(gdf.columns).all(): gdf['formation'] = gdf['formation'].astype(str) return gdf
def extract_coordinates(gdf: gpd.geodataframe.GeoDataFrame, dem: Union[np.ndarray, rasterio.io.DatasetReader, type(None)] = None, inplace: bool = False, **kwargs) -> gpd.geodataframe.GeoDataFrame: """ Extract x,y and z coordinates from a GeoDataFrame Args: gdf - gpd.geodataframe.GeoDataFrame containing Points or LineStrings dem - rasterio.io.DatasetReader containing the z values Kwargs: extent - list containing the extent of the np.ndarray, must be provided in the same CRS as the gdf Return: gdf - gpd.geodataframe.GeoDataFrame containing x, y and z values """ # Input object must be a GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('Loaded object is not a GeoDataFrame') # Create deep copy of gdf if not inplace: gdf = gdf.copy(deep=True) # Checking if Z is in GeoDataFrame if np.logical_not(pd.Series(['Z']).isin(gdf.columns).all()): # Checking if dem is not None if dem is None: raise ValueError('DEM is missing') # Checking if DEM is of type np.ndarray or rasterio object if not isinstance(dem, (np.ndarray, rasterio.io.DatasetReader)): raise TypeError( 'Loaded object is not a np.ndarray or Rasterio object') extent = kwargs.get('extent', None) # Checking if X and Y column already exist in gdf if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()): if isinstance(dem, np.ndarray): gdf = extract_z(gdf, dem, extent=extent) # Extract XYZ values if dem is rasterio object else: # Extract XYZ values if CRSs are matching if gdf.crs == dem.crs: gdf = extract_z(gdf, dem) # Convert gdf before XYZ values extraction else: crs_old = gdf.crs gdf = gdf.to_crs(crs=dem.crs) gdf.rename(columns={'X': 'X1', 'Y': 'Y1'}) gdf = extract_z(extract_xy(gdf), dem) gdf = gdf.to_crs(crs=crs_old) del gdf['X'] del gdf['Y'] gdf.rename(columns={'X1': 'X', 'Y1': 'Y'}) else: # Extract XYZ values if dem is of type np.ndarray if isinstance(dem, np.ndarray): gdf = extract_z(extract_xy(gdf), dem, extent=extent) # Extract XYZ values if dem is rasterio object else: # Extract XYZ values if CRSs are matching if gdf.crs == dem.crs: gdf = extract_z(extract_xy(gdf), dem) # Convert gdf before XYZ values extraction else: crs_old = gdf.crs gdf = gdf.to_crs(crs=dem.crs) gdf = extract_z(extract_xy(gdf), dem) gdf = gdf.to_crs(crs=crs_old) del gdf['X'] del gdf['Y'] gdf = extract_xy(gdf) else: # Checking if X and Y column already exist in gdf if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()): gdf = extract_xy(gdf, inplace=inplace) # Convert dip and azimuth columns to floats if pd.Series(['dip']).isin(gdf.columns).all(): gdf['dip'] = gdf['dip'].astype(float) if pd.Series(['azimuth']).isin(gdf.columns).all(): gdf['azimuth'] = gdf['azimuth'].astype(float) # Convert formation column to string if pd.Series(['formation']).isin(gdf.columns).all(): gdf['formation'] = gdf['formation'].astype(str) return gdf