def write_district_voronoi_to_shapefile( district_voronoi_gpd: gpd.geodataframe.GeoDataFrame, filename: str): crs = {'init': 'epsg:4326'} new_district_gpd = gpd.GeoDataFrame(district_voronoi_gpd, geometry=district_voronoi_gpd.geometry, crs=crs) district_voronoi_gpd.to_file(driver='ESRI Shapefile', filename=filename)
def subset_data(grouping_polys: gpd.geodataframe.GeoDataFrame, thresheld_gdf: gpd.geodataframe.GeoDataFrame, count_gdf: gpd.geodataframe.GeoDataFrame, face_gdf: gpd.geodataframe.GeoDataFrame, buff_distance: int = 100) -> [list, list, list]: """ Creates three lists of dataframes subset by a polygon where the polygon is a grouping of centroids. The first list contains maximum values for each face centroid, the second list contains counts of instances above a threshold, and the third lists faces within the buffered bounding box of a group of centroids. :param grouping_polys: :param thresheld_gdf: :param count_gdf: :param face_gdf: :param buff_distance: :return: """ subset_max_list, subset_count_list, subset_face_list = [], [], [] for i, poly in enumerate(grouping_polys.geometry): subset_max = thresheld_gdf[thresheld_gdf.within(poly)] subset_max_list.append(subset_max) # NOT USED? # subset_count = count_gdf.loc[subset_max.index] subset_count_list.append(count_gdf.loc[subset_max.index]) x0, y0, x1, y1 = poly.buffer(buff_distance).bounds bbox = Polygon([[x0, y0], [x1, y0], [x1, y1], [x0, y1]]) subset_faces = face_gdf[face_gdf.within(bbox)] subset_face_list.append(subset_faces) return subset_max_list, subset_count_list, subset_face_list
def mergeCountry(old: gp.geodataframe.GeoDataFrame, new: gp.geodataframe.GeoDataFrame, mini: str, big: str) -> gp.GeoDataFrame: bigName = big mini = old.loc[old["NAME"] == mini]["geometry"].tolist()[0] big = old.loc[old["NAME"] == big]["geometry"].tolist()[0] try: final = MultiPolygon([*list(mini), *list(big)]) except: try: final = MultiPolygon([mini, *list(big)]) except: try: final = MultiPolygon([*list(mini), big]) except: try: final = MultiPolygon([mini, big]) except: print("wtf???") #new.loc[new["name"] == "China", "geometry"] = final new = new.drop(new.loc[new["name"] == bigName].index.tolist()[0]) new = new.append({"name": old.loc[old["NAME"] == bigName]["NAME"].tolist()[0], "ISO2": old.loc[old["NAME"] == bigName]["ISO2"].tolist()[0], "ISO3": old.loc[old["NAME"] == bigName]["ISO3"].tolist()[0], "area": old.loc[old["NAME"] == bigName]["AREA"].tolist()[0], "pop": old.loc[old["NAME"] == bigName]["POP2005"].tolist()[0], "neighbors": old.loc[old["NAME"] == bigName]["NEIGHBORS"].tolist()[0], "geometry": final}, ignore_index=True) return new
def clip_by_extent(gdf: gpd.geodataframe.GeoDataFrame, bbox: List[Union[int, float]], inplace: bool = False) -> gpd.geodataframe.GeoDataFrame: """ Clipping vector data by extent Args: gdf: GeoDataFrame to be clipped bbox: list of bounds for the gdf to be clipped inplace: - bool - default False -> copy of the current gdf is created Return: gdf: GeoDataFrame with the clipped values """ # Checking if the gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('gdf must be of type GeoDataFrame') # Checking that the bbox is of type list if not isinstance(bbox, list): raise TypeError('Extent must be of type list') # Checking that all values are either ints or floats if not all(isinstance(n, (int, float)) for n in bbox): raise TypeError('Bounds values must be of type int or float') # Checking if inplace is of type bool if not isinstance(inplace, bool): raise TypeError('Inplace must be of type bool') # Creating the bounds from the bbox if len(bbox) == 6: minx, maxx, miny, maxy = bbox[0:4] else: minx, maxx, miny, maxy = bbox # Create deep copy of gdf if not inplace: gdf = gdf.copy(deep=True) # Adding XY values to gdf if they are not present yet if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()): gdf = extract_xy(gdf) # Clipping the GeoDataFrame gdf = gdf[(gdf.X >= minx) & (gdf.X <= maxx) & (gdf.Y >= miny) & (gdf.Y <= maxy)] # Drop geometry column gdf = gdf.drop('geometry', axis=1) # Create new geometry column gdf = gpd.GeoDataFrame(gdf, geometry=gpd.points_from_xy(gdf.X, gdf.Y), crs='EPSG:' + str(gdf.crs.to_epsg())) # Drop Duplicates gdf = gdf.drop_duplicates() return gdf
def plot_district_voronoi(voronoi_gpd: gpd.geodataframe.GeoDataFrame, area_name: str): fig, ax = plt.subplots(1, 1, figsize=(10, 10)) voronoi_gpd.plot(ax=ax) ax.set_title("Voronoi tessellation of " + area_name) ax.set_axis_off() plt.axis('equal') plt.show()
def plot_extreme_edges(gdf: gpd.geodataframe.GeoDataFrame, aoi: gpd.geodataframe.GeoDataFrame, **kwargs) -> None: """ Plots extreme depths along edges along with an overview map showing current plotted domain versus all other domains. :param gdf: :param aoi: :param \**kwargs: See below :Keyword Arguments: * *mini_map* (gpd.geodataframe.GeoDataFrame) -- Multiple domain perimeters. """ if 'mini_map' in kwargs.keys(): mini_map = list(kwargs.values())[0] fig, (ax_string) = plt.subplots(1, 2, figsize=(20, 8)) ax1 = plt.subplot2grid((1, 2), (0, 0)) aoi.plot(color='k', alpha=0.25, ax=ax1) gdf.plot(column='abs_max', cmap='viridis', legend=True, ax=ax1, markersize=16) ax1.set_title( 'Cell Locations with Depths > 1 ft\n(Check for Ponding)'.format( len(gdf)), fontsize=12, fontweight='bold') ax1.axis('off') ax2 = plt.subplot2grid((1, 2), (0, 1)) mini_map.plot(color='#BFBFBF', edgecolor='k', ax=ax2, markersize=16) aoi.plot(color='#FFC0CB', edgecolor='k', ax=ax2) ax2.set_title( 'Current domain (pink) compared to all domains (grey)'.format( len(gdf)), fontsize=12, fontweight='bold') ax2.axis('off') else: fig, ax = plt.subplots(figsize=(7, 7)) aoi.plot(color='k', alpha=0.25, ax=ax) gdf.plot(column='abs_max', cmap='viridis', legend=True, ax=ax, markersize=16) ax.set_title( 'Cell Locations with Depths > 1 ft\n(Check for Ponding)'.format( len(gdf)), fontsize=12, fontweight='bold') ax.axis('off')
def extract_xy(gdf: gpd.geodataframe.GeoDataFrame, inplace: bool = False) -> gpd.geodataframe.GeoDataFrame: """ Extracting x,y coordinates from a GeoDataFrame (Points or LineStrings) and returning a GeoDataFrame with x,y coordinates as additional columns Args: gdf - gpd.geodataframe.GeoDataFrame created from shape file inplace - bool - default False -> copy of the current gdf is created Return: gdf - gpd.geodataframe.GeoDataFrame with appended x,y columns """ # Input object must be a GeoDataFrame assert isinstance( gdf, gpd.geodataframe.GeoDataFrame), 'Loaded object is not a GeoDataFrame' # Store CRS of gdf crs = gdf.crs # Create deep copy of gdf if not inplace: gdf = gdf.copy(deep=True) # Extract x,y coordinates from point shape file if all(gdf.geom_type == "Point"): gdf['X'] = gdf.geometry.x gdf['Y'] = gdf.geometry.y # Convert MultiLineString to LineString for further processing if all(gdf.geom_type == "MultiLineString"): gdf = gdf.explode() # Extract x,y coordinates from line shape file if all(gdf.geom_type == "LineString"): gdf['points'] = [list(geometry.coords) for geometry in gdf.geometry] df = pd.DataFrame(gdf).explode('points') df[['X', 'Y']] = pd.DataFrame(df['points'].tolist(), index=df.index) gdf = gpd.GeoDataFrame(df, geometry=df.geometry, crs=crs) # Convert dip and azimuth columns to floats if pd.Series(['dip']).isin(gdf.columns).all(): gdf['dip'] = gdf['dip'].astype(float) if pd.Series(['azimuth']).isin(gdf.columns).all(): gdf['azimuth'] = gdf['azimuth'].astype(float) # Convert formation column to string if pd.Series(['formation']).isin(gdf.columns).all(): gdf['formation'] = gdf['formation'].astype(str) return gdf
def get_owid_data( # pylint: disable=too-many-arguments cls, owid_data_sets: pd.DataFrame, shape_data: gpd.geodataframe.GeoDataFrame, name: str, year: Optional[int] = None, key: Optional[str] = None, ) -> gpd.geodataframe.GeoDataFrame: """An Owid Data Set combined with the shape_data Args: owid_data_sets (pd.DataFrame): The list of Owid Data Sets shape_data (gpd.geodataframe.GeoDataFrame): The shape data for the map name (str): The name of the Owid Data Set to look up. year (Optional[int], optional): A year to filter to. Defaults to None. key (Optional[str], optional): The name of column containing the values. Defaults to None. Returns: gpd.geodataframe.GeoDataFrame: The Owid Data Sets merged with the shape data """ url = owid_data_sets.loc[name].url owid_data = cls.get_owid_df(url) if year is not None: owid_data = owid_data[owid_data["Year"] == year] merged = shape_data.merge(owid_data, left_on="country", right_on="Entity", how="left") if key is None: key = owid_data.columns[2] merged[key] = merged[key].fillna(0) return merged, key
def to_geo_json_data_source(data: gpd.geodataframe.GeoDataFrame) -> GeoJSONDataSource: """Convert the data to a GeoJSONDataSource Args: data (gpd.geodataframe.GeoDataFrame): The data Returns: GeoJSONDataSource: The resulting GeoJson Data """ json_data = json.dumps(json.loads(data.to_json())) return GeoJSONDataSource(geojson=json_data)
def plot_descriptive_stats(stat_lists: tuple, aoi: gpd.geodataframe.GeoDataFrame, domain: str) -> None: """ Plots the descriptive statistics (Max, Min) for cell centers with the area of interest underneath. :param stat_lists: :param aoi: """ maximums, minimums = stat_lists # Plot descriptive statistics fig, (ax_string) = plt.subplots(1, 2, figsize=(20, 8)) ax1 = plt.subplot2grid((1, 2), (0, 0)) aoi.plot(color='k', alpha=0.25, ax=ax1) maximums.plot(column='max', cmap='viridis', markersize=0.1, legend=True, ax=ax1) ax1.set_title('Maximum Depth (ft)') ax2 = plt.subplot2grid((1, 2), (0, 1)) aoi.plot(color='k', alpha=0.25, ax=ax2) ax2 = minimums.plot(column='min', cmap='viridis', markersize=0.1, legend=True, ax=ax2, s=1) ax2.set_title('Minimum Depth (ft)') ax1.axis('off') ax2.axis('off') fig.suptitle('Depths at Cell Centers of Domain {}'.format(domain), fontsize=16, fontweight='bold')
def create_linestring_gdf( gdf: gpd.geodataframe.GeoDataFrame) -> gpd.geodataframe.GeoDataFrame: """ Create LineStrings from Points Args: gdf: GeoDataFrame containing the points of intersections between topographic contours and layer boundaries Return: gdf_linestring: GeoDataFrame containing LineStrings """ # Checking if gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('gdf must be of type GeoDataFrame') # Checking geometry type of GeoDataFrame if not all(gdf.geom_type == 'Point'): raise ValueError( 'All objects of the GeoDataFrame must be of geom_type point') # Checking if X and Y values are in column if np.logical_not(pd.Series(['formation', 'Z']).isin(gdf.columns).all()): raise ValueError('formation or Z column missing in GeoDataFrame') # Create copy of gdf gdf_new = gdf.copy(deep=True) # Sort by Z values gdf_new = gdf_new.sort_values('Z') # Creae empty LineString list linestrings = [] # Create LineStrings and append to list for i in gdf_new['formation'].unique().tolist(): for j in gdf_new['Z'].unique().tolist(): linestring = create_linestring(gdf_new, i, j) linestrings.append(linestring) # Create gdf gdf_linestrings = gpd.GeoDataFrame(geometry=linestrings) # Add Z values gdf_linestrings['Z'] = gdf_new['Z'].unique() # Add formation name gdf_linestrings['formation'] = gdf['formation'].unique()[0] return gdf_linestrings
def create_linestring( gdf: gpd.geodataframe.GeoDataFrame, formation: str, altitude: Union[int, float]) -> shapely.geometry.linestring.LineString: """ Create a linestring object from a GeoDataFrame containing surface points at a given altitude and for a given formation Args: gdf: GeoDataFrame containing the points of intersections between topographic contours and layer boundaries formation: str/name of the formation altitude: int/float value of the altitude of the points Return: linestring: shapely.geometry.linestring.LineString containing a LineString object """ # Checking if gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('gdf must be of type GeoDataFrame') # Checking geometry type of GeoDataFrame if not all(gdf.geom_type == 'Point'): raise ValueError( 'All objects of the GeoDataFrame must be of geom_type point') # Checking if X and Y values are in column if np.logical_not(pd.Series(['formation', 'Z']).isin(gdf.columns).all()): raise ValueError('formation or Z column missing in GeoDataFrame') # Checking if the formation is of type string if not isinstance(formation, str): raise TypeError('formation must be of type string') # Checking if the altitude is of type int or float if not isinstance(altitude, (int, float)): raise TypeError('altitude must be of type int or float') # Creating a copy of the GeoDataFrame gdf_new = gdf.copy(deep=True) # Filtering GeoDataFrame by formation and altitude gdf_new = gdf_new[gdf_new['formation'] == formation] gdf_new = gdf_new[gdf_new['Z'] == altitude] # Creating LineString from all available points linestring = LineString(gdf_new.geometry.to_list()) return linestring
def plot_district_boundary_on_osm_tile( district_gdf: gpd.geodataframe.GeoDataFrame, figsize: int, linewidth: float, zoom: int): """Gets the image tile corresponding to the bounding box of a district geo dataframe and plots the tile along with the district boundary. Args: district_gdf (gpd.geodataframe.GeoDataFrame): Geo dataframe of a district. figsize (int): The figure size of the plot, we assume that the length and breadth are the same. linewidth (float): Denotes the width of the line used in plotting the boundary of the district zoom (int): level of zoom to be used with contextily library to get the zoom detail on a map tile. """ district_ax = district_gdf.plot(figsize=(figsize, figsize), alpha=0.5, edgecolor='k', facecolor="none", linewidth=linewidth) return ctx.add_basemap(district_ax, crs=district_gdf.crs, zoom=zoom)
def calculate_orientations_new(gdf: gpd.geodataframe.GeoDataFrame) -> gpd.geodataframe.GeoDataFrame: """ Calculating the azimuth for an orientation geodataframe represented by a linestrings Args: gdf: GeoDataFrame containing the linestring of orientations Return: gdf: GeoDataFrame containing the azimuth values of the orientation linestring """ # Checking that pd_series is a pandas series if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('Data must be a GeoDataFrame') # Checking that the pd_series contains a linestring if not all(gdf.geom_type == 'LineString'): raise TypeError('All elements must be of geometry type Linestring') gdf['azimuth'] = gdf.apply(calculate_orientation_new, axis=1) return gdf
def load_surface_colors(path: str, gdf: gpd.geodataframe.GeoDataFrame) -> List[str]: """ Load surface colors from a qml file and store the color values as list to be displayed with gpd plots Args: path: str/path to the qml file gdf: GeoDataFrame of which objects are supposed to be plotted, usually loaded from a polygon/line shape file Return: cols: list of color values for each surface """ # Checking that the path is of type str if not isinstance(path, str): raise TypeError('path must be provided as string') # Checking that the gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('object must be of type GeoDataFrame') # Parse qml column, classes = parse_categorized_qml(path) # Create style dict style_df = pd.DataFrame(build_style_dict(classes)).transpose() # Create deep copy of gdf gdf_copy = gdf.copy(deep=True) # Append style_df to copied gdf gdf_copy["Color"] = gdf_copy[column].replace(style_df.color.to_dict()) # Sort values of gdf by provided column, usually the formation gdf_copy = gdf_copy.sort_values(column) # Filter for unique formations gdf_copy = gdf_copy.groupby([column], as_index=False).last() # Create list of remaining colors cols = gdf_copy['Color'].to_list() return cols
def clip_by_shape(gdf: gpd.geodataframe.GeoDataFrame, shape: gpd.geodataframe.GeoDataFrame, inplace: bool = False) -> gpd.geodataframe.GeoDataFrame: """ Clipping vector data by extent Args: gdf: GeoDataFrame to be clipped shape: GeoDataFrame acting as bbox inplace: - bool - default False -> copy of the current gdf is created Return: gdf: GeoDataFrame with the clipped values """ # Checking if the gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('gdf must be of type GeoDataFrame') # Checking if the shape is of type GeoDataFrame if not isinstance(shape, gpd.geodataframe.GeoDataFrame): raise TypeError('shape must be of type GeoDataFrame') # Checking if inplace is of type bool if not isinstance(inplace, bool): raise TypeError('Inplace must be of type bool') # Create deep copy of gdf if not inplace: gdf = gdf.copy(deep=True) # Setting the extent extent = set_extent(gdf=shape) # Clipping the gdf gdf = clip_by_extent(gdf, extent, inplace=inplace) return gdf
def print_cases(cases_gdf: gpd.geodataframe.GeoDataFrame): ''' Prints available NLP cases to the console. ''' ### Print cases print("\n") print("*********************** Available cases ***********************") ### Print table header... print('---------------------------------------------------------------') _print_table_row("Index", "Name", "Resolution", "Lat.", "Lon.") print('---------------------------------------------------------------') ### Loop through Points stored in the GeoDataFrame... for idx, case in cases_gdf.iterrows(): # Retrieve latitude from geometry cur_lat = case.geometry.coords.xy[1][0] # Retrieve longitude from geometry cur_lon = case.geometry.coords.xy[0][0] _print_table_row(idx, case["name"], case["res"], cur_lat, cur_lon) print('---------------------------------------------------------------')
def interpolate_raster(gdf: gpd.geodataframe.GeoDataFrame, method: str = 'nearest', **kwargs) -> np.ndarray: """ Interpolate raster/digital elevation model from point or line shape file Args: gdf - gpd.geodataframe.GeoDataFrame containing the z values of an area method - string which method of griddata is supposed to be used (nearest,linear,cubic,rbf) res - resolution of the raster in x and y direction Return: np.array as interpolated raster/digital elevation model """ # Checking if the gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('gdf mus be of type GeoDataFrame') # Checking if Z values are in the gdf if np.logical_not(pd.Series(['Z']).isin(gdf.columns).all()): raise ValueError('Z-values not defined') # Checking if XY values are in the gdf if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()): gdf = extract_xy(gdf) # Getting sample number n n = kwargs.get('n', None) seed = kwargs.get('seed', 1) # Checking if number of samples is of type int if not isinstance(n, (int, type(None))): raise TypeError('Number of samples must be of type int') # Checking if seed is of type int if not isinstance(seed, int): raise TypeError('Seed must be of type int') # Sampling gdf if n: np.random.seed(seed) if n <= len(gdf): gdf = gdf.sample(n) else: raise ValueError( 'n must be smaller than the total number of points') # Checking that the method provided is of type string if not isinstance(method, str): raise TypeError('Method must be of type string') # Getting resolution res = kwargs.get('res', 1) # Checking if resolution is of type int if not isinstance(res, int): raise TypeError('resolution must be of type int') # Creating a meshgrid based on the gdf bounds x = np.arange(gdf.bounds.minx.min(), gdf.bounds.maxx.max(), res) y = np.arange(gdf.bounds.miny.min(), gdf.bounds.maxy.max(), res) xx, yy = np.meshgrid(x, y) try: # Interpolating the raster if any([method == 'nearest', method == 'linear', method == 'cubic']): array = griddata((gdf['X'], gdf['Y']), gdf['Z'], (xx, yy), method=method) elif method == 'rbf': function = kwargs.get('function', 'multiquadric') epsilon = kwargs.get('epsilon', 2) rbf = Rbf(gdf['X'], gdf['Y'], gdf['Z'], function=function, epsilon=epsilon) array = rbf(xx, yy) else: raise ValueError('No valid method defined') except np.linalg.LinAlgError: raise ValueError( 'LinAlgError: reduce the number of points by setting a value for n' ) return array
def updateNeighbors(df: gp.geodataframe.GeoDataFrame) -> gp.geodataframe.GeoDataFrame: for index, country in df.iterrows(): neighbors = df[~df.geometry.disjoint(country.geometry)]["name"].tolist() neighbors = [ name for name in neighbors if country["name"] != name ] df.at[index, "neighbors"] = ", ".join(neighbors) return df
def extract_z(gdf: gpd.geodataframe.GeoDataFrame, dem: Union[np.ndarray, rasterio.io.DatasetReader], inplace: bool = False, **kwargs) -> gpd.geodataframe.GeoDataFrame: """ Extracting altitude values from digital elevation model Args: gdf - gpd.geodataframe.GeoDataFrame containing x,y values dem - rasterio.io.DatasetReader containing the z values inplace - bool - default False -> copy of the current gdf is created Kwargs: extent - list containing the extent of the np.ndarray, must be provided in the same CRS as the gdf Return: gdf - gpd.geodataframe.GeoDataFrame containing x,y,z values obtained from a DEM """ # Input object must be a GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('Loaded object is not a GeoDataFrame') # Create deep copy of gdf if not inplace: gdf = gdf.copy(deep=True) # Input object must be a np.ndarray or a rasterio.io.DatasetReader if not isinstance(dem, (np.ndarray, rasterio.io.DatasetReader)): raise TypeError( 'Loaded object is not a np.ndarray or rasterio.io.DatasetReader') # The GeoDataFrame must not contain a Z-column if pd.Series(['Z']).isin(gdf.columns).all(): raise ValueError('Data already contains Z-values') # Extracting z values from a DEM loaded with Rasterio if isinstance(dem, rasterio.io.DatasetReader): try: if gdf.crs == dem.crs: if np.logical_not( pd.Series(['X', 'Y']).isin(gdf.columns).all()): gdf = extract_xy(gdf) gdf['Z'] = [ z[0] for z in dem.sample(gdf[['X', 'Y']].to_numpy()) ] else: crs_old = gdf.crs gdf = gdf.to_crs(crs=dem.crs) gdf = extract_xy(gdf) gdf['Z'] = [ z[0] for z in dem.sample(gdf[['X', 'Y']].to_numpy()) ] gdf = gdf.to_crs(crs=crs_old) del gdf['X'] del gdf['Y'] gdf = extract_xy(gdf) except IndexError: raise ValueError( 'One or more points are located outside the boundaries of the raster' ) # Extracting z values from a DEM as np.ndarray else: if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()): gdf = extract_xy(gdf) extent = kwargs.get('extent', None) assert extent is not None, 'Extent of array is needed to extract Z values' gdf['Z'] = [ sample(dem, extent, gdf[['X', 'Y']].values.tolist()[i]) for i, point in enumerate(gdf[['X', 'Y']].values.tolist()) ] # Convert dip and azimuth columns to floats if pd.Series(['dip']).isin(gdf.columns).all(): gdf['dip'] = gdf['dip'].astype(float) if pd.Series(['azimuth']).isin(gdf.columns).all(): gdf['azimuth'] = gdf['azimuth'].astype(float) # Convert formation column to string if pd.Series(['formation']).isin(gdf.columns).all(): gdf['formation'] = gdf['formation'].astype(str) return gdf
def extract_coordinates(gdf: gpd.geodataframe.GeoDataFrame, dem: Union[np.ndarray, rasterio.io.DatasetReader, type(None)] = None, inplace: bool = False, **kwargs) -> gpd.geodataframe.GeoDataFrame: """ Extract x,y and z coordinates from a GeoDataFrame Args: gdf - gpd.geodataframe.GeoDataFrame containing Points or LineStrings dem - rasterio.io.DatasetReader containing the z values Kwargs: extent - list containing the extent of the np.ndarray, must be provided in the same CRS as the gdf Return: gdf - gpd.geodataframe.GeoDataFrame containing x, y and z values """ # Input object must be a GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): raise TypeError('Loaded object is not a GeoDataFrame') # Create deep copy of gdf if not inplace: gdf = gdf.copy(deep=True) # Checking if Z is in GeoDataFrame if np.logical_not(pd.Series(['Z']).isin(gdf.columns).all()): # Checking if dem is not None if dem is None: raise ValueError('DEM is missing') # Checking if DEM is of type np.ndarray or rasterio object if not isinstance(dem, (np.ndarray, rasterio.io.DatasetReader)): raise TypeError( 'Loaded object is not a np.ndarray or Rasterio object') extent = kwargs.get('extent', None) # Checking if X and Y column already exist in gdf if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()): if isinstance(dem, np.ndarray): gdf = extract_z(gdf, dem, extent=extent) # Extract XYZ values if dem is rasterio object else: # Extract XYZ values if CRSs are matching if gdf.crs == dem.crs: gdf = extract_z(gdf, dem) # Convert gdf before XYZ values extraction else: crs_old = gdf.crs gdf = gdf.to_crs(crs=dem.crs) gdf.rename(columns={'X': 'X1', 'Y': 'Y1'}) gdf = extract_z(extract_xy(gdf), dem) gdf = gdf.to_crs(crs=crs_old) del gdf['X'] del gdf['Y'] gdf.rename(columns={'X1': 'X', 'Y1': 'Y'}) else: # Extract XYZ values if dem is of type np.ndarray if isinstance(dem, np.ndarray): gdf = extract_z(extract_xy(gdf), dem, extent=extent) # Extract XYZ values if dem is rasterio object else: # Extract XYZ values if CRSs are matching if gdf.crs == dem.crs: gdf = extract_z(extract_xy(gdf), dem) # Convert gdf before XYZ values extraction else: crs_old = gdf.crs gdf = gdf.to_crs(crs=dem.crs) gdf = extract_z(extract_xy(gdf), dem) gdf = gdf.to_crs(crs=crs_old) del gdf['X'] del gdf['Y'] gdf = extract_xy(gdf) else: # Checking if X and Y column already exist in gdf if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()): gdf = extract_xy(gdf, inplace=inplace) # Convert dip and azimuth columns to floats if pd.Series(['dip']).isin(gdf.columns).all(): gdf['dip'] = gdf['dip'].astype(float) if pd.Series(['azimuth']).isin(gdf.columns).all(): gdf['azimuth'] = gdf['azimuth'].astype(float) # Convert formation column to string if pd.Series(['formation']).isin(gdf.columns).all(): gdf['formation'] = gdf['formation'].astype(str) return gdf
def plot_removed_values(faults: gpd.geodataframe.GeoDataFrame, vertices_out: gpd.geodataframe.GeoDataFrame, vertices_in: gpd.geodataframe.GeoDataFrame, radius: Union[float, int], **kwargs): """ Plotting the points that were kept and removed and traces of layer boundaries and faults Args: faults: GeoDataFrame containing the fault LineStrings vertices_out: GeoDataFrame containing the kept vertices vertices_in: GeoDataFrame containing the removed vertices radius: float/int indicating the radius of the buffer around faults Kwargs: color_vertices_out: str/color value for vertices_out color_vertices_in: str/color value for vertices_in color_fault_traces: str/color value for fault traces color_fault_buffer: str/color value for fault buffer """ # Getting the color for vertices_out color_vertices_out = kwargs.get('color_vertices_out', 'green') # Getting the color for vertices_in color_vertices_in = kwargs.get('color_vertices_in', 'red') # Getting the color for faults color_fault_traces = kwargs.get('color_fault_traces', '#1f77b4') # Getting the color for the fault buffer color_fault_buffer = kwargs.get('color_fault_buffer', '#adebad') # Checking that the color values are provided as strings if not isinstance(color_vertices_out, str): raise TypeError('Color values must be provided as strings') # Checking that the color values are provided as strings if not isinstance(color_vertices_out, str): raise TypeError('Color values must be provided as strings') # Checking that the color values are provided as strings if not isinstance(color_vertices_out, str): raise TypeError('Color values must be provided as strings') # Checking that the color values are provided as strings if not isinstance(color_vertices_out, str): raise TypeError('Color values must be provided as strings') # Checking that the faults are stored as GeoDataFrame if not isinstance(faults, (gpd.geodataframe.GeoDataFrame, type(None))): raise TypeError('Faults must be of type GeoDataFrame') # Checking that the faults are all of geom_type LineString if not all(faults.geom_type == 'LineString'): raise TypeError('All faults must be of type LineString') # Checking that the kept vertices are stored as GeoDataFrame if not isinstance(faults, (gpd.geodataframe.GeoDataFrame, type(None))): raise TypeError('Kept vertices must be of type GeoDataFrame') # Checking that the removed vertices are stored as GeoDataFrame if not isinstance(faults, (gpd.geodataframe.GeoDataFrame, type(None))): raise TypeError('Removed vertices must be of type GeoDataFrame') # Checking that the vertices are all of geom_type Point if not all(vertices_out.geom_type == 'Point'): raise TypeError('All vertices must be of type Point') # Checking that the vertices are all of geom_type Point if not all(vertices_in.geom_type == 'Point'): raise TypeError('All vertices must be of type Point') # Create buffer around faults faults_buffer = [ faults.loc[i].geometry.buffer(radius) for i in range(len(faults)) ] # Create GeoDataFrame from buffered entries faults_buffer_gdf = gpd.GeoDataFrame({'geometry': faults_buffer}, crs=faults.crs) # Create figure fig, ax = plt.subplots(figsize=(15, 15)) # Plot Faults faults.plot(ax=ax, aspect='equal', color=color_fault_traces) # Plot removed and kept vertices vertices_out.plot(ax=ax, color=color_vertices_out, zorder=5) vertices_in.plot(ax=ax, color=color_vertices_in, zorder=5) # Plotting the buffer around faults faults_buffer_gdf.plot(ax=ax, aspect='equal', color=color_fault_buffer, zorder=1) # Plot grid plt.grid() return fig, ax