Example #1
0
def clip_by_extent(gdf: gpd.geodataframe.GeoDataFrame,
                   bbox: List[Union[int, float]],
                   inplace: bool = False) -> gpd.geodataframe.GeoDataFrame:
    """
    Clipping vector data by extent
    Args:
        gdf: GeoDataFrame to be clipped
        bbox: list of bounds for the gdf to be clipped
        inplace: - bool - default False -> copy of the current gdf is created
    Return:
        gdf: GeoDataFrame with the clipped values
    """

    # Checking if the gdf is of type GeoDataFrame
    if not isinstance(gdf, gpd.geodataframe.GeoDataFrame):
        raise TypeError('gdf must be of type GeoDataFrame')

    # Checking that the bbox is of type list
    if not isinstance(bbox, list):
        raise TypeError('Extent must be of type list')

    # Checking that all values are either ints or floats
    if not all(isinstance(n, (int, float)) for n in bbox):
        raise TypeError('Bounds values must be of type int or float')

    # Checking if inplace is of type bool
    if not isinstance(inplace, bool):
        raise TypeError('Inplace must be of type bool')

    # Creating the bounds from the bbox
    if len(bbox) == 6:
        minx, maxx, miny, maxy = bbox[0:4]
    else:
        minx, maxx, miny, maxy = bbox

    # Create deep copy of gdf
    if not inplace:
        gdf = gdf.copy(deep=True)

    # Adding XY values to gdf if they are not present yet
    if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()):
        gdf = extract_xy(gdf)

    # Clipping the GeoDataFrame
    gdf = gdf[(gdf.X >= minx) & (gdf.X <= maxx) & (gdf.Y >= miny) &
              (gdf.Y <= maxy)]

    # Drop geometry column
    gdf = gdf.drop('geometry', axis=1)

    # Create new geometry column
    gdf = gpd.GeoDataFrame(gdf,
                           geometry=gpd.points_from_xy(gdf.X, gdf.Y),
                           crs='EPSG:' + str(gdf.crs.to_epsg()))

    # Drop Duplicates
    gdf = gdf.drop_duplicates()

    return gdf
Example #2
0
def extract_xy(gdf: gpd.geodataframe.GeoDataFrame,
               inplace: bool = False) -> gpd.geodataframe.GeoDataFrame:
    """
    Extracting x,y coordinates from a GeoDataFrame (Points or LineStrings) and returning a GeoDataFrame with x,y coordinates as additional columns
    Args:
        gdf - gpd.geodataframe.GeoDataFrame created from shape file
        inplace - bool - default False -> copy of the current gdf is created
    Return:
        gdf - gpd.geodataframe.GeoDataFrame with appended x,y columns
    """

    # Input object must be a GeoDataFrame
    assert isinstance(
        gdf,
        gpd.geodataframe.GeoDataFrame), 'Loaded object is not a GeoDataFrame'

    # Store CRS of gdf
    crs = gdf.crs

    # Create deep copy of gdf
    if not inplace:
        gdf = gdf.copy(deep=True)

    # Extract x,y coordinates from point shape file
    if all(gdf.geom_type == "Point"):
        gdf['X'] = gdf.geometry.x
        gdf['Y'] = gdf.geometry.y

    # Convert MultiLineString to LineString for further processing
    if all(gdf.geom_type == "MultiLineString"):
        gdf = gdf.explode()

    # Extract x,y coordinates from line shape file
    if all(gdf.geom_type == "LineString"):
        gdf['points'] = [list(geometry.coords) for geometry in gdf.geometry]
        df = pd.DataFrame(gdf).explode('points')
        df[['X', 'Y']] = pd.DataFrame(df['points'].tolist(), index=df.index)
        gdf = gpd.GeoDataFrame(df, geometry=df.geometry, crs=crs)

    # Convert dip and azimuth columns to floats
    if pd.Series(['dip']).isin(gdf.columns).all():
        gdf['dip'] = gdf['dip'].astype(float)

    if pd.Series(['azimuth']).isin(gdf.columns).all():
        gdf['azimuth'] = gdf['azimuth'].astype(float)

    # Convert formation column to string
    if pd.Series(['formation']).isin(gdf.columns).all():
        gdf['formation'] = gdf['formation'].astype(str)

    return gdf
Example #3
0
def create_linestring_gdf(
        gdf: gpd.geodataframe.GeoDataFrame) -> gpd.geodataframe.GeoDataFrame:
    """
    Create LineStrings from Points
    Args:
        gdf: GeoDataFrame containing the points of intersections between topographic contours and layer boundaries
    Return:
        gdf_linestring: GeoDataFrame containing LineStrings
    """

    # Checking if gdf is of type GeoDataFrame
    if not isinstance(gdf, gpd.geodataframe.GeoDataFrame):
        raise TypeError('gdf must be of type GeoDataFrame')

    # Checking geometry type of GeoDataFrame
    if not all(gdf.geom_type == 'Point'):
        raise ValueError(
            'All objects of the GeoDataFrame must be of geom_type point')

    # Checking if X and Y values are in column
    if np.logical_not(pd.Series(['formation', 'Z']).isin(gdf.columns).all()):
        raise ValueError('formation or Z column missing in GeoDataFrame')

    # Create copy of gdf
    gdf_new = gdf.copy(deep=True)

    # Sort by Z values
    gdf_new = gdf_new.sort_values('Z')

    # Creae empty LineString list
    linestrings = []

    # Create LineStrings and append to list
    for i in gdf_new['formation'].unique().tolist():
        for j in gdf_new['Z'].unique().tolist():
            linestring = create_linestring(gdf_new, i, j)
            linestrings.append(linestring)

    # Create gdf
    gdf_linestrings = gpd.GeoDataFrame(geometry=linestrings)

    # Add Z values
    gdf_linestrings['Z'] = gdf_new['Z'].unique()

    # Add formation name
    gdf_linestrings['formation'] = gdf['formation'].unique()[0]

    return gdf_linestrings
Example #4
0
def create_linestring(
        gdf: gpd.geodataframe.GeoDataFrame, formation: str,
        altitude: Union[int, float]) -> shapely.geometry.linestring.LineString:
    """
    Create a linestring object from a GeoDataFrame containing surface points at a given altitude and for a given
    formation
    Args:
        gdf: GeoDataFrame containing the points of intersections between topographic contours and layer boundaries
        formation: str/name of the formation
        altitude: int/float value of the altitude of the points
    Return:
        linestring: shapely.geometry.linestring.LineString containing a LineString object
    """

    # Checking if gdf is of type GeoDataFrame
    if not isinstance(gdf, gpd.geodataframe.GeoDataFrame):
        raise TypeError('gdf must be of type GeoDataFrame')

    # Checking geometry type of GeoDataFrame
    if not all(gdf.geom_type == 'Point'):
        raise ValueError(
            'All objects of the GeoDataFrame must be of geom_type point')

    # Checking if X and Y values are in column
    if np.logical_not(pd.Series(['formation', 'Z']).isin(gdf.columns).all()):
        raise ValueError('formation or Z column missing in GeoDataFrame')

    # Checking if the formation is of type string
    if not isinstance(formation, str):
        raise TypeError('formation must be of type string')

    # Checking if the altitude is of type int or float
    if not isinstance(altitude, (int, float)):
        raise TypeError('altitude must be of type int or float')

    # Creating a copy of the GeoDataFrame
    gdf_new = gdf.copy(deep=True)

    # Filtering GeoDataFrame by formation and altitude
    gdf_new = gdf_new[gdf_new['formation'] == formation]
    gdf_new = gdf_new[gdf_new['Z'] == altitude]

    # Creating LineString from all available points
    linestring = LineString(gdf_new.geometry.to_list())

    return linestring
Example #5
0
def load_surface_colors(path: str,
                        gdf: gpd.geodataframe.GeoDataFrame) -> List[str]:
    """
    Load surface colors from a qml file and store the color values as list to be displayed with gpd plots
    Args:
        path: str/path to the qml file
        gdf: GeoDataFrame of which objects are supposed to be plotted, usually loaded from a polygon/line shape file
    Return:
        cols: list of color values for each surface
    """

    # Checking that the path is of type str
    if not isinstance(path, str):
        raise TypeError('path must be provided as string')

    # Checking that the gdf is of type GeoDataFrame
    if not isinstance(gdf, gpd.geodataframe.GeoDataFrame):
        raise TypeError('object must be of type GeoDataFrame')

    # Parse qml
    column, classes = parse_categorized_qml(path)

    # Create style dict
    style_df = pd.DataFrame(build_style_dict(classes)).transpose()

    # Create deep copy of gdf
    gdf_copy = gdf.copy(deep=True)

    # Append style_df to copied gdf
    gdf_copy["Color"] = gdf_copy[column].replace(style_df.color.to_dict())

    # Sort values of gdf by provided column, usually the formation
    gdf_copy = gdf_copy.sort_values(column)

    # Filter for unique formations
    gdf_copy = gdf_copy.groupby([column], as_index=False).last()

    # Create list of remaining colors
    cols = gdf_copy['Color'].to_list()

    return cols
Example #6
0
def clip_by_shape(gdf: gpd.geodataframe.GeoDataFrame,
                  shape: gpd.geodataframe.GeoDataFrame,
                  inplace: bool = False) -> gpd.geodataframe.GeoDataFrame:
    """
        Clipping vector data by extent
        Args:
            gdf: GeoDataFrame to be clipped
            shape: GeoDataFrame acting as bbox
            inplace: - bool - default False -> copy of the current gdf is created
        Return:
            gdf: GeoDataFrame with the clipped values
        """

    # Checking if the gdf is of type GeoDataFrame
    if not isinstance(gdf, gpd.geodataframe.GeoDataFrame):
        raise TypeError('gdf must be of type GeoDataFrame')

    # Checking if the shape is of type GeoDataFrame
    if not isinstance(shape, gpd.geodataframe.GeoDataFrame):
        raise TypeError('shape must be of type GeoDataFrame')

    # Checking if inplace is of type bool
    if not isinstance(inplace, bool):
        raise TypeError('Inplace must be of type bool')

    # Create deep copy of gdf
    if not inplace:
        gdf = gdf.copy(deep=True)

    # Setting the extent
    extent = set_extent(gdf=shape)

    # Clipping the gdf
    gdf = clip_by_extent(gdf, extent, inplace=inplace)

    return gdf
Example #7
0
def extract_z(gdf: gpd.geodataframe.GeoDataFrame,
              dem: Union[np.ndarray, rasterio.io.DatasetReader],
              inplace: bool = False,
              **kwargs) -> gpd.geodataframe.GeoDataFrame:
    """
    Extracting altitude values from digital elevation model
    Args:
        gdf - gpd.geodataframe.GeoDataFrame containing x,y values
        dem - rasterio.io.DatasetReader containing the z values
        inplace - bool - default False -> copy of the current gdf is created
    Kwargs:
        extent - list containing the extent of the np.ndarray, must be provided in the same CRS as the gdf
    Return:
        gdf - gpd.geodataframe.GeoDataFrame containing x,y,z values obtained from a DEM
    """

    # Input object must be a GeoDataFrame
    if not isinstance(gdf, gpd.geodataframe.GeoDataFrame):
        raise TypeError('Loaded object is not a GeoDataFrame')

    # Create deep copy of gdf
    if not inplace:
        gdf = gdf.copy(deep=True)

    # Input object must be a np.ndarray or a rasterio.io.DatasetReader
    if not isinstance(dem, (np.ndarray, rasterio.io.DatasetReader)):
        raise TypeError(
            'Loaded object is not a np.ndarray or rasterio.io.DatasetReader')

    # The GeoDataFrame must not contain a Z-column
    if pd.Series(['Z']).isin(gdf.columns).all():
        raise ValueError('Data already contains Z-values')

    # Extracting z values from a DEM loaded with Rasterio
    if isinstance(dem, rasterio.io.DatasetReader):
        try:
            if gdf.crs == dem.crs:
                if np.logical_not(
                        pd.Series(['X', 'Y']).isin(gdf.columns).all()):
                    gdf = extract_xy(gdf)
                gdf['Z'] = [
                    z[0] for z in dem.sample(gdf[['X', 'Y']].to_numpy())
                ]
            else:
                crs_old = gdf.crs
                gdf = gdf.to_crs(crs=dem.crs)
                gdf = extract_xy(gdf)
                gdf['Z'] = [
                    z[0] for z in dem.sample(gdf[['X', 'Y']].to_numpy())
                ]
                gdf = gdf.to_crs(crs=crs_old)
                del gdf['X']
                del gdf['Y']
                gdf = extract_xy(gdf)
        except IndexError:
            raise ValueError(
                'One or more points are located outside the boundaries of the raster'
            )

    # Extracting z values from a DEM as np.ndarray
    else:
        if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()):
            gdf = extract_xy(gdf)

        extent = kwargs.get('extent', None)

        assert extent is not None, 'Extent of array is needed to extract Z values'

        gdf['Z'] = [
            sample(dem, extent, gdf[['X', 'Y']].values.tolist()[i])
            for i, point in enumerate(gdf[['X', 'Y']].values.tolist())
        ]

    # Convert dip and azimuth columns to floats
    if pd.Series(['dip']).isin(gdf.columns).all():
        gdf['dip'] = gdf['dip'].astype(float)

    if pd.Series(['azimuth']).isin(gdf.columns).all():
        gdf['azimuth'] = gdf['azimuth'].astype(float)

    # Convert formation column to string
    if pd.Series(['formation']).isin(gdf.columns).all():
        gdf['formation'] = gdf['formation'].astype(str)

    return gdf
Example #8
0
def extract_coordinates(gdf: gpd.geodataframe.GeoDataFrame,
                        dem: Union[np.ndarray, rasterio.io.DatasetReader,
                                   type(None)] = None,
                        inplace: bool = False,
                        **kwargs) -> gpd.geodataframe.GeoDataFrame:
    """
    Extract x,y and z coordinates from a GeoDataFrame
    Args:
        gdf - gpd.geodataframe.GeoDataFrame containing Points or LineStrings
        dem - rasterio.io.DatasetReader containing the z values
    Kwargs:
        extent - list containing the extent of the np.ndarray, must be provided in the same CRS as the gdf
    Return:
        gdf - gpd.geodataframe.GeoDataFrame containing x, y and z values
    """

    # Input object must be a GeoDataFrame
    if not isinstance(gdf, gpd.geodataframe.GeoDataFrame):
        raise TypeError('Loaded object is not a GeoDataFrame')

    # Create deep copy of gdf
    if not inplace:
        gdf = gdf.copy(deep=True)

    # Checking if Z is in GeoDataFrame
    if np.logical_not(pd.Series(['Z']).isin(gdf.columns).all()):
        # Checking if dem is not None
        if dem is None:
            raise ValueError('DEM is missing')

        # Checking if DEM is of type np.ndarray or rasterio object
        if not isinstance(dem, (np.ndarray, rasterio.io.DatasetReader)):
            raise TypeError(
                'Loaded object is not a np.ndarray or Rasterio object')

        extent = kwargs.get('extent', None)

        # Checking if X and Y column already exist in gdf
        if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()):
            if isinstance(dem, np.ndarray):
                gdf = extract_z(gdf, dem, extent=extent)
            # Extract XYZ values if dem is rasterio object
            else:
                # Extract XYZ values if CRSs are matching
                if gdf.crs == dem.crs:
                    gdf = extract_z(gdf, dem)
                # Convert gdf before XYZ values extraction
                else:
                    crs_old = gdf.crs
                    gdf = gdf.to_crs(crs=dem.crs)
                    gdf.rename(columns={'X': 'X1', 'Y': 'Y1'})
                    gdf = extract_z(extract_xy(gdf), dem)
                    gdf = gdf.to_crs(crs=crs_old)
                    del gdf['X']
                    del gdf['Y']
                    gdf.rename(columns={'X1': 'X', 'Y1': 'Y'})
        else:
            # Extract XYZ values if dem is of type np.ndarray
            if isinstance(dem, np.ndarray):
                gdf = extract_z(extract_xy(gdf), dem, extent=extent)
            # Extract XYZ values if dem is rasterio object
            else:
                # Extract XYZ values if CRSs are matching
                if gdf.crs == dem.crs:
                    gdf = extract_z(extract_xy(gdf), dem)
                # Convert gdf before XYZ values extraction
                else:
                    crs_old = gdf.crs
                    gdf = gdf.to_crs(crs=dem.crs)
                    gdf = extract_z(extract_xy(gdf), dem)
                    gdf = gdf.to_crs(crs=crs_old)
                    del gdf['X']
                    del gdf['Y']
                    gdf = extract_xy(gdf)
    else:
        # Checking if X and Y column already exist in gdf
        if np.logical_not(pd.Series(['X', 'Y']).isin(gdf.columns).all()):
            gdf = extract_xy(gdf, inplace=inplace)

    # Convert dip and azimuth columns to floats
    if pd.Series(['dip']).isin(gdf.columns).all():
        gdf['dip'] = gdf['dip'].astype(float)

    if pd.Series(['azimuth']).isin(gdf.columns).all():
        gdf['azimuth'] = gdf['azimuth'].astype(float)

    # Convert formation column to string
    if pd.Series(['formation']).isin(gdf.columns).all():
        gdf['formation'] = gdf['formation'].astype(str)

    return gdf