Exemple #1
0
    def extract_xy(self, xys, return_array=False, progress=False):
        """Samples pixel values using an array of xy locations.

        Parameters
        ----------
        xys : 2d array-like
            x and y coordinates from which to sample the raster (n_samples, xys).
        
        return_array : bool (opt), default=False
            By default the extracted pixel values are returned as a 
            geopandas.GeoDataFrame. If `return_array=True` then the extracted pixel
            values are returned as a tuple of numpy.ndarrays. 

        progress : bool (opt), default=False
            Show a progress bar for extraction.

        Returns
        -------
        geopandas.GeoDataframe
            Containing extracted data as point geometries if `return_array=False`.

        numpy.ndarray
            2d masked array containing sampled raster values (sample, bands) at the 
            x,y locations.
        """

        # extract pixel values
        dtype = np.find_common_type([np.float32], self.dtypes)
        X = np.ma.zeros((xys.shape[0], self.count), dtype=dtype)

        if progress is True:
            disable_tqdm = False
        else:
            disable_tqdm = True

        for i, (layer, pbar) in enumerate(
                zip(self.iloc,
                    tqdm(self.iloc, total=self.count, disable=disable_tqdm))):
            sampler = sample_gen(dataset=layer.ds,
                                 xy=xys,
                                 indexes=layer.bidx,
                                 masked=True)
            v = np.ma.asarray([i for i in sampler])
            X[:, i] = v.flatten()

        # return as geopandas array as default (or numpy arrays)
        if return_array is False:
            gdf = pd.DataFrame(X, columns=self.names)
            gdf["geometry"] = list(zip(xys[:, 0], xys[:, 1]))
            gdf["geometry"] = gdf["geometry"].apply(Point)
            gdf = gpd.GeoDataFrame(gdf, geometry="geometry", crs=self.crs)
            return gdf

        return X
Exemple #2
0
    def extract_raster(self, src, return_array=False, progress=False):
        """Sample a Raster object by an aligned raster of labelled pixels.

        Parameters
        ----------
        src: rasterio DatasetReader
            Single band raster containing labelled pixels as an open rasterio
            DatasetReader object.

        return_array : bool (opt), default=False
            By default the extracted pixel values are returned as a 
            geopandas.GeoDataFrame. If `return_array=True` then the extracted pixel
            values are returned as a tuple of numpy.ndarrays. 
        
        progress : bool (opt), default=False
            Show a progress bar for extraction.

        Returns
        -------
        geopandas.GeoDataFrame
            Geodataframe containing extracted data as point features if `return_array=False`

        tuple with three items if `return_array is True
            - numpy.ndarray
                Numpy masked array of extracted raster values, typically 2d.
            - numpy.ndarray
                1d numpy masked array of labelled sampled.
            - numpy.ndarray
                2d numpy masked array of row and column indexes of training pixels.
        """

        # open response raster and get labelled pixel indices and values
        arr = src.read(1, masked=True)
        rows, cols = np.nonzero(~arr.mask)
        xys = np.transpose(rasterio.transform.xy(src.transform, rows, cols))
        ys = arr.data[rows, cols]

        # extract Raster object values at row, col indices
        dtype = np.find_common_type([np.float32], self.dtypes)
        X = np.ma.zeros((xys.shape[0], self.count), dtype=dtype)

        if progress is True:
            disable_tqdm = False
        else:
            disable_tqdm = True

        for i, (layer, pbar) in enumerate(
                zip(self.iloc,
                    tqdm(self.iloc, total=self.count, disable=disable_tqdm))):
            sampler = sample_gen(dataset=layer.ds,
                                 xy=xys,
                                 indexes=layer.bidx,
                                 masked=True)
            v = np.ma.asarray([i for i in sampler])
            X[:, i] = v.flatten()

        # summarize data
        if return_array is False:
            column_names = ["value"] + self.names
            gdf = pd.DataFrame(data=np.ma.column_stack((ys, X)),
                               columns=column_names)
            gdf["geometry"] = list(zip(xys[:, 0], xys[:, 1]))
            gdf["geometry"] = gdf["geometry"].apply(Point)
            gdf = gpd.GeoDataFrame(gdf, geometry="geometry", crs=self.crs)
            return gdf

        return X, ys, xys
Exemple #3
0
    def extract_vector(self, gdf, return_array=False, progress=False):
        """Sample a Raster/RasterLayer using a geopandas GeoDataframe containing
        points, lines or polygon features.

        Parameters
        ----------
        gdf: geopandas.GeoDataFrame
            Containing either point, line or polygon geometries. Overlapping
            geometries will cause the same pixels to be sampled.

        return_array : bool (opt), default=False
            By default the extracted pixel values are returned as a 
            geopandas.GeoDataFrame. If `return_array=True` then the extracted pixel
            values are returned as a tuple of numpy.ndarrays. 
        
        progress : bool (opt), default=False
            Show a progress bar for extraction.

        Returns
        -------
        geopandas.GeoDataframe
            Containing extracted data as point geometries if `return_array=False`.

        tuple
            A tuple (geodataframe index, extracted values, coordinates) of the extracted
            raster values as a masked array and the  coordinates of the extracted pixels
            if `as_gdf=False`.
        """

        # rasterize polygon and line geometries
        if all(gdf.geom_type == "Polygon") or all(
                gdf.geom_type == "LineString"):

            shapes = [(geom, val)
                      for geom, val in zip(gdf.geometry, gdf.index)]
            arr = np.ma.zeros((self.height, self.width))
            arr[:] = -99999

            arr = features.rasterize(
                shapes=shapes,
                fill=-99999,
                out=arr,
                transform=self.transform,
                all_touched=True,
            )

            ids = arr[np.nonzero(arr != -99999)]
            ids = ids.astype("int")
            rows, cols = np.nonzero(arr != -99999)
            xys = rasterio.transform.xy(transform=self.transform,
                                        rows=rows,
                                        cols=cols)
            xys = np.transpose(xys)

        elif all(gdf.geom_type == "Point"):
            ids = gdf.index.values
            xys = gdf.bounds.iloc[:, 2:].values

        # extract raster pixels
        dtype = np.find_common_type([np.float32], self.dtypes)
        X = np.ma.zeros((xys.shape[0], self.count), dtype=dtype)

        if progress is True:
            disable_tqdm = False
        else:
            disable_tqdm = True

        for i, (layer, pbar) in enumerate(
                zip(self.iloc,
                    tqdm(self.iloc, total=self.count, disable=disable_tqdm))):
            sampler = sample_gen(dataset=layer.ds,
                                 xy=xys,
                                 indexes=layer.bidx,
                                 masked=True)
            v = np.ma.asarray([i for i in sampler])
            X[:, i] = v.flatten()

        # return as geopandas array as default (or numpy arrays)
        if return_array is False:
            X = pd.DataFrame(np.ma.column_stack((ids, X)),
                             columns=["id"] + self.names)
            X.id = X.id.astype("int")
            X["geometry"] = list(zip(xys[:, 0], xys[:, 1]))
            X["geometry"] = X["geometry"].apply(Point)
            X = gpd.GeoDataFrame(X, geometry="geometry", crs=self.crs)
            return X

        return ids, X, xys
Exemple #4
0
def multipliers(point_path, dst, country="conus"):
    """Use the connection data frame to create the regional multipliers.

    Parameters
    ----------
    point_path : str
        Path to input supply curve point file.
    dst : str
        Path to output multiplier file.
    country : str
        String representation for the country the point path represents. So far
        only 'conus' is available.

    Returns
    -------
    None
    """
    # Get supply curve points and multipliers
    pnts = pd.read_csv(point_path)
    mult_lkup = pd.read_csv(MULTIPLIER_PATHS[country])

    # Get the projected coordinates of the points to match the reeds geotiff
    with rio.open('/projects/rev/data/conus/reeds_regions.tif') as fin:
        proj = Proj(fin.crs.to_proj4())
    eastings, northings = proj(pnts.longitude.values, pnts.latitude.values)
    pnts['eastings'] = eastings
    pnts['northings'] = northings

    # Get the reeds regions associated with each point
    with rio.open('/projects/rev/data/conus/reeds_regions.tif') as fin:
        generator = sample_gen(fin, pnts[['eastings', 'northings']].values)
        results = [x[0] for x in generator]
    pnts['reeds_demand_region'] = results
    pnts_mults = pd.merge(pnts,
                          mult_lkup,
                          on='reeds_demand_region',
                          how='left')

    # Make sure the multiplier dimensions match the points
    try:
        assert pnts_mults.shape[0] == pnts.shape[0]
    except AssertionError:
        raise ("Supply curve and multiplier point dimensions do not match.")

    # Find points with no multipliers and assign nearest neighbors
    misses = pnts_mults[pd.isnull(pnts_mults.trans_multiplier)]
    hits = pnts_mults[~pd.isnull(pnts_mults.trans_multiplier)]
    hits_tree = cKDTree(hits[['eastings', 'northings']].values)
    dist, idx = hits_tree.query(misses[['eastings', 'northings']].values)
    nearests = hits.iloc[idx].trans_multiplier.values
    pnts_mults.loc[misses.index.values, 'trans_multiplier'] = nearests
    try:
        n_missing = len(pnts_mults[pd.isnull(pnts_mults.trans_multiplier)])
        assert n_missing == 0
    except AssertionError:
        raise ("Nearest neighbor search for missing mutlipliers failed.")

    # Save
    cols = ['sc_point_gid', 'trans_multiplier']
    df = pnts_mults[cols]
    df.to_csv(dst)
Exemple #5
0
    def extract_vector(self, gdf, return_array=False, progress=False):
        """Sample a Raster/RasterLayer using a geopandas GeoDataframe containing
        points, lines or polygon features.

        Parameters
        ----------
        gdf: geopandas.GeoDataFrame
            Containing either point, line or polygon geometries. Overlapping geometries
            will cause the same pixels to be sampled.

        return_array : bool (opt), default=False
            By default the extracted pixel values are returned as a 
            geopandas.GeoDataFrame. If `return_array=True` then the extracted pixel
            values are returned as a tuple of numpy.ndarrays. 
        
        progress : bool (opt), default=False
            Show a progress bar for extraction.

        Returns
        -------
        geopandas.GeoDataframe
            Containing extracted data as point geometries (one point per pixel) if 
            `return_array=False`. The resulting GeoDataFrame is indexed using a 
            named pandas.MultiIndex, with `pixel_idx` index referring to the index of each
            pixel that was sampled, and the `geometry_idx` index referring to the index
            of the each geometry in the supplied `gdf`. This makes it possible to keep track
            of how sampled pixel relates to the original geometries, i.e. multiple pixels 
            being extracted within the area of a single polygon that can be referred to using
            the `geometry_idx`. 
            
            The extracted data can subsequently be joined with the attribute table of
            the supplied `gdf` using:

            training_py = geopandas.read_file(nc.polygons)
            df = self.stack.extract_vector(gdf=training_py)
            df = df.dropna()

            df = df.merge(
                right=training_py.loc[:, ("id", "label")],
                left_on="polygon_idx", 
                right_on="id",
                right_index=True
            ) 

        tuple
            A tuple (geodataframe index, extracted values, coordinates) of the extracted
            raster values as a masked array and the  coordinates of the extracted pixels
            if `as_gdf=False`.
        """

        # rasterize polygon and line geometries
        if all(gdf.geom_type == "Polygon") or all(
                gdf.geom_type == "LineString"):

            shapes = [(geom, val)
                      for geom, val in zip(gdf.geometry, gdf.index)]
            arr = np.ma.zeros((self.height, self.width))
            arr[:] = -99999

            arr = features.rasterize(
                shapes=shapes,
                fill=-99999,
                out=arr,
                transform=self.transform,
                all_touched=True,
            )

            ids = arr[np.nonzero(arr != -99999)]
            ids = ids.astype("int")
            rows, cols = np.nonzero(arr != -99999)
            xys = rasterio.transform.xy(transform=self.transform,
                                        rows=rows,
                                        cols=cols)
            xys = np.transpose(xys)

        elif all(gdf.geom_type == "Point"):
            ids = gdf.index.values
            xys = gdf.bounds.iloc[:, 2:].values

        # extract raster pixels
        dtype = np.find_common_type([np.float32], self.dtypes)
        X = np.ma.zeros((xys.shape[0], self.count), dtype=dtype)

        if progress is True:
            disable_tqdm = False
        else:
            disable_tqdm = True

        for i, (layer, pbar) in enumerate(
                zip(self.iloc,
                    tqdm(self.iloc, total=self.count, disable=disable_tqdm))):
            sampler = sample_gen(dataset=layer.ds,
                                 xy=xys,
                                 indexes=layer.bidx,
                                 masked=True)
            v = np.ma.asarray([i for i in sampler])
            X[:, i] = v.flatten()

        # return as geopandas array as default (or numpy arrays)
        if return_array is False:
            X = pd.DataFrame(data=X,
                             columns=self.names,
                             index=[pd.RangeIndex(0, X.shape[0]), ids])
            X.index.set_names(["pixel_idx", "geometry_idx"], inplace=True)
            X["geometry"] = list(zip(xys[:, 0], xys[:, 1]))
            X["geometry"] = X["geometry"].apply(Point)
            X = gpd.GeoDataFrame(X, geometry="geometry", crs=self.crs)
            return X

        return ids, X, xys