コード例 #1
0
ファイル: misc.py プロジェクト: mdkrol/dask-geomodeling
    def process(data, process_kwargs):
        # first handle the time and meta requests
        mode = process_kwargs["mode"]
        if mode == "time":
            return {"time": [data]}
        elif mode == "meta":
            return {"meta": [None]}

        column_name = process_kwargs["column_name"]
        height = process_kwargs["height"]
        width = process_kwargs["width"]
        no_data_value = process_kwargs["no_data_value"]
        dtype = process_kwargs["dtype"]
        f = data["features"]

        # get the value column to rasterize
        if column_name is None:
            values = None
        else:
            try:
                values = f[column_name]
            except KeyError:
                if f.index.name == column_name:
                    values = f.index.to_series()
                else:
                    values = False

        if len(f) == 0 or values is False:  # there is no data to rasterize
            values = np.full((1, height, width), no_data_value, dtype=dtype)
            return {"values": values, "no_data_value": no_data_value}

        result = rasterize_geoseries(
            geoseries=f["geometry"] if "geometry" in f else None,
            values=values,
            bbox=process_kwargs["bbox"],
            projection=data["projection"],
            height=height,
            width=width,
        )

        values = result["values"]

        # cast to the expected dtype if necessary
        cast_values = values.astype(process_kwargs["dtype"])

        # replace the nodata value if necessary
        if result["no_data_value"] != no_data_value:
            cast_values[values == result["no_data_value"]] = no_data_value

        return {"values": cast_values, "no_data_value": no_data_value}
コード例 #2
0
    def process(data, request):
        mode = request["mode"]
        if mode == "time":
            return {"time": [data]}
        elif mode == "meta":
            return {"meta": [None]}
        # load the geometry and transform it into the requested projection
        geometry = load_wkt(data["wkt"])
        if data["projection"] != request["projection"]:
            geometry = utils.shapely_transform(geometry, data["projection"],
                                               request["projection"])

        # take a shortcut when the geometry does not intersect the bbox
        x1, y1, x2, y2 = request["bbox"]
        if (x1 == x2) and (y1 == y2):
            # Don't do box(x1, y1, x2, y2), this gives an invalid geometry.
            bbox_geom = Point(x1, y1)
        else:
            bbox_geom = box(x1, y1, x2, y2)
        if not geometry.intersects(bbox_geom):
            return {
                "values":
                np.full((1, request["height"], request["width"]),
                        False,
                        dtype=np.bool),
                "no_data_value":
                None,
            }

        return utils.rasterize_geoseries(
            geoseries=GeoSeries([geometry]) if not geometry.is_empty else None,
            bbox=request["bbox"],
            projection=request["projection"],
            height=request["height"],
            width=request["width"],
        )
コード例 #3
0
ファイル: test_utils.py プロジェクト: nens/dask-geomodeling
 def test_rasterize_categorical_float(self):
     raster = utils.rasterize_geoseries(
         self.geoseries, values=pd.Series([1.2, 2.4], dtype="category"), **self.box
     )
     self.assertEqual(np.float64, raster["values"].dtype)
コード例 #4
0
ファイル: test_utils.py プロジェクト: nens/dask-geomodeling
 def test_rasterize_categorical_int(self):
     raster = utils.rasterize_geoseries(
         self.geoseries, values=pd.Series([1, 2], dtype="category"), **self.box
     )
     self.assertEqual(np.int32, raster["values"].dtype)
コード例 #5
0
ファイル: test_utils.py プロジェクト: nens/dask-geomodeling
 def test_rasterize_float_point_nodata(self):
     raster = utils.rasterize_geoseries(
         self.geoseries, values=pd.Series([1.2, 2.4]), **self.point_out
     )
     self.assertTupleEqual(raster["values"].shape, (1, 1, 1))
     assert_array_equal(raster["values"], raster["no_data_value"])
コード例 #6
0
ファイル: test_utils.py プロジェクト: nens/dask-geomodeling
 def test_rasterize_int_point(self):
     raster = utils.rasterize_geoseries(
         self.geoseries, values=pd.Series([1, 2]), **self.point_in
     )
     self.assertTupleEqual(raster["values"].shape, (1, 1, 1))
     assert_array_equal(raster["values"], 1)
コード例 #7
0
ファイル: test_utils.py プロジェクト: nens/dask-geomodeling
 def test_rasterize_none_geometry(self):
     self.geoseries.iloc[1] = None
     raster = utils.rasterize_geoseries(self.geoseries, **self.box)
     self.assertEqual(2 * 2, raster["values"].sum())
コード例 #8
0
ファイル: test_utils.py プロジェクト: nens/dask-geomodeling
 def test_rasterize_point_false(self):
     raster = utils.rasterize_geoseries(self.geoseries, **self.point_out)
     self.assertTupleEqual(raster["values"].shape, (1, 1, 1))
     assert_array_equal(raster["values"], False)
コード例 #9
0
    def process(geom_data, raster_data, process_kwargs):
        if process_kwargs.get("empty"):
            return {
                "features": gpd.GeoDataFrame([]),
                "projection": process_kwargs["projection"],
            }
        elif process_kwargs["mode"] == "extent":
            return geom_data

        features = geom_data["features"]
        if len(features) == 0:
            return geom_data

        result = features.copy()

        # transform the features into the aggregation projection
        req_srs = process_kwargs["req_srs"]
        agg_srs = process_kwargs["agg_srs"]

        agg_geometries = utils.geoseries_transform(features["geometry"],
                                                   req_srs, agg_srs)

        statistic = process_kwargs["statistic"]
        percentile = utils.parse_percentile_statistic(statistic)
        if percentile:
            statistic = "percentile"
            agg_func = partial(AggregateRaster.STATISTICS[statistic]["func"],
                               qval=percentile)
        else:
            agg_func = AggregateRaster.STATISTICS[statistic]["func"]

        extensive = AggregateRaster.STATISTICS[statistic]["extensive"]
        result_column = process_kwargs["result_column"]

        # this is only there for the AggregateRasterAboveThreshold
        threshold_name = process_kwargs.get("threshold_name")
        if threshold_name:
            # get the threshold, appending NaN for unlabeled pixels
            threshold_values = np.empty((len(features) + 1, ), dtype="f4")
            threshold_values[:-1] = features[threshold_name].values
            threshold_values[-1] = np.nan
        else:
            threshold_values = None

        # investigate the raster data
        if raster_data is None:
            values = no_data_value = None
        else:
            values = raster_data["values"]
            no_data_value = raster_data["no_data_value"]
        if values is None or np.all(values == no_data_value):  # skip the rest
            result[result_column] = 0 if extensive else np.nan
            return {"features": result, "projection": req_srs}
        depth, height, width = values.shape

        pixel_size = process_kwargs["pixel_size"]
        actual_pixel_size = process_kwargs["actual_pixel_size"]

        # process in groups of disjoint subsets of the features
        agg = np.full((depth, len(features)), np.nan, dtype="f4")
        for select in bucketize(features.bounds.values):
            rasterize_result = utils.rasterize_geoseries(
                agg_geometries.iloc[select],
                process_kwargs["agg_bbox"],
                agg_srs,
                height,
                width,
                values=np.asarray(select, dtype=np.int32),  # GDAL needs int32
            )
            labels = rasterize_result["values"][0]

            # if there is a threshold, generate a raster with thresholds
            if threshold_name:
                # mode="clip" ensures that unlabeled cells use the appended NaN
                thresholds = np.take(threshold_values, labels, mode="clip")
            else:
                thresholds = None

            for frame_no, frame in enumerate(values):
                # limit statistics to active pixels
                active = frame != no_data_value
                # if there is a threshold, mask the frame
                if threshold_name:
                    valid = ~np.isnan(thresholds)  # to suppress warnings
                    active[~valid] = False  # no threshold -> no aggregation
                    active[valid] &= frame[valid] >= thresholds[valid]

                # if there is no single active value: do not aggregate
                if not active.any():
                    continue

                # select features that actually have data
                # (min, max, median, and percentile cannot handle it otherwise)
                active_labels = labels[active]
                select_and_active = list(
                    set(np.unique(active_labels)) & set(select))

                if not select_and_active:
                    continue

                agg[frame_no][select_and_active] = agg_func(
                    1 if statistic == "count" else frame[active],
                    labels=active_labels,
                    index=select_and_active,
                )

        if extensive:  # sum and count
            agg[~np.isfinite(agg)] = 0
            # extensive aggregations have to be scaled
            if actual_pixel_size != pixel_size:
                agg *= (actual_pixel_size / pixel_size)**2
        else:
            agg[~np.isfinite(agg)] = np.nan  # replaces inf by nan

        if depth == 1:
            result[result_column] = agg[0]
        else:
            # store an array in a dataframe cell: set each cell with [np.array]
            result[result_column] = [[x] for x in agg.T]

        return {"features": result, "projection": req_srs}
コード例 #10
0
    def process(geom_data, raster_data, process_kwargs):
        if process_kwargs.get("empty"):
            return {
                "features": gpd.GeoDataFrame([]),
                "projection": process_kwargs["projection"],
            }
        elif process_kwargs["mode"] == "extent":
            return geom_data

        features = geom_data["features"]
        if len(features) == 0:
            return geom_data

        result = features.copy()

        # transform the features into the aggregation projection
        req_srs = process_kwargs["req_srs"]
        agg_srs = process_kwargs["agg_srs"]

        agg_geometries = utils.geoseries_transform(
            features["geometry"],
            req_srs,
            agg_srs,
        )

        statistic = process_kwargs["statistic"]
        percentile = utils.parse_percentile_statistic(statistic)
        if percentile:
            statistic = "percentile"
            agg_func = partial(AggregateRaster.STATISTICS[statistic]["func"],
                               qval=percentile)
        else:
            agg_func = AggregateRaster.STATISTICS[statistic]["func"]

        extensive = AggregateRaster.STATISTICS[statistic]["extensive"]
        result_column = process_kwargs["result_column"]

        # this is only there for the AggregateRasterAboveThreshold
        threshold_name = process_kwargs.get("threshold_name")

        # investigate the raster data
        if raster_data is None:
            values = no_data_value = None
        else:
            values = raster_data["values"]
            no_data_value = raster_data["no_data_value"]
        if values is None or np.all(values == no_data_value):  # skip the rest
            result[result_column] = 0 if extensive else np.nan
            return {"features": result, "projection": req_srs}
        depth, height, width = values.shape

        pixel_size = process_kwargs["pixel_size"]
        actual_pixel_size = process_kwargs["actual_pixel_size"]

        # process in groups of disjoint subsets of the features
        agg = np.full((depth, len(features)), np.nan, dtype="f4")
        for select in bucketize(features.bounds.values):
            agg_geometries_bucket = agg_geometries.iloc[select]
            index = features.index[select]

            rasterize_result = utils.rasterize_geoseries(
                agg_geometries_bucket,
                process_kwargs["agg_bbox"],
                agg_srs,
                height,
                width,
                values=index,
            )
            labels = rasterize_result["values"][0]

            # if there is a threshold, generate a raster with thresholds
            if threshold_name:
                thresholds = features.loc[labels.ravel(),
                                          threshold_name].values.reshape(
                                              labels.shape)
            else:
                thresholds = None

            for frame_no, frame in enumerate(values):
                # limit statistics to active pixels
                active = frame != no_data_value
                # if there is a threshold, mask the frame
                if threshold_name:
                    valid = ~np.isnan(thresholds)  # to suppress warnings
                    active[~valid] = False  # no threshold -> no aggregation
                    active[valid] &= frame[valid] >= thresholds[valid]

                # if there is no single active value: do not aggregate
                if not active.any():
                    continue

                with warnings.catch_warnings():
                    # we may get divide by 0 if any geometry does not contain
                    # any 'active' values
                    warnings.simplefilter("ignore")
                    agg[frame_no][select] = agg_func(
                        1 if statistic == "count" else frame[active],
                        labels=labels[active],
                        index=index,
                    )

        if extensive:  # sum and count
            agg[~np.isfinite(agg)] = 0
            # extensive aggregations have to be scaled
            if actual_pixel_size != pixel_size:
                agg *= (actual_pixel_size / pixel_size)**2
        else:
            agg[~np.isfinite(agg)] = np.nan  # replaces inf by nan

        if depth == 1:
            result[result_column] = agg[0]
        else:
            # store an array in a dataframe cell: set each cell with [np.array]
            result[result_column] = [[x] for x in agg.T]

        return {"features": result, "projection": req_srs}