def fct_buffer(udf_data: UdfData): """Compute buffer of size 10 around features This function creates buffer around all features in the provided feature collection tiles. The resulting geopandas.GeoDataFrame contains the new geometries and a copy of the original attribute data. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ fct_list = [] # Iterate over each tile for tile in udf_data.feature_collection_tiles: # Buffer all features gseries = tile.data.buffer(distance=10) # Create a new GeoDataFrame that includes the buffered geometry and the attribute data new_data = tile.data.set_geometry(gseries) # Create the new feature collection tile fct = FeatureCollectionTile(id=tile.id + "_buffer", data=new_data, start_times=tile.start_times, end_times=tile.end_times) fct_list.append(fct) # Insert the new tiles as list of feature collection tiles in the input object. The new tiles will # replace the original input tiles. udf_data.set_feature_collection_tiles(fct_list)
def rct_pytorch_ml(udf_data: UdfData): """Apply a pre-trained pytorch machine learn model on the first tile The model must be a pytorch model that has expects the input data in the constructor The prediction method must accept a torch.autograd.Variable as input. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ tile = udf_data.raster_collection_tiles[0] # This is the input data of the model. input = torch.autograd.Variable(torch.Tensor(tile.data)) # Get the first model mlm = udf_data.get_ml_model_list()[0] m = mlm.get_model() # Predict the data pred = m(input) # Create the new raster collection tile rct = RasterCollectionTile(id=mlm.name, extent=tile.extent, data=numpy.array(pred.tolist()), start_times=tile.start_times, end_times=tile.end_times) # Insert the new tiles as list of raster collection tiles in the input object. The new tiles will # replace the original input tiles. udf_data.set_raster_collection_tiles([ rct, ])
def test_hypercube_ndvi(self): """Test the hypercube NDVI computation""" dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "hypercube_ndvi.py") udf_code = UdfCode(language="python", source=open(file_name, "r").read()) hc_red = create_hypercube(name="red", value=1, shape=(3, 3, 3)) hc_nir = create_hypercube(name="nir", value=3, shape=(3, 3, 3)) udf_data = UdfData(proj={"EPSG":4326}, hypercube_list=[hc_red, hc_nir]) udf_request = UdfRequest(data=udf_data.to_dict(), code=udf_code) pprint.pprint(udf_request) response = self.app.post('/udf', data=json.dumps(udf_request), content_type="application/json") dict_data = json.loads(response.data) pprint.pprint(dict_data) self.checkHyperCube(dict_data=dict_data)
def checkHyperCube(self, dict_data): """Check the hyper cube data that was processed in the UDF server""" udata = UdfData.from_dict(dict_data) hc_ndvi: HyperCube = udata.hypercube_list[0] self.assertEqual(hc_ndvi.id, "NDVI") self.assertEqual(hc_ndvi.array.name, "NDVI") self.assertEqual(hc_ndvi.array.data.shape, (3, 3, 3)) self.assertEqual(hc_ndvi.array.data[0][0][0], 0.5) self.assertEqual(hc_ndvi.array.data[2][2][2], 0.5)
def test_hypercube_ndvi_message_pack(self): """Test the hypercube NDVI computation with the message pack protocol""" dir = os.path.dirname(openeo_udf.functions.__file__) file_name = os.path.join(dir, "hypercube_ndvi.py") udf_code = UdfCode(language="python", source=open(file_name, "r").read()) hc_red = create_hypercube(name="red", value=1, shape=(3, 3, 3)) hc_nir = create_hypercube(name="nir", value=3, shape=(3, 3, 3)) udf_data = UdfData(proj={"EPSG":4326}, hypercube_list=[hc_red, hc_nir]) udf_request = UdfRequest(data=udf_data.to_dict(), code=udf_code) # pprint.pprint(udf_request) udf_request = base64.b64encode(msgpack.packb(udf_request, use_bin_type=True)) response = self.app.post('/udf_message_pack', data=udf_request, content_type="application/base64") blob = base64.b64decode(response.data) dict_data = msgpack.unpackb(blob, raw=False) self.checkHyperCube(dict_data=dict_data)
def rct_stats(udf_data: UdfData): """Compute univariate statistics for each raster collection tile Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # The dictionary that stores the statistical data stats = {} # Iterate over each raster collection tile and compute statistical values for tile in udf_data.raster_collection_tiles: # make sure to cast the values to floats, otherwise they are not serializable stats[tile.id] = dict(sum=float(tile.data.sum()), mean=float(tile.data.mean()), min=float(tile.data.min()), max=float(tile.data.max())) # Create the structured data object sd = StructuredData(description="Statistical data sum, min, max and mean " "for each raster collection tile as dict", data=stats, type="dict") # Remove all collections and set the StructuredData list udf_data.del_raster_collection_tiles() udf_data.del_feature_collection_tiles() udf_data.set_structured_data_list([sd,])
def rct_time_sum(udf_data: UdfData): """Reduce the time dimension for each tile and compute sum for each pixel over time. Each raster tile in the udf data object will be reduced by time. Sum is computed for each pixel over time. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # The list of tiles that were created tile_results = [] # Iterate over each tile for tile in udf_data.raster_collection_tiles: tile_sum = numpy.sum(tile.data, axis=0) # We need to create a new 3D array with the correct shape for the computed aggregate rows, cols = tile_sum.shape array3d = numpy.ndarray([1, rows, cols]) array3d[0] = tile_sum # Extract the start and end time to set the temporal extent for each tile if tile.start_times is not None and tile.end_times is not None: starts = pandas.DatetimeIndex([tile.start_times[0]]) ends = pandas.DatetimeIndex([tile.end_times[-1]]) else: starts = None ends = None # Create the new raster collection tile rct = RasterCollectionTile(id=tile.id + "_sum", extent=tile.extent, data=array3d, start_times=starts, end_times=ends) tile_results.append(rct) # Insert the new tiles as list of raster collection tiles in the input object. The new tiles will # replace the original input tiles. udf_data.set_raster_collection_tiles(tile_results)
def rct_ndvi(udf_data: UdfData): """Compute the NDVI based on RED and NIR tiles Tiles with ids "red" and "nir" are required. The NDVI computation will be applied to all time stamped 2D raster tiles that have equal time stamps. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ red = None nir = None # Iterate over each tile for tile in udf_data.raster_collection_tiles: if "red" in tile.id.lower(): red = tile if "nir" in tile.id.lower(): nir = tile if red is None: raise Exception("Red raster collection tile is missing in input") if nir is None: raise Exception("Nir raster collection tile is missing in input") if red.start_times is None or red.start_times.tolist() == nir.start_times.tolist(): # Compute the NDVI ndvi = (nir.data - red.data) / (nir.data + red.data) # Create the new raster collection tile rct = RasterCollectionTile(id="ndvi", extent=red.extent, data=ndvi, start_times=red.start_times, end_times=red.end_times) # Insert the new tiles as list of raster collection tiles in the input object. The new tiles will # replace the original input tiles. udf_data.set_raster_collection_tiles([rct,]) else: raise Exception("Time stamps are not equal")
def run_json_user_code(dict_data: Dict) -> Dict: """Run the user defined python code Args: dict_data: the udf request object with code and data organized in a dictionary Returns: """ code = dict_data["code"]["source"] data = UdfData.from_dict(dict_data["data"]) exec(code) return data.to_dict()
def rct_ndvi(udf_data: UdfData): """Compute the NDVI based on RED and NIR hypercubes Hypercubes with ids "red" and "nir" are required. The NDVI computation will be applied to all hypercube dimensions. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ red = None nir = None # Iterate over each tile for cube in udf_data.get_hypercube_list(): if "red" in cube.id.lower(): red = cube if "nir" in cube.id.lower(): nir = cube if red is None: raise Exception("Red hypercube is missing in input") if nir is None: raise Exception("Nir hypercube is missing in input") ndvi = (nir.array - red.array) / (nir.array + red.array) ndvi.name = "NDVI" hc = HyperCube(array=ndvi) udf_data.set_hypercube_list([ hc, ])
def fct_sampling(udf_data: UdfData): """Sample any number of raster collection tiles with a single feature collection (the first if several are provided) and store the samples values in the input feature collection. Each time-slice of a raster collection is stored as a separate column in the feature collection. Hence, the size of the feature collection attributes is (number_of_raster_tile * number_of_xy_slices) x number_of_features. The number of columns is equal to (number_of_raster_tile * number_of_xy_slices). A single feature collection id stored in the input data object that contains the sample attributes and the original data. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ if not udf_data.feature_collection_tiles: raise Exception("A single feature collection is required as input") if len(udf_data.feature_collection_tiles) > 1: raise Exception("The first feature collection will be used for sampling") # Get the first feature collection fct = udf_data.feature_collection_tiles[0] features = fct.data # Iterate over each raster tile for tile in udf_data.raster_collection_tiles: # Compute the number and names of the attribute columns num_slices = len(tile.data) columns = {} column_names = [] for slice in range(num_slices): column_name = tile.id + "_%i"%slice column_names.append(column_name) columns[column_name] = [] # Sample the raster data with each point for feature in features.geometry: # Check if the feature is a point if feature.type == 'Point': x = feature.x y = feature.y values = tile.sample(top=y, left=x) # Store the values in column specific arrays if values: for column_name, value in zip(column_names, values): columns[column_name].append(value) else: for column_name in column_names: columns[column_name].append(math.nan) else: raise Exception("Only points are allowed for sampling") # Attach the sampled attribute data to the GeoDataFrame for column_name in column_names: features[column_name] = columns[column_name] # Create the output feature collection fct = FeatureCollectionTile(id=fct.id + "_sample", data=features, start_times=fct.start_times, end_times=fct.end_times) # Insert the new tiles as list of feature collection tiles in the input object. The new tiles will # replace the original input tiles. udf_data.set_feature_collection_tiles([fct,]) # Remove the raster collection tiles udf_data.del_raster_collection_tiles()