def hyper_pytorch_ml(udf_data: UdfData): """Apply a pre-trained pytorch machine learn model on a hypercube The model must be a pytorch model that has expects the input data in the constructor The prediction method must accept a torch.autograd.Variable as input. Args: udf_data (UdfData): The UDF data object that hypercubes and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ cube = udf_data.get_datacube_list()[0] # This is the input data of the model. input = torch.autograd.Variable(torch.Tensor(cube.array.values)) # Get the first model mlm = udf_data.get_ml_model_list()[0] m = mlm.get_model() # Predict the data pred = m(input) result = xarray.DataArray(data=pred.detach().numpy(), dims=cube.array.dims, coords=cube.array.coords, name=cube.id + "_pytorch") # Create the new raster collection tile result_cube = DataCube(array=result) # Insert the new hypercube in the input object. udf_data.set_datacube_list([result_cube])
def hyper_ndvi(udf_data: UdfData): """Compute the NDVI based on RED and NIR hypercubes Hypercubes with ids "red" and "nir" are required. The NDVI computation will be applied to all hypercube dimensions. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ red = None nir = None # Iterate over each tile for cube in udf_data.get_datacube_list(): if "red" in cube.id.lower(): red = cube if "nir" in cube.id.lower(): nir = cube if red is None: raise Exception("Red hypercube is missing in input") if nir is None: raise Exception("Nir hypercube is missing in input") ndvi = (nir.array - red.array) / (nir.array + red.array) ndvi.name = "NDVI" hc = DataCube(array=ndvi) udf_data.set_datacube_list([hc, ])
def apply_timeseries_generic(udf_data: UdfData, callback: Callable = apply_timeseries): """ Implements the UDF contract by calling a user provided time series transformation function (apply_timeseries). Multiple bands are currently handled separately, another approach could provide a dataframe with a timeseries for each band. :param udf_data: :return: """ # The list of tiles that were created tile_results = [] # Iterate over each cube for cube in udf_data.get_datacube_list(): array3d = [] #use rollaxis to make the time dimension the last one for time_x_slice in numpy.rollaxis(cube.array.values, 1): time_x_result = [] for time_slice in time_x_slice: series = pandas.Series(time_slice) transformed_series = callback(series,udf_data.user_context) time_x_result.append(transformed_series) array3d.append(time_x_result) # We need to create a new 3D array with the correct shape for the computed aggregate result_tile = numpy.rollaxis(numpy.asarray(array3d),1) assert result_tile.shape == cube.array.shape # Create the new raster collection cube rct = DataCube(xarray.DataArray(result_tile)) tile_results.append(rct) # Insert the new tiles as list of raster collection tiles in the input object. The new tiles will # replace the original input tiles. udf_data.set_datacube_list(tile_results) return udf_data
def hyper_min_median_max(udf_data: UdfData): """Compute the min, median and max of the time dimension of a hyper cube Hypercubes with time dimensions are required. The min, median and max reduction of th time axis will be applied to all hypercube dimensions. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # Iterate over each tile cube_list = [] for cube in udf_data.get_datacube_list(): min = cube.array.min(dim="t") median = cube.array.median(dim="t") max = cube.array.max(dim="t") min.name = cube.id + "_min" median.name = cube.id + "_median" max.name = cube.id + "_max" cube_list.append(DataCube(array=min)) cube_list.append(DataCube(array=median)) cube_list.append(DataCube(array=max)) udf_data.set_datacube_list(cube_list)
def rct_sklearn_ml(udf_data: UdfData): """Apply a pre-trained sklearn machine learn model on RED and NIR tiles The model must be a sklearn model that has a prediction method: m.predict(X) The prediction method must accept a pandas.DataFrame as input. Tiles with ids "red" and "nir" are required. The machine learn model will be applied to all spatio-temporal pixel of the two input raster collections. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ red = None nir = None # Iterate over each cube for cube in udf_data.get_datacube_list(): if "red" in cube.id.lower(): red = cube if "nir" in cube.id.lower(): nir = cube if red is None: raise Exception("Red data cube is missing in input") if nir is None: raise Exception("Nir data cube is missing in input") # We need to reshape the data for prediction into one dimensional arrays three_dim_shape = red.array.shape one_dim_shape = numpy.prod(three_dim_shape) red_reshape = red.array.values.reshape((one_dim_shape)) nir_reshape = nir.array.values.reshape((one_dim_shape)) # This is the input data of the model. It must be trained with a DataFrame using the same names. X = pandas.DataFrame() X["red"] = red_reshape X["nir"] = nir_reshape # Get the first model mlm = udf_data.get_ml_model_list()[0] m = mlm.get_model() # Predict the data pred = m.predict(X) # Reshape the one dimensional predicted values to three dimensions based on the input shape pred_reshape = pred.reshape(three_dim_shape) result = xarray.DataArray(data=pred_reshape, dims=red.array.dims, coords=red.array.coords, name=red.id + "_pytorch") # Create the new raster collection cube h = DataCube(array=result) # Insert the new hypercubes in the input object. The new tiles will # replace the original input tiles. udf_data.set_datacube_list([h, ])
def run_user_code(code: str, data: UdfData) -> UdfData: module = load_module_from_string(code) functions = {t[0]: t[1] for t in module.items() if callable(t[1])} for func in functions.items(): try: sig = signature(func[1]) except ValueError: continue params = sig.parameters params_list = [t[1] for t in sig.parameters.items()] if (func[0] == 'apply_timeseries' and 'series' in params and 'context' in params and 'pandas.core.series.Series' in str( params['series'].annotation) and 'pandas.core.series.Series' in str(sig.return_annotation)): #this is a UDF that transforms pandas series from .udf_wrapper import apply_timeseries_generic return apply_timeseries_generic(data, func[1]) elif ((func[0] == 'apply_hypercube' or func[0] == 'apply_datacube') and 'cube' in params and 'context' in params and 'openeo_udf.api.datacube.DataCube' in str( params['cube'].annotation) and 'openeo_udf.api.datacube.DataCube' in str( sig.return_annotation)): #found a datacube mapping function if len(data.get_datacube_list()) != 1: raise ValueError( "The provided UDF expects exactly one datacube, but only: %s were provided." % len(data.get_datacube_list())) result_cube = func[1](data.get_datacube_list()[0], data.user_context) if not isinstance(result_cube, DataCube): raise ValueError( "The provided UDF did not return a DataCube, but got: %s" % result_cube) data.set_datacube_list([result_cube]) break elif len(params_list) == 1 and ( params_list[0].annotation == 'openeo_udf.api.udf_data.UdfData' or params_list[0].annotation == UdfData): #found a generic UDF function func[1](data) break return data
def hyper_map_fabs(udf_data: UdfData): """Compute the absolute values of each hyper cube in the provided data Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # Iterate over each tile cube_list = [] for cube in udf_data.get_datacube_list(): result = numpy.fabs(cube.array) result.name = cube.id + "_fabs" cube_list.append(DataCube(array=result)) udf_data.set_datacube_list(cube_list)
def fct_sampling(udf_data: UdfData): """Sample any number of raster collection tiles with a single feature collection (the first if several are provided) and store the samples values in the input feature collection. Each time-slice of a raster collection is stored as a separate column in the feature collection. Hence, the size of the feature collection attributes is (number_of_raster_tile * number_of_xy_slices) x number_of_features. The number of columns is equal to (number_of_raster_tile * number_of_xy_slices). A single feature collection id stored in the input data object that contains the sample attributes and the original data. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ if not udf_data.feature_collection_list: raise Exception("A single feature collection is required as input") if len(udf_data.feature_collection_list) > 1: raise Exception( "The first feature collection will be used for sampling") # Get the first feature collection fct = udf_data.feature_collection_list[0] features = fct.data # Iterate over each raster cube for cube in udf_data.get_datacube_list(): # Compute the number and names of the attribute columns num_slices = len(cube.data) columns = {} column_names = [] for slice in range(num_slices): column_name = cube.id + "_%i" % slice column_names.append(column_name) columns[column_name] = [] # Sample the raster data with each point for feature in features.geometry: # Check if the feature is a point if feature.type == 'Point': x = feature.x y = feature.y # TODO: Thats needs to be implemented # values = cube.sample(top=y, left=x) values = [0, 0, 0] # Store the values in column specific arrays if values: for column_name, value in zip(column_names, values): columns[column_name].append(value) else: for column_name in column_names: columns[column_name].append(math.nan) else: raise Exception("Only points are allowed for sampling") # Attach the sampled attribute data to the GeoDataFrame for column_name in column_names: features[column_name] = columns[column_name] # Create the output feature collection fct = FeatureCollection(id=fct.id + "_sample", data=features, start_times=fct.start_times, end_times=fct.end_times) # Insert the new tiles as list of feature collection tiles in the input object. The new tiles will # replace the original input tiles. udf_data.set_feature_collection_list([ fct, ]) # Remove the raster collection tiles udf_data.set_datacube_list()