Exemple #1
0
def rct_stats(udf_data: UdfData):
    """Compute univariate statistics for each hypercube

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    # The dictionary that stores the statistical data
    stats = {}
    # Iterate over each raster collection cube and compute statistical values
    for cube in udf_data.get_datacube_list():
        # make sure to cast the values to floats, otherwise they are not serializable
        stats[cube.id] = dict(sum=float(cube.array.sum()),
                              mean=float(cube.array.mean()),
                              min=float(cube.array.min()),
                              max=float(cube.array.max()))
    # Create the structured data object
    sd = StructuredData(description="Statistical data sum, min, max and mean "
                        "for each raster collection cube as dict",
                        data=stats,
                        type="dict")
    # Remove all collections and set the StructuredData list
    udf_data.del_datacube_list()
    udf_data.del_feature_collection_list()
    udf_data.set_structured_data_list([
        sd,
    ])
Exemple #2
0
def hyper_pytorch_ml(udf_data: UdfData):
    """Apply a pre-trained pytorch machine learn model on a hypercube

    The model must be a pytorch model that has expects the input data in the constructor
    The prediction method must accept a torch.autograd.Variable as input.

    Args:
        udf_data (UdfData): The UDF data object that hypercubes and vector tiles

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    cube = udf_data.get_datacube_list()[0]

    # This is the input data of the model.
    input = torch.autograd.Variable(torch.Tensor(cube.array.values))
    # Get the first model
    mlm = udf_data.get_ml_model_list()[0]
    m = mlm.get_model()
    # Predict the data
    pred = m(input)
    result = xarray.DataArray(data=pred.detach().numpy(),
                              dims=cube.array.dims,
                              coords=cube.array.coords,
                              name=cube.id + "_pytorch")
    # Create the new raster collection tile
    result_cube = DataCube(array=result)
    # Insert the new  hypercube in the input object.
    udf_data.set_datacube_list([result_cube])
Exemple #3
0
def hyper_ndvi(udf_data: UdfData):
    """Compute the NDVI based on RED and NIR hypercubes

    Hypercubes with ids "red" and "nir" are required. The NDVI computation will be applied
    to all hypercube dimensions.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes
        and structured data.

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    red = None
    nir = None

    # Iterate over each tile
    for cube in udf_data.get_datacube_list():
        if "red" in cube.id.lower():
            red = cube
        if "nir" in cube.id.lower():
            nir = cube
    if red is None:
        raise Exception("Red hypercube is missing in input")
    if nir is None:
        raise Exception("Nir hypercube is missing in input")

    ndvi = (nir.array - red.array) / (nir.array + red.array)
    ndvi.name = "NDVI"

    hc = DataCube(array=ndvi)
    udf_data.set_datacube_list([hc, ])
Exemple #4
0
def apply_timeseries_generic(udf_data: UdfData, callback: Callable = apply_timeseries):
    """
    Implements the UDF contract by calling a user provided time series transformation function (apply_timeseries).
    Multiple bands are currently handled separately, another approach could provide a dataframe with a timeseries for each band.

    :param udf_data:
    :return:
    """
    # The list of tiles that were created
    tile_results = []

    # Iterate over each cube
    for cube in udf_data.get_datacube_list():
        array3d = []
        #use rollaxis to make the time dimension the last one
        for time_x_slice in numpy.rollaxis(cube.array.values, 1):
            time_x_result = []
            for time_slice in time_x_slice:
                series = pandas.Series(time_slice)
                transformed_series = callback(series,udf_data.user_context)
                time_x_result.append(transformed_series)
            array3d.append(time_x_result)

        # We need to create a new 3D array with the correct shape for the computed aggregate
        result_tile = numpy.rollaxis(numpy.asarray(array3d),1)
        assert result_tile.shape == cube.array.shape
        # Create the new raster collection cube
        rct = DataCube(xarray.DataArray(result_tile))
        tile_results.append(rct)
    # Insert the new tiles as list of raster collection tiles in the input object. The new tiles will
    # replace the original input tiles.
    udf_data.set_datacube_list(tile_results)
    return udf_data
Exemple #5
0
def hyper_min_median_max(udf_data: UdfData):
    """Compute the min, median and max of the time dimension of a hyper cube

    Hypercubes with time dimensions are required. The min, median and max reduction of th time axis will be applied
    to all hypercube dimensions.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes
        and structured data.

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    # Iterate over each tile
    cube_list = []
    for cube in udf_data.get_datacube_list():
        min = cube.array.min(dim="t")
        median = cube.array.median(dim="t")
        max = cube.array.max(dim="t")

        min.name = cube.id + "_min"
        median.name = cube.id + "_median"
        max.name = cube.id + "_max"

        cube_list.append(DataCube(array=min))
        cube_list.append(DataCube(array=median))
        cube_list.append(DataCube(array=max))

    udf_data.set_datacube_list(cube_list)
Exemple #6
0
def run_user_code(code: str, data: UdfData) -> UdfData:
    module = load_module_from_string(code)

    functions = {t[0]: t[1] for t in module.items() if callable(t[1])}

    for func in functions.items():
        try:
            sig = signature(func[1])
        except ValueError:
            continue
        params = sig.parameters
        params_list = [t[1] for t in sig.parameters.items()]
        if (func[0] == 'apply_timeseries' and 'series' in params
                and 'context' in params and 'pandas.core.series.Series' in str(
                    params['series'].annotation)
                and 'pandas.core.series.Series' in str(sig.return_annotation)):
            #this is a UDF that transforms pandas series
            from .udf_wrapper import apply_timeseries_generic
            return apply_timeseries_generic(data, func[1])
        elif ((func[0] == 'apply_hypercube' or func[0] == 'apply_datacube')
              and 'cube' in params and 'context' in params
              and 'openeo_udf.api.datacube.DataCube' in str(
                  params['cube'].annotation)
              and 'openeo_udf.api.datacube.DataCube' in str(
                  sig.return_annotation)):
            #found a datacube mapping function
            if len(data.get_datacube_list()) != 1:
                raise ValueError(
                    "The provided UDF expects exactly one datacube, but only: %s were provided."
                    % len(data.get_datacube_list()))
            result_cube = func[1](data.get_datacube_list()[0],
                                  data.user_context)
            if not isinstance(result_cube, DataCube):
                raise ValueError(
                    "The provided UDF did not return a DataCube, but got: %s" %
                    result_cube)
            data.set_datacube_list([result_cube])
            break
        elif len(params_list) == 1 and (
                params_list[0].annotation == 'openeo_udf.api.udf_data.UdfData'
                or params_list[0].annotation == UdfData):
            #found a generic UDF function
            func[1](data)
            break

    return data
Exemple #7
0
def rct_sklearn_ml(udf_data: UdfData):
    """Apply a pre-trained sklearn machine learn model on RED and NIR tiles

    The model must be a sklearn model that has a prediction method: m.predict(X)
    The prediction method must accept a pandas.DataFrame as input.

    Tiles with ids "red" and "nir" are required. The machine learn model will be applied to all spatio-temporal pixel
    of the two input raster collections.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    red = None
    nir = None

    # Iterate over each cube
    for cube in udf_data.get_datacube_list():
        if "red" in cube.id.lower():
            red = cube
        if "nir" in cube.id.lower():
            nir = cube
    if red is None:
        raise Exception("Red data cube is missing in input")
    if nir is None:
        raise Exception("Nir data cube is missing in input")

    # We need to reshape the data for prediction into one dimensional arrays
    three_dim_shape = red.array.shape
    one_dim_shape = numpy.prod(three_dim_shape)

    red_reshape = red.array.values.reshape((one_dim_shape))
    nir_reshape = nir.array.values.reshape((one_dim_shape))

    # This is the input data of the model. It must be trained with a DataFrame using the same names.
    X = pandas.DataFrame()
    X["red"] = red_reshape
    X["nir"] = nir_reshape

    # Get the first model
    mlm = udf_data.get_ml_model_list()[0]
    m = mlm.get_model()
    # Predict the data
    pred = m.predict(X)
    # Reshape the one dimensional predicted values to three dimensions based on the input shape
    pred_reshape = pred.reshape(three_dim_shape)

    result = xarray.DataArray(data=pred_reshape, dims=red.array.dims,
                              coords=red.array.coords, name=red.id + "_pytorch")
    # Create the new raster collection cube
    h = DataCube(array=result)
    # Insert the new hypercubes in the input object. The new tiles will
    # replace the original input tiles.
    udf_data.set_datacube_list([h, ])
Exemple #8
0
def hyper_map_fabs(udf_data: UdfData):
    """Compute the absolute values of each hyper cube in the provided data

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes
        and structured data.

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    # Iterate over each tile
    cube_list = []
    for cube in udf_data.get_datacube_list():
        result = numpy.fabs(cube.array)
        result.name = cube.id + "_fabs"
        cube_list.append(DataCube(array=result))
    udf_data.set_datacube_list(cube_list)
Exemple #9
0
def fct_sampling(udf_data: UdfData):
    """Sample any number of raster collection tiles with a single feature collection (the first if several are provided)
    and store the samples values in the input feature collection. Each time-slice of a raster collection is
    stored as a separate column in the feature collection. Hence, the size of the feature collection attributes
    is (number_of_raster_tile * number_of_xy_slices) x number_of_features.
    The number of columns is equal to (number_of_raster_tile * number_of_xy_slices).

    A single feature collection id stored in the input data object that contains the sample attributes and
    the original data.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """

    if not udf_data.feature_collection_list:
        raise Exception("A single feature collection is required as input")

    if len(udf_data.feature_collection_list) > 1:
        raise Exception(
            "The first feature collection will be used for sampling")

    # Get the first feature collection
    fct = udf_data.feature_collection_list[0]
    features = fct.data

    # Iterate over each raster cube
    for cube in udf_data.get_datacube_list():

        # Compute the number and names of the attribute columns
        num_slices = len(cube.data)
        columns = {}
        column_names = []
        for slice in range(num_slices):
            column_name = cube.id + "_%i" % slice
            column_names.append(column_name)
            columns[column_name] = []

        # Sample the raster data with each point
        for feature in features.geometry:
            # Check if the feature is a point
            if feature.type == 'Point':
                x = feature.x
                y = feature.y
                # TODO: Thats needs to be implemented
                # values = cube.sample(top=y, left=x)

                values = [0, 0, 0]

                # Store the values in column specific arrays
                if values:
                    for column_name, value in zip(column_names, values):
                        columns[column_name].append(value)
                else:
                    for column_name in column_names:
                        columns[column_name].append(math.nan)
            else:
                raise Exception("Only points are allowed for sampling")
        # Attach the sampled attribute data to the GeoDataFrame
        for column_name in column_names:
            features[column_name] = columns[column_name]
    # Create the output feature collection
    fct = FeatureCollection(id=fct.id + "_sample",
                            data=features,
                            start_times=fct.start_times,
                            end_times=fct.end_times)
    # Insert the new tiles as list of feature collection tiles in the input object. The new tiles will
    # replace the original input tiles.
    udf_data.set_feature_collection_list([
        fct,
    ])
    # Remove the raster collection tiles
    udf_data.set_datacube_list()