Exemple #1
0
class DataCubeStatisticsTestCase(unittest.TestCase):
    create_storage_directory()

    def setUp(self):
        self.app = TestClient(app=app)

    def test_rct_stats(self):
        """Test the raster collection tile statistics UDF"""

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_statistics.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        temp = create_datacube(name="temp",
                               value=1,
                               dims=("t", "x", "y"),
                               shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp])

        run_user_code(code=udf_code.source, data=udf_data)
        result = udf_data.to_dict()

        self.assertEqual(len(result["datacubes"]), 0)
        self.assertEqual(len(result["structured_data_list"]), 1)
        self.assertEqual(result["structured_data_list"][0]["type"], "dict")
        self.assertEqual(result["structured_data_list"][0]["data"]["temp"], {
            'max': 1.0,
            'mean': 1.0,
            'min': 1.0,
            'sum': 27.0
        })
Exemple #2
0
class DataCubeMapFabsTestCase(unittest.TestCase):
    create_storage_directory()

    def setUp(self):
        self.app = TestClient(app=app)

    def test_DataCube_map_fabs(self):
        """Test the DataCube mapping of the numpy fabs function"""

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_map_fabs.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        temp = create_datacube(name="temp",
                               value=1,
                               dims=("t", "x", "y"),
                               shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp])
        run_user_code(code=udf_code.source, data=udf_data)
        self.checkDataCubeMapFabs(udf_data=udf_data)

    def checkDataCubeMapFabs(self, udf_data: UdfData):
        """Check the mapped fabs hyper cube data that was processed in the UDF server"""

        hc_ndvi: DataCube = udf_data.datacube_list[0]
        self.assertEqual(hc_ndvi.id, "temp_fabs")
        self.assertEqual(hc_ndvi.array.name, "temp_fabs")
        self.assertEqual(hc_ndvi.array.data.shape, (3, 3, 3))
        self.assertEqual(hc_ndvi.array.data[0][0][0], 1)
        self.assertEqual(hc_ndvi.array.data[2][2][2], 1)
Exemple #3
0
class DataCubeApiTestCase(unittest.TestCase):
    create_storage_directory()

    def setUp(self):
        self.app = TestClient(app=app)

    def test_hypercube_api(self):
        """Test the hypercube mean reduction"""

        dcm = create_data_collection_model_example()
        dc = DataCube.from_data_collection(data_collection=dcm)
        print(dc[0].get_array())
        print(dc[1].get_array())

        dc1: DataCube = dc[0]
        dc2: DataCube = dc[1]

        self.assertEqual(dc1.id,
                         dcm.variables_collections[0].variables[0].name)
        self.assertEqual(dc2.id,
                         dcm.variables_collections[0].variables[1].name)

        a1: xarray.DataArray = dc1.get_array()
        a1 = numpy.asarray(a1).reshape([27])
        v1 = dcm.variables_collections[0].variables[0].values
        v1 = numpy.asarray(v1)
        self.assertTrue(a1.all() == v1.all())

        a2: xarray.DataArray = dc2.get_array()
        a2 = numpy.asarray(a2).reshape([27])
        v2 = dcm.variables_collections[0].variables[1].values
        v2 = numpy.asarray(v2)
        self.assertTrue(a2.all() == v2.all())
Exemple #4
0
class HypercubeSamplingTestCase(unittest.TestCase):
    create_storage_directory()

    def setUp(self):
        self.app = TestClient(app=app)

    def not_implemented_yet_test_sampling(self):
        """Test the feature collection sampling UDF"""

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_sampling.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        temp = create_datacube(name="temp", value=1, shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp])

        run_user_code(code=udf_code.source, data=udf_data)
        result = udf_data.to_dict()

        self.assertEqual(len(result["feature_collection_tiles"]), 1)
        self.assertEqual(
            len(result["feature_collection_tiles"][0]["data"]["features"]), 1)
        self.assertEqual(
            result["feature_collection_tiles"][0]["data"]["features"][0]
            ["properties"], {'temp': 4})
Exemple #5
0
class DataCubeMinMedianMaxTestCase(unittest.TestCase):
    create_storage_directory()

    def setUp(self):
        self.app = TestClient(app=app)

    def test_DataCube_reduce_min_median_max(self):
        """Test the DataCube min, median, max reduction"""

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_reduce_time_min_median_max.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        temp = create_datacube(name="temp",
                               value=1,
                               dims=("t", "y", "x"),
                               shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[temp])
        run_user_code(code=udf_code.source, data=udf_data)
        self.check_DataCube_min_median_max(udf_data=udf_data)

    def check_DataCube_min_median_max(self, udf_data):
        """Check the min, median, max hyper cube data that was processed in the UDF server"""

        hc: DataCube = udf_data.datacube_list[0]
        self.assertEqual(hc.id, "temp_min")
        self.assertEqual(hc.array.name, "temp_min")
        self.assertEqual(hc.array.data.shape, (3, 3))
        self.assertEqual(hc.array.data[0][0], 1)
        self.assertEqual(hc.array.data[2][2], 1)

        hc: DataCube = udf_data.datacube_list[1]
        self.assertEqual(hc.id, "temp_median")
        self.assertEqual(hc.array.name, "temp_median")
        self.assertEqual(hc.array.data.shape, (3, 3))
        self.assertEqual(hc.array.data[0][0], 1)
        self.assertEqual(hc.array.data[2][2], 1)

        hc: DataCube = udf_data.datacube_list[2]
        self.assertEqual(hc.id, "temp_max")
        self.assertEqual(hc.array.name, "temp_max")
        self.assertEqual(hc.array.data.shape, (3, 3))
        self.assertEqual(hc.array.data[0][0], 1)
        self.assertEqual(hc.array.data[2][2], 1)
Exemple #6
0
class MachineLearningTestCase(unittest.TestCase):
    create_storage_directory()

    def setUp(self):
        self.app = TestClient(app)

    @staticmethod
    def compute_efficiency(model_result, measurement):
        diff = model_result - measurement
        eff = 1 - sum(diff * diff) / ((measurement.var()) * len(measurement))
        return (eff)

    @staticmethod
    def train_sklearn_model(model):
        """This method trains a sklearn random forest regressor to add two numbers that must be
        in range [1,2,3]. The input arrays into the model must have the names *red* and *nir*.

        Args:
            model: The machine learn model to be used for training

        Returns:
            str:
            The filename of the resulting

        """

        # Train a value adder that represents the formula (a + b)
        a = np.random.randint(1, 4, 1000)
        b = np.random.randint(1, 4, 1000)
        # Create the predicting data that is used for training
        y = (a + b)

        print("Train model ", model.__class__)
        # This is the training data with two arrays
        X = pd.DataFrame()

        X["red"] = a
        X["nir"] = b

        # Fit the model and compute the model efficiency
        model = model.fit(X, y)
        # Predict values
        predicted_values = model.predict(X)
        # Compute the score of the model
        score = model.score(X, y)
        # Compute the mean square error
        mse = mean_squared_error(predicted_values, y)

        print("Model score", score, "MSE", mse)
        print("Save the model as compressed joblib object")

        # Save the model with compression
        file_name = '/tmp/rf_add_model.pkl.xz'
        joblib.dump(value=model, filename=file_name, compress=("xz", 3))
        return file_name

    def send_json_request(self, data: UdfData, code: UdfCodeModel) -> Dict:

        udf_request = UdfRequestModel(data=data.to_dict(), code=code)
        result = run_legacy_user_code(dict_data=udf_request.dict())
        return result

    def send_msgpack_request(self, data: UdfData, code: UdfCodeModel) -> Dict:

        return self.send_json_request(data=data, code=code)

        # TODO: Implement the code below

        udf_request = UdfRequestModel(data=data.to_dict(), code=code)
        udf_request = base64.b64encode(
            msgpack.packb(udf_request.dict(), use_bin_type=True))
        response = self.app.post(
            '/udf_message_pack',
            data=udf_request,
            headers={"Content-Type": "application/base64"})
        self.assertEqual(response.status_code, 200)
        blob = base64.b64decode(response.content)
        result = msgpack.unpackb(blob, raw=False)
        return result

    def run_model_test(self, model):

        MachineLearningTestCase.train_sklearn_model(model=model)
        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_sklearn_ml.py")

        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        red = create_datacube(name="red",
                              value=1,
                              dims=("t", "x", "y"),
                              shape=(2, 2, 2))
        nir = create_datacube(name="nir",
                              value=1,
                              dims=("t", "x", "y"),
                              shape=(2, 2, 2))

        ml = MachineLearnModelConfig(
            framework="sklearn",
            name="random_forest",
            description=
            "A sklearn model that adds two numbers in range of [1,1]",
            path="/tmp/rf_add_model.pkl.xz")

        udf_data = UdfData(proj={"EPSG": 4326},
                           datacube_list=[red, nir],
                           ml_model_list=[ml])
        pprint.pprint(udf_data.to_dict())

        run_user_code(code=udf_code.source, data=udf_data)
        result = udf_data.to_dict()
        self.assertAlmostEqual(2.0, result['datacubes'][0]['data'][0][0][0], 2)

        #result = self.send_msgpack_request(data=udf_data, code=udf_code)
        #self.assertAlmostEqual(2.0, result['datacubes'][0]['data'][0][0][0], 2)

    def test_sklearn_random_forest_with_msgpack(self):
        """Test random forest model training and UDF application"""
        model = RandomForestRegressor(n_estimators=100,
                                      max_depth=7,
                                      max_features="log2",
                                      n_jobs=16,
                                      min_samples_split=2,
                                      min_samples_leaf=1,
                                      verbose=0)
        self.run_model_test(model=model)

    def test_sklearn_gradient_boost(self):
        """Test gradent boost model training and UDF application"""
        model = GradientBoostingRegressor(n_estimators=100,
                                          max_depth=7,
                                          max_features="log2",
                                          min_samples_split=2,
                                          min_samples_leaf=1,
                                          verbose=0)
        self.run_model_test(model=model)

    def test_sklearn_extra_tree(self):
        """Test extra tree training and UDF application"""
        model = ExtraTreesRegressor(n_estimators=100,
                                    max_depth=7,
                                    max_features="log2",
                                    min_samples_split=2,
                                    min_samples_leaf=1,
                                    verbose=0)

        self.run_model_test(model=model)

    def test_sklearn_extra_tree_message_pack_md5_hash(self):
        """Test extra tree training and UDF application with message pack protocol and the machine learn model
        uploaded to the UDF md5 hash based storage system"""
        model = ExtraTreesRegressor(n_estimators=100,
                                    max_depth=7,
                                    max_features="log2",
                                    min_samples_split=2,
                                    min_samples_leaf=1,
                                    verbose=0)
        model_path = MachineLearningTestCase.train_sklearn_model(model=model)

        request_model = RequestStorageModel(
            uri=model_path,
            title="This is a test model",
            description="This is the test description.")

        response = self.app.post('/storage', json=request_model.dict())
        print(response.content)
        self.assertEqual(response.status_code, 200)

        md5_hash = response.content.decode("ascii").strip().replace("\"", "")

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_sklearn_ml.py")

        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        red = create_datacube(name="red",
                              value=1,
                              dims=("t", "x", "y"),
                              shape=(2, 2, 2))
        nir = create_datacube(name="nir",
                              value=1,
                              dims=("t", "x", "y"),
                              shape=(2, 2, 2))

        ml = MachineLearnModelConfig(
            framework="sklearn",
            name="random_forest",
            description=
            "A sklearn model that adds two numbers in range of [1,1]",
            md5_hash=md5_hash)

        udf_data = UdfData(proj={"EPSG": 4326},
                           datacube_list=[red, nir],
                           ml_model_list=[ml])
        pprint.pprint(udf_data.to_dict())

        run_user_code(code=udf_code.source, data=udf_data)
        result = udf_data.to_dict()
        self.assertAlmostEqual(2.0, result['datacubes'][0]['data'][0][0][0], 2)

        #result = self.send_msgpack_request(data=udf_data, code=udf_code)
        #self.assertAlmostEqual(2.0, result['datacubes'][0]['data'][0][0][0], 2)

        response = self.app.delete(f'/storage/{md5_hash}')
        self.assertEqual(response.status_code, 200)
Exemple #7
0
class MachineLearningModelStorageTestCase(unittest.TestCase):

    create_storage_directory()

    def setUp(self):
        self.app = TestClient(app=app)

    def test_ml_storage_post_get_delete(self):

        path = "/tmp/test_file"
        content = b"content"

        # Create a dummy file
        file = open(path, "wb")
        file.write(content)
        file.close()

        request_model = RequestStorageModel(
            uri=path,
            title="This is a test model",
            description="This is the test description.")

        response = self.app.post('/storage', json=request_model.dict())
        print(response.content)
        self.assertEqual(response.status_code, 200)

        md5_hash = response.content.decode("ascii")
        self.assertEqual(md5_hash, md5(content).hexdigest())

        response = self.app.get('/storage')
        pprint(response.json())
        self.assertEqual(response.status_code, 200)

        response = self.app.delete(f'/storage/{md5_hash}')
        print(response.content)
        self.assertEqual(response.status_code, 200)

        md5_hash = response.content.decode("ascii")
        self.assertEqual(md5_hash, md5(content).hexdigest())

    def test_ml_storage_post_get_delete_url(self):

        url = "https://storage.googleapis.com/datentransfer/europe_countries.geojson"

        request_model = RequestStorageModel(
            uri=url,
            title="This is a test model",
            description="This is the test description.")

        response = self.app.post('/storage', json=request_model.dict())
        print(response.content)
        self.assertEqual(response.status_code, 200)

        response = self.app.get('/storage')
        pprint(response.json())
        self.assertEqual(response.status_code, 200)

        md5_hash = None

        model_list: List[dict] = response.json()
        for model in model_list:

            model = ResponseStorageModel(**model)
            if model.source == url:
                md5_hash = model.md5_hash

        self.assertIsNotNone(md5_hash)

        response = self.app.delete(f'/storage/{md5_hash}')
        print(response.content)
        self.assertEqual(response.status_code, 200)

    def test_ml_storage_post_url_error(self):

        url = "https://nopopopop.de/file.txt"

        request_model = RequestStorageModel(
            uri=url,
            title="This is a test model",
            description="This is the test description.")

        response = self.app.post('/storage', json=request_model.dict())
        # pprint(response.json())
        self.assertEqual(response.status_code, 400)
Exemple #8
0
class MachineLearningPytorchTestCase(unittest.TestCase):
    create_storage_directory()

    def setUp(self):
        self.app = TestClient(app=app)

    @staticmethod
    def train_pytorch_model(model):
        """Train an arbitrary pytroch model with two features

        Args:
            model: The machine learn model to be used for training

        Returns:
            str:
            The filename of the resulting

        """
        model_path = '/tmp/simple_linear_nn_pytorch.pt'
        criterion = nn.MSELoss()
        a = numpy.random.randint(1, 4, 10)
        a = a.reshape([5, 2])
        input = Variable(torch.Tensor(a))
        target = Variable(torch.Tensor(a))
        for i in range(200):
            output = model(input)
            loss = criterion(output, target)
            model.zero_grad()
            loss.backward()
            optimizer = optim.SGD(model.parameters(), lr=0.1)
            optimizer.step()
        torch.save(model, model_path)

    def test_pytorch_linear_nn(self):
        """Test linear pytorch model training and UDF application"""

        model = SimpleNetwork()

        MachineLearningPytorchTestCase.train_pytorch_model(model=model)

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_pytorch_ml.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        temp = create_datacube(name="temp",
                               value=1,
                               dims=("x", "y"),
                               shape=(2, 2))

        ml = MachineLearnModelConfig(
            framework="pytorch",
            name="linear_model",
            description=
            "A pytorch model that adds two numbers in range of [1,1]",
            path="/tmp/simple_linear_nn_pytorch.pt")
        udf_data = UdfData(proj={"EPSG": 4326},
                           datacube_list=[temp],
                           ml_model_list=[ml])
        run_user_code(code=udf_code.source, data=udf_data)
        pprint.pprint(udf_data.to_dict())
Exemple #9
0
class DataCubeNdviTestCase(unittest.TestCase):
    create_storage_directory()

    def setUp(self):
        self.app = TestClient(app=app)

    def test_DataCube_ndvi(self):
        """Test the DataCube NDVI computation"""

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_ndvi.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        hc_red = create_datacube(name="red",
                                 value=1,
                                 dims=("t", "y", "x"),
                                 shape=(3, 3, 3))
        hc_nir = create_datacube(name="nir",
                                 value=3,
                                 dims=("t", "y", "x"),
                                 shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[hc_red, hc_nir])

        run_user_code(code=udf_code.source, data=udf_data)
        self.checkDataCubeNdvi(udf_data=udf_data)

    def unused_test_DataCube_ndvi_message_pack(self):
        """Test the DataCube NDVI computation with the message pack protocol"""
        # TODO: Reactivate this test

        dir = os.path.dirname(openeo_udf.functions.__file__)
        file_name = os.path.join(dir, "datacube_ndvi.py")
        udf_code = UdfCodeModel(language="python",
                                source=open(file_name, "r").read())

        hc_red = create_datacube(name="red",
                                 value=1,
                                 dims=("t", "y", "x"),
                                 shape=(3, 3, 3))
        hc_nir = create_datacube(name="nir",
                                 value=3,
                                 dims=("t", "y", "x"),
                                 shape=(3, 3, 3))
        udf_data = UdfData(proj={"EPSG": 4326}, datacube_list=[hc_red, hc_nir])

        udf_request = UdfRequestModel(data=udf_data.to_dict(), code=udf_code)
        udf_request = base64.b64encode(
            msgpack.packb(udf_request.dict(), use_bin_type=True))
        response = self.app.post(
            '/udf_message_pack',
            data=udf_request,
            headers={"Content-Type": "application/base64"})
        self.assertEqual(response.status_code, 200)
        blob = base64.b64decode(response.content)
        udf_data = msgpack.unpackb(blob, raw=False)

        self.checkDataCubeNdvi(udf_data=udf_data)

    def checkDataCubeNdvi(self, udf_data: UdfData):
        """Check the ndvi hyper cube data that was processed in the UDF server"""

        hc_ndvi: DataCube = udf_data.datacube_list[0]
        self.assertEqual(hc_ndvi.id, "NDVI")
        self.assertEqual(hc_ndvi.array.name, "NDVI")
        self.assertEqual(hc_ndvi.array.data.shape, (3, 3, 3))
        self.assertEqual(hc_ndvi.array.data[0][0][0], 0.5)
        self.assertEqual(hc_ndvi.array.data[2][2][2], 0.5)