Exemple #1
0
def test_eval_stack_contains():
    s1 = EvalEnv({"foo": "bar"})
    assert "foo" in s1
    s2 = s1.push({"meh": "moh"})
    assert "foo" in s2
    assert "meh" in s2
    assert "meh" not in s1
Exemple #2
0
def test_eval_stack_as_dict():
    s1 = EvalEnv({"foo": "bar"})
    s2 = s1.push({"foo": "meh", "xev": "lol"})
    s3 = s2.push({"xev": "zup", 1: 2, 3: 4})
    assert s1.as_dict() == {"foo": "bar"}
    assert s2.as_dict() == {"foo": "meh", "xev": "lol"}
    assert s3.as_dict() == {"foo": "meh", "xev": "zup", 1: 2, 3: 4}
Exemple #3
0
def test_eval_env_get_deep():
    s1 = EvalEnv({"foo": "bar"})
    s2 = s1.push({})
    s3 = s2.push({})
    assert s3.get("foo") == "bar"
    assert s3["foo"] == "bar"
    assert s3.get("meh", default="jop") == "jop"
    with pytest.raises(KeyError):
        _ = s3["meh"]
Exemple #4
0
def test_eval_stack_overwrite():
    s1 = EvalEnv({"foo": "bar"})
    assert s1["foo"] == "bar"
    s2 = s1.push({"foo": "yoo"})
    assert s1["foo"] == "bar"
    assert s2["foo"] == "yoo"
    s3 = s2.push(foo="meh")
    assert s1["foo"] == "bar"
    assert s2["foo"] == "yoo"
    assert s3["foo"] == "meh"
Exemple #5
0
def test_eval_stack_push():
    s1 = EvalEnv()
    s2 = s1.push({"foo": "bar", "xev": "lol"})
    assert s2["foo"] == "bar"
    assert s2["xev"] == "lol"
    assert s2.get("foo") == "bar"
    assert s2.get("xev") == "lol"
    assert s1.get("foo") is None
    assert s1.get("xev") is None
    with pytest.raises(KeyError):
        _ = s1["foo"]
    with pytest.raises(KeyError):
        _ = s1["xev"]
Exemple #6
0
def test_eval_stack_parameters():
    s0 = EvalEnv()
    s1 = s0.push(parameters={"color": "red", "size": 1})
    s2 = s1.push({"parameters": {"size": 3}})
    s3 = s2.push(user="******")
    s4 = s3.push(parameters={"color": "green", "height": 88})
    assert s0.collect_parameters() == {}
    assert s1.collect_parameters() == {"color": "red", "size": 1}
    assert s2.collect_parameters() == {"color": "red", "size": 3}
    assert s3.collect_parameters() == {"color": "red", "size": 3}
    assert s4.collect_parameters() == {
        "color": "green",
        "size": 3,
        "height": 88
    }
Exemple #7
0
def test_load_collection_sar_backscatter_compatible(get_jvm):
    catalog = get_layer_catalog()

    jvm_mock = get_jvm.return_value
    raster_layer = MagicMock()
    jvm_mock.geopyspark.geotrellis.TemporalTiledRasterLayer.return_value = raster_layer
    raster_layer.layerMetadata.return_value = '{' \
                                              '"crs":"EPSG:4326",\n' \
                                              '"cellType":"uint8",\n' \
                                              '"bounds":{"minKey":{"col":0,"row":0},"maxKey":{"col":1,"row":1}},\n' \
                                              '"extent":{"xmin":0,"ymin":0,"xmax":1,"ymax":1},\n' \
                                              '"layoutDefinition":{\n' \
                                              '"extent":{"xmin":0,"ymin":0,"xmax":1,"ymax":1},' \
                                              '"tileLayout":{"layoutCols":1, "layoutRows":1, "tileCols":256, "tileRows":256}' \
                                              '}' \
                                              '}'

    load_params = LoadParameters(temporal_extent=("2021-02-08T10:36:00Z", "2021-02-08T10:36:00Z"),
                                 spatial_extent={'west': 4, 'east': 4.001, 'north': 52, 'south': 51.9999, 'crs': 4326},
                                 sar_backscatter=SarBackscatterArgs())
    catalog.load_collection('SENTINEL1_GAMMA0_SENTINELHUB', load_params=load_params,
                            env=EvalEnv({'pyramid_levels': 'highest'}))

    factory_mock = jvm_mock.org.openeo.geotrellissentinelhub.PyramidFactory.rateLimited
    sample_type_mock = jvm_mock.org.openeo.geotrellissentinelhub.SampleType.withName.return_value
    cellsize_mock = jvm_mock.geotrellis.raster.CellSize(10, 10)

    factory_mock.assert_called_once_with("https://services.sentinel-hub.com", "sentinel-1-grd", "S1GRD", "???", "!!!",
                                         {"backCoeff": "GAMMA0_TERRAIN", "orthorectify": True}, sample_type_mock, cellsize_mock)

    jvm_mock.org.openeo.geotrellissentinelhub.SampleType.withName.assert_called_once_with("FLOAT32")
    factory_mock.return_value.datacube_seq.assert_called_once()
Exemple #8
0
def test_load_custom_processes_present(tmp_path, api_version,
                                       backend_implementation):
    logger, stream = _get_logger()
    process_name = random_name(prefix="my_process")
    module_name = random_name(prefix="custom_processes")

    path = tmp_path / (module_name + '.py')
    with path.open("w") as f:
        f.write(
            textwrap.dedent("""
            from openeo_driver.ProcessGraphDeserializer import custom_process
            @custom_process
            def {p}(args, env):
                return 42
        """.format(p=process_name)))
    with mock.patch("sys.path", new=[str(tmp_path)] + sys.path):
        load_custom_processes(logger, _name=module_name)

    logs = stream.getvalue()
    assert "Trying to load {n!r} with PYTHONPATH ['{p!s}".format(
        n=module_name, p=str(tmp_path)) in logs
    assert "Loaded {n!r}: {p!r}".format(n=module_name, p=str(path)) in logs

    process_registry = backend_implementation.processing.get_process_registry(
        api_version=api_version)
    f = process_registry.get_function(process_name)
    assert f({}, EvalEnv()) == 42
Exemple #9
0
    def test_min_time(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()
        min_time = cube.reduce_dimension(reducer=reducer('min'),
                                         dimension='t',
                                         env=env)
        max_time = cube.reduce_dimension(reducer=reducer('max'),
                                         dimension='t',
                                         env=env)

        stitched = min_time.pyramid.levels[0].stitch()
        print(stitched)

        self.assertEquals(2.0, stitched.cells[0][0][0])

        for p in self.points[1:3]:
            result = min_time.timeseries(p.x, p.y, srs="EPSG:3857")
            print(result)
            print(cube.timeseries(p.x, p.y, srs="EPSG:3857"))
            max_result = max_time.timeseries(p.x, p.y, srs="EPSG:3857")
            self.assertEqual(1.0, result['NoDate'])
            self.assertEqual(2.0, max_result['NoDate'])
Exemple #10
0
def test_point_series_apply_tile(imagecollection_with_two_bands_and_one_date,
                                 udf_code):
    udf_code = textwrap.dedent(udf_code)

    reducer = GeoPySparkBackendImplementation().visit_process_graph({
        "udf_process": {
            "arguments": {
                "data": {
                    "from_argument": "dimension_data"
                },
                "udf": udf_code
            },
            "process_id": "run_udf",
            "result": True
        },
    })

    env = EvalEnv()
    transformed_collection = imagecollection_with_two_bands_and_one_date.reduce_dimension(
        dimension="bands", reducer=reducer, env=env)

    for p in TestCustomFunctions.points[0:3]:
        result = transformed_collection.timeseries(p.x, p.y)
        print(result)
        value = result.popitem()
        print(value)
Exemple #11
0
def test_evaluate_predefined_property(backend_implementation):
    pg = {
        "lc": {"process_id": "load_collection", "arguments": {"id": "TERRASCOPE_S2_FAPAR_V2"}, "result": True},
    }

    env = EvalEnv(dict(backend_implementation=backend_implementation))
    evaluate(pg, do_dry_run=True, env=env)
Exemple #12
0
def test_apply_neighborhood_overlap_udf(
        imagecollection_with_two_bands_and_three_dates, udf_noop):
    the_date = datetime.datetime(2017, 9, 25, 11, 37)
    input = imagecollection_with_two_bands_and_three_dates.pyramid.levels[
        0].to_spatial_layer(the_date).stitch().cells
    result = imagecollection_with_two_bands_and_three_dates.apply_neighborhood(
        process=udf_noop,
        size=[{
            'dimension': 'x',
            'unit': 'px',
            'value': 32
        }, {
            'dimension': 'y',
            'unit': 'px',
            'value': 32
        }],
        overlap=[{
            'dimension': 'x',
            'unit': 'px',
            'value': 8
        }, {
            'dimension': 'y',
            'unit': 'px',
            'value': 8
        }],
        env=EvalEnv())
    result_xarray = result._to_xarray()
    first_band = result_xarray.sel(bands='red', t=the_date)
    # assert_array_almost_equal(input[0],first_band[ :input.shape[1], :input.shape[2]])
    print(first_band)
    result_array = result.pyramid.levels[0].to_spatial_layer(
        the_date).stitch().cells

    subresult = result_array[:input.shape[0], :input.shape[1], :input.shape[2]]
    assert_array_almost_equal(input, subresult)
Exemple #13
0
def test_eval_stack_init_value_copy():
    d = {"foo": "bar"}
    s = EvalEnv(d)
    assert d["foo"] == "bar"
    assert s["foo"] == "bar"
    d["foo"] = "meh"
    assert d["foo"] == "meh"
    assert s["foo"] == "bar"
Exemple #14
0
def test_load_collection_sar_backscatter_incompatible():
    catalog = get_layer_catalog()
    load_params = LoadParameters(sar_backscatter=SarBackscatterArgs())
    with pytest.raises(OpenEOApiException) as exc_info:
        catalog.load_collection('TERRASCOPE_S2_TOC_V2', load_params=load_params, env=EvalEnv())

    assert exc_info.value.status_code == 400
    assert (exc_info.value.args[0] ==
            """Process "sar_backscatter" is not applicable for collection TERRASCOPE_S2_TOC_V2.""")
Exemple #15
0
def test_load_file_oscars_resample(get_jvm):
    catalog = get_layer_catalog()
    jvm_mock = get_jvm.return_value
    raster_layer = MagicMock()
    raster_layer.layerMetadata.return_value = '{' \
                                              '"crs":"EPSG:4326",\n' \
                                              '"cellType":"uint8",\n' \
                                              '"bounds":{"minKey":{"col":0,"row":0},"maxKey":{"col":1,"row":1}},\n' \
                                              '"extent":{"xmin":0,"ymin":0,"xmax":1,"ymax":1},\n' \
                                              '"layoutDefinition":{\n' \
                                              '"extent":{"xmin":0,"ymin":0,"xmax":1,"ymax":1},' \
                                              '"tileLayout":{"layoutCols":1, "layoutRows":1, "tileCols":256, "tileRows":256}' \
                                              '}' \
                                              '}'

    jvm_mock.geopyspark.geotrellis.TemporalTiledRasterLayer.return_value = raster_layer
    load_params = LoadParameters(temporal_extent=("2010-01-01T10:36:00Z", "2012-01-01T10:36:00Z"),
                                 spatial_extent={'west': 4, 'east': 4.001, 'north': 52, 'south': 51.9999, 'crs': 4326},
                                 target_resolution=[15,15],
                                 target_crs=3857,
                                 featureflags={"experimental":True}
                                 )
    env = EvalEnv()
    env = env.push({"pyramid_levels": "single"})

    factory_mock = jvm_mock.org.openeo.geotrellis.file.Sentinel2PyramidFactory
    extent_mock = jvm_mock.geotrellis.vector.Extent.return_value
    cellsize_call_mock = jvm_mock.geotrellis.raster.CellSize
    cellsize_mock = jvm_mock.geotrellis.raster.CellSize(15, 15)

    datacubeParams = jvm_mock.org.openeo.geotrelliscommon.DataCubeParameters.return_value


    collection = catalog.load_collection('COPERNICUS_30', load_params=load_params, env=env)
    assert(collection.metadata.spatial_dimensions[0].step == 0.002777777777777778)
    assert(collection.metadata.spatial_dimensions[1].step == 0.002777777777777778)

    jvm_mock.geotrellis.vector.Extent.assert_called_once_with(4.0, 51.9999, 4.001, 52.0)
    cellsize_call_mock.assert_called_with(15,15)

    factory_mock.assert_called_once_with('https://services.terrascope.be/catalogue', 'urn:eop:VITO:COP_DEM_GLO_30M_COG', ['DEM'], '/data/MTDA/DEM/COP_DEM_30M_COG', cellsize_mock, True)
    factory_mock.return_value.datacube_seq.assert_called_once_with(ANY, '2010-01-01T10:36:00+00:00', '2012-01-01T10:36:00+00:00', {}, '', datacubeParams)
Exemple #16
0
def test_reduce_bands():
    input = create_spacetime_layer()
    input = gps.Pyramid({0: input})
    collection_metadata = GeopysparkCubeMetadata({
        "cube:dimensions": {
            "my_bands": {
                "type": "bands",
                "values": ["B04", "B08"]
            },
        }
    })
    imagecollection = GeopysparkDataCube(pyramid=input,
                                         metadata=collection_metadata)

    visitor = GeotrellisTileProcessGraphVisitor()
    graph = {
        "sum": {
            "arguments": {
                "data": {
                    "from_argument": "dimension_data"
                },
                "ignore_nodata": True
            },
            "process_id": "sum"
        },
        "subtract": {
            "arguments": {
                "data": {
                    "from_argument": "dimension_data"
                }
            },
            "process_id": "subtract"
        },
        "divide": {
            "arguments": {
                "data": [{
                    "from_node": "sum"
                }, {
                    "from_node": "subtract"
                }]
            },
            "process_id": "divide",
            "result": True
        }
    }
    visitor.accept_process_graph(graph)
    stitched = imagecollection.reduce_dimension(
        dimension='my_bands', reducer=visitor,
        env=EvalEnv()).pyramid.levels[0].to_spatial_layer().stitch()
    print(stitched)
    assert 3.0 == stitched.cells[0][0][0]
Exemple #17
0
def test_load_file_oscars(get_jvm):
    catalog = get_layer_catalog()
    jvm_mock = get_jvm.return_value
    raster_layer = MagicMock()
    raster_layer.layerMetadata.return_value = '{' \
                                              '"crs":"EPSG:4326",\n' \
                                              '"cellType":"uint8",\n' \
                                              '"bounds":{"minKey":{"col":0,"row":0},"maxKey":{"col":1,"row":1}},\n' \
                                              '"extent":{"xmin":0,"ymin":0,"xmax":1,"ymax":1},\n' \
                                              '"layoutDefinition":{\n' \
                                              '"extent":{"xmin":0,"ymin":0,"xmax":1,"ymax":1},' \
                                              '"tileLayout":{"layoutCols":1, "layoutRows":1, "tileCols":256, "tileRows":256}' \
                                              '}' \
                                              '}'

    jvm_mock.geopyspark.geotrellis.TemporalTiledRasterLayer.return_value = raster_layer
    load_params = LoadParameters(temporal_extent=("2010-01-01T10:36:00Z", "2012-01-01T10:36:00Z"),
                                 spatial_extent={'west': 4, 'east': 4.001, 'north': 52, 'south': 51.9999, 'crs': 4326})
    env = EvalEnv()
    env = env.push({"pyramid_levels": "single"})
    collection = catalog.load_collection('COPERNICUS_30', load_params=load_params, env=env)
    assert(collection.metadata.spatial_dimensions[0].step == 0.002777777777777778)
    assert(collection.metadata.spatial_dimensions[1].step == 0.002777777777777778)
Exemple #18
0
def test_reduce_bands(imagecollection_with_two_bands_and_three_dates):
    cube = imagecollection_with_two_bands_and_three_dates
    ts = _timeseries_stitch(cube)
    assert len(ts) == 3
    assert set(t.cells.shape for t in ts.values()) == {(2, 8, 8)}

    reducer = _simple_reducer("sum")
    env = EvalEnv()
    cube = cube.reduce_dimension(dimension="bands", reducer=reducer, env=env)
    ts = _timeseries_stitch(cube)
    assert len(ts) == 2
    expected = np.full((1, 8, 8), 3.0)
    for t, tile in ts.items():
        assert_array_almost_equal(tile.cells, expected)
Exemple #19
0
    def test_reduce(self):
        input = Pyramid({0: self.tiled_raster_rdd})

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("max"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(2.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("min"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(1.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("sum"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertEqual(4.0, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(dimension="t",
                                         reducer=reducer("mean"),
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(2.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(1.3333333, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(reducer=reducer("variance"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.2222222, stitched.cells[0][0][1])

        stitched = cube.reduce_dimension(reducer=reducer("sd"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        print(stitched)
        self.assertEqual(0.0, stitched.cells[0][0][0])
        self.assertAlmostEqual(0.4714045, stitched.cells[0][0][1])
Exemple #20
0
def test_apply_dimension_bands_udf(
        imagecollection_with_two_bands_and_three_dates, udf_noop):
    the_date = datetime.datetime(2017, 9, 25, 11, 37)

    input = imagecollection_with_two_bands_and_three_dates.pyramid.levels[
        0].to_spatial_layer(the_date).stitch().cells
    result = imagecollection_with_two_bands_and_three_dates.apply_dimension(
        process=udf_noop, dimension='bands', env=EvalEnv())
    result_xarray = result._to_xarray()
    first_band = result_xarray.sel(bands='red', t=the_date)
    # assert_array_almost_equal(input[0],first_band[ :input.shape[1], :input.shape[2]])
    print(first_band)
    result_array = result.pyramid.levels[0].to_spatial_layer(
        the_date).stitch().cells

    subresult = result_array[:input.shape[0], :input.shape[1], :input.shape[2]]
    assert_array_almost_equal(input, subresult)
Exemple #21
0
def main(argv=None):
    logging.basicConfig(level=logging.INFO)
    process_graph, args = handle_cli(argv)
    _log.info(
        "Evaluating process graph: {pg}".format(pg=safe_repr(process_graph)))

    _setup_local_spark(print=_log.info)

    # Local imports to workaround the pyspark import issues.
    from openeo_driver.ProcessGraphDeserializer import evaluate
    from openeogeotrellis.backend import GeoPySparkBackendImplementation

    env = EvalEnv({
        "version":
        args.api_version,
        "pyramid_levels":
        "highest",
        "user":
        None,  # TODO
        "require_bounds":
        True,
        "correlation_id":
        f"cli-pid{os.getpid()}",
        "backend_implementation":
        GeoPySparkBackendImplementation(use_zookeeper=False),
    })

    with TimingLogger(title="Evaluate process graph", logger=_log):
        result = evaluate(process_graph, env=env)

    if isinstance(result, ImageCollectionResult):
        filename = args.output or f"result.{result.format}"
        with TimingLogger(title=f"Saving result to {filename!r}", logger=_log):
            result.save_result(filename)
    elif isinstance(result, JSONResult):
        if args.output:
            with open(args.output, "w") as f:
                json.dump(result.prepare_for_json(), f)
        else:
            print(result.prepare_for_json())
    elif isinstance(result, dict):
        # TODO: support storing JSON result to file
        print(result)
    else:
        # TODO: support more result types
        raise ValueError(result)
Exemple #22
0
def test_reduce_bands_reduce_time(imagecollection_with_two_bands_and_three_dates, udf_noop):
    cube = imagecollection_with_two_bands_and_three_dates
    ts = _timeseries_stitch(cube)
    assert len(ts) == 3
    assert set(t.cells.shape for t in ts.values()) == {(2, 8, 8)}

    reducer = _simple_reducer("sum")
    env = EvalEnv()
    cube = cube.reduce_dimension(dimension="bands", reducer=reducer, env=env)
    ts = _timeseries_stitch(cube)
    assert len(ts) == 2
    assert set(t.cells.shape for t in ts.values()) == {(1, 8, 8)}

    cube = cube.reduce_dimension(dimension='t', reducer=udf_noop, env=env)
    stiched = _stitch(cube)
    assert stiched.cells.shape == (1, 8, 8)
    expected = np.full((1, 8, 8), 3.0)
    assert_array_almost_equal(stiched.cells, expected)
Exemple #23
0
def test_load_collection_data_cube_params(get_jvm):
    catalog = get_layer_catalog()

    jvm_mock = get_jvm.return_value
    raster_layer = MagicMock()
    jvm_mock.geopyspark.geotrellis.TemporalTiledRasterLayer.return_value = raster_layer
    raster_layer.layerMetadata.return_value = '{' \
                                              '"crs":"EPSG:4326",\n' \
                                              '"cellType":"uint8",\n' \
                                              '"bounds":{"minKey":{"col":0,"row":0},"maxKey":{"col":1,"row":1}},\n' \
                                              '"extent":{"xmin":0,"ymin":0,"xmax":1,"ymax":1},\n' \
                                              '"layoutDefinition":{\n' \
                                              '"extent":{"xmin":0,"ymin":0,"xmax":1,"ymax":1},' \
                                              '"tileLayout":{"layoutCols":1, "layoutRows":1, "tileCols":256, "tileRows":256}' \
                                              '}' \
                                              '}'

    load_params = LoadParameters(
        temporal_extent=('2019-01-01', '2019-01-01'),
        bands=['temperature-mean'],
        spatial_extent={'west': 4, 'east': 4.001, 'north': 52, 'south': 51.9999, 'crs': 4326}

    )
    load_params['featureflags'] = {
        "tilesize": 1,
        "experimental": True
    }
    env = EvalEnv({'require_bounds': True, 'pyramid_levels': 'highest'})
    collection = catalog.load_collection('AGERA5', load_params=load_params, env=env)

    print(collection.metadata)
    assert len(collection.metadata.bands) == 1
    assert collection.metadata.bands[0].name == 'temperature-mean'

    factory_mock = jvm_mock.org.openeo.geotrellis.file.AgEra5PyramidFactory
    projected_polys = jvm_mock.org.openeo.geotrellis.ProjectedPolygons.fromExtent.return_value
    datacubeParams = jvm_mock.org.openeo.geotrelliscommon.DataCubeParameters.return_value

    jvm_mock.geotrellis.vector.Extent.assert_called_once_with(4.0, 51.9999, 4.001, 52.0)

    factory_mock.assert_called_once_with('/data/MEP/ECMWF/AgERA5/*/*/AgERA5_dewpoint-temperature_*.tif', ['temperature-mean'], '.+_(\\d{4})(\\d{2})(\\d{2})\\.tif')
    factory_mock.return_value.datacube_seq.assert_called_once_with(projected_polys, '2019-01-01T00:00:00+00:00', '2019-01-01T00:00:00+00:00', {}, '',datacubeParams)
    getattr(datacubeParams,'tileSize_$eq').assert_called_once_with(1)
    getattr(datacubeParams, 'layoutScheme_$eq').assert_called_once_with('FloatingLayoutScheme')
Exemple #24
0
    def test_reduce_all_data(self):
        input = Pyramid({
            0:
            self._single_pixel_layer({
                datetime.datetime.strptime("2016-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                1.0,
                datetime.datetime.strptime("2017-04-24T04:00:00Z", '%Y-%m-%dT%H:%M:%SZ'):
                5.0
            })
        })

        cube = GeopysparkDataCube(pyramid=input,
                                  metadata=self.collection_metadata)
        env = EvalEnv()
        stitched = cube.reduce_dimension(reducer=reducer("min"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(1.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("max"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(5.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("sum"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertEqual(6.0, stitched.cells[0][0][0])

        stitched = cube.reduce_dimension(reducer=reducer("mean"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(3.0, stitched.cells[0][0][0], delta=0.001)

        stitched = cube.reduce_dimension(reducer=reducer("variance"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(4.0, stitched.cells[0][0][0], delta=0.001)

        stitched = cube.reduce_dimension(reducer=reducer("sd"),
                                         dimension="t",
                                         env=env).pyramid.levels[0].stitch()
        self.assertAlmostEqual(2.0, stitched.cells[0][0][0], delta=0.001)
Exemple #25
0
def test_chunk_polygon(imagecollection_with_two_bands_and_three_dates):
    file_name = get_test_data_file("udf_add_to_bands.py")
    with open(file_name, "r") as f:
        udf_code = f.read()
    udf_add_to_bands = {
        "udf_process": {
            "arguments": {
                "data": {
                    "from_argument": "dimension_data"
                },
                "udf": udf_code
            },
            "process_id": "run_udf",
            "result": True
        },
    }
    env = EvalEnv()

    polygon1 = Extent(0.0, 0.0, 4.0, 4.0).to_polygon
    chunks = MultiPolygon([polygon1])
    cube: GeopysparkDataCube = imagecollection_with_two_bands_and_three_dates
    result_cube: GeopysparkDataCube = cube.chunk_polygon(udf_add_to_bands, chunks=chunks, mask_value=None, env=env)
    result_layer: TiledRasterLayer = result_cube.pyramid.levels[0]

    assert result_layer.layer_type == LayerType.SPACETIME

    results_numpy = result_layer.to_numpy_rdd().collect()
    band0_month10 = np.zeros((4, 4))
    band1_month10 = np.zeros((4, 4))
    band0_month10.fill(1012)
    band1_month10.fill(1101)
    for key_and_tile in results_numpy:
        instant: datetime.datetime = key_and_tile[0].instant
        tile: Tile = key_and_tile[1]
        cells: np.ndarray = tile.cells
        assert cells.shape == (2, 4, 4)
        assert tile.cell_type == 'FLOAT'
        if instant.month == 10:
            np.testing.assert_array_equal(cells, np.array([band0_month10, band1_month10]))
        elif instant.month == 9 and instant.day == 25:
            np.testing.assert_array_equal(cells, np.array([band0_month10 - 1, band1_month10 + 1]))
Exemple #26
0
def test_apply_neighborhood_no_overlap(
        imagecollection_with_two_bands_and_three_dates):
    the_date = datetime.datetime(2017, 9, 25, 11, 37)
    graph = {
        "abs": {
            "arguments": {
                "p": {
                    "from_argument": "data"
                },
                "base": 2
            },
            "process_id": "power",
            "result": True
        }
    }
    input = imagecollection_with_two_bands_and_three_dates.pyramid.levels[
        0].to_spatial_layer(the_date).stitch().cells
    result = imagecollection_with_two_bands_and_three_dates.apply_neighborhood(
        process=graph,
        size=[{
            'dimension': 'x',
            'unit': 'px',
            'value': 32
        }, {
            'dimension': 'y',
            'unit': 'px',
            'value': 32
        }, {
            'dimension': 't',
            'value': "P1D"
        }],
        overlap=[],
        env=EvalEnv(),
    )
    result_array = result.pyramid.levels[0].to_spatial_layer(
        the_date).stitch().cells
    print(result_array)
    expected_result = np.power(2, input)
    print(expected_result)
    assert_array_almost_equal(expected_result, result_array)
Exemple #27
0
def test_load_collection_bands_with_required_extent(get_jvm):
    catalog = get_layer_catalog()

    jvm_mock = get_jvm.return_value
    raster_layer = MagicMock()
    jvm_mock.geopyspark.geotrellis.TemporalTiledRasterLayer.return_value = raster_layer
    raster_layer.layerMetadata.return_value = '{' \
                                              '"crs":"EPSG:4326",\n' \
                                              '"cellType":"uint8",\n' \
                                              '"bounds":{"minKey":{"col":0,"row":0},"maxKey":{"col":1,"row":1}},\n' \
                                              '"extent":{"xmin":0,"ymin":0,"xmax":1,"ymax":1},\n' \
                                              '"layoutDefinition":{\n' \
                                              '"extent":{"xmin":0,"ymin":0,"xmax":1,"ymax":1},' \
                                              '"tileLayout":{"layoutCols":1, "layoutRows":1, "tileCols":256, "tileRows":256}' \
                                              '}' \
                                              '}'

    load_params = LoadParameters(
        temporal_extent=('2019-01-01', '2019-01-01'),
        bands=['TOC-B03_10M'],
        spatial_extent={'west': 4, 'east': 4.001, 'north': 52, 'south': 51.9999, 'crs': 4326}
    )
    env = EvalEnv({'require_bounds': True})
    collection = catalog.load_collection('TERRASCOPE_S2_TOC_V2', load_params=load_params, env=env)

    print(collection.metadata)
    assert len(collection.metadata.bands) == 1
    assert collection.metadata.bands[0].name == 'TOC-B03_10M'

    factory_mock = jvm_mock.org.openeo.geotrellis.file.Sentinel2PyramidFactory
    extent_mock = jvm_mock.geotrellis.vector.Extent.return_value
    cellsize_mock = jvm_mock.geotrellis.raster.CellSize.return_value

    jvm_mock.geotrellis.vector.Extent.assert_called_once_with(4.0, 51.9999, 4.001, 52.0)

    factory_mock.assert_called_once_with("https://services.terrascope.be/catalogue", 'urn:eop:VITO:TERRASCOPE_S2_TOC_V2', ['TOC-B03_10M'], '/data/MTDA/TERRASCOPE_Sentinel2/TOC_V2', cellsize_mock,False)
    factory_mock.return_value.pyramid_seq.assert_called_once_with(extent_mock, "EPSG:4326", '2019-01-01T00:00:00+00:00', '2019-01-01T00:00:00+00:00', {}, '')
Exemple #28
0
def test_apply_dimension_array_interpolate_linear(
        imagecollection_with_two_bands_and_three_dates):
    the_date = datetime.datetime(2017, 9, 30, 00, 37)
    graph = {
        "array_interpolate_linear": {
            "arguments": {
                "data": {
                    "from_argument": "data"
                }
            },
            "process_id": "array_interpolate_linear",
            "result": True
        }
    }
    result = imagecollection_with_two_bands_and_three_dates.apply_dimension(
        process=graph,
        dimension='t',
        target_dimension='some_other_dim',
        env=EvalEnv(),
        context={'bla': 'bla'})
    result_array = result.pyramid.levels[0].to_spatial_layer(
        the_date).stitch().cells

    assert np.all(np.isclose(result_array, 1))
Exemple #29
0
def main(argv: List[str]) -> None:
    iterations = int(argv[1]) if len(argv) > 1 else 1

    start_date = argv[2] if len(argv) > 2 else '2018-01-01'
    end_date = argv[3] if len(argv) > 3 else start_date

    print("%d iteration(s) from %s to %s" % (iterations, start_date, end_date))

    vector_files = [
        #  ("1", "/data/users/Public/vdboschj/EP-3025/GeometryCollection.shp"),
        #  ("63 overlapping", "/data/users/Public/vdboschj/EP-3025/BELCAM_fields_2017_winter_wheat_4326.shp"),
        #  ("61 non-overlapping", "/data/users/Public/vdboschj/EP-3025/BELCAM_fields_2017_winter_wheat_4326_non_overlapping.shp"),
        #  ("25 K overlapping", "/data/users/Public/driesj/fields_flanders_non_overlap.shp"),
        #  ("18 K non-overlapping", "/data/users/Public/driesj/fields_flanders_zero_overlap.shp")
        ("59 K overlapping",
         "/data/users/Public/driesj/fields_flanders_zero_overlap_59350.shp"),
        ("59 K non-overlapping",
         "/data/users/Public/driesj/fields_flanders_zero_overlap_59338.shp")
    ]

    sc = SparkContext.getOrCreate(conf=None)

    env = EvalEnv({
        "version": "1.0.0",
        "pyramid_levels": "highest",
        "correlation_id": f"benchmark-pid{os.getpid()}",
        "backend_implementation": GeoPySparkBackendImplementation(),
    })

    try:
        for context, vector_file in vector_files:
            process_graph = _huge_vector_file_time_series(
                vector_file, start_date, end_date).graph

            def evaluate() -> Dict:
                principal = sc.getConf().get("spark.yarn.principal")
                key_tab = sc.getConf().get("spark.yarn.keytab")

                kerberos(principal, key_tab)
                return ProcessGraphDeserializer.evaluate(process_graph,
                                                         env=env)

            def combine_iterations(acc: (Dict, float), i: int) -> (Dict, float
                                                                   ):
                count = i + 1
                _, duration_sum = acc

                res, duration = _time(evaluate)
                print(
                    "iteration %d of %d: evaluation of %s (%s) took %f seconds"
                    % (count, iterations, vector_file, context, duration))

                return res, duration_sum + duration

            (result, total_duration) = reduce(combine_iterations,
                                              range(iterations), (None, 0))

            # json_result = json.dumps(replace_nan_values(result), sort_keys=True, indent=2, separators=(',', ': '))

            print("evaluation of %s (%s) took %f seconds on average" %
                  (vector_file, context, total_duration / iterations))
    finally:
        sc.stop()
Exemple #30
0
def test_apply_dimension_invalid_dimension(
        imagecollection_with_two_bands_and_three_dates, udf_noop):
    the_date = datetime.datetime(2017, 9, 25, 11, 37)
    with pytest.raises(FeatureUnsupportedException):
        result = imagecollection_with_two_bands_and_three_dates.apply_dimension(
            process=udf_noop, dimension='bla', env=EvalEnv())