Ejemplo n.º 1
0
def get_implementation(version: str = "1", jvm=None) -> S1BackscatterOrfeo:
    jvm = jvm or get_jvm()
    if version == "1":
        return S1BackscatterOrfeo(jvm=jvm)
    elif version == "2":
        return S1BackscatterOrfeoV2(jvm=jvm)
    else:
        raise ValueError(version)
Ejemplo n.º 2
0
def test_load_test_collection():
    bands = ["Flat:1", "TileCol", "TileRow", "Longitude", "Latitude", "Day"]
    collection_metadata = GeopysparkCubeMetadata({
        "cube:dimensions": {
            "x": {
                "type": "spatial",
                "axis": "x"
            },
            "y": {
                "type": "spatial",
                "axis": "y"
            },
            "t": {
                "type": "temporal"
            },
            "bands": {
                "type": "bands",
                "values": bands
            }
        }
    })

    extent = get_jvm().geotrellis.vector.Extent(1.0, 2.0, 2.0, 4.0)
    pyramid = load_test_collection(collection_id="TestCollection-LonLat4x4",
                                   collection_metadata=collection_metadata,
                                   extent=extent,
                                   srs="EPSG:4326",
                                   from_date="2021-01-20",
                                   to_date="2021-01-30",
                                   bands=bands)
    cube = GeopysparkDataCube(pyramid=geopyspark.Pyramid(pyramid),
                              metadata=collection_metadata)

    data = cube.pyramid.levels[0].to_spatial_layer().stitch()
    print(data)

    assert data.cells.shape == (6, 8, 4)
    assert_equal(data.cells[0], np.ones((8, 4)))
    assert_equal(data.cells[1], [[1, 1, 1, 1]] * 8)
    assert_equal(data.cells[2], [[2, 2, 2, 2]] * 4 + [[3, 3, 3, 3]] * 4)
    assert_equal(data.cells[3], [[1.0, 1.25, 1.5, 1.75]] * 8)
    assert_equal(
        data.cells[4],
        np.array(([[2, 2.25, 2.5, 2.75, 3.0, 3.25, 3.5, 3.75]] * 4)).T)
    assert_equal(data.cells[5], 25.0 * np.ones((8, 4)))
Ejemplo n.º 3
0
 def __init__(self, jvm: JVMView = None):
     self.jvm = jvm or get_jvm()
Ejemplo n.º 4
0
def run_job(job_specification,
            output_file: Path,
            metadata_file: Path,
            api_version,
            job_dir,
            dependencies: dict,
            user_id: str = None):
    logger.info(f"Job spec: {json.dumps(job_specification,indent=1)}")
    process_graph = job_specification['process_graph']

    backend_implementation = GeoPySparkBackendImplementation()
    logger.info(f"Using backend implementation {backend_implementation}")
    correlation_id = str(uuid.uuid4())
    logger.info(f"Correlation id: {correlation_id}")
    env = EvalEnv({
        'version': api_version or "1.0.0",
        'pyramid_levels': 'highest',
        'user': User(user_id=user_id),
        'require_bounds': True,
        'correlation_id': correlation_id,
        'dependencies': dependencies,
        "backend_implementation": backend_implementation,
    })
    tracer = DryRunDataTracer()
    logger.info("Starting process graph evaluation")
    result = ProcessGraphDeserializer.evaluate(process_graph,
                                               env=env,
                                               do_dry_run=tracer)
    logger.info("Evaluated process graph, result (type {t}): {r!r}".format(
        t=type(result), r=result))

    if isinstance(result, DelayedVector):
        geojsons = (mapping(geometry) for geometry in result.geometries)
        result = JSONResult(geojsons)

    if isinstance(result, DriverDataCube):
        format_options = job_specification.get('output', {})
        format_options["batch_mode"] = True
        result = ImageCollectionResult(cube=result,
                                       format='GTiff',
                                       options=format_options)

    if not isinstance(result, SaveResult):  # Assume generic JSON result
        result = JSONResult(result)

    global_metadata_attributes = {
        "title": job_specification.get("title", ""),
        "description": job_specification.get("description", ""),
        "institution": "openEO platform - Geotrellis backend: " + __version__
    }

    assets_metadata = None
    if ('write_assets' in dir(result)):
        result.options["batch_mode"] = True
        result.options["file_metadata"] = global_metadata_attributes
        if (result.options.get("sample_by_feature")):
            geoms = tracer.get_geometries("filter_spatial")
            if len(geoms) > 1:
                logger.warning(
                    "Multiple aggregate_spatial geometries: {c}".format(
                        c=len(geoms)))
            elif len(geoms) == 0:
                logger.warning(
                    "sample_by_feature enabled, but no geometries found. They can be specified using filter_spatial."
                )
            else:
                result.options["geometries"] = geoms[0]
            if (result.options["geometries"] == None):
                logger.error(
                    "samply_by_feature was set, but no geometries provided through filter_spatial. Make sure to provide geometries."
                )
        assets_metadata = result.write_assets(str(output_file))
        for name, asset in assets_metadata.items():
            _add_permissions(Path(asset["href"]), stat.S_IWGRP)
        logger.info("wrote image collection to %s" % output_file)

    elif isinstance(result, ImageCollectionResult):
        result.options["batch_mode"] = True
        result.save_result(filename=str(output_file))
        _add_permissions(output_file, stat.S_IWGRP)
        logger.info("wrote image collection to %s" % output_file)
    elif isinstance(result, MultipleFilesResult):
        result.reduce(output_file, delete_originals=True)
        _add_permissions(output_file, stat.S_IWGRP)
        logger.info("reduced %d files to %s" %
                    (len(result.files), output_file))
    elif isinstance(result, NullResult):
        logger.info("skipping output file %s" % output_file)
    else:
        raise NotImplementedError(
            "unsupported result type {r}".format(r=type(result)))

    if any(card4l
           for _, card4l in dependencies.values()):  # TODO: clean this up
        logger.debug("awaiting Sentinel Hub CARD4L data...")

        s3_service = get_jvm().org.openeo.geotrellissentinelhub.S3Service()

        poll_interval_secs = 10
        max_delay_secs = 600

        card4l_dependencies = [
            (collection_id, source_location)
            for (collection_id,
                 metadata_properties), (source_location,
                                        card4l) in dependencies.items()
            if card4l
        ]

        for collection_id, source_location in card4l_dependencies:
            uri_parts = urlparse(source_location)
            bucket_name = uri_parts.hostname
            request_group_id = uri_parts.path[1:]

            try:
                # FIXME: incorporate collection_id and metadata_properties to make sure the files don't clash
                s3_service.download_stac_data(bucket_name, request_group_id,
                                              str(job_dir), poll_interval_secs,
                                              max_delay_secs)
                logger.info("downloaded CARD4L data in {b}/{g} to {d}".format(
                    b=bucket_name, g=request_group_id, d=job_dir))
            except Py4JJavaError as e:
                java_exception = e.java_exception

                if (java_exception.getClass().getName(
                ) == 'org.openeo.geotrellissentinelhub.S3Service$StacMetadataUnavailableException'
                    ):
                    logger.warning(
                        "could not find CARD4L metadata to download from s3://{b}/{r} after {d}s"
                        .format(b=bucket_name,
                                r=request_group_id,
                                d=max_delay_secs))
                else:
                    raise e

        _transform_stac_metadata(job_dir)

    unique_process_ids = CollectUniqueProcessIdsVisitor().accept_process_graph(
        process_graph).process_ids

    _export_result_metadata(tracer=tracer,
                            result=result,
                            output_file=output_file,
                            metadata_file=metadata_file,
                            unique_process_ids=unique_process_ids,
                            asset_metadata=assets_metadata)

    if ConfigParams().is_kube_deploy:
        import boto3
        from openeogeotrellis.utils import s3_client

        bucket = os.environ.get('SWIFT_BUCKET')
        s3_instance = s3_client()

        logger.info("Writing results to object storage")
        for file in os.listdir(job_dir):
            full_path = str(job_dir) + "/" + file
            s3_instance.upload_file(full_path, bucket, full_path.strip("/"))
Ejemplo n.º 5
0
 def epsg_code(gps_crs) -> Optional[int]:
     crs = get_jvm().geopyspark.geotrellis.TileLayer.getCRS(gps_crs)
     return crs.get().epsgCode().getOrElse(
         None) if crs.isDefined() else None
Ejemplo n.º 6
0
    def load_collection(self, collection_id: str, load_params: LoadParameters,
                        env: EvalEnv) -> GeopysparkDataCube:
        logger.info("Creating layer for {c} with load params {p}".format(
            c=collection_id, p=load_params))

        metadata = GeopysparkCubeMetadata(
            self.get_collection_metadata(collection_id))

        if metadata.get("common_name") == collection_id:
            common_name_metadatas = [
                GeopysparkCubeMetadata(m)
                for m in self.get_collection_with_common_name(
                    metadata.get("common_name"))
            ]

            backend_provider = load_params.backend_provider
            if backend_provider:
                metadata = next(
                    filter(
                        lambda m: backend_provider.lower() == m.get(
                            "_vito", "data_source", "provider:backend"),
                        common_name_metadatas), None)
            else:
                metadata = next(
                    filter(
                        lambda m: m.get("_vito", "data_source",
                                        "default_provider:backend"),
                        common_name_metadatas), None)

            if not metadata:
                metadata = common_name_metadatas[0]

        layer_source_info = metadata.get("_vito", "data_source", default={})

        sar_backscatter_compatible = layer_source_info.get(
            "sar_backscatter_compatible", False)

        if load_params.sar_backscatter is not None and not sar_backscatter_compatible:
            raise OpenEOApiException(
                message=
                """Process "sar_backscatter" is not applicable for collection {c}."""
                .format(c=collection_id),
                status_code=400)

        layer_source_type = layer_source_info.get("type", "Accumulo").lower()

        native_crs = self._native_crs(metadata)

        postprocessing_band_graph = metadata.get("_vito",
                                                 "postprocessing_bands",
                                                 default=None)
        logger.info("Layer source type: {s!r}".format(s=layer_source_type))
        cell_width = float(
            metadata.get("cube:dimensions", "x", "step", default=10.0))
        cell_height = float(
            metadata.get("cube:dimensions", "y", "step", default=10.0))

        temporal_extent = load_params.temporal_extent
        from_date, to_date = normalize_temporal_extent(temporal_extent)
        metadata = metadata.filter_temporal(from_date, to_date)

        spatial_extent = load_params.spatial_extent
        west = spatial_extent.get("west", None)
        east = spatial_extent.get("east", None)
        north = spatial_extent.get("north", None)
        south = spatial_extent.get("south", None)
        srs = spatial_extent.get("crs", None)
        if isinstance(srs, int):
            srs = 'EPSG:%s' % str(srs)
        if srs is None:
            srs = 'EPSG:4326'

        bands = load_params.bands
        if bands:
            band_indices = [metadata.get_band_index(b) for b in bands]
            metadata = metadata.filter_bands(bands)
            metadata = metadata.rename_labels(metadata.band_dimension.name,
                                              bands, metadata.band_names)
        else:
            band_indices = None
        logger.info("band_indices: {b!r}".format(b=band_indices))
        # TODO: avoid this `still_needs_band_filter` ugliness.
        #       Also see https://github.com/Open-EO/openeo-geopyspark-driver/issues/29
        still_needs_band_filter = False

        correlation_id = env.get("correlation_id", '')
        logger.info("Correlation ID is '{cid}'".format(cid=correlation_id))

        logger.info("Detected process types:" + str(load_params.process_types))
        default_temporal_resolution = "ByDay"
        default_indexReduction = 8
        if len(load_params.process_types
               ) == 1 and ProcessType.GLOBAL_TIME in load_params.process_types:
            #for pure timeseries processing, adjust partitioning strategy
            default_temporal_resolution = "None"
            default_indexReduction = 0

        feature_flags = load_params.get("featureflags", {})
        experimental = feature_flags.get("experimental", False)
        tilesize = feature_flags.get("tilesize", 256)
        indexReduction = feature_flags.get("indexreduction",
                                           default_indexReduction)
        temporalResolution = feature_flags.get("temporalresolution",
                                               default_temporal_resolution)
        globalbounds = feature_flags.get("global_bounds", True)

        jvm = get_jvm()

        extent = None
        spatial_bounds_present = all(b is not None
                                     for b in [west, south, east, north])

        if not spatial_bounds_present:
            if env.get('require_bounds', False):
                raise OpenEOApiException(
                    code="MissingSpatialFilter",
                    status_code=400,
                    message=
                    "No spatial filter could be derived to load this collection: {c} . Please specify a bounding box, or polygons to define your area of interest."
                    .format(c=collection_id))
            else:
                #whole world processing, for instance in viewing services
                srs = "EPSG:4326"
                west = -180.0
                south = -90
                east = 180
                north = 90
                spatial_bounds_present = True

        extent = jvm.geotrellis.vector.Extent(float(west), float(south),
                                              float(east), float(north))
        metadata = metadata.filter_bbox(west=west,
                                        south=south,
                                        east=east,
                                        north=north,
                                        crs=srs)

        geometries = load_params.aggregate_spatial_geometries

        if not geometries:
            projected_polygons = jvm.org.openeo.geotrellis.ProjectedPolygons.fromExtent(
                extent, srs)
        elif isinstance(geometries, Point):
            buffered_extent = jvm.geotrellis.vector.Extent(
                *buffer_point_approx(geometries, srs).bounds)
            projected_polygons = jvm.org.openeo.geotrellis.ProjectedPolygons.fromExtent(
                buffered_extent, srs)
        elif isinstance(geometries, GeometryCollection) and any(
                isinstance(geom, Point) for geom in geometries.geoms):
            polygon_wkts = [
                str(buffer_point_approx(geom, srs))
                if isinstance(geom, Point) else str(geom)
                for geom in geometries.geoms
            ]
            projected_polygons = jvm.org.openeo.geotrellis.ProjectedPolygons.fromWkt(
                polygon_wkts, srs)
        else:
            projected_polygons = to_projected_polygons(jvm, geometries)

        single_level = env.get('pyramid_levels', 'all') != 'all'

        if native_crs == 'UTM':
            target_epsg_code = auto_utm_epsg_for_geometry(
                box(west, south, east, north), srs)
        else:
            target_epsg_code = int(native_crs.split(":")[-1])

        if (experimental):
            if (load_params.target_resolution is not None):
                cell_width = float(load_params.target_resolution[0])
                cell_height = float(load_params.target_resolution[1])
            if (load_params.target_crs is not None
                    and isinstance(load_params.target_crs, int)):
                target_epsg_code = load_params.target_crs

        projected_polygons_native_crs = (getattr(
            getattr(jvm.org.openeo.geotrellis, "ProjectedPolygons$"),
            "MODULE$").reproject(projected_polygons, target_epsg_code))

        datacubeParams = jvm.org.openeo.geotrelliscommon.DataCubeParameters()
        #WTF simple assignment to a var in a scala class doesn't work??
        getattr(datacubeParams, "tileSize_$eq")(tilesize)
        getattr(datacubeParams,
                "maskingStrategyParameters_$eq")(load_params.custom_mask)
        if (load_params.data_mask is not None
                and isinstance(load_params.data_mask, GeopysparkDataCube)):
            datacubeParams.setMaskingCube(
                load_params.data_mask.get_max_level().srdd.rdd())
        datacubeParams.setPartitionerIndexReduction(indexReduction)
        datacubeParams.setPartitionerTemporalResolution(temporalResolution)

        if globalbounds and len(load_params.global_extent) > 0:
            ge = load_params.global_extent
            datacubeParams.setGlobalExtent(float(ge["west"]),
                                           float(ge["south"]),
                                           float(ge["east"]),
                                           float(ge["north"]), ge["crs"])

        if single_level:
            getattr(datacubeParams, "layoutScheme_$eq")("FloatingLayoutScheme")

        def metadata_properties(flatten_eqs=True) -> Dict[str, object]:
            layer_properties = metadata.get("_vito", "properties", default={})
            custom_properties = load_params.properties

            all_properties = {
                property_name:
                filter_properties.extract_literal_match(condition)
                for property_name, condition in {
                    **layer_properties,
                    **custom_properties
                }.items()
            }

            def eq_value(criterion: Dict[str, object]) -> object:
                if len(criterion) != 1:
                    raise ValueError(
                        f'expected a single "eq" criterion, was {criterion}')

                return criterion['eq']

            return ({
                property_name: eq_value(criterion)
                for property_name, criterion in all_properties.items()
            } if flatten_eqs else all_properties)

        def accumulo_pyramid():
            pyramidFactory = jvm.org.openeo.geotrellisaccumulo.PyramidFactory(
                "hdp-accumulo-instance",
                ','.join(ConfigParams().zookeepernodes))
            if layer_source_info.get("split", False):
                pyramidFactory.setSplitRanges(True)

            accumulo_layer_name = layer_source_info['data_id']
            nonlocal still_needs_band_filter
            still_needs_band_filter = bool(band_indices)

            polygons = load_params.aggregate_spatial_geometries

            if polygons:
                projected_polygons = to_projected_polygons(jvm, polygons)
                return pyramidFactory.pyramid_seq(
                    accumulo_layer_name, projected_polygons.polygons(),
                    projected_polygons.crs(), from_date, to_date)
            else:
                return pyramidFactory.pyramid_seq(accumulo_layer_name, extent,
                                                  srs, from_date, to_date)

        def s3_pyramid():
            endpoint = layer_source_info['endpoint']
            region = layer_source_info['region']
            bucket_name = layer_source_info['bucket_name']
            nonlocal still_needs_band_filter
            still_needs_band_filter = bool(band_indices)
            return jvm.org.openeo.geotrelliss3.PyramidFactory(endpoint, region, bucket_name) \
                .pyramid_seq(extent, srs, from_date, to_date)

        def s3_jp2_pyramid():
            endpoint = layer_source_info['endpoint']
            region = layer_source_info['region']

            return jvm.org.openeo.geotrelliss3.Jp2PyramidFactory(endpoint, region) \
                .pyramid_seq(extent, srs, from_date, to_date, band_indices)

        def file_s2_pyramid():
            return file_pyramid(
                lambda opensearch_endpoint, opensearch_collection_id,
                opensearch_link_titles, root_path: jvm.org.openeo.geotrellis.
                file.Sentinel2PyramidFactory(
                    opensearch_endpoint, opensearch_collection_id,
                    opensearch_link_titles, root_path,
                    jvm.geotrellis.raster.CellSize(cell_width, cell_height
                                                   ), experimental))

        def file_oscars_pyramid():
            return file_pyramid(
                lambda opensearch_endpoint, opensearch_collection_id,
                opensearch_link_titles, root_path: jvm.org.openeo.geotrellis.
                file.Sentinel2PyramidFactory(
                    opensearch_endpoint, opensearch_collection_id,
                    opensearch_link_titles, root_path,
                    jvm.geotrellis.raster.CellSize(cell_width, cell_height
                                                   ), experimental))

        def file_s5p_pyramid():
            return file_pyramid(
                jvm.org.openeo.geotrellis.file.Sentinel5PPyramidFactory)

        def file_probav_pyramid():
            opensearch_endpoint = layer_source_info.get(
                'opensearch_endpoint',
                ConfigParams().default_opensearch_endpoint)

            return jvm.org.openeo.geotrellis.file.ProbaVPyramidFactory(opensearch_endpoint,
                                                                       layer_source_info.get('opensearch_collection_id'), layer_source_info.get('root_path'),jvm.geotrellis.raster.CellSize(cell_width, cell_height)) \
                .pyramid_seq(extent, srs, from_date, to_date, band_indices, correlation_id)

        def file_pyramid(pyramid_factory):
            opensearch_endpoint = layer_source_info.get(
                'opensearch_endpoint',
                ConfigParams().default_opensearch_endpoint)
            opensearch_collection_id = layer_source_info[
                'opensearch_collection_id']
            opensearch_link_titles = metadata.opensearch_link_titles
            root_path = layer_source_info['root_path']

            factory = pyramid_factory(opensearch_endpoint,
                                      opensearch_collection_id,
                                      opensearch_link_titles, root_path)

            if single_level:
                #TODO EP-3561 UTM is not always the native projection of a layer (PROBA-V), need to determine optimal projection
                return factory.datacube_seq(projected_polygons_native_crs,
                                            from_date, to_date,
                                            metadata_properties(),
                                            correlation_id, datacubeParams)
            else:
                if geometries:
                    return factory.pyramid_seq(projected_polygons.polygons(),
                                               projected_polygons.crs(),
                                               from_date, to_date,
                                               metadata_properties(),
                                               correlation_id)
                else:
                    return factory.pyramid_seq(extent, srs, from_date, to_date,
                                               metadata_properties(),
                                               correlation_id)

        def geotiff_pyramid():
            glob_pattern = layer_source_info['glob_pattern']
            date_regex = layer_source_info['date_regex']

            new_pyramid_factory = jvm.org.openeo.geotrellis.geotiff.PyramidFactory.from_disk(
                glob_pattern, date_regex)

            return self._geotiff_pyramid_factories.setdefault(collection_id, new_pyramid_factory) \
                .pyramid_seq(extent, srs, from_date, to_date)

        def sentinel_hub_pyramid():
            # TODO: move the metadata manipulation out of this function and get rid of the nonlocal?
            nonlocal metadata

            dependencies = env.get('dependencies', {})
            sar_backscatter_arguments: Optional[SarBackscatterArgs] = (
                (load_params.sar_backscatter or SarBackscatterArgs())
                if sar_backscatter_compatible else None)

            if dependencies:
                source_location, card4l = dependencies[(
                    collection_id, to_hashable(metadata_properties()))]

                # date_regex supports:
                #  - original: _20210223.tif
                #  - CARD4L: s1_rtc_0446B9_S07E035_2021_02_03_MULTIBAND.tif
                #  - tiles assembled from cache: 31UDS_7_2-20190921.tif
                date_regex = r".+(\d{4})_?(\d{2})_?(\d{2}).*\.tif"
                interpret_as_cell_type = "float32ud0"
                lat_lon = card4l

                if source_location.startswith("file:"):
                    assembled_uri = source_location
                    glob_pattern = f"{assembled_uri}/*.tif"

                    logger.info(f"Sentinel Hub pyramid from {glob_pattern}")

                    pyramid_factory = jvm.org.openeo.geotrellis.geotiff.PyramidFactory.from_disk(
                        glob_pattern, date_regex, interpret_as_cell_type,
                        lat_lon)
                else:
                    s3_uri = source_location
                    key_regex = r".+\.tif"
                    recursive = True

                    logger.info(f"Sentinel Hub pyramid from {s3_uri}")

                    pyramid_factory = jvm.org.openeo.geotrellis.geotiff.PyramidFactory.from_s3(
                        s3_uri, key_regex, date_regex, recursive,
                        interpret_as_cell_type, lat_lon)

                if sar_backscatter_arguments and sar_backscatter_arguments.mask:
                    metadata = metadata.append_band(
                        Band(name='mask', common_name=None,
                             wavelength_um=None))

                if sar_backscatter_arguments and sar_backscatter_arguments.local_incidence_angle:
                    metadata = metadata.append_band(
                        Band(name='local_incidence_angle',
                             common_name=None,
                             wavelength_um=None))

                return (pyramid_factory.datacube_seq(
                    projected_polygons_native_crs, from_date, to_date,
                    metadata_properties(), collection_id, datacubeParams)
                        if single_level else pyramid_factory.pyramid_seq(
                            extent, srs, from_date, to_date))
            else:
                if collection_id == 'PLANETSCOPE':
                    # note: "byoc-" prefix is optional for the collection ID but dataset ID requires it
                    shub_collection_id = feature_flags['byoc_collection_id']
                    dataset_id = shub_collection_id
                else:
                    shub_collection_id = layer_source_info['collection_id']
                    dataset_id = layer_source_info['dataset_id']

                endpoint = layer_source_info['endpoint']
                client_id = layer_source_info['client_id']
                client_secret = layer_source_info['client_secret']
                sample_type = jvm.org.openeo.geotrellissentinelhub.SampleType.withName(
                    layer_source_info.get('sample_type', 'UINT16'))

                shub_band_names = metadata.band_names

                if sar_backscatter_arguments and sar_backscatter_arguments.mask:
                    metadata = metadata.append_band(
                        Band(name='mask', common_name=None,
                             wavelength_um=None))
                    shub_band_names.append('dataMask')

                if sar_backscatter_arguments and sar_backscatter_arguments.local_incidence_angle:
                    metadata = metadata.append_band(
                        Band(name='local_incidence_angle',
                             common_name=None,
                             wavelength_um=None))
                    shub_band_names.append('localIncidenceAngle')

                band_gsds = [
                    band.gsd['value'] for band in metadata.bands
                    if band.gsd is not None
                ]
                cell_size = (jvm.geotrellis.raster.CellSize(
                    min([float(gsd[0]) for gsd in band_gsds]),
                    min([float(gsd[1])
                         for gsd in band_gsds])) if len(band_gsds) > 0 else
                             jvm.geotrellis.raster.CellSize(
                                 cell_width, cell_height))

                pyramid_factory = jvm.org.openeo.geotrellissentinelhub.PyramidFactory.rateLimited(
                    endpoint, shub_collection_id, dataset_id, client_id,
                    client_secret,
                    sentinel_hub.processing_options(sar_backscatter_arguments)
                    if sar_backscatter_arguments else {}, sample_type,
                    cell_size)

                unflattened_metadata_properties = metadata_properties(
                    flatten_eqs=False)

                return (pyramid_factory.datacube_seq(
                    projected_polygons_native_crs.polygons(),
                    projected_polygons_native_crs.crs(), from_date, to_date,
                    shub_band_names, unflattened_metadata_properties,
                    datacubeParams)
                        if single_level else pyramid_factory.pyramid_seq(
                            extent, srs, from_date, to_date, shub_band_names,
                            unflattened_metadata_properties))

        def creo_pyramid():
            mission = layer_source_info['mission']
            level = layer_source_info['level']
            catalog = CreoCatalogClient(mission=mission, level=level)
            product_paths = catalog.query_product_paths(
                datetime.strptime(from_date[:10], "%Y-%m-%d"),
                datetime.strptime(to_date[:10], "%Y-%m-%d"),
                ulx=west,
                uly=north,
                brx=east,
                bry=south)
            return jvm.org.openeo.geotrelliss3.CreoPyramidFactory(product_paths, metadata.band_names) \
                .datacube_seq(projected_polygons_native_crs, from_date, to_date,{},collection_id)

        def file_cgls_pyramid():
            if len(metadata.band_names) != 1:
                raise ValueError(
                    "expected a single band name for collection {cid}, got {bs} instead"
                    .format(cid=collection_id, bs=metadata.band_names))

            data_glob = layer_source_info['data_glob']
            band_name = metadata.band_names[0].upper()
            date_regex = layer_source_info['date_regex']

            factory = jvm.org.openeo.geotrellis.file.CglsPyramidFactory(
                data_glob, band_name, date_regex)

            return (factory.datacube_seq(projected_polygons, from_date,
                                         to_date)
                    if single_level else factory.pyramid_seq(
                        projected_polygons.polygons(),
                        projected_polygons.crs(), from_date, to_date))

        def file_agera5_pyramid():
            data_glob = layer_source_info['data_glob']
            band_file_markers = metadata.band_names
            date_regex = layer_source_info['date_regex']

            factory = jvm.org.openeo.geotrellis.file.AgEra5PyramidFactory(
                data_glob, band_file_markers, date_regex)

            return (factory.datacube_seq(projected_polygons, from_date,
                                         to_date, {}, "", datacubeParams)
                    if single_level else factory.pyramid_seq(
                        projected_polygons.polygons(),
                        projected_polygons.crs(), from_date, to_date))

        logger.info("loading pyramid {s}".format(s=layer_source_type))
        if layer_source_type == 's3':
            pyramid = s3_pyramid()
        elif layer_source_type == 's3-jp2':
            pyramid = s3_jp2_pyramid()
        elif layer_source_type == 'file-s2':
            pyramid = file_s2_pyramid()
        elif layer_source_type == 'file-probav':
            pyramid = file_probav_pyramid()
        elif layer_source_type == 'geotiff':
            pyramid = geotiff_pyramid()
        elif layer_source_type == 'file-s1-coherence':
            pyramid = file_s2_pyramid()
        elif layer_source_type == 'sentinel-hub':
            pyramid = sentinel_hub_pyramid()
        elif layer_source_type == 'creo':
            pyramid = creo_pyramid()
        elif layer_source_type == 'file-cgls':
            pyramid = file_cgls_pyramid()
        elif layer_source_type == 'file-agera5':
            pyramid = file_agera5_pyramid()
        elif layer_source_type == 'file-oscars':
            pyramid = file_oscars_pyramid()
        elif layer_source_type == 'creodias-s1-backscatter':
            sar_backscatter_arguments = load_params.sar_backscatter or SarBackscatterArgs(
            )
            s1_backscatter_orfeo = get_s1_backscatter_orfeo(
                version=sar_backscatter_arguments.options.get(
                    "implementation_version", "2"),
                jvm=jvm)
            pyramid = s1_backscatter_orfeo.creodias(
                projected_polygons=projected_polygons_native_crs,
                from_date=from_date,
                to_date=to_date,
                correlation_id=correlation_id,
                sar_backscatter_arguments=sar_backscatter_arguments,
                bands=bands,
                extra_properties=metadata_properties())
        elif layer_source_type == 'accumulo':
            pyramid = accumulo_pyramid()
        elif layer_source_type == 'testing':
            pyramid = load_test_collection(collection_id=collection_id,
                                           collection_metadata=metadata,
                                           extent=extent,
                                           srs=srs,
                                           from_date=from_date,
                                           to_date=to_date,
                                           bands=bands,
                                           correlation_id=correlation_id)
        else:
            raise OpenEOApiException(
                message="Invalid layer source type {t!r}".format(
                    t=layer_source_type))

        if isinstance(pyramid, dict):
            levels = pyramid
        else:
            temporal_tiled_raster_layer = jvm.geopyspark.geotrellis.TemporalTiledRasterLayer
            option = jvm.scala.Option

            levels = {
                pyramid.apply(index)._1(): geopyspark.TiledRasterLayer(
                    geopyspark.LayerType.SPACETIME,
                    temporal_tiled_raster_layer(
                        option.apply(pyramid.apply(index)._1()),
                        pyramid.apply(index)._2()))
                for index in range(0, pyramid.size())
            }

        if single_level:
            max_zoom = max(levels.keys())
            levels = {max_zoom: levels[max_zoom]}

        image_collection = GeopysparkDataCube(
            pyramid=geopyspark.Pyramid(levels), metadata=metadata)

        if (postprocessing_band_graph != None):
            from openeogeotrellis.geotrellis_tile_processgraph_visitor import GeotrellisTileProcessGraphVisitor
            visitor = GeotrellisTileProcessGraphVisitor()
            image_collection = image_collection.apply_dimension(
                visitor.accept_process_graph(postprocessing_band_graph),
                image_collection.metadata.band_dimension.name)

        if still_needs_band_filter:
            # TODO: avoid this `still_needs_band_filter` ugliness.
            #       Also see https://github.com/Open-EO/openeo-geopyspark-driver/issues/29
            image_collection = image_collection.filter_bands(band_indices)

        return image_collection
Ejemplo n.º 7
0
 def __init__(self, _builder=None):
     super().__init__()
     self.builder = _builder or get_jvm().org.openeo.geotrellis.OpenEOProcessScriptBuilder()
     # process list to keep track of processes, so this class has a double function
     self.processes = OrderedDict()