Example #1
0
    class Foo:
        @staticmethod
        def add(x, y):
            """Add x and y."""
            return x + y

        do_plus = legacy_alias(add, "do_plus")
Example #2
0
    class Foo:
        @classmethod
        def add(cls, x, y):
            """Add x and y."""
            assert cls is Foo
            return x + y

        do_plus = legacy_alias(add, "do_plus")
Example #3
0
def test_legacy_alias_function(recwarn):
    def add(x, y):
        """Add x and y."""
        return x + y

    do_plus = legacy_alias(add, "do_plus")

    assert add.__doc__ == "Add x and y."
    assert do_plus.__doc__ == "Use of this legacy function is deprecated, use :py:func:`.add` instead."

    assert add(2, 3) == 5
    assert len(recwarn) == 0

    with pytest.warns(DeprecationWarning, match="Call to deprecated function `do_plus`, use `add` instead."):
        res = do_plus(2, 3)
    assert res == 5
Example #4
0
    class Foo:
        def add(self, x, y):
            """Add x and y."""
            return x + y

        do_plus = legacy_alias(add, "do_plus")
class _ProcessGraphAbstraction(_FromNodeMixin):
    """
    Base class for client-side abstractions/wrappers
    for structures that are represented by a openEO process graph:
    raster data cubes, vector cubes, ML models, ...
    """
    def __init__(self, pgnode: PGNode, connection: "Connection"):
        self._pg = pgnode
        self._connection = connection

    def __str__(self):
        return "{t}({pg})".format(t=self.__class__.__name__, pg=self._pg)

    def flat_graph(self) -> dict:
        """
        Get the process graph in flat dict representation

        .. note:: This method is mainly for internal use, subject to change and not recommended for general usage.
            Instead, use :py:meth:`to_json()` to get a JSON representation of the process graph.
        """
        # TODO: wrap in {"process_graph":...} by default/optionally?
        return self._pg.flat_graph()

    flatten = legacy_alias(flat_graph, name="flatten")

    def to_json(self, indent=2, separators=None) -> str:
        """
        Get JSON representation of (flat dict) process graph.
        """
        pg = {"process_graph": self.flat_graph()}
        return json.dumps(pg, indent=indent, separators=separators)

    @property
    def _api_version(self):
        return self._connection.capabilities().api_version_check

    @property
    def connection(self) -> "Connection":
        return self._connection

    def result_node(self):
        """Get the result node (:py:class:`PGNode`) of the process graph."""
        return self._pg

    def from_node(self):
        # _FromNodeMixin API
        return self._pg

    def _build_pgnode(self, process_id: str, arguments: dict,
                      namespace: Optional[str], **kwargs) -> PGNode:
        """
        Helper to build a PGNode from given argument dict and/or kwargs,
        and possibly resolving the `THIS` reference.
        """
        arguments = {**(arguments or {}), **kwargs}
        for k, v in arguments.items():
            if v is THIS:
                arguments[k] = self
            # TODO: also necessary to traverse lists/dictionaries?
        return PGNode(process_id=process_id,
                      arguments=arguments,
                      namespace=namespace)
Example #6
0
class VectorCube(_ProcessGraphAbstraction):
    """
    A Vector Cube, or 'Vector Collection' is a data structure containing 'Features':
    https://www.w3.org/TR/sdw-bp/#dfn-feature

    The features in this cube are restricted to have a geometry. Geometries can be points, lines, polygons etcetera.
    A geometry is specified in a 'coordinate reference system'. https://www.w3.org/TR/sdw-bp/#dfn-coordinate-reference-system-(crs)
    """

    def __init__(self, graph: PGNode, connection: 'Connection', metadata: CollectionMetadata = None):
        super().__init__(pgnode=graph, connection=connection)
        # TODO: does VectorCube need CollectionMetadata?
        self.metadata = metadata

    def process(
            self,
            process_id: str,
            arguments: dict = None,
            metadata: Optional[CollectionMetadata] = None,
            namespace: Optional[str] = None,
            **kwargs) -> 'VectorCube':
        """
        Generic helper to create a new DataCube by applying a process.

        :param process_id: process id of the process.
        :param args: argument dictionary for the process.
        :return: new DataCube instance
        """
        pg = self._build_pgnode(process_id=process_id, arguments=arguments, namespace=namespace, **kwargs)
        return VectorCube(graph=pg, connection=self._connection, metadata=metadata or self.metadata)

    @openeo_process
    def run_udf(
            self, udf: str, runtime: str, version: Optional[str] = None, context: Optional[dict] = None
    ) -> "VectorCube":
        """
        .. versionadded:: 0.10.0
        """
        return self.process(
            process_id="run_udf",
            data=self, udf=udf, runtime=runtime,
            arguments=dict_no_none({"version": version, "context": context}),
        )

    @openeo_process
    def save_result(self, format: str = "GeoJson", options: dict = None):
        return self.process(
            process_id="save_result",
            arguments={
                "data": self,
                "format": format,
                "options": options or {}
            }
        )

    def download(self, outputfile: str, format: str = "GeoJSON", options: dict = None):
        cube = self.save_result(format=format, options=options)
        return self._connection.download(cube.flat_graph(), outputfile)

    def execute_batch(
            self,
            outputfile: Union[str, pathlib.Path], out_format: str = None,
            print=print, max_poll_interval=60, connection_retry_interval=30,
            job_options=None, **format_options) -> BatchJob:
        """
        Evaluate the process graph by creating a batch job, and retrieving the results when it is finished.
        This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.

        For very long running jobs, you probably do not want to keep the client running.

        :param job_options:
        :param outputfile: The path of a file to which a result can be written
        :param out_format: (optional) Format of the job result.
        :param format_options: String Parameters for the job result format

        """
        job = self.create_job(out_format, job_options=job_options, **format_options)
        return job.run_synchronous(
            # TODO #135 support multi file result sets too
            outputfile=outputfile,
            print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval
        )

    def create_job(self, out_format=None, job_options=None, **format_options) -> BatchJob:
        """
        Sends a job to the backend and returns a ClientJob instance.

        :param out_format: String Format of the job result.
        :param job_options:
        :param format_options: String Parameters for the job result format
        :return: status: ClientJob resulting job.
        """
        shp = self
        if out_format:
            # add `save_result` node
            shp = shp.save_result(format=out_format, options=format_options)
        return self._connection.create_job(process_graph=shp.flat_graph(), additional=job_options)

    send_job = legacy_alias(create_job, name="send_job")
Example #7
0
class Connection(RestApiConnection):
    """
    Connection to an openEO backend.
    """

    _MINIMUM_API_VERSION = ComparableVersion("0.4.0")

    # Temporary workaround flag to enable for backends (e.g. EURAC) that expect id_token to be sent as bearer token
    # TODO DEPRECATED To remove when all backends properly expect access_token
    # see https://github.com/Open-EO/openeo-wcps-driver/issues/45
    oidc_auth_user_id_token_as_bearer = False

    def __init__(
            self, url, auth: AuthBase = None, session: requests.Session = None, default_timeout: int = None,
            auth_config: AuthConfig = None, refresh_token_store: RefreshTokenStore = None
    ):
        """
        Constructor of Connection, authenticates user.

        :param url: String Backend root url
        """
        self._orig_url = url
        super().__init__(
            root_url=self.version_discovery(url, session=session),
            auth=auth, session=session, default_timeout=default_timeout
        )
        self._capabilities_cache = {}

        # Initial API version check.
        if self._api_version.below(self._MINIMUM_API_VERSION):
            raise ApiVersionException("OpenEO API version should be at least {m!s}, but got {v!s}".format(
                m=self._MINIMUM_API_VERSION, v=self._api_version)
            )

        self._auth_config = auth_config
        self._refresh_token_store = refresh_token_store or RefreshTokenStore()

    @classmethod
    def version_discovery(cls, url: str, session: requests.Session = None) -> str:
        """
        Do automatic openEO API version discovery from given url, using a "well-known URI" strategy.

        :param url: initial backend url (not including "/.well-known/openeo")
        :return: root url of highest supported backend version
        """
        try:
            well_known_url_response = RestApiConnection(url, session=session).get("/.well-known/openeo")
            assert well_known_url_response.status_code == 200
            versions = well_known_url_response.json()["versions"]
            supported_versions = [v for v in versions if cls._MINIMUM_API_VERSION <= v["api_version"]]
            assert supported_versions
            production_versions = [v for v in supported_versions if v.get("production", True)]
            highest_version = max(production_versions or supported_versions, key=lambda v: v["api_version"])
            _log.debug("Highest supported version available in backend: %s" % highest_version)
            return highest_version['url']
        except Exception:
            # Be very lenient about failing on the well-known URI strategy.
            return url

    def _get_auth_config(self) -> AuthConfig:
        if self._auth_config is None:
            self._auth_config = AuthConfig()
        return self._auth_config

    def authenticate_basic(self, username: str = None, password: str = None) -> 'Connection':
        """
        Authenticate a user to the backend using basic username and password.

        :param username: User name
        :param password: User passphrase
        """
        if username is None:
            username, password = self._get_auth_config().get_basic_auth(backend=self._orig_url)
            if username is None:
                raise OpenEoClientException("No username/password given or found.")

        resp = self.get(
            '/credentials/basic',
            # /credentials/basic is the only endpoint that expects a Basic HTTP auth
            auth=HTTPBasicAuth(username, password)
        ).json()
        # Switch to bearer based authentication in further requests.
        if self._api_version.at_least("1.0.0"):
            self.auth = BearerAuth(bearer='basic//{t}'.format(t=resp["access_token"]))
        else:
            self.auth = BearerAuth(bearer=resp["access_token"])
        return self

    def authenticate_OIDC(
            self, client_id: str,
            provider_id: str = None,
            webbrowser_open=None,
            timeout=120,
            server_address: Tuple[str, int] = None
    ) -> 'Connection':
        """
        Authenticates a user to the backend using OpenID Connect.

        :param client_id: Client id to use for OpenID Connect authentication
        :param webbrowser_open: optional handler for the initial OAuth authentication request
            (opens a webbrowser by default)
        :param timeout: number of seconds after which to abort the authentication procedure
        :param server_address: optional tuple (hostname, port_number) to serve the OAuth redirect callback on

        TODO: deprecated?
        """
        # TODO: option to increase log level temporarily?
        provider_id, provider = self._get_oidc_provider(provider_id)

        client_info = OidcClientInfo(client_id=client_id, provider=provider)
        authenticator = OidcAuthCodePkceAuthenticator(
            client_info=client_info,
            webbrowser_open=webbrowser_open,
            timeout=timeout,
            server_address=server_address,
        )
        return self._authenticate_oidc(authenticator, provider_id=provider_id)

    def _get_oidc_provider(self, provider_id: Union[str, None] = None) -> Tuple[str, OidcProviderInfo]:
        """
        Get OpenID Connect discovery URL for given provider_id

        :param provider_id: id of OIDC provider as specified by backend (/credentials/oidc).
            Can be None if there is just one provider.
        :return: updated provider_id and provider info object
        """
        if self._api_version.at_least("1.0.0"):
            oidc_info = self.get("/credentials/oidc", expected_status=200).json()
            providers = {p["id"]: p for p in oidc_info["providers"]}
            _log.info("Found OIDC providers: {p}".format(p=list(providers.keys())))
            if provider_id:
                if provider_id not in providers:
                    raise OpenEoClientException("Requested provider {r!r} not available. Should be one of {p}.".format(
                        r=provider_id, p=list(providers.keys()))
                    )
                provider = providers[provider_id]
            elif len(providers) == 1:
                # No provider id given, but there is only one anyway: we can handle that.
                provider_id, provider = providers.popitem()
            else:
                raise OpenEoClientException("No provider_id given. Available: {p!r}.".format(
                    p=list(providers.keys()))
                )
            provider = OidcProviderInfo(issuer=provider["issuer"], scopes=provider.get("scopes"))
        else:
            # Per spec: '/credentials/oidc' will redirect to  OpenID Connect discovery document
            provider = OidcProviderInfo(discovery_url=self.build_url('/credentials/oidc'))
        return provider_id, provider

    def _get_oidc_provider_and_client_info(
            self, provider_id: str,
            client_id: Union[str, None], client_secret: Union[str, None]
    ) -> Tuple[str, OidcClientInfo]:
        """
        Resolve provider_id and client info (as given or from config)

        :param provider_id: id of OIDC provider as specified by backend (/credentials/oidc).
            Can be None if there is just one provider.

        :return: (client_id, client_secret)
        """
        provider_id, provider = self._get_oidc_provider(provider_id)

        if client_id is None:
            client_id, client_secret = self._get_auth_config().get_oidc_client_configs(
                backend=self._orig_url, provider_id=provider_id
            )
            _log.info("Using client_id {c!r} from config (provider {p!r})".format(c=client_id, p=provider_id))
            if client_id is None:
                raise OpenEoClientException("No client ID found.")

        client_info = OidcClientInfo(client_id=client_id, client_secret=client_secret, provider=provider)

        return provider_id, client_info

    def _authenticate_oidc(
            self,
            authenticator: OidcAuthenticator,
            provider_id: str,
            store_refresh_token: bool = False
    ) -> 'Connection':
        """
        Authenticate through OIDC and set up bearer token (based on OIDC access_token) for further requests.
        """
        tokens = authenticator.get_tokens()
        _log.info("Obtained tokens: {t}".format(t=[k for k, v in tokens._asdict().items() if v]))
        if tokens.refresh_token and store_refresh_token:
            self._refresh_token_store.set_refresh_token(
                issuer=authenticator.provider_info.issuer,
                client_id=authenticator.client_id,
                refresh_token=tokens.refresh_token
            )
        token = tokens.access_token if not self.oidc_auth_user_id_token_as_bearer else tokens.id_token
        if self._api_version.at_least("1.0.0"):
            self.auth = BearerAuth(bearer='oidc/{p}/{t}'.format(p=provider_id, t=token))
        else:
            self.auth = BearerAuth(bearer=token)
        return self

    def authenticate_oidc_authorization_code(
            self,
            client_id: str = None,
            client_secret: str = None,
            provider_id: str = None,
            timeout: int = None,
            server_address: Tuple[str, int] = None,
            webbrowser_open: Callable = None,
            store_refresh_token=False,
    ) -> 'Connection':
        """
        OpenID Connect Authorization Code Flow (with PKCE).

        WARNING: this API is in experimental phase
        """
        provider_id, client_info = self._get_oidc_provider_and_client_info(
            provider_id=provider_id, client_id=client_id, client_secret=client_secret
        )
        authenticator = OidcAuthCodePkceAuthenticator(
            client_info=client_info,
            webbrowser_open=webbrowser_open, timeout=timeout, server_address=server_address
        )
        return self._authenticate_oidc(authenticator, provider_id=provider_id, store_refresh_token=store_refresh_token)

    def authenticate_oidc_client_credentials(
            self,
            client_id: str = None,
            client_secret: str = None,
            provider_id: str = None,
            store_refresh_token=False,
    ) -> 'Connection':
        """
        OpenID Connect Client Credentials flow.

        WARNING: this API is in experimental phase
        """
        provider_id, client_info = self._get_oidc_provider_and_client_info(
            provider_id=provider_id, client_id=client_id, client_secret=client_secret
        )
        authenticator = OidcClientCredentialsAuthenticator(client_info=client_info)
        return self._authenticate_oidc(authenticator, provider_id=provider_id, store_refresh_token=store_refresh_token)

    def authenticate_oidc_resource_owner_password_credentials(
            self,
            username: str, password: str,
            client_id: str = None,
            client_secret: str = None,
            provider_id: str = None,
            store_refresh_token=False
    ) -> 'Connection':
        """
        OpenId Connect Resource Owner Password Credentials

        WARNING: this API is in experimental phase
        """
        provider_id, client_info = self._get_oidc_provider_and_client_info(
            provider_id=provider_id, client_id=client_id, client_secret=client_secret
        )
        # TODO: also get username and password from config?
        authenticator = OidcResourceOwnerPasswordAuthenticator(
            client_info=client_info, username=username, password=password
        )
        return self._authenticate_oidc(authenticator, provider_id=provider_id, store_refresh_token=store_refresh_token)

    def authenticate_oidc_refresh_token(
            self, client_id: str = None, refresh_token: str = None, client_secret: str = None, provider_id: str = None
    ) -> 'Connection':
        """
        OpenId Connect Refresh Token

        WARNING: this API is in experimental phase
        """
        provider_id, client_info = self._get_oidc_provider_and_client_info(
            provider_id=provider_id, client_id=client_id, client_secret=client_secret
        )

        if refresh_token is None:
            refresh_token = self._refresh_token_store.get_refresh_token(
                issuer=client_info.provider.issuer,
                client_id=client_info.client_id
            )
            if refresh_token is None:
                raise OpenEoClientException("No refresh token given or found")

        authenticator = OidcRefreshTokenAuthenticator(client_info=client_info, refresh_token=refresh_token)
        return self._authenticate_oidc(authenticator, provider_id=provider_id)

    def authenticate_oidc_device(
            self, client_id: str=None, client_secret: str=None, provider_id: str = None,
            store_refresh_token=False,
            **kwargs
    ) -> 'Connection':
        """
        Authenticate with OAuth Device Authorization grant/flow

        WARNING: this API is in experimental phase
        """
        provider_id, client_info = self._get_oidc_provider_and_client_info(
            provider_id=provider_id, client_id=client_id, client_secret=client_secret
        )
        authenticator = OidcDeviceAuthenticator(client_info=client_info, **kwargs)
        return self._authenticate_oidc(authenticator, provider_id=provider_id, store_refresh_token=store_refresh_token)

    def describe_account(self) -> str:
        """
        Describes the currently authenticated user account.
        """
        return self.get('/me').json()

    def user_jobs(self) -> dict:
        """
        Loads all jobs of the current user.

        :return: jobs: Dict All jobs of the user
        """
        # TODO duplication with `list_jobs()` method
        return self.get('/jobs').json()["jobs"]

    def list_collections(self) -> List[dict]:
        """
        Loads all available imagecollections types.

        :return: list of collection meta data dictionaries
        """
        return self.get('/collections').json()["collections"]

    def list_collection_ids(self) -> List[str]:
        """
        Get list of all collection ids

        :return: list of collection ids
        """
        return [collection['id'] for collection in self.list_collections() if 'id' in collection]

    def capabilities(self) -> RESTCapabilities:
        """
        Loads all available capabilities.

        :return: data_dict: Dict All available data types
        """
        if "capabilities" not in self._capabilities_cache:
            self._capabilities_cache["capabilities"] = RESTCapabilities(self.get('/').json())
        return self._capabilities_cache["capabilities"]



    def list_output_formats(self) -> dict:
        if self._api_version.at_least("1.0.0"):
            return self.list_file_formats()["output"]
        else:
            return self.get('/output_formats').json()

    list_file_types = legacy_alias(list_output_formats, "list_file_types")

    def list_file_formats(self) -> dict:
        """
        Get available input and output formats
        """
        if "file_formats" not in self._capabilities_cache:
            self._capabilities_cache["file_formats"] = self.get('/file_formats').json()
        return self._capabilities_cache["file_formats"]

    def list_service_types(self) -> dict:
        """
        Loads all available service types.

        :return: data_dict: Dict All available service types
        """
        return self.get('/service_types').json()

    def list_services(self) -> dict:
        """
        Loads all available services of the authenticated user.

        :return: data_dict: Dict All available service types
        """
        # TODO return parsed service objects
        return self.get('/services').json()

    def describe_collection(self, name) -> dict:
        # TODO: Maybe create some kind of Data class.
        """
        Loads detailed information of a specific image collection.

        :param name: String Id of the collection
        :return: data_dict: Dict Detailed information about the collection
        """
        return self.get('/collections/{}'.format(name)).json()

    def collection_metadata(self, name) -> CollectionMetadata:
        return CollectionMetadata(metadata=self.describe_collection(name))

    def list_processes(self) -> List[dict]:
        # TODO: Maybe format the result dictionary so that the process_id is the key of the dictionary.
        """
        Loads all available processes of the back end.

        :return: processes_dict: Dict All available processes of the back end.
        """
        return self.get('/processes').json()["processes"]

    def list_jobs(self) -> dict:
        """
        Lists all jobs of the authenticated user.

        :return: job_list: Dict of all jobs of the user.
        """
        # TODO: Maybe format the result so that there get Job classes returned.
        # TODO: duplication with `user_jobs()` method
        return self.get('/jobs').json()["jobs"]

    def save_user_defined_process(
            self, user_defined_process_id: str, process_graph: dict,
            parameters: List[Union[dict, Parameter]] = None, public: bool = False) -> RESTUserDefinedProcess:
        """
        Saves a process graph and its metadata in the backend as a user-defined process for the authenticated user.

        :param user_defined_process_id: unique identifier for the user-defined process
        :param process_graph: a process graph
        :param parameters: a list of parameters
        :param public: visible to other users?
        :return: a RESTUserDefinedProcess instance
        """
        if user_defined_process_id in set(p["id"] for p in self.list_processes()):
            warnings.warn("Defining user-defined process {u!r} with same id as a pre-defined process".format(
                u=user_defined_process_id))
        udp = RESTUserDefinedProcess(user_defined_process_id=user_defined_process_id, connection=self)
        udp.store(process_graph=process_graph, parameters=parameters, public=public)
        return udp

    def list_user_defined_processes(self) -> List[dict]:
        """
        Lists all user-defined processes of the authenticated user.
        """
        return self.get("/process_graphs").json()["processes"]

    def user_defined_process(self, user_defined_process_id: str) -> RESTUserDefinedProcess:
        """
        Get the user-defined process based on its id. The process with the given id should already exist.

        :param user_defined_process_id: the id of the user-defined process
        :return: a RESTUserDefinedProcess instance
        """
        return RESTUserDefinedProcess(user_defined_process_id=user_defined_process_id, connection=self)

    def validate_processgraph(self, process_graph):
        # Endpoint: POST /validate
        raise NotImplementedError()

    @property
    def _api_version(self) -> ComparableVersion:
        # TODO make this a public property (it's also useful outside the Connection class)
        return self.capabilities().api_version_check

    def datacube_from_process(self, process_id: str, **kwargs) -> DataCube:
        """
        Load a raster datacube, from a custom process.

        @param process_id: The process id of the custom process.
        @param kwargs: The arguments of the custom process
        @return: A DataCube, without valid metadata, as the client is not aware of this custom process.
        """

        if self._api_version.at_least("1.0.0"):
            graph = PGNode(process_id, kwargs)
            return DataCube(graph, self)
        else:
            raise OpenEoClientException(
                "This method requires support for at least version 1.0.0 in the openEO backend.")

    def load_collection(self, collection_id: str, **kwargs) -> Union[ImageCollectionClient, DataCube]:
        """
        Load an image collection by collection id

        see :py:meth:`openeo.rest.imagecollectionclient.ImageCollectionClient.load_collection`
        for available arguments.

        :param collection_id: image collection identifier (string)
        :return: ImageCollectionClient
        """
        if self._api_version.at_least("1.0.0"):
            return DataCube.load_collection(collection_id=collection_id, connection=self, **kwargs)
        else:
            return ImageCollectionClient.load_collection(collection_id=collection_id, session=self, **kwargs)

    # Legacy alias.
    imagecollection = load_collection

    def create_service(self, graph: dict, type: str, **kwargs) -> dict:
        # TODO: type hint for graph: is it a nested or a flat one?
        req = self._build_request_with_process_graph(process_graph=graph, type=type, **kwargs)
        response = self.post(path="/services", json=req, expected_status=201)
        # TODO: "location" is url of the service metadata, not (base) url of service (https://github.com/Open-EO/openeo-api/issues/269)
        # TODO: fetch this metadata and return a full metadata object instead?
        return {
            'url': response.headers.get('Location'),
            'service_id': response.headers.get("OpenEO-Identifier"),
        }

    def remove_service(self, service_id: str):
        """
        Stop and remove a secondary web service.

        :param service_id: service identifier
        :return:
        """
        response = self.delete('/services/' + service_id)

    def job_results(self, job_id):
        return self.get("/jobs/{}/results".format(job_id)).json()

    def job_logs(self, job_id, offset):
        return self.get("/jobs/{}/logs".format(job_id), params={'offset': offset}).json()

    def list_files(self):
        """
        Lists all files that the logged in user uploaded.

        :return: file_list: List of the user uploaded files.
        """

        return self.get('/files').json()['files']

    def create_file(self, path):
        """
        Creates virtual file

        :return: file object.
        """
        # No endpoint just returns a file object.
        raise NotImplementedError()

    def _build_request_with_process_graph(self, process_graph: dict, **kwargs) -> dict:
        """
        Prepare a json payload with a process graph to submit to /result, /services, /jobs, ...
        :param process_graph: flat dict representing a process graph
        """
        result = kwargs
        if self._api_version.at_least("1.0.0"):
            result["process"] = {"process_graph": process_graph}
        else:
            result["process_graph"] = process_graph
        return result

    # TODO: Maybe rename to execute and merge with execute().
    def download(self, graph: dict, outputfile: Union[Path, str, None] = None):
        """
        Downloads the result of a process graph synchronously,
        and save the result to the given file or return bytes object if no outputfile is specified.
        This method is useful to export binary content such as images. For json content, the execute method is recommended.

        :param graph: (flat) dict representing a process graph
        :param outputfile: output file
        """
        request = self._build_request_with_process_graph(process_graph=graph)
        r = self.post(path="/result", json=request, stream=True, timeout=1000)
        if outputfile is not None:
            with Path(outputfile).open(mode="wb") as f:
                shutil.copyfileobj(r.raw, f)
        else:
            return r.content

    def execute(self, process_graph: dict):
        """
        Execute a process graph synchronously.

        :param process_graph: (flat) dict representing a process graph
        """
        req = self._build_request_with_process_graph(process_graph=process_graph)
        return self.post(path="/result", json=req).json()

    def create_job(self, process_graph: dict, title: str = None, description: str = None,
                   plan: str = None, budget=None,
                   additional: Dict = None) -> RESTJob:
        """
        Posts a job to the back end.

        :param process_graph: (flat) dict representing process graph
        :param title: String title of the job
        :param description: String description of the job
        :param plan: billing plan
        :param budget: Budget
        :param additional: additional job options to pass to the backend
        :return: job_id: String Job id of the new created job
        """
        # TODO move all this (RESTJob factory) logic to RESTJob?
        req = self._build_request_with_process_graph(
            process_graph=process_graph,
            title=title, description=description, plan=plan, budget=budget
        )
        if additional:
            # TODO: get rid of this non-standard field? https://github.com/Open-EO/openeo-api/issues/276
            req["job_options"] = additional

        response = self.post("/jobs", json=req, expected_status=201)

        if "openeo-identifier" in response.headers:
            job_id = response.headers['openeo-identifier']
        elif "location" in response.headers:
            _log.warning("Backend did not explicitly respond with job id, will guess it from redirect URL.")
            job_id = response.headers['location'].split("/")[-1]
        else:
            raise OpenEoClientException("Failed fo extract job id")
        return RESTJob(job_id, self)

    def job(self, job_id: str):
        """
        Get the job based on the id. The job with the given id should already exist.
        
        Use :py:meth:`openeo.rest.connection.Connection.create_job` to create new jobs

        :param job_id: the job id of an existing job
        :return: A job object.
        """
        return RESTJob(job_id, self)

    def load_disk_collection(self, format: str, glob_pattern: str, options: dict = {}) -> ImageCollectionClient:
        """
        Loads image data from disk as an ImageCollection.

        :param format: the file format, e.g. 'GTiff'
        :param glob_pattern: a glob pattern that matches the files to load from disk
        :param options: options specific to the file format
        :return: the data as an ImageCollection
        """

        if self._api_version.at_least("1.0.0"):
            return DataCube.load_disk_collection(self, format, glob_pattern, **options)
        else:
            return ImageCollectionClient.load_disk_collection(self, format, glob_pattern, **options)
class PGNode:
    """
    Wrapper for process node in a process graph (has process_id and arguments).

    While this is a simple, thin container, it allows a bit more abstraction, basic encapsulation,
    type hinting and code intelligence in your IDE than something generic like a dict.

    Also note that a full openEO "process graph" is essentially a directed acyclic graph of nodes
    pointing to each other. A process graph is practically equivalent with its "result" node,
    as it points (directly or recursively) to all the other nodes it depends on.

    """

    def __init__(self, process_id: str, arguments: dict = None, namespace: Union[str, None] = None, **kwargs):
        self._process_id = process_id
        # Merge arguments dict and kwargs
        arguments = dict(**(arguments or {}), **kwargs)
        # Make sure direct PGNode arguments are properly wrapped in a "from_node" dict
        for arg, value in arguments.items():
            if isinstance(value, PGNode):
                arguments[arg] = {"from_node": value}
        # TODO: use a frozendict of some sort to ensure immutability?
        self._arguments = arguments
        self._namespace = namespace

    def __repr__(self):
        return "<{c} {p!r} at 0x{m:x}>".format(c=self.__class__.__name__, p=self.process_id, m=id(self))

    @property
    def process_id(self) -> str:
        return self._process_id

    @property
    def arguments(self) -> dict:
        return self._arguments

    @property
    def namespace(self) -> Union[str, None]:
        return self._namespace

    def to_dict(self) -> dict:
        """
        Convert process graph to a nested dictionary structure.
        Uses deep copy style: nodes that are reused in graph will be deduplicated
        """

        def _deep_copy(x):
            """PGNode aware deep copy helper"""
            if isinstance(x, PGNode):
                return dict_no_none(process_id=x.process_id, arguments=_deep_copy(x.arguments), namespace=x.namespace)
            if isinstance(x, Parameter):
                return {"from_parameter": x.name}
            elif isinstance(x, dict):
                return {str(k): _deep_copy(v) for k, v in x.items()}
            elif isinstance(x, (list, tuple)):
                return type(x)(_deep_copy(v) for v in x)
            elif isinstance(x, (str, int, float)) or x is None:
                return x
            else:
                raise ValueError(repr(x))

        return _deep_copy(self)

    def flat_graph(self) -> dict:
        """Get the process graph in flat dict representation"""
        return GraphFlattener().flatten(node=self)

    flatten = legacy_alias(flat_graph, name="flatten")

    @staticmethod
    def to_process_graph_argument(value: Union['PGNode', str, dict]) -> dict:
        """
        Normalize given argument properly to a "process_graph" argument
        to be used as reducer/subprocess for processes like
        'reduce_dimension', 'aggregate_spatial', 'apply', 'merge_cubes', 'resample_cube_temporal'
        """
        if isinstance(value, str):
            # assume string with predefined reduce/apply process ("mean", "sum", ...)
            # TODO: is this case still used? It's invalid anyway for 1.0 openEO spec I think?
            return value
        elif isinstance(value, PGNode):
            return {"process_graph": value}
        elif isinstance(value, dict) and isinstance(value.get("process_graph"), PGNode):
            return value
        else:
            raise ValueError(value)
Example #9
0
class VectorCube:
    """
    A Vector Cube, or 'Vector Collection' is a data structure containing 'Features':
    https://www.w3.org/TR/sdw-bp/#dfn-feature

    The features in this cube are restricted to have a geometry. Geometries can be points, lines, polygons etcetera.
    A geometry is specified in a 'coordinate reference system'. https://www.w3.org/TR/sdw-bp/#dfn-coordinate-reference-system-(crs)
    """
    def __init__(self,
                 graph: PGNode,
                 connection: 'Connection',
                 metadata: CollectionMetadata = None):
        super().__init__()
        # Process graph
        self._pg = graph
        self._connection = connection
        self.metadata = metadata

    def __str__(self):
        return "DataCube({pg})".format(pg=self._pg)

    @property
    def graph(self) -> dict:
        """Get the process graph in flat dict representation"""
        return self.flat_graph()

    def flat_graph(self) -> dict:
        """Get the process graph in flat dict representation"""
        return self._pg.flat_graph()

    flatten = legacy_alias(flat_graph, name="flatten")

    def to_json(self, indent=2, separators=None) -> str:
        """
        Get JSON representation of (flat dict) process graph.
        """
        pg = {"process_graph": self.flat_graph()}
        return json.dumps(pg, indent=indent, separators=separators)

    @property
    def _api_version(self):
        return self._connection.capabilities().api_version_check

    @property
    def connection(self):
        return self._connection

    def process(self,
                process_id: str,
                args: dict = None,
                metadata: CollectionMetadata = None,
                **kwargs) -> 'VectorCube':
        """
        Generic helper to create a new DataCube by applying a process.

        :param process_id: process id of the process.
        :param args: argument dictionary for the process.
        :return: new DataCube instance
        """
        return self.process_with_node(PGNode(process_id=process_id,
                                             arguments=args,
                                             **kwargs),
                                      metadata=metadata)

    def process_with_node(self,
                          pg: PGNode,
                          metadata: CollectionMetadata = None) -> 'VectorCube':
        """
        Generic helper to create a new DataCube by applying a process (given as process graph node)

        :param pg: process graph node (containing process id and arguments)
        :param metadata: (optional) metadata to override original cube metadata (e.g. when reducing dimensions)
        :return: new DataCube instance
        """
        from openeo.rest.datacube import DataCube, THIS
        arguments = pg.arguments
        for k, v in arguments.items():
            if isinstance(v, DataCube) or isinstance(v, VectorCube):
                arguments[k] = {"from_node": v._pg}
            elif v is THIS:
                arguments[k] = {"from_node": self._pg}
        # TODO: deep copy `self.metadata` instead of using same instance?
        # TODO: cover more cases where metadata has to be altered
        return VectorCube(graph=pg,
                          connection=self._connection,
                          metadata=metadata or self.metadata)

    def save_result(self, format: str = "GeoJson", options: dict = None):
        return self.process(process_id="save_result",
                            args={
                                "data": {
                                    "from_node": self._pg
                                },
                                "format": format,
                                "options": options or {}
                            })

    def download(self,
                 outputfile: str,
                 format: str = "GeoJson",
                 options: dict = None):
        cube = self.save_result(format=format, options=options)
        return self._connection.download(cube.flat_graph(), outputfile)

    def execute_batch(self,
                      outputfile: Union[str, pathlib.Path],
                      out_format: str = None,
                      print=print,
                      max_poll_interval=60,
                      connection_retry_interval=30,
                      job_options=None,
                      **format_options) -> RESTJob:
        """
        Evaluate the process graph by creating a batch job, and retrieving the results when it is finished.
        This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.

        For very long running jobs, you probably do not want to keep the client running.

        :param job_options:
        :param outputfile: The path of a file to which a result can be written
        :param out_format: (optional) Format of the job result.
        :param format_options: String Parameters for the job result format

        """
        job = self.send_job(out_format,
                            job_options=job_options,
                            **format_options)
        return job.run_synchronous(
            # TODO #135 support multi file result sets too
            outputfile=outputfile,
            print=print,
            max_poll_interval=max_poll_interval,
            connection_retry_interval=connection_retry_interval)

    def send_job(self,
                 out_format=None,
                 job_options=None,
                 **format_options) -> RESTJob:
        """
        Sends a job to the backend and returns a ClientJob instance.

        :param out_format: String Format of the job result.
        :param job_options:
        :param format_options: String Parameters for the job result format
        :return: status: ClientJob resulting job.
        """
        shp = self
        if out_format:
            # add `save_result` node
            shp = shp.save_result(format=out_format, options=format_options)
        return self._connection.create_job(process_graph=shp.flat_graph(),
                                           additional=job_options)
Example #10
0
class ImageCollectionClient(ImageCollection):
    """Class representing an Image Collection. (In the API as 'imagery')
        Supports 0.4.
    """
    def __init__(self,
                 node_id: str,
                 builder: GraphBuilder,
                 session: 'Connection',
                 metadata: CollectionMetadata = None):
        self.node_id = node_id
        self.builder = builder
        self.session = session
        self.graph = builder.processes
        self.metadata = CollectionMetadata.get_or_create(metadata)

    def __str__(self):
        return "ImageCollection: %s" % self.node_id

    @property
    def _api_version(self):
        return self.session.capabilities().api_version_check

    @property
    def connection(self):
        return self.session

    def flat_graph(self):
        return self.graph

    @classmethod
    def load_collection(cls,
                        collection_id: str,
                        session: 'Connection' = None,
                        spatial_extent: Union[Dict[str, float], None] = None,
                        temporal_extent: Union[List[Union[str,
                                                          datetime.datetime,
                                                          datetime.date]],
                                               None] = None,
                        bands: Union[List[str], None] = None,
                        fetch_metadata=True):
        """
        Create a new Image Collection/Raster Data cube.

        :param collection_id: A collection id, should exist in the backend.
        :param session: The session to use to connect with the backend.
        :param spatial_extent: limit data to specified bounding box or polygons
        :param temporal_extent: limit data to specified temporal interval
        :param bands: only add the specified bands
        :return:
        """
        # TODO: rename function to load_collection for better similarity with corresponding process id?
        builder = GraphBuilder()
        process_id = 'load_collection'
        normalized_temporal_extent = list(
            get_temporal_extent(extent=temporal_extent)
        ) if temporal_extent is not None else None
        arguments = {
            'id': collection_id,
            'spatial_extent': spatial_extent,
            'temporal_extent': normalized_temporal_extent,
        }
        metadata = session.collection_metadata(
            collection_id) if fetch_metadata else None
        if bands:
            if isinstance(bands, str):
                bands = [bands]
            if metadata:
                bands = [
                    metadata.band_dimension.band_name(b, allow_common=False)
                    for b in bands
                ]
            arguments['bands'] = bands
        node_id = builder.process(process_id, arguments)
        if bands:
            metadata = metadata.filter_bands(bands)
        return cls(node_id, builder, session, metadata=metadata)

    create_collection = legacy_alias(load_collection, "create_collection")

    @classmethod
    def load_disk_collection(cls, session: 'Connection', file_format: str,
                             glob_pattern: str,
                             **options) -> 'ImageCollection':
        """
        Loads image data from disk as an ImageCollection.

        :param session: The session to use to connect with the backend.
        :param file_format: the file format, e.g. 'GTiff'
        :param glob_pattern: a glob pattern that matches the files to load from disk
        :param options: options specific to the file format
        :return: the data as an ImageCollection
        """
        builder = GraphBuilder()

        process_id = 'load_disk_data'
        arguments = {
            'format': file_format,
            'glob_pattern': glob_pattern,
            'options': options
        }

        node_id = builder.process(process_id, arguments)

        return cls(node_id, builder, session, metadata={})

    def _filter_temporal(self, start: str, end: str) -> 'ImageCollection':
        return self.graph_add_process(process_id='filter_temporal',
                                      args={
                                          'data': {
                                              'from_node': self.node_id
                                          },
                                          'extent': [start, end]
                                      })

    def filter_bbox(self,
                    west,
                    east,
                    north,
                    south,
                    crs=None,
                    base=None,
                    height=None) -> 'ImageCollection':
        extent = {'west': west, 'east': east, 'north': north, 'south': south}
        extent.update(dict_no_none(crs=crs, base=base, height=height))
        return self.graph_add_process(process_id='filter_bbox',
                                      args={
                                          'data': {
                                              'from_node': self.node_id
                                          },
                                          'extent': extent
                                      })

    def filter_bands(
            self, bands: Union[List[Union[str, int]],
                               str]) -> 'ImageCollection':
        """
        Filter the imagery by the given bands
        :param bands: list of band names, common names or band indices. Single band name can also be given as string.
        :return a DataCube instance
        """
        if isinstance(bands, str):
            bands = [bands]
        bands = [self.metadata.band_dimension.band_name(b) for b in bands]
        im = self.graph_add_process(
            process_id='filter_bands',
            args={
                'data': {
                    'from_node': self.node_id
                },
                'bands': [b for b in bands if b in self.metadata.band_names],
                'common_names':
                [b for b in bands if b in self.metadata.band_common_names]
            })
        if im.metadata:
            im.metadata = im.metadata.filter_bands(bands)
        return im

    band_filter = legacy_alias(filter_bands, "band_filter")

    def band(self, band: Union[str, int]) -> 'ImageCollection':
        """Filter the imagery by the given bands
            :param band: band name, band common name or band index.
            :return An ImageCollection instance
        """

        process_id = 'reduce'
        band_index = self.metadata.get_band_index(band)

        args = {
            'data': {
                'from_node': self.node_id
            },
            'dimension': self.metadata.band_dimension.name,
            'reducer': {
                'callback': {
                    'r1': {
                        'arguments': {
                            'data': {
                                'from_argument': 'data'
                            },
                            'index': band_index
                        },
                        'process_id': 'array_element',
                        'result': True
                    }
                }
            }
        }

        return self.graph_add_process(process_id, args)

    def resample_spatial(self,
                         resolution: Union[float, Tuple[float, float]],
                         projection: Union[int, str] = None,
                         method: str = 'near',
                         align: str = 'upper-left'):
        return self.graph_add_process(
            'resample_spatial', {
                'data': {
                    'from_node': self.node_id
                },
                'resolution': resolution,
                'projection': projection,
                'method': method,
                'align': align
            })

    def subtract(self, other: Union[ImageCollection, Union[int, float]]):
        """
        Subtract other from this datacube, so the result is: this - other
        The number of bands in both data cubes has to be the same.

        :param other:
        :return ImageCollection: this - other
        """
        operator = "subtract"
        if isinstance(other, int) or isinstance(other, float):
            return self._reduce_bands_binary_const(operator, other)
        elif isinstance(other, ImageCollection):
            return self._reduce_bands_binary(operator, other)
        else:
            raise ValueError("Unsupported right-hand operand: " + str(other))

    def divide(self, other: Union[ImageCollection, Union[int, float]]):
        """
        Subtraction other from this datacube, so the result is: this - other
        The number of bands in both data cubes has to be the same.

        :param other:
        :return ImageCollection: this - other
        """
        operator = "divide"
        if isinstance(other, int) or isinstance(other, float):
            return self._reduce_bands_binary_const(operator, other)
        elif isinstance(other, ImageCollection):
            return self._reduce_bands_binary(operator, other)
        else:
            raise ValueError("Unsupported right-hand operand: " + str(other))

    def product(self, other: Union[ImageCollection, Union[int, float]]):
        """
        Multiply other with this datacube, so the result is: this * other
        The number of bands in both data cubes has to be the same.

        :param other:
        :return ImageCollection: this - other
        """
        operator = "product"
        if isinstance(other, int) or isinstance(other, float):
            return self._reduce_bands_binary_const(operator, other)
        elif isinstance(other, ImageCollection):
            return self._reduce_bands_binary(operator, other)
        else:
            raise ValueError("Unsupported right-hand operand: " + str(other))

    def logical_or(self, other: ImageCollection):
        """
        Apply element-wise logical `or` operation
        :param other:
        :return ImageCollection: logical_or(this, other)
        """
        return self._reduce_bands_binary(operator='or',
                                         other=other,
                                         arg_name='expressions')

    def logical_and(self, other: ImageCollection):
        """
        Apply element-wise logical `and` operation
        :param other:
        :return ImageCollection: logical_and(this, other)
        """
        return self._reduce_bands_binary(operator='and',
                                         other=other,
                                         arg_name='expressions')

    def __invert__(self):
        """

        :return:
        """
        operator = 'not'
        my_builder = self._get_band_graph_builder()
        new_builder = None
        extend_previous_callback_graph = my_builder is not None
        # TODO: why does these `add_process` calls use "expression" instead of "data" like the other cases?
        if not extend_previous_callback_graph:
            new_builder = GraphBuilder()
            # TODO merge both process graphs?
            new_builder.add_process(operator,
                                    expression={'from_argument': 'data'},
                                    result=True)
        else:
            new_builder = my_builder.copy()
            current_result = new_builder.find_result_node_id()
            new_builder.processes[current_result]['result'] = False
            new_builder.add_process(operator,
                                    expression={'from_node': current_result},
                                    result=True)

        return self._create_reduced_collection(new_builder,
                                               extend_previous_callback_graph)

    def __ne__(self, other: Union[ImageCollection, Union[int, float]]):
        return self._reduce_bands_binary_xy('neq', other)

    def __eq__(self, other: Union[ImageCollection, Union[int, float]]):
        """
        Pixelwise comparison of this data cube with another cube or constant.

        :param other: Another data cube, or a constant
        :return:
        """
        return self._reduce_bands_binary_xy('eq', other)

    def __gt__(self, other: Union[ImageCollection, Union[int, float]]):
        """
        Pairwise comparison of the bands in this data cube with the bands in the 'other' data cube.
        The number of bands in both data cubes has to be the same.

        :param other:
        :return ImageCollection: this + other
        """
        return self._reduce_bands_binary_xy('gt', other)

    def __ge__(self, other: Union[ImageCollection, Union[int, float]]):
        return self._reduce_bands_binary_xy('gte', other)

    def __lt__(self, other: Union[ImageCollection, Union[int, float]]):
        """
        Pairwise comparison of the bands in this data cube with the bands in the 'other' data cube.
        The number of bands in both data cubes has to be the same.

        :param other:
        :return ImageCollection: this + other
        """
        return self._reduce_bands_binary_xy('lt', other)

    def __le__(self, other: Union[ImageCollection, Union[int, float]]):
        return self._reduce_bands_binary_xy('lte', other)

    def _create_reduced_collection(self, callback_graph_builder,
                                   extend_previous_callback_graph):
        if not extend_previous_callback_graph:
            # there was no previous reduce step
            args = {
                'data': {
                    'from_node': self.node_id
                },
                'dimension': self.metadata.band_dimension.name,
                'reducer': {
                    'callback': callback_graph_builder.processes
                }
            }
            return self.graph_add_process("reduce", args)
        else:
            process_graph_copy = self.builder.shallow_copy()
            process_graph_copy.processes[self.node_id]['arguments']['reducer'][
                'callback'] = callback_graph_builder.processes

            # now current_node should be a reduce node, let's modify it
            # TODO: properly update metadata of reduced cube? #metadatareducedimension
            return ImageCollectionClient(self.node_id,
                                         process_graph_copy,
                                         self.session,
                                         metadata=self.metadata)

    def __truediv__(self, other):
        return self.divide(other)

    def __sub__(self, other):
        return self.subtract(other)

    def __radd__(self, other):
        return self.add(other)

    def __add__(self, other):
        return self.add(other)

    def __neg__(self):
        return self.product(-1)

    def __mul__(self, other):
        return self.product(other)

    def __rmul__(self, other):
        return self.product(other)

    def __or__(self, other):
        return self.logical_or(other)

    def __and__(self, other):
        return self.logical_and(other)

    def add(self, other: Union[ImageCollection, Union[int, float]]):
        """
        Pairwise addition of the bands in this data cube with the bands in the 'other' data cube.
        The number of bands in both data cubes has to be the same.

        :param other:
        :return ImageCollection: this + other
        """
        operator = "sum"
        if isinstance(other, int) or isinstance(other, float):
            return self._reduce_bands_binary_const(operator, other)
        elif isinstance(other, ImageCollection):
            return self._reduce_bands_binary(operator, other)
        else:
            raise ValueError("Unsupported right-hand operand: " + str(other))

    def _reduce_bands_binary(self,
                             operator,
                             other: 'ImageCollectionClient',
                             arg_name='data'):
        # first we create the callback
        my_builder = self._get_band_graph_builder()
        other_builder = other._get_band_graph_builder()
        merged = GraphBuilder.combine(operator=operator,
                                      first=my_builder
                                      or {'from_argument': 'data'},
                                      second=other_builder
                                      or {'from_argument': 'data'},
                                      arg_name=arg_name)
        # callback is ready, now we need to properly set up the reduce process that will invoke it
        if my_builder is None and other_builder is None:
            # there was no previous reduce step, perhaps this is a cube merge?
            # cube merge is happening when node id's differ, otherwise we can use regular reduce
            if (self.node_id != other.node_id):
                # we're combining data from two different datacubes: http://api.openeo.org/v/0.4.0/processreference/#merge_cubes

                # set result node id's first, to keep track
                my_builder = self.builder
                my_builder.processes[self.node_id]['result'] = True
                other_builder = other.builder
                other_builder.processes[other.node_id]['result'] = True

                cubes_merged = GraphBuilder.combine(operator="merge_cubes",
                                                    first=my_builder,
                                                    second=other_builder,
                                                    arg_name="cubes")
                node_id = cubes_merged.find_result_node_id()
                the_node = cubes_merged.processes[node_id]
                the_node["result"] = False
                cubes = the_node["arguments"]["cubes"]
                the_node["arguments"]["cube1"] = cubes[0]
                the_node["arguments"]["cube2"] = cubes[1]
                del the_node["arguments"]["cubes"]

                #there can be only one process for now
                cube_list = list(
                    merged.processes.values())[0]["arguments"][arg_name]
                assert len(cube_list) == 2
                # it is really not clear if this is the agreed way to go
                cube_list[0]["from_argument"] = "x"
                cube_list[1]["from_argument"] = "y"
                the_node["arguments"]["overlap_resolver"] = {
                    'callback': merged.processes
                }
                the_node["arguments"]["binary"] = True
                return ImageCollectionClient(node_id,
                                             cubes_merged,
                                             self.session,
                                             metadata=self.metadata)
            else:
                args = {
                    'data': {
                        'from_node': self.node_id
                    },
                    'reducer': {
                        'callback': merged.processes
                    }
                }
                return self.graph_add_process("reduce", args)
        else:
            left_data_arg = self.builder.processes[
                self.node_id]["arguments"]["data"]
            right_data_arg = other.builder.processes[
                other.node_id]["arguments"]["data"]
            if left_data_arg != right_data_arg:
                raise BandMathException(
                    "'Band math' between bands of different image collections is not supported yet."
                )
            node_id = self.node_id
            reducing_graph = self
            if reducing_graph.graph[node_id]["process_id"] != "reduce":
                node_id = other.node_id
                reducing_graph = other
            new_builder = reducing_graph.builder.shallow_copy()
            new_builder.processes[node_id]['arguments']['reducer'][
                'callback'] = merged.processes
            # now current_node should be a reduce node, let's modify it
            # TODO: properly update metadata of reduced cube? #metadatareducedimension
            return ImageCollectionClient(node_id,
                                         new_builder,
                                         reducing_graph.session,
                                         metadata=self.metadata)

    def _reduce_bands_binary_xy(self, operator, other: Union[ImageCollection,
                                                             Union[int,
                                                                   float]]):
        """
        Pixelwise comparison of this data cube with another cube or constant.

        :param other: Another data cube, or a constant
        :return:
        """
        if isinstance(other, int) or isinstance(other, float):
            my_builder = self._get_band_graph_builder()
            new_builder = None
            extend_previous_callback_graph = my_builder is not None
            if not extend_previous_callback_graph:
                new_builder = GraphBuilder()
                # TODO merge both process graphs?
                new_builder.add_process(operator,
                                        x={'from_argument': 'data'},
                                        y=other,
                                        result=True)
            else:
                new_builder = my_builder.shallow_copy()
                current_result = new_builder.find_result_node_id()
                new_builder.processes[current_result]['result'] = False
                new_builder.add_process(operator,
                                        x={'from_node': current_result},
                                        y=other,
                                        result=True)

            return self._create_reduced_collection(
                new_builder, extend_previous_callback_graph)
        elif isinstance(other, ImageCollection):
            return self._reduce_bands_binary(operator, other)
        else:
            raise ValueError("Unsupported right-hand operand: " + str(other))

    def _reduce_bands_binary_const(self, operator, other: Union[int, float]):
        my_builder = self._get_band_graph_builder()
        new_builder = None
        extend_previous_callback_graph = my_builder is not None
        if not extend_previous_callback_graph:
            new_builder = GraphBuilder()
            # TODO merge both process graphs?
            new_builder.add_process(operator,
                                    data=[{
                                        'from_argument': 'data'
                                    }, other],
                                    result=True)
        else:
            current_result = my_builder.find_result_node_id()
            new_builder = my_builder.shallow_copy()
            new_builder.processes[current_result]['result'] = False
            new_builder.add_process(operator,
                                    data=[{
                                        'from_node': current_result
                                    }, other],
                                    result=True)

        return self._create_reduced_collection(new_builder,
                                               extend_previous_callback_graph)

    def _get_band_graph_builder(self):
        current_node = self.graph[self.node_id]
        if current_node["process_id"] == "reduce":
            # TODO: check "dimension" of "reduce" in some way?
            callback_graph = current_node["arguments"]["reducer"]["callback"]
            return GraphBuilder.from_process_graph(callback_graph)
        return None

    def add_dimension(self,
                      name: str,
                      label: Union[str, int, float],
                      type: str = "other"):
        if type == "bands" and self.metadata.has_band_dimension():
            # TODO: remove old "bands" dimension in appropriate places (see #metadatareducedimension)
            _log.warning(
                'Adding new "bands" dimension on top of existing one.')
        return self.graph_add_process(process_id='add_dimension',
                                      args={
                                          'data': {
                                              'from_node': self.node_id
                                          },
                                          'name': name,
                                          'value': label,
                                          'type': type,
                                      },
                                      metadata=self.metadata.add_dimension(
                                          name, label, type))

    def apply_dimension(self,
                        code: str,
                        runtime=None,
                        version="latest",
                        dimension='t',
                        target_dimension=None) -> 'ImageCollection':
        """
        Applies an n-ary process (i.e. takes an array of pixel values instead of a single pixel value) to a raster data cube.
        In contrast, the process apply applies an unary process to all pixel values.

        By default, apply_dimension applies the the process on all pixel values in the data cube as apply does, but the parameter dimension can be specified to work only on a particular dimension only. For example, if the temporal dimension is specified the process will work on a time series of pixel values.

        The n-ary process must return as many elements in the returned array as there are in the input array. Otherwise a CardinalityChanged error must be returned.


        :param code: UDF code or process identifier
        :param runtime:
        :param version:
        :param dimension:
        :return:
        :raises: CardinalityChangedError
        """
        process_id = 'apply_dimension'
        if runtime:
            callback = {'udf': self._create_run_udf(code, runtime, version)}
        else:
            callback = {
                'process': {
                    "arguments": {
                        "data": {
                            "from_argument": "data"
                        }
                    },
                    "process_id": code,
                    "result": True
                }
            }
        args = {
            'data': {
                'from_node': self.node_id
            },
            'dimension': self.metadata.assert_valid_dimension(dimension),
            'process': {
                'callback': callback
            }
        }
        return self.graph_add_process(process_id, args)

    def reduce_bands_udf(self,
                         code: str,
                         runtime="Python",
                         version="latest") -> 'ImageCollection':
        """
        Reduce "band" dimension with a UDF
        """
        process_id = 'reduce'
        args = {
            'data': {
                'from_node': self.node_id
            },
            'dimension': self.metadata.band_dimension.name,
            'binary': False,
            'reducer': {
                'callback': {
                    'udf': self._create_run_udf(code, runtime, version)
                }
            }
        }
        return self.graph_add_process(process_id, args)

    def _create_run_udf(self, code, runtime, version):
        return {
            "arguments": {
                "data": {
                    "from_argument": "data"
                },
                "runtime": runtime,
                "version": version,
                "udf": code
            },
            "process_id": "run_udf",
            "result": True
        }

    def reduce_temporal_udf(self,
                            code: str,
                            runtime="Python",
                            version="latest"):
        """
        Apply reduce (`reduce_dimension`) process with given UDF along temporal dimension.

        :param code: The UDF code, compatible with the given runtime and version
        :param runtime: The UDF runtime
        :param version: The UDF runtime version
        """
        process_id = 'reduce'
        args = {
            'data': {
                'from_node': self.node_id
            },
            'dimension': self.metadata.temporal_dimension.name,
            'binary': False,
            'reducer': {
                'callback': {
                    'udf': self._create_run_udf(code, runtime, version)
                }
            }
        }
        return self.graph_add_process(process_id, args)

    reduce_tiles_over_time = legacy_alias(reduce_temporal_udf,
                                          "reduce_tiles_over_time")

    def apply(self,
              process: str,
              data_argument='data',
              arguments={}) -> 'ImageCollection':
        process_id = 'apply'
        arguments[data_argument] = \
            {
                "from_argument": data_argument
            }
        args = {
            'data': {
                'from_node': self.node_id
            },
            'process': {
                'callback': {
                    "unary": {
                        "arguments": arguments,
                        "process_id": process,
                        "result": True
                    }
                }
            }
        }

        return self.graph_add_process(process_id, args)

    def _reduce_time(self, reduce_function="max"):
        process_id = 'reduce'

        args = {
            'data': {
                'from_node': self.node_id
            },
            'dimension': self.metadata.temporal_dimension.name,
            'reducer': {
                'callback': {
                    'r1': {
                        'arguments': {
                            'data': {
                                'from_argument': 'data'
                            }
                        },
                        'process_id': reduce_function,
                        'result': True
                    }
                }
            }
        }

        return self.graph_add_process(process_id, args)

    def min_time(self) -> 'ImageCollection':
        """Finds the minimum value of a time series for all bands of the input dataset.

            :return: An ImageCollection instance
        """

        return self._reduce_time(reduce_function="min")

    def max_time(self) -> 'ImageCollection':
        """
        Finds the maximum value of a time series for all bands of the input dataset.

        :return: An ImageCollection instance
        """
        return self._reduce_time(reduce_function="max")

    def mean_time(self) -> 'ImageCollection':
        """Finds the mean value of a time series for all bands of the input dataset.

            :return: An ImageCollection instance
        """
        return self._reduce_time(reduce_function="mean")

    def median_time(self) -> 'ImageCollection':
        """Finds the median value of a time series for all bands of the input dataset.

            :return: An ImageCollection instance
        """

        return self._reduce_time(reduce_function="median")

    def count_time(self) -> 'ImageCollection':
        """Counts the number of images with a valid mask in a time series for all bands of the input dataset.

            :return: An ImageCollection instance
        """
        return self._reduce_time(reduce_function="count")

    def ndvi(self, name="ndvi") -> 'ImageCollection':
        """ Normalized Difference Vegetation Index (NDVI)

            :param name: Name of the newly created band

            :return: An ImageCollection instance
        """
        process_id = 'ndvi'
        args = {'data': {'from_node': self.node_id}, 'name': name}
        return self.graph_add_process(process_id, args)

    def normalized_difference(self,
                              other: ImageCollection) -> 'ImageCollection':
        return self._reduce_bands_binary("normalized_difference", other)

    def linear_scale_range(self, input_min, input_max, output_min,
                           output_max) -> 'ImageCollection':
        """ Color stretching
            :param input_min: Minimum input value
            :param input_max: Maximum input value
            :param output_min: Minimum output value
            :param output_max: Maximum output value
            :return An ImageCollection instance
        """
        process_id = 'linear_scale_range'
        args = {
            'x': {
                'from_node': self.node_id
            },
            'inputMin': input_min,
            'inputMax': input_max,
            'outputMin': output_min,
            'outputMax': output_max
        }
        return self.graph_add_process(process_id, args)

    def mask(self,
             polygon: Union[Polygon, MultiPolygon, str] = None,
             srs=None,
             rastermask: 'ImageCollection' = None,
             replacement=None) -> 'ImageCollection':
        """
        Mask the image collection using either a polygon or a raster mask.

        All pixels outside the polygon should be set to the nodata value.
        All pixels inside, or intersecting the polygon should retain their original value.

        All pixels are replaced for which the corresponding pixels in the mask are non-zero (for numbers) or True
        (for boolean values).

        The pixel values are replaced with the value specified for replacement, which defaults to None (no data).
        No data values will be left untouched by the masking operation.

        # TODO: just provide a single `mask` argument and detect the type: polygon or process graph
        # TODO: also see `mask` vs `mask_polygon` processes in https://github.com/Open-EO/openeo-processes/pull/110

        :param polygon: A polygon, provided as a :class:`shapely.geometry.Polygon` or :class:`shapely.geometry.MultiPolygon`, or a filename pointing to a valid vector file
        :param srs: The reference system of the provided polygon, by default this is Lat Lon (EPSG:4326).
        :param rastermask: the raster mask
        :param replacement: the value to replace the masked pixels with
        :raise: :class:`ValueError` if a polygon is supplied and its area is 0.
        :return: A new ImageCollection, with the mask applied.
        """
        mask = None
        new_collection = None
        if polygon is not None:
            if isinstance(polygon, (str, pathlib.Path)):
                # TODO: default to loading file client side?
                # TODO: change read_vector to load_uploaded_files https://github.com/Open-EO/openeo-processes/pull/106
                new_collection = self.graph_add_process(
                    'read_vector', args={'filename': str(polygon)})

                mask = {'from_node': new_collection.node_id}
            else:
                if polygon.area == 0:
                    raise ValueError("Mask {m!s} has an area of {a!r}".format(
                        m=polygon, a=polygon.area))

                geojson = mapping(polygon)
                if srs:
                    geojson['crs'] = {
                        'type': 'name',
                        'properties': {
                            'name': srs
                        }
                    }
                mask = geojson
                new_collection = self
        elif rastermask is not None:
            mask_node = rastermask.graph[rastermask.node_id]
            mask_node['result'] = True
            new_collection = self._graph_merge(rastermask.graph)
            #mask node id may have changed!
            mask_id = new_collection.builder.find_result_node_id()
            mask_node = new_collection.graph[mask_id]
            mask_node['result'] = False
            mask = {'from_node': mask_id}

        else:
            raise AttributeError(
                "mask process: either a polygon or a rastermask should be provided."
            )

        process_id = 'mask'

        args = {'data': {'from_node': self.node_id}, 'mask': mask}
        if replacement is not None:
            args['replacement'] = replacement

        return new_collection.graph_add_process(process_id, args)

    def merge(self,
              other: 'ImageCollection',
              overlap_resolver: str = None) -> 'ImageCollection':
        other_node = other.graph[other.node_id]
        other_node['result'] = True
        new_collection = self._graph_merge(other.graph)
        # mask node id may have changed!
        mask_id = new_collection.builder.find_result_node_id()
        other_node = new_collection.graph[mask_id]
        other_node['result'] = False
        cube2 = {'from_node': mask_id}
        args = {'cube1': {'from_node': self.node_id}, 'cube2': cube2}
        if overlap_resolver:
            # Assume simple math operation
            # TODO support general overlap resolvers.
            assert isinstance(overlap_resolver, str)
            args["overlap_resolver"] = {
                "callback": {
                    "r1": {
                        "process_id": overlap_resolver,
                        "arguments": {
                            "data": [{
                                "from_argument": "x"
                            }, {
                                "from_argument": "y"
                            }]
                        },
                        "result": True,
                    }
                }
            }
            args["binary"] = True
        return new_collection.graph_add_process('merge_cubes', args)

    def apply_kernel(self,
                     kernel,
                     factor=1.0,
                     border=0,
                     replace_invalid=0) -> 'ImageCollection':
        """
        Applies a focal operation based on a weighted kernel to each value of the specified dimensions in the data cube.

        :param kernel: The kernel to be applied on the data cube. It should be a 2D numpy array.
        :param factor: A factor that is multiplied to each value computed by the focal operation. This is basically a shortcut for explicitly multiplying each value by a factor afterwards, which is often required for some kernel-based algorithms such as the Gaussian blur.
        :return: A data cube with the newly computed values. The resolution, cardinality and the number of dimensions are the same as for the original data cube.
        """
        return self.graph_add_process(
            'apply_kernel', {
                'data': {
                    'from_node': self.node_id
                },
                'kernel': kernel.tolist(),
                'factor': factor,
                'border': border,
                'replace_invalid': replace_invalid
            })

    ####VIEW methods #######

    def polygonal_mean_timeseries(
            self, polygon: Union[Polygon, MultiPolygon,
                                 str]) -> 'ImageCollection':
        """
        Extract a mean time series for the given (multi)polygon. Its points are
        expected to be in the EPSG:4326 coordinate
        reference system.

        :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file
        :return: ImageCollection
        """

        return self._polygonal_timeseries(polygon, "mean")

    def polygonal_histogram_timeseries(
            self, polygon: Union[Polygon, MultiPolygon,
                                 str]) -> 'ImageCollection':
        """
        Extract a histogram time series for the given (multi)polygon. Its points are
        expected to be in the EPSG:4326 coordinate
        reference system.

        :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file
        :return: ImageCollection
        """

        return self._polygonal_timeseries(polygon, "histogram")

    def polygonal_median_timeseries(
            self, polygon: Union[Polygon, MultiPolygon,
                                 str]) -> 'ImageCollection':
        """
        Extract a median time series for the given (multi)polygon. Its points are
        expected to be in the EPSG:4326 coordinate
        reference system.

        :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file
        :return: ImageCollection
        """

        return self._polygonal_timeseries(polygon, "median")

    def polygonal_standarddeviation_timeseries(
            self, polygon: Union[Polygon, MultiPolygon,
                                 str]) -> 'ImageCollection':
        """
        Extract a time series of standard deviations for the given (multi)polygon. Its points are
        expected to be in the EPSG:4326 coordinate
        reference system.

        :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file
        :return: ImageCollection
        """

        return self._polygonal_timeseries(polygon, "sd")

    def _polygonal_timeseries(self, polygon: Union[Polygon, MultiPolygon, str],
                              func: str) -> 'ImageCollection':
        def graph_add_aggregate_process(graph) -> 'ImageCollection':
            process_id = 'aggregate_polygon'
            args = {
                'data': {
                    'from_node': self.node_id
                },
                'polygons': polygons,
                'reducer': {
                    'callback': {
                        "unary": {
                            "arguments": {
                                "data": {
                                    "from_argument": "data"
                                }
                            },
                            "process_id": func,
                            "result": True
                        }
                    }
                }
            }
            return graph.graph_add_process(process_id, args)

        if isinstance(polygon, str):
            with_read_vector = self.graph_add_process(
                'read_vector', args={'filename': polygon})
            polygons = {'from_node': with_read_vector.node_id}
            return graph_add_aggregate_process(with_read_vector)
        else:
            polygons = mapping(polygon)
            return graph_add_aggregate_process(self)

    def save_result(self, format: str = "GTIFF", options: dict = None):
        return self.graph_add_process(process_id="save_result",
                                      args={
                                          "data": {
                                              "from_node": self.node_id
                                          },
                                          "format": format,
                                          "options": options or {}
                                      })

    def download(self,
                 outputfile: str = None,
                 format: str = None,
                 options: dict = None):
        """Download image collection, e.g. as GeoTIFF."""
        if not format:
            format = guess_format(outputfile) if outputfile else "GTiff"

        newcollection = self.save_result(format=format, options=options)
        newcollection.graph[newcollection.node_id]["result"] = True
        return self.session.download(newcollection.graph, outputfile)

    def tiled_viewing_service(self, type: str, **kwargs) -> Service:
        self.graph[self.node_id]['result'] = True
        return self.session.create_service(self.graph, type=type, **kwargs)

    def execute_batch(self,
                      outputfile: Union[str, pathlib.Path],
                      out_format: str = None,
                      print=print,
                      max_poll_interval=60,
                      connection_retry_interval=30,
                      job_options=None,
                      **format_options):
        """
        Evaluate the process graph by creating a batch job, and retrieving the results when it is finished.
        This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.

        For very long running jobs, you probably do not want to keep the client running.

        :param job_options:
        :param outputfile: The path of a file to which a result can be written
        :param out_format: String Format of the job result.
        :param format_options: String Parameters for the job result format

        """
        job = self.create_job(out_format,
                              job_options=job_options,
                              **format_options)
        return job.run_synchronous(
            # TODO #135 support multi file result sets too
            outputfile=outputfile,
            print=print,
            max_poll_interval=max_poll_interval,
            connection_retry_interval=connection_retry_interval)

    def create_job(self,
                   out_format=None,
                   title: str = None,
                   description: str = None,
                   plan: str = None,
                   budget=None,
                   job_options=None,
                   **format_options) -> BatchJob:
        """
        Sends a job to the backend and returns a Job instance. The job will still need to be started and managed explicitly.
        The :func:`~openeo.imagecollection.ImageCollection.execute_batch` method allows you to run batch jobs without managing it.

        :param out_format: String Format of the job result.
        :param job_options: A dictionary containing (custom) job options
        :param format_options: String Parameters for the job result format
        :return: status: Job resulting job.
        """
        img = self
        if out_format:
            # add `save_result` node
            img = img.save_result(format=out_format, options=format_options)
        img.graph[img.node_id]["result"] = True
        return self.session.create_job(process_graph=img.graph,
                                       title=title,
                                       description=description,
                                       plan=plan,
                                       budget=budget,
                                       additional=job_options)

    send_job = legacy_alias(create_job, name="send_job")

    def execute(self) -> Dict:
        """Executes the process graph of the imagery. """
        newbuilder = self.builder.shallow_copy()
        newbuilder.processes[self.node_id]['result'] = True
        return self.session.execute(newbuilder.processes)

    ####### HELPER methods #######

    def _graph_merge(self, other_graph: Dict):
        newbuilder = self.builder.shallow_copy()
        merged = newbuilder.merge(GraphBuilder.from_process_graph(other_graph))
        # TODO: properly update metadata as well?
        newCollection = ImageCollectionClient(self.node_id,
                                              merged,
                                              self.session,
                                              metadata=self.metadata)
        return newCollection

    def graph_add_process(
            self,
            process_id: str,
            args: dict,
            metadata: CollectionMetadata = None) -> 'ImageCollectionClient':
        """
        Returns a new imagecollection with an added process with the given process
        id and a dictionary of arguments

        :param process_id: String, Process Id of the added process.
        :param args: Dict, Arguments of the process.

        :return: new ImageCollectionClient instance
        """
        #don't modify in place, return new builder
        newbuilder = self.builder.shallow_copy()
        id = newbuilder.process(process_id, args)

        # TODO: properly update metadata as well?
        newCollection = ImageCollectionClient(node_id=id,
                                              builder=newbuilder,
                                              session=self.session,
                                              metadata=metadata
                                              or copy.copy(self.metadata))
        return newCollection
Example #11
0
class ImageCollection(ABC):
    """Class representing Processes. """
    @deprecated("Use `filter_temporal()` instead")
    def date_range_filter(
            self, start_date: Union[str, datetime, date],
            end_date: Union[str, datetime, date]) -> 'ImageCollection':
        """
        Specifies a date range filter to be applied on the ImageCollection
        DEPRECATED: use :func:`openeo.ImageCollection.filter_temporal`

        :param start_date: Start date of the filter, inclusive, format: "YYYY-MM-DD".
        :param end_date: End date of the filter, exclusive, format e.g.: "2018-01-13".
        :return: An ImageCollection filtered by date.
        """
        return self.filter_temporal(start_date=start_date, end_date=end_date)

    @deprecated("Use `filter_temporal()` instead")
    def filter_daterange(self, extent) -> 'ImageCollection':
        """Drops observations from a collection that have been captured before
            a start or after a given end date.

            :param extent: List of starting date and ending date of the filter
            :return: An ImageCollection filtered by date.
        """
        return self.filter_temporal(extent=extent)

    def filter_temporal(
            self,
            *args,
            start_date: Union[str, datetime, date] = None,
            end_date: Union[str, datetime, date] = None,
            extent: Union[list, tuple] = None) -> 'ImageCollection':
        """
        Limit the ImageCollection to a certain date range, which can be specified in several ways:

        >>> im.filter_temporal("2019-07-01", "2019-08-01")
        >>> im.filter_temporal(["2019-07-01", "2019-08-01"])
        >>> im.filter_temporal(extent=["2019-07-01", "2019-08-01"])
        >>> im.filter_temporal(start_date="2019-07-01", end_date="2019-08-01"])

        :param start_date: start date of the filter (inclusive), as a string or date object
        :param end_date: end date of the filter (exclusive), as a string or date object
        :param extent: two element list/tuple start and end date of the filter
        :return: An ImageCollection filtered by date.

        Subclasses are recommended to implement `_filter_temporal', which has simpler API.

        https://open-eo.github.io/openeo-api/processreference/#filter_temporal
        """
        start, end = get_temporal_extent(*args,
                                         start_date=start_date,
                                         end_date=end_date,
                                         extent=extent)
        return self._filter_temporal(start, end)

    def _filter_temporal(self, start_date: str,
                         end_date: str) -> 'ImageCollection':
        # Subclasses are expected to implement this method, but for bit of backward compatibility
        # with old style subclasses we forward to `date_range_filter`
        # TODO: replace this with raise NotImplementedError() or decorate with @abstractmethod
        return self.date_range_filter(start_date, end_date)

    def filter_bbox(self,
                    west,
                    east,
                    north,
                    south,
                    crs=None,
                    base=None,
                    height=None) -> 'ImageCollection':
        """
        Limits the ImageCollection to a given spatial bounding box.

        :param west: west boundary (longitude / easting)
        :param east: east boundary (longitude / easting)
        :param north: north boundary (latitude / northing)
        :param south: south boundary (latitude / northing)
        :param crs: spatial reference system of boundaries as
                    proj4 or EPSG:12345 like string
        :param base: lower left corner coordinate axis 3
        :param height: upper right corner coordinate axis 3
        :return: An image collection cropped to the specified bounding box.

        https://open-eo.github.io/openeo-api/v/0.4.1/processreference/#filter_bbox

        # TODO: allow passing some kind of bounding box object? e.g. a (xmin, ymin, xmax, ymax) tuple?
        """
        # Subclasses are expected to implement this method, but for bit of backwards compatibility
        # with old style subclasses we forward to `bbox_filter`
        # TODO: replace this with raise NotImplementedError() or decorate with @abstractmethod
        kwargs = dict(west=west, east=east, north=north, south=south)
        kwargs.update(dict_no_none(crs=crs, base=base, height=height))
        return self.bbox_filter(**kwargs)

    @deprecated(reason="Use `filter_bbox()` instead.")
    def bbox_filter(self,
                    west=None,
                    east=None,
                    north=None,
                    south=None,
                    crs=None,
                    left=None,
                    right=None,
                    top=None,
                    bottom=None,
                    srs=None,
                    base=None,
                    height=None) -> 'ImageCollection':
        """
        Specifies a bounding box to filter input image collections.
        DEPRECATED: use :func:`openeo.ImageCollection.filter_bbox`

        :param left:
        :param right:
        :param top:
        :param bottom:
        :param srs:
        :return: An image collection cropped to the specified bounding box.
        """
        return self.filter_bbox(west=first_not_none(west, left),
                                east=first_not_none(east, right),
                                north=first_not_none(north, top),
                                south=first_not_none(south, bottom),
                                base=base,
                                height=height,
                                crs=first_not_none(crs, srs))

    def resample_spatial(self,
                         resolution: Union[float, Tuple[float, float]],
                         projection: Union[int, str] = None,
                         method: str = 'near',
                         align: str = 'upper-left'):
        """
        Resamples the spatial dimensions (x,y) of the data cube to a specified resolution and/or warps the data cube
        to the target projection. At least resolution or projection must be specified.

        Use filter_bbox to set the target spatial extent.

        https://processes.openeo.org/#resample_spatial

        :param resolution: Either a single number or an array with separate resolutions for each spatial dimension.
            Resamples the data cube to the target resolution, which can be specified either as separate values
            for x and y or as a single value for both axes.  Specified in the units of the target projection.
            Doesn't change the resolution by default (0).
        :param projection: Either an epsg code, as an integer, or a proj-definition
            string. Warps the data cube to the target projection. Target projection specified as EPSG code or PROJ
            definition. Doesn't change the projection by default (null).
        :param method: Resampling method. Methods are
            inspired by GDAL, see gdalwarp for more information. Possible values: near, bilinear, cubic, cubicspline,
            lanczos, average, mode, max, min, med, q1, q3
        :param align: Specifies to which corner of the spatial extent
            the new resampled data is aligned to. Possible values: lower-left, upper-left, lower-right, upper-right

        :return: A raster data cube with values warped onto the new projection.
        """
        pass

    def resample_cube_spatial(self,
                              target: 'ImageCollection',
                              method: str = 'near') -> 'ImageCollection':
        """
        Resamples the spatial dimensions (x,y) of this data cube to a target data cube and return the results as a new data cube.

        https://processes.openeo.org/#resample_cube_spatial

        :param target: An ImageCollection that specifies the target
        :param method: The resampling method.
        :return: A raster data cube with values warped onto the new projection.
        """
        pass

    def apply(self, process: str, arguments={}) -> 'ImageCollection':
        """
        Applies a unary process (a local operation) to each value of the specified or all dimensions in the data cube.
        https://open-eo.github.io/openeo-api/v/0.4.0/processreference/#apply

        :param process: A process (callback) to be applied on each value. The specified process must be unary meaning that it must work on a single value.
        :param dimensions: The names of the dimensions to apply the process on. Defaults to an empty array so that all dimensions are used.
        :return: A data cube with the newly computed values. The resolution, cardinality and the number of dimensions are the same as for the original data cube.
        """
        raise NotImplementedError(
            "Apply function not supported by this data cube.")

    def apply_dimension(self,
                        code: str,
                        runtime=None,
                        version="latest",
                        dimension='t',
                        target_dimension=None) -> 'ImageCollection':
        """
        Applies a user defined process to all pixel values along a dimension of a raster data cube. For example,
        if the temporal dimension is specified the process will work on a time series of pixel values.

        The process reduce_dimension also applies a process to pixel values along a dimension, but drops
        the dimension afterwards. The process apply applies a process to each pixel value in the data cube.

        The target dimension is the source dimension if not specified otherwise in the target_dimension parameter.
        The pixel values in the target dimension get replaced by the computed pixel values. The name, type and
         reference system are preserved.

        The dimension labels are preserved when the target dimension is the source dimension and the number of
        pixel values in the source dimension is equal to the number of values computed by the process. Otherwise,
        the dimension labels will be incrementing integers starting from zero, which can be changed using
        rename_labels afterwards. The number of labels will equal to the number of values computed by the process.



        :param code: UDF code or process identifier
        :param runtime: UDF runtime to use
        :param version: Version of the UDF runtime to use
        :param dimension: The name of the source dimension to apply the process on. Fails with a DimensionNotAvailable error if the specified dimension does not exist.
        :param target_dimension: The name of the target dimension or null (the default) to use the source dimension
        specified in the parameter dimension. By specifying a target dimension, the source dimension is removed.
        The target dimension with the specified name and the type other (see add_dimension) is created, if it doesn't exist yet.

        :return: A datacube with the UDF applied to the given dimension.
        :raises: DimensionNotAvailable
        """
        pass

    def apply_neighborhood(self, process, size: List[Dict],
                           overlap: List[Dict]):
        """
        Applies a focal process to a data cube.
        A focal process is a process that works on a 'neighbourhood' of pixels. The neighbourhood can extend into multiple dimensions, this extent is specified by the `size` argument. It is not only (part of) the size of the input window, but also the size of the output for a given position of the sliding window. The sliding window moves with multiples of `size`.

        An overlap can be specified so that neighbourhoods can have overlapping boundaries. This allows for continuity of the output. The values included in the data cube as overlap can't be modified by the given `process`.

        The neighbourhood size should be kept small enough, to avoid running beyond computational resources, but a too small size will result in a larger number of process invocations, which may slow down processing. Window sizes for spatial dimensions typically are in the range of 64 to 512 pixels, while overlaps of 8 to 32 pixels are common.

        The process must not add new dimensions, or remove entire dimensions, but the result can have different dimension labels.

        For the special case of 2D convolution, it is recommended to use ``apply_kernel()``.

        @param process: Process to be applied on all neighbourhoods.
        @param size: Neighbourhood sizes along each dimension. This object maps dimension names to either a physical measure (e.g. 100 m, 10 days) or pixels (e.g. 32 pixels). For dimensions not specified, the default is to provide all values. Be aware that including all values from overly large dimensions may not be processed at once.
        @param overlap: Overlap of neighbourhoods along each dimension to avoid border effects. For instance a temporal dimension can add 1 month before and after a neighbourhood. In the spatial dimensions, this is often a number of pixels. The overlap specified is added before and after, so an overlap of 8 pixels will add 8 pixels on both sides of the window, so 16 in total. Be aware that large overlaps increase the need for computational resources and modifying overlapping data in subsequent operations have no effect.
        @return: A data cube with the newly computed values. The cardinality, resolution and the number of dimensions are the same as for the original data cube.
        """
        pass

    def aggregate_time(self, temporal_window,
                       aggregationfunction) -> 'ImageCollection':
        """ Applies a windowed reduction to a timeseries by applying a user defined function.
            DEPRECATED: use Aggregate_temporal

            :param temporal_window: The time windows to group by, can be a list of halfopen intervals
            :param aggregationfunction: The function to apply to each time window. Takes a pandas Timeseries as input.

            :return: An ImageCollection containing  a result for each time window
        """
        pass

    def aggregate_temporal(self,
                           intervals: List[List],
                           reducer,
                           labels: List = None,
                           dimension: str = None,
                           context: Dict = None) -> 'ImageCollection':
        """ Computes a temporal aggregation based on an array of date and/or time intervals.

            Calendar hierarchies such as year, month, week etc. must be transformed into specific intervals by the clients. For each interval, all data along the dimension will be passed through the reducer. The computed values will be projected to the labels, so the number of labels and the number of intervals need to be equal.

            If the dimension is not set, the data cube is expected to only have one temporal dimension.

            :param intervals: Temporal left-closed intervals so that the start time is contained, but not the end time.
            :param labels: Labels for the intervals. The number of labels and the number of groups need to be equal.
            :param reducer: A reducer to be applied on all values along the specified dimension. The reducer must be a callable process (or a set processes) that accepts an array and computes a single return value of the same type as the input values, for example median.
            :param dimension: The temporal dimension for aggregation. All data along the dimension will be passed through the specified reducer. If the dimension is not set, the data cube is expected to only have one temporal dimension.

            :return: An ImageCollection containing  a result for each time window
        """
        pass

    def aggregate_temporal_period(self,
                                  period: str,
                                  reducer,
                                  dimension: str = None,
                                  context: Dict = None) -> 'ImageCollection':
        """ Computes a temporal aggregation based on calendar hierarchies such as years, months or seasons. For other calendar hierarchies aggregate_temporal can be used.

            For each interval, all data along the dimension will be passed through the reducer.

            If the dimension is not set or is set to null, the data cube is expected to only have one temporal dimension.

            The period argument specifies the time intervals to aggregate. The following pre-defined values are available:

            - hour: Hour of the day
            - day: Day of the year
            - week: Week of the year
            - dekad: Ten day periods, counted per year with three periods per month (day 1 - 10, 11 - 20 and 21 - end of month). The third dekad of the month can range from 8 to 11 days. For example, the fourth dekad is Feb, 1 - Feb, 10 each year.
            - month: Month of the year
            - season: Three month periods of the calendar seasons (December - February, March - May, June - August, September - November).
            - tropical-season: Six month periods of the tropical seasons (November - April, May - October).
            - year: Proleptic years
            - decade: Ten year periods (0-to-9 decade), from a year ending in a 0 to the next year ending in a 9.
            - decade-ad: Ten year periods (1-to-0 decade) better aligned with the Anno Domini (AD) calendar era, from a year ending in a 1 to the next year ending in a 0.


            :param period: The period of the time intervals to aggregate.
            :param reducer: A reducer to be applied on all values along the specified dimension. The reducer must be a callable process (or a set processes) that accepts an array and computes a single return value of the same type as the input values, for example median.
            :param dimension: The temporal dimension for aggregation. All data along the dimension will be passed through the specified reducer. If the dimension is not set, the data cube is expected to only have one temporal dimension.

            :return: A data cube with the same dimensions. The dimension properties (name, type, labels, reference system and resolution) remain unchanged.
        """
        pass

    @deprecated("Use a more specific reduce method instead.")
    def reduce(self, reducer, dimension):
        """
        Applies a reducer to a data cube dimension by collapsing all the input values along the specified dimension into a single output value computed by the reducer.

        The reducer must accept an array and return a single value (see parameter reducer). Nominal values are possible, but need to be mapped, e.g. band names to wavelengths, date strings to numeric timestamps since 1970 etc.

        https://open-eo.github.io/openeo-api/v/0.4.0/processreference/#reduce

        :param reducer: A reducer to be applied on the specified dimension. The reducer must be a callable process (or a set processes) that accepts an array and computes a single return value of the same type as the input values, for example median.
        :param dimension: The dimension over which to reduce.
        :return: A data cube with the newly computed values. The number of dimensions is reduced, but the resolution and cardinality are the same as for the original data cube.
        """
        raise NotImplementedError(
            "This image collection does not support the reduce operation.")

    def reduce_time(self, aggregationfunction) -> 'ImageCollection':
        """ Applies a windowed reduction to a timeseries by applying a user defined function.

            :param aggregationfunction: The function to apply to each time window. Takes a pandas Timeseries as input.

            :return: An ImageCollection without a time dimension
        """
        pass

    def min_time(self) -> 'ImageCollection':
        """
            Finds the minimum value of time series for all bands of the input dataset.

            :return: An ImageCollection without a time dimension.
        """
        pass

    def max_time(self) -> 'ImageCollection':
        """
            Finds the maximum value of time series for all bands of the input dataset.

            :return: An ImageCollection without a time dimension.
        """
        pass

    def mean_time(self) -> 'ImageCollection':
        """
            Finds the mean value of time series for all bands of the input dataset.

            :return: An ImageCollection without a time dimension.
        """
        pass

    def median_time(self) -> 'ImageCollection':
        """
            Finds the median value of time series for all bands of the input dataset.

            :return: An ImageCollection without a time dimension.
        """
        pass

    def count_time(self) -> 'ImageCollection':
        """
            Counts the number of images with a valid mask in a time series for all bands of the input dataset.

            :return: An ImageCollection without a time dimension.
        """
        pass

    def ndvi(self, **kwargs) -> 'ImageCollection':
        """ Normalized Difference Vegetation Index (NDVI)

            :param kwargs:

            :return: An ImageCollection instance
        """
        pass

    def rename_labels(self,
                      dimension: str,
                      target: list,
                      source: list = None) -> 'ImageCollection':
        """ Renames the labels of the specified dimension in the data cube from source to target.

            :param dimension: Dimension name
            :param target: The new names for the labels.
            :param source: The names of the labels as they are currently in the data cube.

            :return: An ImageCollection instance
        """
        pass

    def rename_dimension(self, source: str, target: str):
        """
        Renames a dimension in the data cube while preserving all other properties.

        :param source: The current name of the dimension. Fails with a DimensionNotAvailable error if the specified dimension does not exist.
        :param target: A new Name for the dimension. Fails with a DimensionExists error if a dimension with the specified name exists.

        :return: A new datacube with the dimension renamed.
        """
        pass

    def filter_bands(self, bands) -> 'ImageCollection':
        """Filters the bands in the data cube so that bands that don't match any of the criteria are dropped from the data cube.
        The data cube is expected to have only one spectral dimension.
        The following criteria can be used to select bands:

            :param bands: List of band names or single band name as a string. The order of the specified array defines the order of the bands in the data cube, which can be important for subsequent processes.
            :return An ImageCollection instance
        """
        # TODO: also handle a common_names (and wavelengths) argument like https://open-eo.github.io/openeo-api/processreference/#filter_bands?
        #       see https://github.com/Open-EO/openeo-processes/issues/77
        pass

    @deprecated("use `filter_bands()` instead")
    def band_filter(self, bands) -> 'ImageCollection':
        return self.filter_bands(bands=bands)

    def band(self, band_name) -> 'ImageCollection':
        """Select the given band, as input for subsequent operations.

            :param band_name: List of band names or single band name as a string.
            :return: An ImageCollection instance
        """
        # TODO: does this method have to be defined at the level of the ImageCollection base class? it is only implemented by the rest client
        pass

    def merge(self, other: 'ImageCollection') -> 'ImageCollection':
        """
        Merge the bands of this data cubes with the bands of another datacube. The bands of 'other' will be appended to the bands
        of this datacube, maintaining the order.

        :param other: The other datacube to merge with this datacube
        :return: A new datacube with bands merged.
        """
        pass

    def apply_kernel(self,
                     kernel,
                     factor=1.0,
                     border=0,
                     replace_invalid=0) -> 'ImageCollection':
        """
        Applies a focal operation based on a weighted kernel to each value of the specified dimensions in the data cube.

        The border parameter determines how the data is extended when the kernel overlaps with the borders.
        The following options are available:

        * numeric value - fill with a user-defined constant number n: nnnnnn|abcdefgh|nnnnnn (default, with n = 0)
        * replicate - repeat the value from the pixel at the border: aaaaaa|abcdefgh|hhhhhh
        * reflect - mirror/reflect from the border: fedcba|abcdefgh|hgfedc
        * reflect_pixel - mirror/reflect from the center of the pixel at the border: gfedcb|abcdefgh|gfedcb
        * wrap - repeat/wrap the image: cdefgh|abcdefgh|abcdef


        :param kernel: The kernel to be applied on the data cube. The kernel has to be as many dimensions as the data cube has dimensions.
        :param factor: A factor that is multiplied to each value computed by the focal operation. This is basically a shortcut for explicitly multiplying each value by a factor afterwards, which is often required for some kernel-based algorithms such as the Gaussian blur.
        :param border: Determines how the data is extended when the kernel overlaps with the borders. Defaults to fill the border with zeroes.
        :param: replace_invalid: This parameter specifies the value to replace non-numerical or infinite numerical values with. By default, those values are replaced with zeroes.
        :return: A data cube with the newly computed values. The resolution, cardinality and the number of dimensions are the same as for the original data cube.
        """
        pass

    def raster_to_vector(self) -> 'VectorCube':
        """
        EXPERIMENTAL: not generally supported, API subject to change
        Converts this raster data cube into a vector data cube. The bounding polygon of homogenous areas of pixels is constructed.


        @return: A vectorcube
        """
        pass

    ####VIEW methods #######

    def polygonal_mean_timeseries(
            self, polygon: Union[Polygon, MultiPolygon,
                                 str]) -> 'ImageCollection':
        """
        Extract a mean time series for the given (multi)polygon. Its points are expected to be in the EPSG:4326 coordinate
        reference system.

        :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file

        :return: Dict: A timeseries
        """
        pass

    def polygonal_histogram_timeseries(
            self, polygon: Union[Polygon, MultiPolygon,
                                 str]) -> 'ImageCollection':
        """
        Extract a histogram time series for the given (multi)polygon. Its points are expected to be in the EPSG:4326 coordinate
        reference system.

        :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file

        :return: Dict: A timeseries
        """
        pass

    def polygonal_median_timeseries(
            self, polygon: Union[Polygon, MultiPolygon,
                                 str]) -> 'ImageCollection':
        """
        Extract a median time series for the given (multi)polygon. Its points are
        expected to be in the EPSG:4326 coordinate
        reference system.

        :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file
        :return: ImageCollection
        """
        pass

    def polygonal_standarddeviation_timeseries(
            self, polygon: Union[Polygon, MultiPolygon,
                                 str]) -> 'ImageCollection':
        """
        Extract a time series of standard deviations for the given (multi)polygon. Its points are
        expected to be in the EPSG:4326 coordinate
        reference system.

        :param polygon: The (multi)polygon; or a file path or HTTP URL to a GeoJSON file or shape file
        :return: ImageCollection
        """
        pass

    def resolution_merge(self,
                         high_resolution_bands: List[str],
                         low_resolution_bands: List[str],
                         method: str = None) -> 'ImageCollection':
        """
        EXPERIMENTAL
        Resolution merging algorithms try to improve the spatial resolution of lower resolution bands (e.g. Sentinel-2 20M) based on higher resolution bands. (e.g. Sentinel-2 10M).

        `Pansharpening explained: <https://bok.eo4geo.eu/IP2-1-3>`_

        `Example publication: 'Improving the Spatial Resolution of Land Surface Phenology by Fusing Medium- and Coarse-Resolution Inputs' <https://doi.org/10.1109/TGRS.2016.2537929>`_

        @param high_resolution_bands: A list of band names to use as 'high-resolution' band. Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands). If unique band name and common name conflict, the unique band name has higher priority. The order of the specified array defines the order of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the original order. These bands will remain unmodified.
        @param low_resolution_bands: A list of band names for which the spatial resolution should be increased. Either the unique band name (metadata field `name` in bands) or one of the common band names (metadata field `common_name` in bands). If unique band name and common name conflict, the unique band name has higher priority. The order of the specified array defines the order of the bands in the data cube. If multiple bands match a common name, all matched bands are included in the original order. These bands will be modified by the process.
        @param method: The method to use. The supported algorithms can vary between back-ends. Set to `null` (the default) to allow the back-end to choose, which will improve portability, but reduce reproducibility..
        @return: A datacube with the same bands and metadata as the input, but algorithmically increased spatial resolution for the selected bands.
        """
        pass

    def tiled_viewing_service(self, **kwargs) -> Service:
        """
        Returns metadata for a tiled viewing service that visualizes this layer.

        :param process_graph: process graph dict
        :param service_type: The type of viewing service to create, for instance: 'WMTS'

        :return: A service object.
        """
        pass

    def execute_batch(self,
                      outputfile: Union[str, pathlib.Path],
                      out_format: str = None,
                      print=print,
                      max_poll_interval=60,
                      connection_retry_interval=30,
                      job_options=None,
                      **format_options) -> BatchJob:
        """
        Evaluate the process graph by creating a batch job, and retrieving the results when it is finished.
        This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.

        For very long running jobs, you probably do not want to keep the client running. In that case, using
        :func:`~openeo.imagecollection.ImageCollection.create_job` might be more appropriate.

        :param job_options: A dictionary containing (custom) job options
        :param outputfile: The path of a file to which a result can be written
        :param out_format: (optional) Format of the job result.
        :param format_options: String Parameters for the job result format

        """
        pass

    def create_job(self,
                   out_format: str = None,
                   job_options: Dict = None,
                   **format_options) -> BatchJob:
        """
        Sends a job to the backend and returns a BatchJob instance. The job will still need to be started and managed explicitly.
        The :func:`~openeo.imagecollection.ImageCollection.execute_batch` method allows you to run batch jobs without managing it.

        :param out_format: String Format of the job result.
        :param job_options: A dictionary containing (custom) job options
        :param format_options: String Parameters for the job result format
        :return: status: BatchJob resulting job.
        """
        pass

    send_job = legacy_alias(create_job, name="send_job")

    def pipe(self, func: Callable, *args, **kwargs):
        """
        Pipe the image collection through a function and return the result.

        Allows to wrap a sequence of operations in a function and reuse it in a chained fashion.
        For example:

        >>> # Define a reusable set of ImageCollection operations
        >>> def ndvi_percent(cube):
        ...     return cube.ndvi().linear_scale_range(0, 1, 0, 100)
        >>> # Reuse the procedure
        >>> ndvi1 = cube1.pipe(ndvi_percent)
        >>> ndvi2 = cube2.pipe(ndvi_percent)
        >>> ndvi3 = cube3.pipe(ndvi_percent)

        Inspired by pandas.DataFrame.pipe

        :param func: function that expects a ImageCollection as first argument (and optionally additional arguments)
        :return: result of applying the function to the ImageCollection
        """
        return func(self, *args, **kwargs)
Example #12
0
class Connection(RestApiConnection):
    """
    Connection to an openEO backend.
    """

    _MINIMUM_API_VERSION = ComparableVersion("0.4.0")

    # Temporary workaround flag to enable for backends (e.g. EURAC) that expect id_token to be sent as bearer token
    # TODO DEPRECATED To remove when all backends properly expect access_token
    # see https://github.com/Open-EO/openeo-wcps-driver/issues/45
    oidc_auth_user_id_token_as_bearer = False

    def __init__(self,
                 url,
                 auth: AuthBase = None,
                 session: requests.Session = None,
                 default_timeout: int = None,
                 auth_config: AuthConfig = None,
                 refresh_token_store: RefreshTokenStore = None):
        """
        Constructor of Connection, authenticates user.

        :param url: String Backend root url
        """
        if "://" not in url:
            url = "https://" + url
        self._orig_url = url
        super().__init__(root_url=self.version_discovery(url, session=session),
                         auth=auth,
                         session=session,
                         default_timeout=default_timeout)
        self._capabilities_cache = {}

        # Initial API version check.
        if self._api_version.below(self._MINIMUM_API_VERSION):
            raise ApiVersionException(
                "OpenEO API version should be at least {m!s}, but got {v!s}".
                format(m=self._MINIMUM_API_VERSION, v=self._api_version))

        self._auth_config = auth_config
        self._refresh_token_store = refresh_token_store

    @classmethod
    def version_discovery(cls,
                          url: str,
                          session: requests.Session = None) -> str:
        """
        Do automatic openEO API version discovery from given url, using a "well-known URI" strategy.

        :param url: initial backend url (not including "/.well-known/openeo")
        :return: root url of highest supported backend version
        """
        try:
            well_known_url_response = RestApiConnection(
                url, session=session).get("/.well-known/openeo")
            assert well_known_url_response.status_code == 200
            versions = well_known_url_response.json()["versions"]
            supported_versions = [
                v for v in versions
                if cls._MINIMUM_API_VERSION <= v["api_version"]
            ]
            assert supported_versions
            production_versions = [
                v for v in supported_versions if v.get("production", True)
            ]
            highest_version = max(production_versions or supported_versions,
                                  key=lambda v: v["api_version"])
            _log.debug("Highest supported version available in backend: %s" %
                       highest_version)
            return highest_version['url']
        except Exception:
            # Be very lenient about failing on the well-known URI strategy.
            return url

    def _get_auth_config(self) -> AuthConfig:
        if self._auth_config is None:
            self._auth_config = AuthConfig()
        return self._auth_config

    def _get_refresh_token_store(self) -> RefreshTokenStore:
        if self._refresh_token_store is None:
            self._refresh_token_store = RefreshTokenStore()
        return self._refresh_token_store

    def authenticate_basic(self,
                           username: str = None,
                           password: str = None) -> 'Connection':
        """
        Authenticate a user to the backend using basic username and password.

        :param username: User name
        :param password: User passphrase
        """
        if username is None:
            username, password = self._get_auth_config().get_basic_auth(
                backend=self._orig_url)
            if username is None:
                raise OpenEoClientException(
                    "No username/password given or found.")

        resp = self.get(
            '/credentials/basic',
            # /credentials/basic is the only endpoint that expects a Basic HTTP auth
            auth=HTTPBasicAuth(username, password)).json()
        # Switch to bearer based authentication in further requests.
        if self._api_version.at_least("1.0.0"):
            self.auth = BearerAuth(bearer='basic//{t}'.format(
                t=resp["access_token"]))
        else:
            self.auth = BearerAuth(bearer=resp["access_token"])
        return self

    def _get_oidc_provider(
            self,
            provider_id: Union[str,
                               None] = None) -> Tuple[str, OidcProviderInfo]:
        """
        Get OpenID Connect discovery URL for given provider_id

        :param provider_id: id of OIDC provider as specified by backend (/credentials/oidc).
            Can be None if there is just one provider.
        :return: updated provider_id and provider info object
        """
        if self._api_version.at_least("1.0.0"):
            oidc_info = self.get("/credentials/oidc",
                                 expected_status=200).json()
            providers = {p["id"]: p for p in oidc_info["providers"]}
            _log.info(
                "Found OIDC providers: {p}".format(p=list(providers.keys())))
            if provider_id:
                if provider_id not in providers:
                    raise OpenEoClientException(
                        "Requested OIDC provider {r!r} not available. Should be one of {p}."
                        .format(r=provider_id, p=list(providers.keys())))
                provider = providers[provider_id]
            elif len(providers) == 1:
                provider_id, provider = providers.popitem()
                _log.info(
                    "No OIDC provider given, but only one available: {p!r}. Use that one."
                    .format(p=provider_id))
            else:
                # Check if there is a single provider in the config to use.
                backend = self._orig_url
                provider_configs = self._get_auth_config(
                ).get_oidc_provider_configs(backend=backend)
                intersection = set(provider_configs.keys()).intersection(
                    providers.keys())
                if len(intersection) == 1:
                    provider_id = intersection.pop()
                    provider = providers[provider_id]
                    _log.info(
                        "No OIDC provider id given, but only one in config (backend {b!r}): {p!r}."
                        " Use that one.".format(b=backend, p=provider_id))
                else:
                    raise OpenEoClientException(
                        "No OIDC provider id given. Pick one from: {p!r}.".
                        format(p=list(providers.keys())))
            provider = OidcProviderInfo.from_dict(provider)
        else:
            # Per spec: '/credentials/oidc' will redirect to  OpenID Connect discovery document
            provider = OidcProviderInfo(
                discovery_url=self.build_url('/credentials/oidc'))
        return provider_id, provider

    def _get_oidc_provider_and_client_info(
        self,
        provider_id: str,
        client_id: Union[str, None],
        client_secret: Union[str, None],
        default_client_grant_types: Union[None,
                                          List[DefaultOidcClientGrant]] = None
    ) -> Tuple[str, OidcClientInfo]:
        """
        Resolve provider_id and client info (as given or from config)

        :param provider_id: id of OIDC provider as specified by backend (/credentials/oidc).
            Can be None if there is just one provider.

        :return: OIDC provider id and client info
        """
        provider_id, provider = self._get_oidc_provider(provider_id)

        if client_id is None:
            _log.debug("No client_id: checking config for prefered client_id")
            client_id, client_secret = self._get_auth_config(
            ).get_oidc_client_configs(backend=self._orig_url,
                                      provider_id=provider_id)
            if client_id:
                _log.info("Using client_id {c!r} from config (provider {p!r})".
                          format(c=client_id, p=provider_id))
        if client_id is None and default_client_grant_types:
            # Try "default_client" from backend's provider info.
            _log.debug(
                "No client_id given: checking default client in backend's provider info"
            )
            client_id = provider.get_default_client_id(
                grant_types=default_client_grant_types)
            if client_id:
                _log.info(
                    "Using default client_id {c!r} from OIDC provider {p!r} info."
                    .format(c=client_id, p=provider_id))
        if client_id is None:
            raise OpenEoClientException("No client_id found.")

        client_info = OidcClientInfo(client_id=client_id,
                                     client_secret=client_secret,
                                     provider=provider)

        return provider_id, client_info

    def _authenticate_oidc(self,
                           authenticator: OidcAuthenticator,
                           provider_id: str,
                           store_refresh_token: bool = False) -> 'Connection':
        """
        Authenticate through OIDC and set up bearer token (based on OIDC access_token) for further requests.
        """
        tokens = authenticator.get_tokens(
            request_refresh_token=store_refresh_token)
        _log.info("Obtained tokens: {t}".format(
            t=[k for k, v in tokens._asdict().items() if v]))
        if store_refresh_token:
            if tokens.refresh_token:
                self._get_refresh_token_store().set_refresh_token(
                    issuer=authenticator.provider_info.issuer,
                    client_id=authenticator.client_id,
                    refresh_token=tokens.refresh_token)
            else:
                _log.warning(
                    "OIDC token response did not contain refresh token.")
        token = tokens.access_token if not self.oidc_auth_user_id_token_as_bearer else tokens.id_token
        if self._api_version.at_least("1.0.0"):
            self.auth = BearerAuth(
                bearer='oidc/{p}/{t}'.format(p=provider_id, t=token))
        else:
            self.auth = BearerAuth(bearer=token)
        return self

    def authenticate_oidc_authorization_code(
        self,
        client_id: str = None,
        client_secret: str = None,
        provider_id: str = None,
        timeout: int = None,
        server_address: Tuple[str, int] = None,
        webbrowser_open: Callable = None,
        store_refresh_token=False,
    ) -> 'Connection':
        """
        OpenID Connect Authorization Code Flow (with PKCE).
        """
        provider_id, client_info = self._get_oidc_provider_and_client_info(
            provider_id=provider_id,
            client_id=client_id,
            client_secret=client_secret,
            default_client_grant_types=[DefaultOidcClientGrant.AUTH_CODE_PKCE],
        )
        authenticator = OidcAuthCodePkceAuthenticator(
            client_info=client_info,
            webbrowser_open=webbrowser_open,
            timeout=timeout,
            server_address=server_address)
        return self._authenticate_oidc(authenticator,
                                       provider_id=provider_id,
                                       store_refresh_token=store_refresh_token)

    def authenticate_oidc_client_credentials(
        self,
        client_id: str = None,
        client_secret: str = None,
        provider_id: str = None,
        store_refresh_token=False,
    ) -> 'Connection':
        """
        OpenID Connect Client Credentials flow.
        """
        provider_id, client_info = self._get_oidc_provider_and_client_info(
            provider_id=provider_id,
            client_id=client_id,
            client_secret=client_secret)
        authenticator = OidcClientCredentialsAuthenticator(
            client_info=client_info)
        return self._authenticate_oidc(authenticator,
                                       provider_id=provider_id,
                                       store_refresh_token=store_refresh_token)

    def authenticate_oidc_resource_owner_password_credentials(
            self,
            username: str,
            password: str,
            client_id: str = None,
            client_secret: str = None,
            provider_id: str = None,
            store_refresh_token=False) -> 'Connection':
        """
        OpenId Connect Resource Owner Password Credentials
        """
        provider_id, client_info = self._get_oidc_provider_and_client_info(
            provider_id=provider_id,
            client_id=client_id,
            client_secret=client_secret)
        # TODO: also get username and password from config?
        authenticator = OidcResourceOwnerPasswordAuthenticator(
            client_info=client_info, username=username, password=password)
        return self._authenticate_oidc(authenticator,
                                       provider_id=provider_id,
                                       store_refresh_token=store_refresh_token)

    def authenticate_oidc_refresh_token(
            self,
            client_id: str = None,
            refresh_token: str = None,
            client_secret: str = None,
            provider_id: str = None) -> 'Connection':
        """
        OpenId Connect Refresh Token
        """
        provider_id, client_info = self._get_oidc_provider_and_client_info(
            provider_id=provider_id,
            client_id=client_id,
            client_secret=client_secret,
            default_client_grant_types=[DefaultOidcClientGrant.REFRESH_TOKEN],
        )

        if refresh_token is None:
            refresh_token = self._get_refresh_token_store().get_refresh_token(
                issuer=client_info.provider.issuer,
                client_id=client_info.client_id)
            if refresh_token is None:
                raise OpenEoClientException("No refresh token given or found")

        authenticator = OidcRefreshTokenAuthenticator(
            client_info=client_info, refresh_token=refresh_token)
        return self._authenticate_oidc(authenticator, provider_id=provider_id)

    def authenticate_oidc_device(self,
                                 client_id: str = None,
                                 client_secret: str = None,
                                 provider_id: str = None,
                                 store_refresh_token=False,
                                 use_pkce: Union[bool, None] = None,
                                 **kwargs) -> 'Connection':
        """
        Authenticate with OAuth Device Authorization grant/flow

        :param use_pkce: Use PKCE instead of client secret.
            If not set explicitly to `True` (use PKCE) or `False` (use client secret),
            it will be attempted to detect the best mode automatically.
            Note that PKCE for device code is not widely supported among OIDC providers.

        .. versionchanged:: 0.5.1 Add :py:obj:`use_pkce` argument
        """
        provider_id, client_info = self._get_oidc_provider_and_client_info(
            provider_id=provider_id,
            client_id=client_id,
            client_secret=client_secret,
            default_client_grant_types=[
                DefaultOidcClientGrant.DEVICE_CODE_PKCE
            ],
        )
        authenticator = OidcDeviceAuthenticator(client_info=client_info,
                                                use_pkce=use_pkce,
                                                **kwargs)
        return self._authenticate_oidc(authenticator,
                                       provider_id=provider_id,
                                       store_refresh_token=store_refresh_token)

    def authenticate_oidc(self,
                          provider_id: str = None,
                          client_id: Union[str, None] = None,
                          client_secret: Union[str, None] = None,
                          store_refresh_token: bool = True):
        """
        Do OpenID Connect authentication, first trying refresh tokens and falling back on device code flow.

        .. versionadded:: 0.6.0
        """
        provider_id, client_info = self._get_oidc_provider_and_client_info(
            provider_id=provider_id,
            client_id=client_id,
            client_secret=client_secret,
            default_client_grant_types=[
                DefaultOidcClientGrant.DEVICE_CODE_PKCE,
                DefaultOidcClientGrant.REFRESH_TOKEN
            ])

        # Try refresh token first.
        refresh_token = self._get_refresh_token_store().get_refresh_token(
            issuer=client_info.provider.issuer,
            client_id=client_info.client_id)
        if refresh_token:
            try:
                _log.info(
                    "Found refresh token: trying refresh token based authentication."
                )
                authenticator = OidcRefreshTokenAuthenticator(
                    client_info=client_info, refresh_token=refresh_token)
                con = self._authenticate_oidc(
                    authenticator,
                    provider_id=provider_id,
                    store_refresh_token=store_refresh_token)
                # TODO: pluggable/jupyter-aware display function?
                print("Authenticated using refresh token.")
                return con
            except OidcException as e:
                _log.info(
                    "Refresh token based authentication failed: {e}.".format(
                        e=e))

        # Fall back on device code flow
        # TODO: make it possible to do other fallback flows too?
        _log.info("Trying device code flow.")
        authenticator = OidcDeviceAuthenticator(client_info=client_info)
        con = self._authenticate_oidc(authenticator,
                                      provider_id=provider_id,
                                      store_refresh_token=store_refresh_token)
        print("Authenticated using device code flow.")
        return con

    def describe_account(self) -> str:
        """
        Describes the currently authenticated user account.
        """
        return self.get('/me').json()

    @deprecated("use :py:meth:`list_jobs` instead", version="0.4.10")
    def user_jobs(self) -> dict:
        return self.list_jobs()

    def list_collections(self) -> List[dict]:
        """
        Loads all available imagecollections types.

        :return: list of collection meta data dictionaries
        """
        data = self.get('/collections').json()["collections"]
        return VisualList("collections", data=data)

    def list_collection_ids(self) -> List[str]:
        """
        Get list of all collection ids

        :return: list of collection ids
        """
        return [
            collection['id'] for collection in self.list_collections()
            if 'id' in collection
        ]

    def capabilities(self) -> RESTCapabilities:
        """
        Loads all available capabilities.

        :return: data_dict: Dict All available data types
        """
        if "capabilities" not in self._capabilities_cache:
            self._capabilities_cache["capabilities"] = RESTCapabilities(
                self.get('/').json(), self._orig_url)
        return self._capabilities_cache["capabilities"]

    def list_output_formats(self) -> dict:
        if self._api_version.at_least("1.0.0"):
            return self.list_file_formats()["output"]
        else:
            return self.get('/output_formats').json()

    list_file_types = legacy_alias(list_output_formats, "list_file_types")

    def list_file_formats(self) -> dict:
        """
        Get available input and output formats
        """
        if "file_formats" not in self._capabilities_cache:
            self._capabilities_cache["file_formats"] = self.get(
                '/file_formats').json()
        return VisualDict("file-formats",
                          data=self._capabilities_cache["file_formats"])

    def list_service_types(self) -> dict:
        """
        Loads all available service types.

        :return: data_dict: Dict All available service types
        """
        if "service_types" not in self._capabilities_cache:
            self._capabilities_cache["service_types"] = self.get(
                '/service_types').json()
        return VisualDict("service-types",
                          data=self._capabilities_cache["service_types"])

    def list_udf_runtimes(self) -> dict:
        """
        Loads all available UDF runtimes.

        :return: data_dict: Dict All available UDF runtimes
        """
        if "udf_runtimes" not in self._capabilities_cache:
            self._capabilities_cache["udf_runtimes"] = self.get(
                '/udf_runtimes').json()
        return VisualDict("udf-runtimes",
                          data=self._capabilities_cache["udf_runtimes"])

    def list_services(self) -> dict:
        """
        Loads all available services of the authenticated user.

        :return: data_dict: Dict All available services
        """
        # TODO return parsed service objects
        services = self.get('/services').json()["services"]
        return VisualList("data-table",
                          data=services,
                          parameters={'columns': 'services'})

    def describe_collection(self, name) -> dict:
        # TODO: Maybe create some kind of Data class.
        """
        Loads detailed information of a specific image collection.

        :param name: String Id of the collection
        :return: data_dict: Dict Detailed information about the collection
        """
        data = self.get('/collections/{}'.format(name)).json()
        return VisualDict("collection", data=data)

    def collection_items(self,
                         name,
                         spatial_extent: Optional[List[float]] = None,
                         temporal_extent: Optional[List[Union[
                             str, datetime.datetime]]] = None,
                         limit: int = None) -> Iterator[dict]:
        """
        Loads items for a specific image collection.
        May not be available for all collections.

        This is an experimental API and is subject to change.

        :param name: String Id of the collection
        :param spatial_extent: Limits the items to the given bounding box in WGS84:
            1. Lower left corner, coordinate axis 1
            2. Lower left corner, coordinate axis 2
            3. Upper right corner, coordinate axis 1
            4. Upper right corner, coordinate axis 2

        :param temporal_extent: Limits the items to the specified temporal interval.
        :param limit: The amount of items per request/page. If None, the back-end decides.
            The interval has to be specified as an array with exactly two elements (start, end).
            Also supports open intervals by setting one of the boundaries to None, but never both.

        :return: data_list: List A list of items
        """
        url = '/collections/{}/items'.format(name)
        params = {}
        if spatial_extent:
            params["bbox"] = ",".join(str(c) for c in spatial_extent)
        if temporal_extent:
            params["datetime"] = "/".join(
                ".." if t is None else rfc3339.normalize(t)
                for t in temporal_extent)
        if limit is not None and limit > 0:
            params['limit'] = limit

        return paginate(
            self, url, params, lambda response, page: VisualDict(
                "items",
                data=response,
                parameters={
                    'show-map': True,
                    'heading': 'Page {} - Items'.format(page)
                }))

    def collection_metadata(self, name) -> CollectionMetadata:
        return CollectionMetadata(metadata=self.describe_collection(name))

    def list_processes(self) -> List[dict]:
        # TODO: Maybe format the result dictionary so that the process_id is the key of the dictionary.
        """
        Loads all available processes of the back end.

        :return: processes_dict: Dict All available processes of the back end.
        """
        data = self.get('/processes').json()["processes"]
        return VisualList("processes", data=data)

    def list_jobs(self) -> dict:
        """
        Lists all jobs of the authenticated user.

        :return: job_list: Dict of all jobs of the user.
        """
        # TODO: Parse the result so that there get Job classes returned?
        jobs = self.get('/jobs').json()["jobs"]
        return VisualList("data-table",
                          data=jobs,
                          parameters={'columns': 'jobs'})

    def save_user_defined_process(
            self,
            user_defined_process_id: str,
            process_graph: Union[dict, ProcessBuilderBase],
            parameters: List[Union[dict, Parameter]] = None,
            public: bool = False,
            summary: str = None,
            description: str = None) -> RESTUserDefinedProcess:
        """
        Saves a process graph and its metadata in the backend as a user-defined process for the authenticated user.

        :param user_defined_process_id: unique identifier for the user-defined process
        :param process_graph: a process graph
        :param parameters: a list of parameters
        :param public: visible to other users?
        :param summary: A short summary of what the process does.
        :param description: Detailed description to explain the entity. CommonMark 0.29 syntax MAY be used for rich text representation.
        :return: a RESTUserDefinedProcess instance
        """
        if user_defined_process_id in set(p["id"]
                                          for p in self.list_processes()):
            warnings.warn(
                "Defining user-defined process {u!r} with same id as a pre-defined process"
                .format(u=user_defined_process_id))
        if not parameters:
            warnings.warn(
                "Defining user-defined process {u!r} without parameters".
                format(u=user_defined_process_id))
        udp = RESTUserDefinedProcess(
            user_defined_process_id=user_defined_process_id, connection=self)
        udp.store(process_graph=process_graph,
                  parameters=parameters,
                  public=public,
                  summary=summary,
                  description=description)
        return udp

    def list_user_defined_processes(self) -> List[dict]:
        """
        Lists all user-defined processes of the authenticated user.
        """
        return self.get("/process_graphs").json()["processes"]

    def user_defined_process(
            self, user_defined_process_id: str) -> RESTUserDefinedProcess:
        """
        Get the user-defined process based on its id. The process with the given id should already exist.

        :param user_defined_process_id: the id of the user-defined process
        :return: a RESTUserDefinedProcess instance
        """
        return RESTUserDefinedProcess(
            user_defined_process_id=user_defined_process_id, connection=self)

    def validate_processgraph(self, process_graph):
        # Endpoint: POST /validate
        raise NotImplementedError()

    @property
    def _api_version(self) -> ComparableVersion:
        # TODO make this a public property (it's also useful outside the Connection class)
        return self.capabilities().api_version_check

    def datacube_from_process(self,
                              process_id: str,
                              namespace: str = None,
                              **kwargs) -> DataCube:
        """
        Load a raster datacube, from a custom process.

        :param process_id: The process id of the custom process.
        :param namespace: optional: process namespace
        :param kwargs: The arguments of the custom process
        :return: A DataCube, without valid metadata, as the client is not aware of this custom process.
        """

        if self._api_version.at_least("1.0.0"):
            graph = PGNode(process_id, namespace=namespace, arguments=kwargs)
            return DataCube(graph=graph, connection=self)
        else:
            raise OpenEoClientException(
                "This method requires support for at least version 1.0.0 in the openEO backend."
            )

    def load_collection(
        self,
        collection_id: str,
        spatial_extent: Optional[Dict[str, float]] = None,
        temporal_extent: Optional[List[Union[str, datetime.datetime,
                                             datetime.date]]] = None,
        bands: Optional[List[str]] = None,
        properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None
    ) -> DataCube:
        """
        Load a DataCube by collection id.

        :param collection_id: image collection identifier
        :param spatial_extent: limit data to specified bounding box or polygons
        :param temporal_extent: limit data to specified temporal interval
        :param bands: only add the specified bands
        :param properties: limit data by metadata property predicates
        :return: a datacube containing the requested data
        """
        if self._api_version.at_least("1.0.0"):
            return DataCube.load_collection(collection_id=collection_id,
                                            connection=self,
                                            spatial_extent=spatial_extent,
                                            temporal_extent=temporal_extent,
                                            bands=bands,
                                            properties=properties)
        else:
            return ImageCollectionClient.load_collection(
                collection_id=collection_id,
                session=self,
                spatial_extent=spatial_extent,
                temporal_extent=temporal_extent,
                bands=bands)

    imagecollection = legacy_alias(load_collection, name="imagecollection")

    def create_service(self, graph: dict, type: str, **kwargs) -> dict:
        # TODO: type hint for graph: is it a nested or a flat one?
        req = self._build_request_with_process_graph(process_graph=graph,
                                                     type=type,
                                                     **kwargs)
        response = self.post(path="/services", json=req, expected_status=201)
        # TODO: "location" is url of the service metadata, not (base) url of service (https://github.com/Open-EO/openeo-api/issues/269)
        # TODO: fetch this metadata and return a full metadata object instead?
        return {
            'url': response.headers.get('Location'),
            'service_id': response.headers.get("OpenEO-Identifier"),
        }

    def remove_service(self, service_id: str):
        """
        Stop and remove a secondary web service.

        :param service_id: service identifier
        :return:
        """
        response = self.delete('/services/' + service_id)

    @deprecated("Use :py:meth:`openeo.rest.job.RESTJob.get_results` instead.",
                version="0.4.10")
    def job_results(self, job_id) -> dict:
        """Get batch job results metadata."""
        return RESTJob(job_id, connection=self).list_results()

    @deprecated("Use :py:meth:`openeo.rest.job.RESTJob.logs` instead.",
                version="0.4.10")
    def job_logs(self, job_id, offset) -> list:
        """Get batch job logs."""
        return RESTJob(job_id, connection=self).logs(offset=offset)

    def list_files(self):
        """
        Lists all files that the logged in user uploaded.

        :return: file_list: List of the user uploaded files.
        """

        files = self.get('/files').json()['files']
        return VisualList("data-table",
                          data=files,
                          parameters={'columns': 'files'})

    def create_file(self, path):
        """
        Creates virtual file

        :return: file object.
        """
        # No endpoint just returns a file object.
        raise NotImplementedError()

    def _build_request_with_process_graph(self, process_graph: Union[dict,
                                                                     Any],
                                          **kwargs) -> dict:
        """
        Prepare a json payload with a process graph to submit to /result, /services, /jobs, ...
        :param process_graph: flat dict representing a process graph
        """
        result = kwargs
        process_graph = as_flat_graph(process_graph)
        if self._api_version.at_least("1.0.0"):
            result["process"] = {"process_graph": process_graph}
        else:
            result["process_graph"] = process_graph
        return result

    # TODO: unify `download` and `execute` better: e.g. `download` always writes to disk, `execute` returns result (raw or as JSON decoded dict)
    def download(self,
                 graph: dict,
                 outputfile: Union[Path, str, None] = None,
                 timeout: int = 30 * 60):
        """
        Downloads the result of a process graph synchronously,
        and save the result to the given file or return bytes object if no outputfile is specified.
        This method is useful to export binary content such as images. For json content, the execute method is recommended.

        :param graph: (flat) dict representing a process graph
        :param outputfile: output file
        """
        request = self._build_request_with_process_graph(process_graph=graph)
        response = self.post(path="/result",
                             json=request,
                             stream=True,
                             timeout=timeout)

        if outputfile is not None:
            with Path(outputfile).open(mode="wb") as f:
                for chunk in response.iter_content(chunk_size=None):
                    f.write(chunk)
        else:
            return response.content

    def execute(self, process_graph: dict):
        """
        Execute a process graph synchronously.

        :param process_graph: (flat) dict representing a process graph
        """
        req = self._build_request_with_process_graph(
            process_graph=process_graph)
        return self.post(path="/result", json=req).json()

    def create_job(self,
                   process_graph: dict,
                   title: str = None,
                   description: str = None,
                   plan: str = None,
                   budget=None,
                   additional: Dict = None) -> RESTJob:
        """
        Posts a job to the back end.

        :param process_graph: (flat) dict representing process graph
        :param title: String title of the job
        :param description: String description of the job
        :param plan: billing plan
        :param budget: Budget
        :param additional: additional job options to pass to the backend
        :return: job_id: String Job id of the new created job
        """
        # TODO move all this (RESTJob factory) logic to RESTJob?
        req = self._build_request_with_process_graph(
            process_graph=process_graph,
            **dict_no_none(title=title,
                           description=description,
                           plan=plan,
                           budget=budget))
        if additional:
            # TODO: get rid of this non-standard field? https://github.com/Open-EO/openeo-api/issues/276
            req["job_options"] = additional

        response = self.post("/jobs", json=req, expected_status=201)

        if "openeo-identifier" in response.headers:
            job_id = response.headers['openeo-identifier']
        elif "location" in response.headers:
            _log.warning(
                "Backend did not explicitly respond with job id, will guess it from redirect URL."
            )
            job_id = response.headers['location'].split("/")[-1]
        else:
            raise OpenEoClientException("Failed fo extract job id")
        return RESTJob(job_id, self)

    def job(self, job_id: str):
        """
        Get the job based on the id. The job with the given id should already exist.
        
        Use :py:meth:`openeo.rest.connection.Connection.create_job` to create new jobs

        :param job_id: the job id of an existing job
        :return: A job object.
        """
        return RESTJob(job_id, self)

    def load_disk_collection(self,
                             format: str,
                             glob_pattern: str,
                             options: dict = {}) -> ImageCollectionClient:
        """
        Loads image data from disk as an ImageCollection.

        :param format: the file format, e.g. 'GTiff'
        :param glob_pattern: a glob pattern that matches the files to load from disk
        :param options: options specific to the file format
        :return: the data as an ImageCollection
        """

        if self._api_version.at_least("1.0.0"):
            return DataCube.load_disk_collection(self, format, glob_pattern,
                                                 **options)
        else:
            return ImageCollectionClient.load_disk_collection(
                self, format, glob_pattern, **options)