Ejemplo n.º 1
0
class Metadata(Service):
    """
    Image Metadata Service

    Any methods that take start and end timestamps accept most common date/time
    formats as a string. If no explicit timezone is given, the timestamp is assumed
    to be in UTC. For example ``'2012-06-01'`` means June 1st 2012 00:00 in UTC,
    ``'2012-06-01 00:00+02:00'`` means June 1st 2012 00:00 in GMT+2.
    """

    TIMEOUT = (9.5, 120)

    properties = GenericProperties()

    def __init__(self, url=None, auth=None, retries=None):
        """
        :param str url: A HTTP URL pointing to a version of the storage service
            (defaults to current version)
        :param Auth auth: A custom user authentication (defaults to the user
            authenticated locally by token information on disk or by environment
            variables)
        :param urllib3.util.retry.Retry retries: A custom retry configuration
            used for all API requests (defaults to a reasonable amount of retries)
        """
        if auth is None:
            auth = Auth()

        simplefilter("always", DeprecationWarning)
        if url is None:
            url = os.environ.get(
                "DESCARTESLABS_METADATA_URL",
                "https://platform.descarteslabs.com/metadata/v1",
            )

        super(Metadata, self).__init__(url, auth=auth, retries=retries)
        self._raster = Raster(auth=self.auth)

    def bands(self,
              products=None,
              limit=None,
              offset=None,
              wavelength=None,
              resolution=None,
              tags=None,
              bands=None,
              **kwargs):
        """Search for imagery data bands that you have access to.

        :param list(str) products: A list of product(s) to return bands for.
        :param int limit: Number of results to return.
        :param int offset: Index to start at when returning results.
        :param float wavelength: A wavelength in nm e.g 700 that the band sensor must measure.
        :param int resolution: The resolution in meters per pixel e.g 30 of the data available in this band.
        :param list(str) tags: A list of tags that the band must have in its own tag list.

        :return: List of dicts containing at most `limit` bands. Empty if there are no
            bands matching query (e.g. product id not available).
        :rtype: DotList(DotDict)
        """
        params = [
            "limit", "offset", "products", "wavelength", "resolution", "tags"
        ]

        args = locals()
        kwargs = dict(
            kwargs, **{
                param: args[param]
                for param in params if args[param] is not None
            })

        r = self.session.post("/bands/search", json=kwargs)
        return DotList(r.json())

    def derived_bands(self,
                      bands=None,
                      require_bands=None,
                      limit=None,
                      offset=None,
                      **kwargs):
        """Search for predefined derived bands that you have access to.

        :param list(str) bands: Limit the derived bands to ones that can be
                                computed using this list of spectral bands.
                                e.g ["red", "nir", "swir1"]
        :param bool require_bands: Control whether searched bands *must* contain
                                   all the spectral bands passed in the bands param.
                                   Defaults to False.
        :param int limit: Number of results to return.
        :param int offset: Index to start at when returning results.

        :return: List of dicts containing at most `limit` bands.
        :rtype: DotList(DotDict)
        """
        params = ["bands", "require_bands", "limit", "offset"]

        args = locals()
        kwargs = dict(
            kwargs, **{
                param: args[param]
                for param in params if args[param] is not None
            })

        r = self.session.post("/bands/derived/search", json=kwargs)
        return DotList(r.json())

    def get_bands_by_id(self, id_):
        """
        For a given image source id, return the available bands.

        :param str id_: A :class:`Metadata` image identifier.

        :return: A dictionary of band entries and their metadata.
        :rtype: DotDict

        :raises ~descarteslabs.client.exceptions.NotFoundError: Raised if image id cannot
            be found.

        Example::

            >>> from descarteslabs.client.services import Metadata
            >>> bands = Metadata().get_bands_by_id('landsat:LC08:PRE:TOAR:meta_LC80270312016188_v1')
            >>> ndvi_info = bands['derived:ndvi'] # View NDVI band information
            >>> ndvi_info['physical_range']
            [-1.0, 1.0]
        """
        r = self.session.get("/bands/id/{}".format(id_))

        return DotDict(r.json())

    def get_bands_by_product(self, product_id):
        """
        All bands (includig derived bands) available in a product.

        :param str product_id: A product identifier.

        :return: A dictionary mapping band ids to dictionaries of their metadata.
            Returns empty dict if product id not found.
        :rtype: DotDict
        """
        r = self.session.get("/bands/all/{}".format(product_id))

        return DotDict(r.json())

    def products(self,
                 bands=None,
                 limit=None,
                 offset=None,
                 owner=None,
                 text=None,
                 **kwargs):
        """Search products that are available on the platform.

        :param list(str) bands: Band name(s) e.g ["red", "nir"] to filter products by.
                                Note that products must match all bands that are passed.
        :param int limit: Number of results to return.
        :param int offset: Index to start at when returning results.
        :param str owner: Filter products by the owner's uuid.
        :param str text: Filter products by string match.

        :return: List of dicts containing at most `limit` products. Empty if no matching
            products are found.
        :rtype: DotList(DotDict)
        """
        params = ["limit", "offset", "bands", "owner", "text"]

        args = locals()
        kwargs = dict(
            kwargs, **{
                param: args[param]
                for param in params if args[param] is not None
            })
        check_deprecated_kwargs(kwargs, {"band": "bands"})

        r = self.session.post("/products/search", json=kwargs)

        return DotList(r.json())

    def available_products(self):
        """Get the list of product identifiers you have access to.

        :return: List of product ids
        :rtype: DotList

        Example::
            >>> from descarteslabs.client.services import Metadata
            >>> products = Metadata().available_products()
            >>> products  # doctest: +SKIP
            ['landsat:LC08:PRE:TOAR']
        """
        r = self.session.get("/products")

        return DotList(r.json())

    def summary(self,
                products=None,
                sat_ids=None,
                date="acquired",
                interval=None,
                place=None,
                geom=None,
                start_datetime=None,
                end_datetime=None,
                cloud_fraction=None,
                cloud_fraction_0=None,
                fill_fraction=None,
                storage_state=None,
                q=None,
                pixels=None,
                dltile=None,
                **kwargs):
        """Get a summary of the results for the specified spatio-temporal query.

        :param list(str) products: Product identifier(s).
        :param list(str) sat_ids: Satellite identifier(s).
        :param str date: The date field to use for search (e.g. `acquired`).
        :param str interval: Part of the date to aggregate over (e.g. `day`).
            The list of possibilites is:

            * ``year`` or ``y``
            * ``quarter``
            * ``month`` or ``M``
            * ``week`` or ``q``
            * ``day`` or ``d``
            * ``hour`` or ``h``
            * ``minute`` or ``m``
            * ``product``
        :param str place: A slug identifier to be used as a region of interest.
        :param str geom: A GeoJSON or WKT region of interest or a Shapely shape object.
        :param str start_datetime: Desired starting timestamp, in any common format.
        :param str end_datetime: Desired ending timestamp, in any common format.
        :param float cloud_fraction: Maximum cloud fraction, calculated by data provider.
        :param float cloud_fraction_0: Maximum cloud fraction, calculated by cloud mask pixels.
        :param float fill_fraction: Minimum scene fill fraction, calculated as valid/total pixels.
        :param str storage_state: Filter results based on `storage_state` value. Allowed values are `"available"`,
            `"remote"`, or `None`, which returns all results regardless of `storage_state` value.
        :param ~descarteslabs.common.property_filtering.filtering.Expression q:
            Expression for filtering the results. See
            :py:attr:`~descarteslabs.client.services.metadata.properties`.
        :param bool pixels: Whether to include pixel counts in summary calculations.
        :param str dltile: A dltile key used to specify the search geometry, an alternative
            to the ``geom`` argument.

        :return: Dictionary containing summary of products that match query. Empty products list
            if no matching products found.
        :rtype: DotDict

        Example::

            >>> from descarteslabs.client.services import Metadata
            >>> iowa_geom = {
            ...     "coordinates": [[
            ...         [-96.498997, 42.560832],
            ...         [-95.765645, 40.585208],
            ...         [-91.729115, 40.61364],
            ...         [-91.391613, 40.384038],
            ...         [-90.952233, 40.954047],
            ...         [-91.04589, 41.414085],
            ...         [-90.343228, 41.587833],
            ...         [-90.140613, 41.995999],
            ...         [-91.065059, 42.751338],
            ...         [-91.217706, 43.50055],
            ...         [-96.599191, 43.500456],
            ...         [-96.498997, 42.560832]
            ...     ]],
            ...     "type": "Polygon"
            ... }
            >>> Metadata().summary(geom=iowa_geom,
            ...                    products=['landsat:LC08:PRE:TOAR'],
            ...                    start_datetime='2016-07-06',
            ...                    end_datetime='2016-07-07',
            ...                    interval='hour',
            ...                    pixels=True)
            {
              'bytes': 93298309,
              'count': 1,
              'items': [
                {
                  'bytes': 93298309,
                  'count': 1,
                  'date': '2016-07-06T16:00:00.000Z',
                  'pixels': 250508160,
                  'timestamp': 1467820800
                }
              ],
              'pixels': 250508160,
              'products': ['landsat:LC08:PRE:TOAR']
            }
        """
        check_deprecated_kwargs(
            kwargs,
            {
                "product": "products",
                "const_id": "const_ids",
                "sat_id": "sat_ids",
                "start_time": "start_datetime",
                "end_time": "end_datetime",
                "part": "interval",
            },
        )

        if place:
            places = Places()
            places.auth = self.auth
            shape = places.shape(place, geom="low")
            geom = json.dumps(shape["geometry"])

        if dltile is not None:
            if isinstance(dltile, string_types):
                dltile = self._raster.dltile(dltile)
            if isinstance(dltile, dict):
                geom = dltile["geometry"]

        if isinstance(geom, dict):
            geom = json.dumps(geom)

        if sat_ids:
            if isinstance(sat_ids, string_types):
                sat_ids = [sat_ids]

            kwargs["sat_ids"] = sat_ids

        if products:
            if isinstance(products, string_types):
                products = [products]

            kwargs["products"] = products

        if date:
            kwargs["date"] = date

        if interval:
            kwargs["interval"] = interval

        if geom:
            geom = shapely_to_geojson(geom)
            kwargs["geom"] = geom

        if start_datetime:
            kwargs["start_datetime"] = start_datetime

        if end_datetime:
            kwargs["end_datetime"] = end_datetime

        if cloud_fraction is not None:
            kwargs["cloud_fraction"] = cloud_fraction

        if cloud_fraction_0 is not None:
            kwargs["cloud_fraction_0"] = cloud_fraction_0

        if fill_fraction is not None:
            kwargs["fill_fraction"] = fill_fraction

        if q is not None:
            if not isinstance(q, list):
                q = [q]
            kwargs["query_expr"] = AndExpression(q).serialize()

        if pixels:
            kwargs["pixels"] = pixels

        if storage_state:
            kwargs["storage_state"] = storage_state

        r = self.session.post("/summary", json=kwargs)
        return DotDict(r.json())

    def paged_search(self,
                     products=None,
                     sat_ids=None,
                     date="acquired",
                     place=None,
                     geom=None,
                     start_datetime=None,
                     end_datetime=None,
                     cloud_fraction=None,
                     cloud_fraction_0=None,
                     fill_fraction=None,
                     storage_state=None,
                     q=None,
                     limit=100,
                     fields=None,
                     dltile=None,
                     sort_field=None,
                     sort_order="asc",
                     randomize=None,
                     continuation_token=None,
                     **kwargs):
        """
        Execute a metadata query in a paged manner, with up to 10,000 items per page.

        Most clients should use :py:func:`features` instead, which batch searches into smaller requests
        and handles the paging for you.

        :param list(str) products: Product Identifier(s).
        :param list(str) sat_ids: Satellite identifier(s).
        :param str date: The date field to use for search (default is `acquired`).
        :param str place: A slug identifier to be used as a region of interest.
        :param str geom: A GeoJSON or WKT region of interest or a Shapely shape object.
        :param str start_datetime: Desired starting timestamp, in any common format.
        :param str end_datetime: Desired ending timestamp, in any common format.
        :param float cloud_fraction: Maximum cloud fraction, calculated by data provider.
        :param float cloud_fraction_0: Maximum cloud fraction, calculated by cloud mask pixels.
        :param float fill_fraction: Minimum scene fill fraction, calculated as valid/total pixels.
        :param str storage_state: Filter results based on `storage_state` value. Allowed values are
            `"available"`, `"remote"`, or `None`, which returns all results regardless of
            `storage_state` value.
        :param ~descarteslabs.common.property_filtering.filtering.Expression q:
            Expression for filtering the results. See
            :py:attr:`~descarteslabs.client.services.metadata.properties`.
        :param int limit: Maximum number of items per page to return.
        :param list(str) fields: Properties to return.
        :param str dltile: A dltile key used to specify the search geometry, an alternative
            to the ``geom`` argument.
        :param str sort_field: Property to sort on.
        :param str sort_order: Order of sort.
        :param bool randomize: Randomize the results. You may also use an `int` or `str` as an explicit seed.
        :param str continuation_token: None for new query, or the `properties.continuation_token` value from
            the returned FeatureCollection from a previous invocation of this method to page through a large
            result set.

        :return: GeoJSON ``FeatureCollection`` containing at most `limit` features.
        :rtype: DotDict
        """
        check_deprecated_kwargs(
            kwargs,
            {
                "product": "products",
                "const_id": "const_ids",
                "sat_id": "sat_ids",
                "start_time": "start_datetime",
                "end_time": "end_datetime",
                "offset": None,
            },
        )

        if place:
            places = Places()
            places.auth = self.auth
            shape = places.shape(place, geom="low")
            geom = json.dumps(shape["geometry"])

        if dltile is not None:
            if isinstance(dltile, string_types):
                dltile = self._raster.dltile(dltile)
            if isinstance(dltile, dict):
                geom = dltile["geometry"]

        if isinstance(geom, dict):
            geom = json.dumps(geom)

        kwargs.update({"date": date, "limit": limit})

        if sat_ids:
            if isinstance(sat_ids, string_types):
                sat_ids = [sat_ids]

            kwargs["sat_ids"] = sat_ids

        if products:
            if isinstance(products, string_types):
                products = [products]

            kwargs["products"] = products

        if geom:
            geom = shapely_to_geojson(geom)
            kwargs["geom"] = geom

        if start_datetime:
            kwargs["start_datetime"] = start_datetime

        if end_datetime:
            kwargs["end_datetime"] = end_datetime

        if cloud_fraction is not None:
            kwargs["cloud_fraction"] = cloud_fraction

        if cloud_fraction_0 is not None:
            kwargs["cloud_fraction_0"] = cloud_fraction_0

        if fill_fraction is not None:
            kwargs["fill_fraction"] = fill_fraction

        if storage_state:
            kwargs["storage_state"] = storage_state

        if fields is not None:
            kwargs["fields"] = fields

        if q is not None:
            if not isinstance(q, list):
                q = [q]
            kwargs["query_expr"] = AndExpression(q).serialize()

        if sort_field is not None:
            kwargs["sort_field"] = sort_field

        if sort_order is not None:
            kwargs["sort_order"] = sort_order

        if randomize is not None:
            kwargs["random_seed"] = randomize

        if continuation_token is not None:
            kwargs["continuation_token"] = continuation_token

        r = self.session.post("/search", json=kwargs)

        fc = {"type": "FeatureCollection", "features": r.json()}

        if "x-continuation-token" in r.headers:
            fc["properties"] = {
                "continuation_token": r.headers["x-continuation-token"]
            }

        return DotDict(fc)

    def search(self,
               products=None,
               sat_ids=None,
               date="acquired",
               place=None,
               geom=None,
               start_datetime=None,
               end_datetime=None,
               cloud_fraction=None,
               cloud_fraction_0=None,
               fill_fraction=None,
               storage_state=None,
               q=None,
               limit=100,
               fields=None,
               dltile=None,
               sort_field=None,
               sort_order="asc",
               randomize=None,
               **kwargs):
        """Search metadata given a spatio-temporal query. All parameters are
        optional.

        If performing a large query, consider using the iterator :py:func:`features` instead.

        :param list(str) products: Product Identifier(s).
        :param list(str) sat_ids: Satellite identifier(s).
        :param str date: The date field to use for search (e.g. `acquired`).
        :param str place: A slug identifier to be used as a region of interest.
        :param str geom: A GeoJSON or WKT region of interest.
        :param str start_datetime: Desired starting timestamp, in any common format.
        :param str end_datetime: Desired ending timestamp, in any common format.
        :param float cloud_fraction: Maximum cloud fraction, calculated by data provider.
        :param float cloud_fraction_0: Maximum cloud fraction, calculated by cloud mask pixels.
        :param float fill_fraction: Minimum scene fill fraction, calculated as valid/total pixels.
        :param str storage_state: Filter results based on `storage_state` value. Allowed values are
            `"available"`, `"remote"`, or `None`, which returns all results regardless of
            `storage_state` value.
        :param ~descarteslabs.common.property_filtering.filtering.Expression q:
            Expression for filtering the results. See
            :py:attr:`~descarteslabs.client.services.metadata.properties`.
        :param int limit: Maximum number of items to return.
        :param list(str) fields: Properties to return.
        :param str dltile: A dltile key used to specify the search geometry, an alternative
            to the ``geom`` argument.
        :param str sort_field: Property to sort on.
        :param str sort_order: Order of sort.
        :param bool randomize: Randomize the results. You may also use an `int` or `str` as an explicit seed.

        :return: GeoJSON ``FeatureCollection``. Empty features list if no matching images found.
        :rtype: DotDict

        Note that as of release 0.16.0 the ``continuation_token`` token has been removed. Please use the
        :py:func:`paged_search` if you require this feature.

        Example::

            >>> from descarteslabs.client.services import Metadata
            >>> iowa_geom = {
            ...     "coordinates": [[
            ...         [-96.498997, 42.560832],
            ...         [-95.765645, 40.585208],
            ...         [-91.729115, 40.61364],
            ...         [-91.391613, 40.384038],
            ...         [-90.952233, 40.954047],
            ...         [-91.04589, 41.414085],
            ...         [-90.343228, 41.587833],
            ...         [-90.140613, 41.995999],
            ...         [-91.065059, 42.751338],
            ...         [-91.217706, 43.50055],
            ...         [-96.599191, 43.500456],
            ...         [-96.498997, 42.560832]
            ...     ]],
            ...     "type": "Polygon"
            ... }
            >>> scenes = Metadata().search(
            ...     geom=iowa_geom,
            ...     products=['landsat:LC08:PRE:TOAR'],
            ...     start_datetime='2016-07-01',
            ...     end_datetime='2016-07-31T23:59:59'
            ... )
            >>> len(scenes['features'])  # doctest: +SKIP
            2
        """
        features_iter = self.features(
            products=products,
            sat_ids=sat_ids,
            date=date,
            place=place,
            geom=geom,
            start_datetime=start_datetime,
            end_datetime=end_datetime,
            cloud_fraction=cloud_fraction,
            cloud_fraction_0=cloud_fraction_0,
            fill_fraction=fill_fraction,
            storage_state=storage_state,
            q=q,
            fields=fields,
            dltile=dltile,
            sort_field=sort_field,
            sort_order=sort_order,
            randomize=randomize,
            batch_size=1000 if limit is None else min(limit, 1000),
            **kwargs)
        limited_features = itertools.islice(features_iter, limit)
        return DotDict(type="FeatureCollection",
                       features=DotList(limited_features))

    def ids(self,
            products=None,
            sat_ids=None,
            date="acquired",
            place=None,
            geom=None,
            start_datetime=None,
            end_datetime=None,
            cloud_fraction=None,
            cloud_fraction_0=None,
            fill_fraction=None,
            storage_state=None,
            q=None,
            limit=100,
            dltile=None,
            sort_field=None,
            sort_order=None,
            randomize=None,
            **kwargs):
        """Search metadata given a spatio-temporal query. All parameters are
        optional.

        :param list(str) products: Products identifier(s).
        :param list(str) sat_ids: Satellite identifier(s).
        :param str date: The date field to use for search (e.g. `acquired`).
        :param str place: A slug identifier to be used as a region of interest.
        :param str geom: A GeoJSON or WKT region of interest.
        :param str start_datetime: Desired starting timestamp, in any common format.
        :param str end_datetime: Desired ending timestamp, in any common format.
        :param float cloud_fraction: Maximum cloud fraction, calculated by data provider.
        :param float cloud_fraction_0: Maximum cloud fraction, calculated by cloud mask pixels.
        :param float fill_fraction: Minimum scene fill fraction, calculated as valid/total pixels.
        :param str storage_state: Filter results based on `storage_state` value. Allowed values are
            `"available"`, `"remote"`, or `None`, which returns all results regardless of
            `storage_state` value.
        :param ~descarteslabs.common.property_filtering.filtering.Expression q:
            Expression for filtering the results. See
            :py:attr:`~descarteslabs.client.services.metadata.properties`.
        :param int limit: Number of items to return.
        :param str dltile: A dltile key used to specify the search geometry, an alternative
            to the ``geom`` argument.
        :param str sort_field: Property to sort on.
        :param str sort_order: Order of sort.
        :param bool randomize: Randomize the results. You may also use an `int` or `str` as an explicit seed.

        :return: List of image identifiers. Empty list if no matching images found.
        :rtype: DotList(str)

        Example::

            >>> from descarteslabs.client.services import Metadata
            >>> iowa_geom = {
            ...     "coordinates": [[
            ...         [-96.498997, 42.560832],
            ...         [-95.765645, 40.585208],
            ...         [-91.729115, 40.61364],
            ...         [-91.391613, 40.384038],
            ...         [-90.952233, 40.954047],
            ...         [-91.04589, 41.414085],
            ...         [-90.343228, 41.587833],
            ...         [-90.140613, 41.995999],
            ...         [-91.065059, 42.751338],
            ...         [-91.217706, 43.50055],
            ...         [-96.599191, 43.500456],
            ...         [-96.498997, 42.560832]
            ...     ]],
            ...     "type": "Polygon"
            ... }
            >>> ids = Metadata().ids(geom=iowa_geom,
            ...                      products=['landsat:LC08:PRE:TOAR'],
            ...                      start_datetime='2016-07-01',
            ...                      end_datetime='2016-07-31T23:59:59')
            >>> len(ids)  # doctest: +SKIP
            2

            >>> ids  # doctest: +SKIP
            ['landsat:LC08:PRE:TOAR:meta_LC80260322016197_v1', 'landsat:LC08:PRE:TOAR:meta_LC80270312016188_v1']
        """
        result = self.search(sat_ids=sat_ids,
                             products=products,
                             date=date,
                             place=place,
                             geom=geom,
                             start_datetime=start_datetime,
                             end_datetime=end_datetime,
                             cloud_fraction=cloud_fraction,
                             cloud_fraction_0=cloud_fraction_0,
                             fill_fraction=fill_fraction,
                             storage_state=storage_state,
                             q=q,
                             limit=limit,
                             fields=[],
                             dltile=dltile,
                             sort_field=sort_field,
                             sort_order=sort_order,
                             randomize=randomize,
                             **kwargs)

        return DotList(feature["id"] for feature in result["features"])

    def features(self,
                 products=None,
                 sat_ids=None,
                 date="acquired",
                 place=None,
                 geom=None,
                 start_datetime=None,
                 end_datetime=None,
                 cloud_fraction=None,
                 cloud_fraction_0=None,
                 fill_fraction=None,
                 storage_state=None,
                 q=None,
                 fields=None,
                 batch_size=1000,
                 dltile=None,
                 sort_field=None,
                 sort_order="asc",
                 randomize=None,
                 **kwargs):
        """Generator that efficiently scrolls through the search results.

        :param int batch_size: Number of features to fetch per request.

        :return: Generator of GeoJSON ``Feature`` objects. Empty if no matching images found.
        :rtype: generator

        Example::

            >>> from descarteslabs.client.services import Metadata
            >>> features = Metadata().features(
            ...     "landsat:LC08:PRE:TOAR",
            ...     start_datetime='2016-01-01',
            ...     end_datetime="2016-03-01"
            ... )
            >>> total = 0
            >>> for f in features:
            ...     total += 1

            >>> total # doctest: +SKIP
            31898
        """

        continuation_token = None

        while True:
            result = self.paged_search(sat_ids=sat_ids,
                                       products=products,
                                       date=date,
                                       place=place,
                                       geom=geom,
                                       start_datetime=start_datetime,
                                       end_datetime=end_datetime,
                                       cloud_fraction=cloud_fraction,
                                       cloud_fraction_0=cloud_fraction_0,
                                       fill_fraction=fill_fraction,
                                       storage_state=storage_state,
                                       q=q,
                                       fields=fields,
                                       limit=batch_size,
                                       dltile=dltile,
                                       sort_field=sort_field,
                                       sort_order=sort_order,
                                       randomize=randomize,
                                       continuation_token=continuation_token,
                                       **kwargs)

            if not result["features"]:
                break

            for feature in result["features"]:
                yield feature

            continuation_token = result["properties"].get("continuation_token")
            if not continuation_token:
                break

    def get(self, image_id):
        """Get metadata of a single image.

        :param str image_id: Image identifier.

        :return: A dictionary of metadata for a single image.
        :rtype: DotDict

        :raises ~descarteslabs.client.exceptions.NotFoundError: Raised if image id cannot
             be found.

        Example::

            >>> from descarteslabs.client.services import Metadata
            >>> meta = Metadata().get('landsat:LC08:PRE:TOAR:meta_LC80270312016188_v1')
            >>> keys = list(meta.keys())
            >>> keys.sort()
            >>> keys
            ['acquired', 'area', 'bits_per_pixel', 'bright_fraction', 'bucket',
             'cloud_fraction', 'cloud_fraction_0', 'confidence_dlsr', 'cs_code',
             'descartes_version', 'file_md5s', 'file_sizes', 'files', 'fill_fraction',
             'geolocation_accuracy', 'geometry', 'geotrans', 'id', 'identifier', 'key',
             'owner_type', 'processed', 'product', 'proj4', 'projcs', 'published',
             'raster_size', 'reflectance_scale', 'roll_angle', 'sat_id',
             'solar_azimuth_angle', 'solar_elevation_angle', 'storage_state',
             'sw_version', 'terrain_correction', 'tile_id']
        """
        r = self.session.get("/get/{}".format(image_id))
        return DotDict(r.json())

    def get_by_ids(self, ids, fields=None, ignore_not_found=True, **kwargs):
        """Get metadata for multiple images by image id. The response contains list of
        found images in the order of the given ids.

        :param list(str) ids: Image identifiers.
        :param list(str) fields: Properties to return.
        :param bool ignore_not_found: For image id lookups that fail: if :py:obj:`True`, ignore;
                                      if :py:obj:`False`, raise :py:exc:`NotFoundError`. Default is :py:obj:`True`.

        :return: List of image metadata dicts.
        :rtype: DotList(DotDict)

        :raises ~descarteslabs.client.exceptions.NotFoundError: Raised if an image id cannot
             be found and ignore_not_found set to `False` (default is `True`)
        """
        kwargs["ids"] = ids
        kwargs["ignore_not_found"] = ignore_not_found
        if fields is not None:
            kwargs["fields"] = fields

        r = self.session.post("/batch/images", json=kwargs)
        return DotList(r.json())

    def get_product(self, product_id):
        """Get information about a single product.

        :param str product_id: Product Identifier.

        :return: A dictionary with metadata for a single product.
        :rtype: DotDict

        :raises ~descarteslabs.client.exceptions.NotFoundError: Raised if an product id
            cannot be found.
        """
        r = self.session.get("/products/{}".format(product_id))
        return DotDict(r.json())

    def get_band(self, band_id):
        """Get information about a single band.

        :param str band_id: Band Identifier.

        :return: A dictionary with metadata for a single band.
        :rtype: DotDict

        :raises ~descarteslabs.client.exceptions.NotFoundError: Raised if an band id
            cannot be found.
        """
        r = self.session.get("/bands/{}".format(band_id))
        return DotDict(r.json())

    def get_derived_band(self, derived_band_id):
        """Get information about a single derived band.

        :param str derived_band_id: Derived band identifier.

        :return: A dictionary with metadata for a single derived band.
        :rtype: DotDict

        :raises ~descarteslabs.client.exceptions.NotFoundError: Raised if an band id
            cannot be found.
        """
        r = self.session.get("/bands/derived/{}".format(derived_band_id))
        return DotDict(r.json())
Ejemplo n.º 2
0
def search(
    aoi,
    products=None,
    start_datetime=None,
    end_datetime=None,
    cloud_fraction=None,
    storage_state=None,
    limit=100,
    sort_field=None,
    sort_order="asc",
    date_field="acquired",
    query=None,
    randomize=False,
    raster_client=None,
    metadata_client=None,
):
    """
    Search for Scenes in the Descartes Labs catalog.

    Returns a SceneCollection of Scenes that overlap with an area of interest,
    and meet the given search criteria.

    Parameters
    ----------
    aoi : GeoJSON-like dict, :class:`~descarteslabs.scenes.geocontext.GeoContext`, or object with __geo_interface__
        Search for scenes that intersect this area by any amount.
        If a :class:`~descarteslabs.scenes.geocontext.GeoContext`, a copy is returned as ``ctx``, with missing values
        filled in. Otherwise, the returned ``ctx`` will be an `AOI`, with this as its geometry.
    products : str or List[str], optional
        Descartes Labs product identifiers
    start_datetime : str, datetime-like, optional
        Restrict to scenes acquired after this datetime
    end_datetime : str, datetime-like, optional
        Restrict to scenes acquired before this datetime
    cloud_fraction : float, optional
        Restrict to scenes that are covered in clouds by less than this fraction
        (between 0 and 1)
    storage_state : str, optional
        Filter results based on ``storage_state`` value
        (``"available"``, ``"remote"``, or ``None``)
    limit : int or None, optional, default 100
        Maximum number of Scenes to return, or None for all results.
    sort_field : str, optional
        Field name in :py:attr:`Scene.properties` by which to order the results
    sort_order : str, optional, default 'asc'
        ``"asc"`` or ``"desc"``
    date_field : str, optional, default 'acquired'
        The field used when filtering by date
        (``"acquired"``, ``"processed"``, ``"published"``)
    query : ~descarteslabs.common.property_filtering.filtering.Expression, optional
        Expression used to filter Scenes by their properties, built from
        :class:`dl.properties <descarteslabs.common.property_filtering.filtering.GenericProperties>`.
        You can construct filter expression using the ``==``, ``!=``, ``<``, ``>``,
        ``<=`` and ``>=`` operators as well as the
        :meth:`~descarteslabs.common.property_filtering.filtering.Property.like`
        and :meth:`~descarteslabs.common.property_filtering.filtering.Property.in_`
        methods. You cannot use the boolean keywords ``and`` and ``or`` because of
        Python language limitations; instead you can combine filter expressions
        with ``&`` (boolean "and") and ``|`` (boolean "or").
        Example:
        ``150 < dl.properties.azimuth_angle < 160 & dl.properties.cloud_fraction < 0.5``
    randomize : bool, default False, optional
        Randomize the order of the results.
        You may also use an int or str as an explicit seed.
    raster_client : Raster, optional
        Unneeded in general use; lets you use a specific client instance
        with non-default auth and parameters.
    metadata_client : Metadata, optional
        Unneeded in general use; lets you use a specific client instance
        with non-default auth and parameters.

    Returns
    -------
    scenes : `SceneCollection`
        Scenes matching your criteria.
    ctx: :class:`~descarteslabs.scenes.geocontext.GeoContext`
        The given ``aoi`` as a :class:`~descarteslabs.scenes.geocontext.GeoContext` (if it isn't one already),
        with reasonable default parameters for loading all matching Scenes.

        If ``aoi`` was a :class:`~descarteslabs.scenes.geocontext.GeoContext`, ``ctx`` will be a copy of ``aoi``,
        with any properties that were ``None`` assigned the defaults below.

        If ``aoi`` was not a :class:`~descarteslabs.scenes.geocontext.GeoContext`, an `AOI` instance will be created
        with ``aoi`` as its geometry, and defaults assigned as described below:

        **Default Spatial Parameters:**

        * resolution: the finest resolution of any band of all matching scenes
        * crs: the most common CRS used of all matching scenes
    """

    if isinstance(aoi, geocontext.GeoContext):
        ctx = aoi
        if ctx.bounds is None and ctx.geometry is None:
            raise ValueError(
                "Unspecified where to search, "
                "since the GeoContext given for ``aoi`` has neither geometry nor bounds set"
            )
    else:
        ctx = geocontext.AOI(geometry=aoi)

    if raster_client is None:
        raster_client = Raster()
    if metadata_client is None:
        metadata_client = Metadata()

    if isinstance(products, six.string_types):
        products = [products]

    if isinstance(start_datetime, datetime.datetime):
        start_datetime = start_datetime.isoformat()

    if isinstance(end_datetime, datetime.datetime):
        end_datetime = end_datetime.isoformat()

    metadata_params = dict(
        products=products,
        geom=ctx.__geo_interface__,
        start_datetime=start_datetime,
        end_datetime=end_datetime,
        cloud_fraction=cloud_fraction,
        storage_state=storage_state,
        limit=limit,
        sort_field=sort_field,
        sort_order=sort_order,
        date=date_field,
        q=query,
        randomize=randomize,
    )

    metadata = metadata_client.search(**metadata_params)
    if products is None:
        products = {
            meta["properties"]["product"]
            for meta in metadata["features"]
        }

    product_bands = {
        product:
        Scene._scenes_bands_dict(metadata_client.get_bands_by_product(product))
        for product in products
    }

    scenes = SceneCollection(
        (Scene(meta, product_bands[meta["properties"]["product"]])
         for meta in metadata["features"]),
        raster_client=raster_client,
    )

    if len(scenes) > 0 and isinstance(ctx, geocontext.AOI):
        assign_ctx = {}
        if ctx.resolution is None and ctx.shape is None:
            resolutions = filter(
                None,
                (b.get("resolution") for band in six.itervalues(product_bands)
                 for b in six.itervalues(band)),
            )
            try:
                assign_ctx["resolution"] = min(resolutions)
            except ValueError:
                assign_ctx[
                    "resolution"] = None  # from min of an empty sequence; no band defines resolution

        if ctx.crs is None:
            assign_ctx["crs"] = collections.Counter(
                scene.properties["crs"]
                for scene in scenes).most_common(1)[0][0]

        if len(assign_ctx) > 0:
            ctx = ctx.assign(**assign_ctx)

    return scenes, ctx
Ejemplo n.º 3
0
    def ndarray(self,
                bands,
                ctx,
                mask_nodata=True,
                mask_alpha=True,
                bands_axis=0,
                raster_info=False,
                resampler="near",
                raster_client=None):
        """
        Load bands from this scene as an ndarray, optionally masking invalid data.

        Parameters
        ----------
        bands : str or Sequence[str]
            Band names to load. Can be a single string of band names
            separated by spaces (``"red green blue derived:ndvi"``),
            or a sequence of band names (``["red", "green", "blue", "derived:ndvi"]``).
            Names must be keys in ``self.properties.bands``.
            If the alpha band is requested, it must be last in the list
            to reduce rasterization errors.
        ctx : `GeoContext`
            A `GeoContext` to use when loading this Scene
        mask_nodata : bool, default True
            Whether to mask out values in each band that equal
            that band's ``nodata`` sentinel value.
        mask_alpha : bool, default True
            Whether to mask pixels in all bands where the alpha band is 0.
        bands_axis : int, default 0
            Axis along which bands should be located in the returned array.
            If 0, the array will have shape ``(band, y, x)``, if -1,
            it will have shape ``(y, x, band)``.

            It's usually easier to work with bands as the outermost axis,
            but when working with large arrays, or with many arrays concatenated
            together, NumPy operations aggregating each xy point across bands
            can be slightly faster with bands as the innermost axis.
        raster_info : bool, default False
            Whether to also return a dict of information about the rasterization
            of the scene, including the coordinate system WKT and geotransform matrix.
            Generally only useful if you plan to upload data derived
            from this scene back to the Descartes catalog, or use it with GDAL.
        resampler : str, default "near"
            Algorithm used to interpolate pixel values when scaling and transforming
            the image to its new resolution or SRS. Possible values are
            ``near`` (nearest-neighbor), ``bilinear``, ``cubic``, ``cubicsplice``,
            ``lanczos``, ``average``, ``mode``, ``max``, ``min``, ``med``, ``q1``, ``q3``.
        raster_client : Raster, optional
            Unneeded in general use; lets you use a specific client instance
            with non-default auth and parameters.

        Returns
        -------
        arr : ndarray
            Returned array's shape will be ``(band, y, x)`` if bands_axis is 0,
            ``(y, x, band)`` if bands_axis is -1
            If ``mask_nodata`` or ``mask_alpha`` is True, arr will be a masked array.
        raster_info : dict
            If ``raster_info=True``, a raster information dict is also returned.

        Example
        -------
        >>> import descarteslabs as dl
        >>> scene, ctx = dl.scenes.Scene.from_id("landsat:LC08:PRE:TOAR:meta_LC80270312016188_v1")
        >>> arr = scene.ndarray("red green blue", ctx)
        >>> type(arr)
        <class 'numpy.ma.core.MaskedArray'>
        >>> arr.shape
        (3, 15259, 15340)
        >>> red_band = arr[0]

        Raises
        ------
        ValueError
            If requested bands are unavailable.
            If band names are not given or are invalid.
            If the requested bands have different dtypes.
        NotFoundError
            If a Scene's ID cannot be found in the Descartes Labs catalog
        BadRequestError
            If the Descartes Labs platform is given invalid parameters
        """
        if raster_client is None:
            raster_client = Raster()

        if not (-3 < bands_axis < 3):
            raise ValueError(
                "Invalid bands_axis; axis {} would not exist in a 3D array".
                format(bands_axis))

        bands = self._bands_to_list(bands)
        common_data_type = self._common_data_type_of_bands(bands)

        self_bands = self.properties["bands"]
        if mask_alpha:
            if "alpha" not in self_bands:
                raise ValueError(
                    "Cannot mask alpha: no alpha band for the product '{}'. "
                    "Try setting 'mask_alpha=False'.".format(
                        self.properties["product"]))
            try:
                alpha_i = bands.index("alpha")
            except ValueError:
                bands.append("alpha")
                drop_alpha = True
            else:
                if alpha_i != len(bands) - 1:
                    raise ValueError(
                        "Alpha must be the last band in order to reduce rasterization errors"
                    )
                drop_alpha = False

        raster_params = ctx.raster_params
        full_raster_args = dict(inputs=self.properties["id"],
                                order="gdal",
                                bands=bands,
                                scales=None,
                                data_type=common_data_type,
                                resampler=resampler,
                                **raster_params)

        try:
            arr, info = raster_client.ndarray(**full_raster_args)
        except NotFoundError:
            six.raise_from(
                NotFoundError(
                    "'{}' does not exist in the Descartes catalog".format(
                        self.properties["id"])), None)
        except BadRequestError as e:
            msg = (
                "Error with request:\n"
                "{err}\n"
                "For reference, dl.Raster.ndarray was called with these arguments:\n"
                "{args}")
            msg = msg.format(err=e,
                             args=json.dumps(full_raster_args, indent=2))
            six.raise_from(BadRequestError(msg), None)

        if len(arr.shape) == 2:
            # if only 1 band requested, still return a 3d array
            arr = arr[np.newaxis]

        if mask_nodata or mask_alpha:
            if mask_alpha:
                alpha = arr[-1]
                if drop_alpha:
                    arr = arr[:-1]
                    bands.pop(-1)

            mask = np.zeros_like(arr, dtype=bool)

            if mask_nodata:
                for i, bandname in enumerate(bands):
                    nodata = self_bands[bandname].get('nodata')
                    if nodata is not None:
                        mask[i] = arr[i] == nodata

            if mask_alpha:
                mask |= alpha == 0

            arr = np.ma.MaskedArray(arr, mask, copy=False)

        if bands_axis != 0:
            arr = np.moveaxis(arr, 0, bands_axis)
        if raster_info:
            return arr, info
        else:
            return arr
 def __init__(self, iterable=None, raster_client=None):
     super(SceneCollection, self).__init__(iterable)
     self._raster_client = raster_client if raster_client is not None else Raster(
     )
Ejemplo n.º 5
0
class Metadata(Service):
    """
    Image Metadata Service

    Any methods that take start and end timestamps accept most common date/time
    formats as a string. If no explicit timezone is given, the timestamp is assumed
    to be in UTC. For example ``'2012-06-01'`` means June 1st 2012 00:00 in UTC,
    ``'2012-06-01 00:00+02:00'`` means June 1st 2012 00:00 in GMT+2.
    """

    TIMEOUT = (9.5, 120)

    properties = GLOBAL_PROPERTIES

    def __init__(self, url=None, auth=None):
        """The parent Service class implements authentication and exponential
        backoff/retry. Override the url parameter to use a different instance
        of the backing service.
        """
        if auth is None:
            auth = Auth()

        simplefilter('always', DeprecationWarning)
        if url is None:
            url = os.environ.get("DESCARTESLABS_METADATA_URL",
                                 "https://platform.descarteslabs.com/metadata/v1")

        super(Metadata, self).__init__(url, auth=auth)
        self._raster = Raster(auth=self.auth)

    def sources(self):
        warn(SOURCES_DEPRECATION_MESSAGE, DeprecationWarning)

        r = self.session.get('/sources')
        return DotList(r.json())

    def bands(
        self,
        products=None,
        limit=None,
        offset=None,
        wavelength=None,
        resolution=None,
        tags=None,
        bands=None,
        **kwargs
    ):
        """Search for imagery data bands that you have access to.

        :param list(str) products: A list of product(s) to return bands for.
        :param int limit: Number of results to return.
        :param int offset: Index to start at when returning results.
        :param float wavelength: A wavelength in nm e.g 700 that the band sensor must measure.
        :param int resolution: The resolution in meters per pixel e.g 30 of the data available in this band.
        :param list(str) tags: A list of tags that the band must have in its own tag list.


        """
        params = ['limit', 'offset', 'products',
                  'wavelength', 'resolution', 'tags']

        args = locals()
        kwargs = dict(kwargs, **{
            param: args[param]
            for param in params
            if args[param] is not None
        })

        r = self.session.post('/bands/search', json=kwargs)
        return DotList(r.json())

    def derived_bands(self, bands=None, require_bands=None, limit=None, offset=None, **kwargs):
        """Search for predefined derived bands that you have access to.

        :param list(str) bands: Limit the derived bands to ones that can be
                                computed using this list of spectral bands.
                                e.g ["red", "nir", "swir1"]
        :param bool require_bands: Control whether searched bands must contain
                                   all the spectral bands passed in the bands param.
                                   Defaults to False.
        :param int limit: Number of results to return.
        :param int offset: Index to start at when returning results.
        """
        params = ['bands', 'limit', 'offset']

        args = locals()
        kwargs = dict(kwargs, **{
            param: args[param]
            for param in params
            if args[param] is not None
        })

        r = self.session.post('/bands/derived/search', json=kwargs)
        return DotList(r.json())

    def get_bands_by_id(self, id_):
        """
        For a given source id, return the available bands.

        :param str id_: A :class:`Metadata` identifier.

        :return: A dictionary of band entries and their metadata.
        """
        r = self.session.get('/bands/id/{}'.format(id_))

        return DotDict(r.json())

    def get_bands_by_product(self, product_id):
        """
        All bands (includig derived bands) available in a product.

        :param str product_id: A product identifier.

        :return: A dictionary mapping band IDs to dictionaries of their metadata.
        """
        r = self.session.get('/bands/all/{}'.format(product_id))

        return DotDict(r.json())

    def products(self, bands=None, limit=None, offset=None, owner=None, text=None, **kwargs):
        """Search products that are available on the platform.

        :param list(str) bands: Band name(s) e.g ["red", "nir"] to filter products by.
                                Note that products must match all bands that are passed.
        :param int limit: Number of results to return.
        :param int offset: Index to start at when returning results.
        :param str owner: Filter products by the owner's uuid.
        :param str text: Filter products by string match.

        """
        params = ['limit', 'offset', 'bands', 'owner', 'text']

        args = locals()
        kwargs = dict(kwargs, **{
            param: args[param]
            for param in params
            if args[param] is not None
        })

        r = self.session.post('/products/search', json=kwargs)

        return DotList(r.json())

    def available_products(self):
        """Get the list of product identifiers you have access to.

        Example::
            >>> from descarteslabs.client.services import Metadata
            >>> products = Metadata().available_products()
            >>> products  # doctest: +SKIP
            ['landsat:LC08:PRE:TOAR']

        """
        r = self.session.get('/products')

        return DotList(r.json())

    def summary(self, products=None, sat_id=None, date='acquired', part=None,
                place=None, geom=None, start_datetime=None, end_datetime=None, cloud_fraction=None,
                cloud_fraction_0=None, fill_fraction=None, q=None, pixels=None,
                dltile=None, **kwargs):
        """Get a summary of the results for the specified spatio-temporal query.

        :param list(str) products: Product identifier(s).
        :param list(str) sat_id: Satellite identifier(s). *Deprecated*
        :param str date: The date field to use for search (e.g. `acquired`).
        :param str part: Part of the date to aggregate over (e.g. `day`).
        :param str place: A slug identifier to be used as a region of interest.
        :param str geom: A GeoJSON or WKT region of interest.
        :param str start_datetime: Desired starting timestamp, in any common format.
        :param str end_datetime: Desired ending timestamp, in any common format.
        :param float cloud_fraction: Maximum cloud fraction, calculated by data provider.
        :param float cloud_fraction_0: Maximum cloud fraction, calculated by cloud mask pixels.
        :param float fill_fraction: Minimum scene fill fraction, calculated as valid/total pixels.
        :param expr q: Expression for filtering the results. See :py:attr:`descarteslabs.utilities.properties`.
        :param bool pixels: Whether to include pixel counts in summary calculations.
        :param str dltile: A dltile key used to specify the resolution, bounds, and srs.

        Example usage::

            >>> from descarteslabs.client.services import Metadata
            >>> Metadata().summary(place='north-america_united-states_iowa', \
                    products=['landsat:LC08:PRE:TOAR'], start_datetime='2016-07-06', \
                    end_datetime='2016-07-07', part='hour', pixels=True)
            {
              'bytes': 93298309,
              'count': 1,
              'items': [
                {
                  'bytes': 93298309,
                  'count': 1,
                  'date': '2016-07-06T16:00:00',
                  'pixels': 250508160,
                  'timestamp': 1467820800
                }
              ],
              'pixels': 250508160,
              'products': ['landsat:LC08:PRE:TOAR']
            }
        """
        if place:
            places = Places()
            places.auth = self.auth
            shape = places.shape(place, geom='low')
            geom = json.dumps(shape['geometry'])

        if dltile is not None:
            if isinstance(dltile, string_types):
                dltile = self._raster.dltile(dltile)
            if isinstance(dltile, dict):
                geom = dltile['geometry']

        if isinstance(geom, dict):
            geom = json.dumps(geom)

        if sat_id:
            warn(SAT_ID_DEPRECATION_MESSAGE, DeprecationWarning)

            if isinstance(sat_id, string_types):
                sat_id = [sat_id]

            kwargs['sat_id'] = sat_id

        if products:
            if isinstance(products, string_types):
                products = [products]

            kwargs['products'] = products

        if date:
            kwargs['date'] = date

        if part:
            kwargs['part'] = part

        if geom:
            kwargs['geom'] = geom

        if start_datetime:
            kwargs['start_datetime'] = start_datetime

        if end_datetime:
            kwargs['end_datetime'] = end_datetime

        if cloud_fraction is not None:
            kwargs['cloud_fraction'] = cloud_fraction

        if cloud_fraction_0 is not None:
            kwargs['cloud_fraction_0'] = cloud_fraction_0

        if fill_fraction is not None:
            kwargs['fill_fraction'] = fill_fraction

        if q is not None:
            if not isinstance(q, list):
                q = [q]
            kwargs['query_expr'] = AndExpression(q).serialize()

        if pixels:
            kwargs['pixels'] = pixels

        r = self.session.post('/summary', json=kwargs)
        return DotDict(r.json())

    def search(self, products=None, sat_id=None, date='acquired', place=None,
               geom=None, start_datetime=None, end_datetime=None, cloud_fraction=None,
               cloud_fraction_0=None, fill_fraction=None, q=None, limit=100, offset=0,
               fields=None, dltile=None, sort_field=None, sort_order="asc", randomize=None,
               continuation_token=None, **kwargs):
        """Search metadata given a spatio-temporal query. All parameters are
        optional. For accessing more than 10000 results, see :py:func:`features`.

        :param list(str) products: Product Identifier(s).
        :param list(str) sat_id: Satellite identifier(s).
        :param str date: The date field to use for search (e.g. `acquired`).
        :param str place: A slug identifier to be used as a region of interest.
        :param str geom: A GeoJSON or WKT region of interest.
        :param str start_datetime: Desired starting timestamp, in any common format.
        :param str end_datetime: Desired ending timestamp, in any common format.
        :param float cloud_fraction: Maximum cloud fraction, calculated by data provider.
        :param float cloud_fraction_0: Maximum cloud fraction, calculated by cloud mask pixels.
        :param float fill_fraction: Minimum scene fill fraction, calculated as valid/total pixels.
        :param expr q: Expression for filtering the results. See :py:attr:`descarteslabs.utilities.properties`.
        :param int limit: Number of items to return up to the maximum of 10000.
        :param int offset: Number of items to skip.
        :param list(str) fields: Properties to return.
        :param str dltile: a dltile key used to specify the resolution, bounds, and srs.
        :param str sort_field: Property to sort on.
        :param str sort_order: Order of sort.
        :param bool randomize: Randomize the results. You may also use an `int` or `str` as an explicit seed.

        return: GeoJSON ``FeatureCollection``

        Example::

            >>> from descarteslabs.client.services import Metadata
            >>> scenes = Metadata().search(place='north-america_united-states_iowa', \
                                         products=['landsat:LC08:PRE:TOAR'], \
                                         start_datetime='2016-07-01', \
                                         end_datetime='2016-07-31T23:59:59')
            >>> len(scenes['features'])  # doctest: +SKIP
            2
        """
        if place:
            places = Places()
            places.auth = self.auth
            shape = places.shape(place, geom='low')
            geom = json.dumps(shape['geometry'])

        if dltile is not None:
            if isinstance(dltile, string_types):
                dltile = self._raster.dltile(dltile)
            if isinstance(dltile, dict):
                geom = dltile['geometry']

        if isinstance(geom, dict):
            geom = json.dumps(geom)

        kwargs.update({'date': date, 'limit': limit})

        if offset:
            warn(OFFSET_DEPRECATION_MESSAGE, DeprecationWarning)
            kwargs['offset'] = offset

        if sat_id:
            warn(SAT_ID_DEPRECATION_MESSAGE, DeprecationWarning)

            if isinstance(sat_id, string_types):
                sat_id = [sat_id]

            kwargs['sat_id'] = sat_id

        if products:
            if isinstance(products, string_types):
                products = [products]

            kwargs['products'] = products

        if geom:
            kwargs['geom'] = geom

        if start_datetime:
            kwargs['start_datetime'] = start_datetime

        if end_datetime:
            kwargs['end_datetime'] = end_datetime

        if cloud_fraction is not None:
            kwargs['cloud_fraction'] = cloud_fraction

        if cloud_fraction_0 is not None:
            kwargs['cloud_fraction_0'] = cloud_fraction_0

        if fill_fraction is not None:
            kwargs['fill_fraction'] = fill_fraction

        if fields is not None:
            kwargs['fields'] = fields

        if q is not None:
            if not isinstance(q, list):
                q = [q]
            kwargs['query_expr'] = AndExpression(q).serialize()

        if sort_field is not None:
            kwargs['sort_field'] = sort_field

        if sort_order is not None:
            kwargs['sort_order'] = sort_order

        if randomize is not None:
            kwargs['random_seed'] = randomize

        if continuation_token is not None:
            kwargs['continuation_token'] = continuation_token

        r = self.session.post('/search', json=kwargs)

        fc = {'type': 'FeatureCollection', "features": r.json()}

        if 'x-continuation-token' in r.headers:
            fc['properties'] = {
                'continuation_token': r.headers['x-continuation-token']}

        return DotDict(fc)

    def ids(self, products=None, sat_id=None, date='acquired', place=None,
            geom=None, start_datetime=None, end_datetime=None, cloud_fraction=None,
            cloud_fraction_0=None, fill_fraction=None, q=None, limit=100, offset=None,
            dltile=None, sort_field=None, sort_order=None, randomize=None, **kwargs):
        """Search metadata given a spatio-temporal query. All parameters are
        optional.

        :param list(str) products: Products identifier(s).
        :param list(str) sat_id: Satellite identifier(s).
        :param str date: The date field to use for search (e.g. `acquired`).
        :param str place: A slug identifier to be used as a region of interest.
        :param str geom: A GeoJSON or WKT region of interest.
        :param str start_datetime: Desired starting timestamp, in any common format.
        :param str end_datetime: Desired ending timestamp, in any common format.
        :param float cloud_fraction: Maximum cloud fraction, calculated by data provider.
        :param float cloud_fraction_0: Maximum cloud fraction, calculated by cloud mask pixels.
        :param float fill_fraction: Minimum scene fill fraction, calculated as valid/total pixels.
        :param expr q: Expression for filtering the results. See :py:attr:`descarteslabs.utilities.properties`.
        :param int limit: Number of items to return.
        :param int offset: Number of items to skip.
        :param str dltile: a dltile key used to specify the resolution, bounds, and srs.
        :param str sort_field: Property to sort on.
        :param str sort_order: Order of sort.
        :param bool randomize: Randomize the results. You may also use an `int` or `str` as an explicit seed.

        :return: List of image identifiers.

        Example::

            >>> from descarteslabs.client.services import Metadata
            >>> ids = Metadata().ids(place='north-america_united-states_iowa', \
                                 products=['landsat:LC08:PRE:TOAR'], \
                                 start_datetime='2016-07-01', \
                                 end_datetime='2016-07-31T23:59:59')
            >>> len(ids)  # doctest: +SKIP
            1

            >>> ids  # doctest: +SKIP
            ['landsat:LC08:PRE:TOAR:meta_LC80260322016197_v1', 'landsat:LC08:PRE:TOAR:meta_LC80270312016188_v1']

        """
        result = self.search(sat_id=sat_id, products=products, date=date,
                             place=place, geom=geom, start_datetime=start_datetime,
                             end_datetime=end_datetime, cloud_fraction=cloud_fraction,
                             cloud_fraction_0=cloud_fraction_0, fill_fraction=fill_fraction,
                             q=q, limit=limit, offset=offset, fields=[], dltile=dltile,
                             sort_field=sort_field, sort_order=sort_order, randomize=randomize, **kwargs)

        return DotList(feature['id'] for feature in result['features'])

    def features(self, products=None, sat_id=None, date='acquired', place=None,
                 geom=None, start_datetime=None, end_datetime=None, cloud_fraction=None,
                 cloud_fraction_0=None, fill_fraction=None, q=None, fields=None,
                 batch_size=1000, dltile=None, sort_field=None, sort_order='asc',
                 randomize=None, **kwargs):
        """Generator that efficiently scrolls through the search results.

        :param int batch_size: Number of features to fetch per request.

        :return: Generator of GeoJSON ``Feature`` objects.

        Example::

            >>> from descarteslabs.client.services import Metadata
            >>> features = Metadata().features("landsat:LC08:PRE:TOAR", \
                            start_datetime='2016-01-01', \
                            end_datetime="2016-03-01")
            >>> total = 0
            >>> for f in features: \
                    total += 1

            >>> total # doctest: +SKIP
            31898
        """

        continuation_token = None

        while True:
            result = self.search(sat_id=sat_id, products=products,
                                 date=date, place=place, geom=geom,
                                 start_datetime=start_datetime, end_datetime=end_datetime,
                                 cloud_fraction=cloud_fraction,
                                 cloud_fraction_0=cloud_fraction_0,
                                 fill_fraction=fill_fraction, q=q,
                                 fields=fields, limit=batch_size, dltile=dltile,
                                 sort_field=sort_field, sort_order=sort_order,
                                 randomize=randomize, continuation_token=continuation_token, **kwargs)

            if not result['features']:
                break

            for feature in result['features']:
                yield feature

            continuation_token = result['properties'].get('continuation_token')
            if not continuation_token:
                break

    def get(self, image_id):
        """Get metadata of a single image.

        :param str image_id: Image identifier.

        Example::

            >>> from descarteslabs.client.services import Metadata
            >>> meta = Metadata().get('landsat:LC08:PRE:TOAR:meta_LC80270312016188_v1')
            >>> keys = list(meta.keys())
            >>> keys.sort()
            >>> keys
            ['acquired', 'area', 'bits_per_pixel', 'bright_fraction', 'bucket', 'cloud_fraction',
             'cloud_fraction_0', 'cs_code', 'descartes_version', 'file_md5s', 'file_sizes', 'files',
             'fill_fraction', 'geolocation_accuracy', 'geometry', 'geotrans', 'id', 'identifier', 'key',
             'processed', 'product', 'projcs', 'published', 'raster_size', 'reflectance_scale', 'roll_angle',
             'sat_id', 'solar_azimuth_angle', 'solar_elevation_angle', 'sw_version', 'terrain_correction',
             'tile_id']
        """
        r = self.session.get('/get/{}'.format(image_id))
        return DotDict(r.json())

    def get_by_ids(self, ids, fields=None, ignore_not_found=True, **kwargs):
        """Get metadata for multiple images by id. The response contains found images in the
        order of the given ids.

        :param list(str) ids: Image identifiers.
        :param list(str) fields: Properties to return.
        :param bool ignore_not_found: For image id lookups that fail: if :py:obj:`True`, ignore;
                                      if :py:obj:`False`, raise :py:exc:`NotFoundError`. Default is :py:obj:`True`.

        :return: List of image metadata.
        :rtype: list(dict)
        """
        kwargs['ids'] = ids
        kwargs['ignore_not_found'] = ignore_not_found
        if fields is not None:
            kwargs['fields'] = fields

        r = self.session.post('/batch/images', json=kwargs)
        return DotList(r.json())

    def get_product(self, product_id):
        """Get information about a single product.

        :param str product_id: Product Identifier.

        """
        r = self.session.get('/products/{}'.format(product_id))
        return DotDict(r.json())

    def get_band(self, band_id):
        """Get information about a single band.

        :param str band_id: Band Identifier.

        """
        r = self.session.get('/bands/{}'.format(band_id))
        return DotDict(r.json())

    def get_derived_band(self, derived_band_id):
        """Get information about a single product.

        :param str derived_band_id: Derived band identifier.

        """
        r = self.session.get('/bands/derived/{}'.format(derived_band_id))
        return DotDict(r.json())
Ejemplo n.º 6
0
def search(aoi,
           products=None,
           start_datetime=None,
           end_datetime=None,
           cloud_fraction=None,
           limit=100,
           sort_field=None,
           sort_order='asc',
           date_field='acquired',
           query=None,
           randomize=False,
           raster_client=None,
           metadata_client=None):
    """
    Search for Scenes in the Descartes Labs catalog.

    Returns a SceneCollection of Scenes that overlap with an area of interest,
    and meet the given search criteria.

    Parameters
    ----------
    aoi : GeoJSON-like dict, GeoContext, or object with __geo_interface__
        Search for scenes that intersect this area by any amount.
        If a GeoContext, a copy is returned as ``ctx``, with missing values filled in.
        Otherwise, the returned ``ctx`` will be an `AOI`, with this as its geometry.
        If shapely is not installed, this must be a GeoContext instance.
        See below for details.
    products : str or List[str], optional
        Descartes Labs product identifiers
    start_datetime : str, datetime-like, optional
        Restrict to scenes acquired after this datetime
    end_datetime : str, datetime-like, optional
        Restrict to scenes acquired before this datetime
    cloud_fraction : float, optional
        Restrict to scenes that are covered in clouds by less than this fraction
        (between 0 and 1)
    limit : int, optional
        Maximum number of Scenes to return, up to 10000.
    sort_field : str, optional
        Field name in ``Scene.properties`` by which to order the results
    sort_order : str, optional, default 'asc'
        ``"asc"`` or ``"desc"``
    date_field : str, optional, default 'acquired'
        The field used when filtering by date
        (``"acquired"``, ``"processed"``, ``"published"``)
    query : descarteslabs.common.property_filtering.Expression, optional
        Expression used to filter Scenes by their properties, built from ``dl.properties``.

        >>> query = 150 < dl.properties.azimuth_angle < 160 & dl.properties.cloud_fraction < 0.5
        >>> query = dl.properties.sat_id == "Terra"
    randomize : bool, default False, optional
        Randomize the order of the results.
        You may also use an int or str as an explicit seed.
    raster_client : Raster, optional
        Unneeded in general use; lets you use a specific client instance
        with non-default auth and parameters.
    metadata_client : Metadata, optional
        Unneeded in general use; lets you use a specific client instance
        with non-default auth and parameters.

    Returns
    -------
    scenes : SceneCollection
        Scenes matching your criteria.
    ctx: GeoContext
        The given ``aoi`` as a GeoContext (if it isn't one already),
        with reasonable default parameters for loading all matching Scenes.

        If ``aoi`` was a `GeoContext`, ``ctx`` will be a copy of ``aoi``,
        with any properties that were ``None`` assigned the defaults below.

        If ``aoi`` was not a `GeoContext`, an `AOI` instance will be created
        with ``aoi`` as its geometry, and defaults assigned as described below:

        **Default Spatial Parameters:**

        * resolution: the finest resolution of any band of all matching scenes
        * crs: the most common CRS used of all matching scenes
    """

    if isinstance(aoi, geocontext.GeoContext):
        ctx = aoi
        if ctx.bounds is None and ctx.geometry is None:
            raise ValueError(
                "Unspecified where to search, "
                "since the GeoContext given for ``aoi`` has neither geometry nor bounds set"
            )
    else:
        try:
            ctx = geocontext.AOI(geometry=aoi)
        except NotImplementedError:
            raise six.raise_from(
                NotImplementedError(
                    "Cannot create an AOI GeoContext from your geometry, since shapely is not installed. "
                    "Either create a GeoContext yourself and pass it in, or install shapely.\n"
                    "Note that you can install all recommended dependencies with `pip install descarteslabs[complete]`"
                ), None)

    if raster_client is None:
        raster_client = Raster()
    if metadata_client is None:
        metadata_client = Metadata()

    if isinstance(products, six.string_types):
        products = [products]

    if isinstance(start_datetime, datetime.datetime):
        start_datetime = start_datetime.isoformat()

    if isinstance(end_datetime, datetime.datetime):
        end_datetime = end_datetime.isoformat()

    if limit > MAX_RESULT_WINDOW:
        raise ValueError("Limit must be <= {}".format(MAX_RESULT_WINDOW))

    metadata_params = dict(products=products,
                           geom=ctx.__geo_interface__,
                           start_datetime=start_datetime,
                           end_datetime=end_datetime,
                           cloud_fraction=cloud_fraction,
                           limit=limit,
                           sort_field=sort_field,
                           sort_order=sort_order,
                           date=date_field,
                           q=query,
                           randomize=randomize)

    metadata = metadata_client.search(**metadata_params)
    if products is None:
        products = {
            meta["properties"]["product"]
            for meta in metadata["features"]
        }

    product_bands = {
        product:
        Scene._scenes_bands_dict(metadata_client.get_bands_by_product(product))
        for product in products
    }

    scenes = SceneCollection(
        (Scene(meta, product_bands[meta["properties"]["product"]])
         for meta in metadata["features"]),
        raster_client=raster_client)

    if len(scenes) > 0:
        assign_ctx = {}
        if ctx.resolution is None:
            resolutions = filter(None,
                                 (b.get("resolution")
                                  for band in six.itervalues(product_bands)
                                  for b in six.itervalues(band)))
            try:
                assign_ctx["resolution"] = min(resolutions)
            except ValueError:
                assign_ctx[
                    "resolution"] = None  # from min of an empty sequence; no band defines resolution

        if ctx.crs is None:
            assign_ctx["crs"] = collections.Counter(
                scene.properties["crs"]
                for scene in scenes).most_common(1)[0][0]

        if len(assign_ctx) > 0:
            ctx = ctx.assign(**assign_ctx)

    return scenes, ctx