Exemple #1
0
def read_blosc_array(metadata, data):
    output = np.empty(metadata['shape'], dtype=np.dtype(metadata['dtype']))
    ptr = output.__array_interface__['data'][0]

    for _ in metadata['chunks']:
        raw_size, buffer = read_blosc_buffer(data)
        blosc.decompress_ptr(buffer, ptr)
        ptr += raw_size

    bytes_received = ptr - output.__array_interface__['data'][0]

    if bytes_received != output.nbytes:
        raise ServerError("Did not receive complete array (got {}, expected {})".format(
            bytes_received, output.nbytes))

    return output
Exemple #2
0
    def stack(
        self,
        bands,
        ctx,
        flatten=None,
        mask_nodata=True,
        mask_alpha=None,
        bands_axis=1,
        raster_info=False,
        resampler="near",
        processing_level=None,
        scaling=None,
        data_type=None,
        max_workers=None,
    ):
        """
        Load bands from all scenes and stack them into a 4D ndarray,
        optionally masking invalid data.

        If the selected bands and scenes have different data types the resulting
        ndarray has the most general of those data types. See
        `Scene.ndarray() <descarteslabs.scenes.scene.Scene.ndarray>` for details
        on data type conversions.

        Parameters
        ----------
        bands : str or Sequence[str]
            Band names to load. Can be a single string of band names
            separated by spaces (``"red green blue"``),
            or a sequence of band names (``["red", "green", "blue"]``).
            If the alpha band is requested, it must be last in the list
            to reduce rasterization errors.
        ctx : :class:`~descarteslabs.scenes.geocontext.GeoContext`
            A :class:`~descarteslabs.scenes.geocontext.GeoContext` to use when loading each Scene
        flatten : str, Sequence[str], callable, or Sequence[callable], default None
            "Flatten" groups of Scenes in the stack into a single layer by mosaicking
            each group (such as Scenes from the same day), then stacking the mosaics.

            ``flatten`` takes the same predicates as `Collection.groupby`, such as
            ``"properties.date"`` to mosaic Scenes acquired at the exact same timestamp,
            or ``["properties.date.year", "properties.date.month", "properties.date.day"]``
            to combine Scenes captured on the same day (but not necessarily the same time).

            This is especially useful when ``ctx`` straddles a scene boundary
            and contains one image captured right after another. Instead of having
            each as a separate layer in the stack, you might want them combined.

            Note that indicies in the returned ndarray will no longer correspond to
            indicies in this SceneCollection, since multiple Scenes may be combined into
            one layer in the stack. You can call ``groupby`` on this SceneCollection
            with the same parameters to iterate through groups of Scenes in equivalent
            order to the returned ndarray.

            Additionally, the order of scenes in the ndarray will change:
            they'll be sorted by the parameters to ``flatten``.
        mask_nodata : bool, default True
            Whether to mask out values in each band of each scene that equal
            that band's ``nodata`` sentinel value.
        mask_alpha : bool or str or None, default None
            Whether to mask pixels in all bands where the alpha band of all scenes is 0.
            Provide a string to use an alternate band name for masking.
            If the alpha band is available for all scenes in the collection and
            ``mask_alpha`` is None, ``mask_alpha`` is set to True. If not,
            mask_alpha is set to False.
        bands_axis : int, default 1
            Axis along which bands should be located.
            If 1, the array will have shape ``(scene, band, y, x)``, if -1,
            it will have shape ``(scene, y, x, band)``, etc.
            A bands_axis of 0 is currently unsupported.
        raster_info : bool, default False
            Whether to also return a list of dicts about the rasterization of
            each scene, including the coordinate system WKT
            and geotransform matrix.
            Generally only useful if you plan to upload data derived from this
            scene back to the Descartes catalog, or use it with GDAL.
        resampler : str, default "near"
            Algorithm used to interpolate pixel values when scaling and transforming
            each image to its new resolution or SRS. Possible values are
            ``near`` (nearest-neighbor), ``bilinear``, ``cubic``, ``cubicsplice``,
            ``lanczos``, ``average``, ``mode``, ``max``, ``min``, ``med``, ``q1``, ``q3``.
        processing_level : str, optional
            How the processing level of the underlying data should be adjusted. Possible
            values are ``toa`` (top of atmosphere) and ``surface``. For products that
            support it, ``surface`` applies Descartes Labs' general surface reflectance
            algorithm to the output.
        scaling : None, str, list, dict
            Band scaling specification. Please see :meth:`scaling_parameters` for a full
            description of this parameter.
        data_type : None, str
            Output data type. Please see :meth:`scaling_parameters` for a full
            description of this parameter.
        max_workers : int, default None
            Maximum number of threads to use to parallelize individual ndarray
            calls to each Scene.
            If None, it defaults to the number of processors on the machine,
            multiplied by 5.
            Note that unnecessary threads *won't* be created if ``max_workers``
            is greater than the number of Scenes in the SceneCollection.

        Returns
        -------
        arr : ndarray
            Returned array's shape is ``(scene, band, y, x)`` if bands_axis is 1,
            or ``(scene, y, x, band)`` if bands_axis is -1.
            If ``mask_nodata`` or ``mask_alpha`` is True, arr will be a masked array.
            The data type ("dtype") of the array is the most general of the data
            types among the scenes being rastered.
        raster_info : List[dict]
            If ``raster_info=True``, a list of raster information dicts for each scene
            is also returned

        Raises
        ------
        ValueError
            If requested bands are unavailable, or band names are not given
            or are invalid.
            If not all required parameters are specified in the :class:`~descarteslabs.scenes.geocontext.GeoContext`.
            If the SceneCollection is empty.
        `NotFoundError`
            If a Scene's ID cannot be found in the Descartes Labs catalog
        `BadRequestError`
            If the Descartes Labs platform is given unrecognized parameters
        """
        if len(self) == 0:
            raise ValueError("This SceneCollection is empty")

        kwargs = dict(
            mask_nodata=mask_nodata,
            mask_alpha=mask_alpha,
            bands_axis=bands_axis,
            raster_info=raster_info,
            resampler=resampler,
            processing_level=processing_level,
        )

        if bands_axis == 0 or bands_axis == -4:
            raise NotImplementedError(
                "bands_axis of 0 is currently unsupported for `SceneCollection.stack`. "
                "If you require this shape, try ``np.moveaxis(my_stack, 1, 0)`` on the returned ndarray."
            )
        elif bands_axis > 0:
            kwargs["bands_axis"] = (
                bands_axis - 1
            )  # the bands axis for each component ndarray call in the stack

        if flatten is not None:
            if isinstance(flatten,
                          six.string_types) or not hasattr(flatten, "__len__"):
                flatten = [flatten]
            scenes = [
                sc if len(sc) > 1 else sc[0]
                for group, sc in self.groupby(*flatten)
            ]
        else:
            scenes = self

        full_stack = None
        mask = None
        if raster_info:
            raster_infos = [None] * len(scenes)

        alpha_band_name = "alpha"
        if isinstance(mask_alpha, six.string_types):
            alpha_band_name = mask_alpha
        elif mask_alpha is None:
            mask_alpha = self._collection_has_alpha(alpha_band_name)

        bands = Scene._bands_to_list(bands)
        pop_alpha = False
        if mask_alpha and alpha_band_name not in bands:
            pop_alpha = True
            bands.append(alpha_band_name)
            scaling = _scaling.append_alpha_scaling(scaling)

        scales, data_type = _scaling.multiproduct_scaling_parameters(
            self._product_band_properties(), bands, scaling, data_type)

        if pop_alpha:
            bands.pop(-1)
            if scales:
                scales.pop(-1)

        kwargs["scaling"] = scales
        kwargs["data_type"] = data_type

        def threaded_ndarrays():
            def data_loader(scene_or_scenecollection, bands, ctx, **kwargs):
                ndarray_kwargs = dict(kwargs,
                                      raster_client=self._raster_client)
                if isinstance(scene_or_scenecollection, self.__class__):
                    return lambda: scene_or_scenecollection.mosaic(
                        bands, ctx, **kwargs)
                else:
                    return lambda: scene_or_scenecollection.ndarray(
                        bands, ctx, **ndarray_kwargs)

            try:
                futures = concurrent.futures
            except ImportError:
                logging.warning(
                    "Failed to import concurrent.futures. ndarray calls will be serial."
                )
                for i, scene_or_scenecollection in enumerate(scenes):
                    yield i, data_loader(scene_or_scenecollection, bands, ctx,
                                         **kwargs)()
            else:
                with futures.ThreadPoolExecutor(
                        max_workers=max_workers) as executor:
                    future_ndarrays = {}
                    for i, scene_or_scenecollection in enumerate(scenes):
                        future_ndarray = executor.submit(
                            data_loader(scene_or_scenecollection, bands, ctx,
                                        **kwargs))
                        future_ndarrays[future_ndarray] = i
                    for future in futures.as_completed(future_ndarrays):
                        i = future_ndarrays[future]
                        result = future.result()
                        yield i, result

        for i, arr in threaded_ndarrays():
            if raster_info:
                arr, raster_meta = arr
                raster_infos[i] = raster_meta

            if full_stack is None:
                stack_shape = (len(scenes), ) + arr.shape
                full_stack = np.empty(stack_shape, dtype=arr.dtype)
                if isinstance(arr, np.ma.MaskedArray):
                    mask = np.empty(stack_shape, dtype=bool)

            if isinstance(arr, np.ma.MaskedArray):
                full_stack[i] = arr.data
                mask[i] = arr.mask
            else:
                full_stack[i] = arr

        if mask is not None:
            full_stack = np.ma.MaskedArray(full_stack, mask, copy=False)
        if raster_info:
            return full_stack, raster_infos
        else:
            return full_stack
    def stack(
            self,
            inputs,
            bands=None,
            scales=None,
            data_type="UInt16",
            srs=None,
            resolution=None,
            dimensions=None,
            cutline=None,
            place=None,
            bounds=None,
            bounds_srs=None,
            align_pixels=False,
            resampler=None,
            order='image',
            dltile=None,
            max_workers=None,
            **pass_through_params
    ):
        """Retrieve a stack of rasters as a 4-D NumPy array.

        To ensure every raster in the stack has the same shape and covers the same
        spatial extent, you must either:
        * set ``dltile``, or
        * set [``resolution`` or ``dimensions``], ``srs``, and ``bounds``

        :param inputs: List, or list of lists, of :class:`Metadata` identifiers.
            The stack will follow the same order as this list.
            Each element in the list is treated as a separate input to ``raster.ndarray``,
            so if a list of lists is given, each sublist's identifiers will be mosaiced together
            to become a single level in the stack.
        :param bands: List of requested bands. If the last item in the list is an alpha
            band (with data range `[0, 1]`) it affects rastering of all other bands:
            When rastering multiple images, they are combined image-by-image only where
            each respective image's alpha band is `1` (pixels where the alpha band is not
            `1` are "transparent" in the overlap between images). If a pixel is fully
            masked considering all combined alpha bands it will be `0` in all non-alpha
            bands.
        :param scales: List of tuples specifying the scaling to be applied to each band.
            A tuple has 4 elements in the order ``(src_min, src_max, out_min, out_max)``,
            meaning values in the source range ``src_min`` to ``src_max`` will be scaled
            to the output range ``out_min`` to ``out_max``. A tuple with 2 elements
            ``(src_min, src_max)`` is also allowed, in which case the output range
            defaults to ``(0, 255)`` (a useful default for the common output type
            ``Byte``).  If no scaling is desired for a band, use ``None``. This tuple
            format and behaviour is identical to GDAL's scales during translation.
            Example argument: ``[(0, 10000, 0, 127), None, (0, 10000)]`` - the first
            band will have source values 0-10000 scaled to 0-127, the second band will
            not be scaled, the third band will have 0-10000 scaled to 0-255.
        :param str data_type: Output data type (one of ``Byte``, ``UInt16``, ``Int16``,
            ``UInt32``, ``Int32``, ``Float32``, ``Float64``).
        :param str srs: Output spatial reference system definition understood by GDAL.
        :param float resolution: Desired resolution in output SRS units. Incompatible with
            `dimensions`
        :param tuple dimensions: Desired output (width, height) in pixels. Incompatible with
            `resolution`
        :param str cutline: A GeoJSON feature or geometry to be used as a cutline.
        :param str place: A slug identifier to be used as a cutline.
        :param tuple bounds: ``(min_x, min_y, max_x, max_y)`` in target SRS.
        :param str bounds_srs:
            Override the coordinate system in which bounds are expressed.
            If not given, bounds are assumed to be expressed in the output SRS.
        :param bool align_pixels: Align pixels to the target coordinate system.
        :param str resampler: Resampling algorithm to be used during warping (``near``,
            ``bilinear``, ``cubic``, ``cubicsplice``, ``lanczos``, ``average``, ``mode``,
            ``max``, ``min``, ``med``, ``q1``, ``q3``).
        :param str order: Order of the returned array. `image` returns arrays as
            ``(scene, row, column, band)`` while `gdal` returns arrays as ``(scene, band, row, column)``.
        :param str dltile: a dltile key used to specify the resolution, bounds, and srs.
        :param int max_workers: Maximum number of threads over which to
            parallelize individual ndarray calls. If `None`, will be set to the minimum
            of the number of inputs and `DEFAULT_MAX_WORKERS`.

        :return: A tuple of ``(stack, metadata)``.
            ``stack``: 4D ndarray. The axes are ordered ``(scene, band, y, x)``
            (or ``(scene, y, x, band)`` if ``order="gdal"``). The scenes in the outermost
            axis are in the same order as the list of identifiers given as ``inputs``.
            ``metadata``: List[dict] of the rasterization metadata for each element in ``inputs``.
            As with the metadata returned by :meth:`ndarray` and :meth:`raster`, these dictionaries
            contain useful information about the raster, such as its geotransform matrix and WKT
            of its coordinate system, but there are no guarantees that certain keys will be present.
        """
        if not isinstance(inputs, (list, tuple)):
            raise TypeError("Inputs must be a list or tuple, instead got '{}'".format(type(inputs)))

        params = dict(
            bands=bands,
            scales=scales,
            data_type=data_type,
            srs=srs,
            resolution=resolution,
            dimensions=dimensions,
            cutline=cutline,
            place=place,
            bounds=bounds,
            bounds_srs=bounds_srs,
            align_pixels=align_pixels,
            resampler=resampler,
            order=order,
            dltile=dltile,
            max_workers=max_workers,
            **pass_through_params
        )

        if dltile is None:
            if resolution is None and dimensions is None:
                raise ValueError("Must set `resolution` or `dimensions`")
            if srs is None:
                raise ValueError("Must set `srs`")
            if bounds is None:
                raise ValueError("Must set `bounds`")

        full_stack = None
        metadata = [None] * len(inputs)
        for i, arr, meta in self._threaded_ndarray(inputs, **params):
            if len(arr.shape) == 2:
                if order == "image":
                    arr = np.expand_dims(arr, -1)
                elif order == "gdal":
                    arr = np.expand_dims(arr, 0)
                else:
                    raise ValueError("Unknown order '{}'; should be one of 'image' or 'gdal'".format(order))
            if full_stack is None:
                stack_shape = (len(inputs),) + arr.shape
                full_stack = np.empty(stack_shape, dtype=arr.dtype)
            full_stack[i] = arr
            metadata[i] = meta

        return full_stack, metadata