def read_blosc_array(metadata, data): output = np.empty(metadata['shape'], dtype=np.dtype(metadata['dtype'])) ptr = output.__array_interface__['data'][0] for _ in metadata['chunks']: raw_size, buffer = read_blosc_buffer(data) blosc.decompress_ptr(buffer, ptr) ptr += raw_size bytes_received = ptr - output.__array_interface__['data'][0] if bytes_received != output.nbytes: raise ServerError("Did not receive complete array (got {}, expected {})".format( bytes_received, output.nbytes)) return output
def stack( self, bands, ctx, flatten=None, mask_nodata=True, mask_alpha=None, bands_axis=1, raster_info=False, resampler="near", processing_level=None, scaling=None, data_type=None, max_workers=None, ): """ Load bands from all scenes and stack them into a 4D ndarray, optionally masking invalid data. If the selected bands and scenes have different data types the resulting ndarray has the most general of those data types. See `Scene.ndarray() <descarteslabs.scenes.scene.Scene.ndarray>` for details on data type conversions. Parameters ---------- bands : str or Sequence[str] Band names to load. Can be a single string of band names separated by spaces (``"red green blue"``), or a sequence of band names (``["red", "green", "blue"]``). If the alpha band is requested, it must be last in the list to reduce rasterization errors. ctx : :class:`~descarteslabs.scenes.geocontext.GeoContext` A :class:`~descarteslabs.scenes.geocontext.GeoContext` to use when loading each Scene flatten : str, Sequence[str], callable, or Sequence[callable], default None "Flatten" groups of Scenes in the stack into a single layer by mosaicking each group (such as Scenes from the same day), then stacking the mosaics. ``flatten`` takes the same predicates as `Collection.groupby`, such as ``"properties.date"`` to mosaic Scenes acquired at the exact same timestamp, or ``["properties.date.year", "properties.date.month", "properties.date.day"]`` to combine Scenes captured on the same day (but not necessarily the same time). This is especially useful when ``ctx`` straddles a scene boundary and contains one image captured right after another. Instead of having each as a separate layer in the stack, you might want them combined. Note that indicies in the returned ndarray will no longer correspond to indicies in this SceneCollection, since multiple Scenes may be combined into one layer in the stack. You can call ``groupby`` on this SceneCollection with the same parameters to iterate through groups of Scenes in equivalent order to the returned ndarray. Additionally, the order of scenes in the ndarray will change: they'll be sorted by the parameters to ``flatten``. mask_nodata : bool, default True Whether to mask out values in each band of each scene that equal that band's ``nodata`` sentinel value. mask_alpha : bool or str or None, default None Whether to mask pixels in all bands where the alpha band of all scenes is 0. Provide a string to use an alternate band name for masking. If the alpha band is available for all scenes in the collection and ``mask_alpha`` is None, ``mask_alpha`` is set to True. If not, mask_alpha is set to False. bands_axis : int, default 1 Axis along which bands should be located. If 1, the array will have shape ``(scene, band, y, x)``, if -1, it will have shape ``(scene, y, x, band)``, etc. A bands_axis of 0 is currently unsupported. raster_info : bool, default False Whether to also return a list of dicts about the rasterization of each scene, including the coordinate system WKT and geotransform matrix. Generally only useful if you plan to upload data derived from this scene back to the Descartes catalog, or use it with GDAL. resampler : str, default "near" Algorithm used to interpolate pixel values when scaling and transforming each image to its new resolution or SRS. Possible values are ``near`` (nearest-neighbor), ``bilinear``, ``cubic``, ``cubicsplice``, ``lanczos``, ``average``, ``mode``, ``max``, ``min``, ``med``, ``q1``, ``q3``. processing_level : str, optional How the processing level of the underlying data should be adjusted. Possible values are ``toa`` (top of atmosphere) and ``surface``. For products that support it, ``surface`` applies Descartes Labs' general surface reflectance algorithm to the output. scaling : None, str, list, dict Band scaling specification. Please see :meth:`scaling_parameters` for a full description of this parameter. data_type : None, str Output data type. Please see :meth:`scaling_parameters` for a full description of this parameter. max_workers : int, default None Maximum number of threads to use to parallelize individual ndarray calls to each Scene. If None, it defaults to the number of processors on the machine, multiplied by 5. Note that unnecessary threads *won't* be created if ``max_workers`` is greater than the number of Scenes in the SceneCollection. Returns ------- arr : ndarray Returned array's shape is ``(scene, band, y, x)`` if bands_axis is 1, or ``(scene, y, x, band)`` if bands_axis is -1. If ``mask_nodata`` or ``mask_alpha`` is True, arr will be a masked array. The data type ("dtype") of the array is the most general of the data types among the scenes being rastered. raster_info : List[dict] If ``raster_info=True``, a list of raster information dicts for each scene is also returned Raises ------ ValueError If requested bands are unavailable, or band names are not given or are invalid. If not all required parameters are specified in the :class:`~descarteslabs.scenes.geocontext.GeoContext`. If the SceneCollection is empty. `NotFoundError` If a Scene's ID cannot be found in the Descartes Labs catalog `BadRequestError` If the Descartes Labs platform is given unrecognized parameters """ if len(self) == 0: raise ValueError("This SceneCollection is empty") kwargs = dict( mask_nodata=mask_nodata, mask_alpha=mask_alpha, bands_axis=bands_axis, raster_info=raster_info, resampler=resampler, processing_level=processing_level, ) if bands_axis == 0 or bands_axis == -4: raise NotImplementedError( "bands_axis of 0 is currently unsupported for `SceneCollection.stack`. " "If you require this shape, try ``np.moveaxis(my_stack, 1, 0)`` on the returned ndarray." ) elif bands_axis > 0: kwargs["bands_axis"] = ( bands_axis - 1 ) # the bands axis for each component ndarray call in the stack if flatten is not None: if isinstance(flatten, six.string_types) or not hasattr(flatten, "__len__"): flatten = [flatten] scenes = [ sc if len(sc) > 1 else sc[0] for group, sc in self.groupby(*flatten) ] else: scenes = self full_stack = None mask = None if raster_info: raster_infos = [None] * len(scenes) alpha_band_name = "alpha" if isinstance(mask_alpha, six.string_types): alpha_band_name = mask_alpha elif mask_alpha is None: mask_alpha = self._collection_has_alpha(alpha_band_name) bands = Scene._bands_to_list(bands) pop_alpha = False if mask_alpha and alpha_band_name not in bands: pop_alpha = True bands.append(alpha_band_name) scaling = _scaling.append_alpha_scaling(scaling) scales, data_type = _scaling.multiproduct_scaling_parameters( self._product_band_properties(), bands, scaling, data_type) if pop_alpha: bands.pop(-1) if scales: scales.pop(-1) kwargs["scaling"] = scales kwargs["data_type"] = data_type def threaded_ndarrays(): def data_loader(scene_or_scenecollection, bands, ctx, **kwargs): ndarray_kwargs = dict(kwargs, raster_client=self._raster_client) if isinstance(scene_or_scenecollection, self.__class__): return lambda: scene_or_scenecollection.mosaic( bands, ctx, **kwargs) else: return lambda: scene_or_scenecollection.ndarray( bands, ctx, **ndarray_kwargs) try: futures = concurrent.futures except ImportError: logging.warning( "Failed to import concurrent.futures. ndarray calls will be serial." ) for i, scene_or_scenecollection in enumerate(scenes): yield i, data_loader(scene_or_scenecollection, bands, ctx, **kwargs)() else: with futures.ThreadPoolExecutor( max_workers=max_workers) as executor: future_ndarrays = {} for i, scene_or_scenecollection in enumerate(scenes): future_ndarray = executor.submit( data_loader(scene_or_scenecollection, bands, ctx, **kwargs)) future_ndarrays[future_ndarray] = i for future in futures.as_completed(future_ndarrays): i = future_ndarrays[future] result = future.result() yield i, result for i, arr in threaded_ndarrays(): if raster_info: arr, raster_meta = arr raster_infos[i] = raster_meta if full_stack is None: stack_shape = (len(scenes), ) + arr.shape full_stack = np.empty(stack_shape, dtype=arr.dtype) if isinstance(arr, np.ma.MaskedArray): mask = np.empty(stack_shape, dtype=bool) if isinstance(arr, np.ma.MaskedArray): full_stack[i] = arr.data mask[i] = arr.mask else: full_stack[i] = arr if mask is not None: full_stack = np.ma.MaskedArray(full_stack, mask, copy=False) if raster_info: return full_stack, raster_infos else: return full_stack
def stack( self, inputs, bands=None, scales=None, data_type="UInt16", srs=None, resolution=None, dimensions=None, cutline=None, place=None, bounds=None, bounds_srs=None, align_pixels=False, resampler=None, order='image', dltile=None, max_workers=None, **pass_through_params ): """Retrieve a stack of rasters as a 4-D NumPy array. To ensure every raster in the stack has the same shape and covers the same spatial extent, you must either: * set ``dltile``, or * set [``resolution`` or ``dimensions``], ``srs``, and ``bounds`` :param inputs: List, or list of lists, of :class:`Metadata` identifiers. The stack will follow the same order as this list. Each element in the list is treated as a separate input to ``raster.ndarray``, so if a list of lists is given, each sublist's identifiers will be mosaiced together to become a single level in the stack. :param bands: List of requested bands. If the last item in the list is an alpha band (with data range `[0, 1]`) it affects rastering of all other bands: When rastering multiple images, they are combined image-by-image only where each respective image's alpha band is `1` (pixels where the alpha band is not `1` are "transparent" in the overlap between images). If a pixel is fully masked considering all combined alpha bands it will be `0` in all non-alpha bands. :param scales: List of tuples specifying the scaling to be applied to each band. A tuple has 4 elements in the order ``(src_min, src_max, out_min, out_max)``, meaning values in the source range ``src_min`` to ``src_max`` will be scaled to the output range ``out_min`` to ``out_max``. A tuple with 2 elements ``(src_min, src_max)`` is also allowed, in which case the output range defaults to ``(0, 255)`` (a useful default for the common output type ``Byte``). If no scaling is desired for a band, use ``None``. This tuple format and behaviour is identical to GDAL's scales during translation. Example argument: ``[(0, 10000, 0, 127), None, (0, 10000)]`` - the first band will have source values 0-10000 scaled to 0-127, the second band will not be scaled, the third band will have 0-10000 scaled to 0-255. :param str data_type: Output data type (one of ``Byte``, ``UInt16``, ``Int16``, ``UInt32``, ``Int32``, ``Float32``, ``Float64``). :param str srs: Output spatial reference system definition understood by GDAL. :param float resolution: Desired resolution in output SRS units. Incompatible with `dimensions` :param tuple dimensions: Desired output (width, height) in pixels. Incompatible with `resolution` :param str cutline: A GeoJSON feature or geometry to be used as a cutline. :param str place: A slug identifier to be used as a cutline. :param tuple bounds: ``(min_x, min_y, max_x, max_y)`` in target SRS. :param str bounds_srs: Override the coordinate system in which bounds are expressed. If not given, bounds are assumed to be expressed in the output SRS. :param bool align_pixels: Align pixels to the target coordinate system. :param str resampler: Resampling algorithm to be used during warping (``near``, ``bilinear``, ``cubic``, ``cubicsplice``, ``lanczos``, ``average``, ``mode``, ``max``, ``min``, ``med``, ``q1``, ``q3``). :param str order: Order of the returned array. `image` returns arrays as ``(scene, row, column, band)`` while `gdal` returns arrays as ``(scene, band, row, column)``. :param str dltile: a dltile key used to specify the resolution, bounds, and srs. :param int max_workers: Maximum number of threads over which to parallelize individual ndarray calls. If `None`, will be set to the minimum of the number of inputs and `DEFAULT_MAX_WORKERS`. :return: A tuple of ``(stack, metadata)``. ``stack``: 4D ndarray. The axes are ordered ``(scene, band, y, x)`` (or ``(scene, y, x, band)`` if ``order="gdal"``). The scenes in the outermost axis are in the same order as the list of identifiers given as ``inputs``. ``metadata``: List[dict] of the rasterization metadata for each element in ``inputs``. As with the metadata returned by :meth:`ndarray` and :meth:`raster`, these dictionaries contain useful information about the raster, such as its geotransform matrix and WKT of its coordinate system, but there are no guarantees that certain keys will be present. """ if not isinstance(inputs, (list, tuple)): raise TypeError("Inputs must be a list or tuple, instead got '{}'".format(type(inputs))) params = dict( bands=bands, scales=scales, data_type=data_type, srs=srs, resolution=resolution, dimensions=dimensions, cutline=cutline, place=place, bounds=bounds, bounds_srs=bounds_srs, align_pixels=align_pixels, resampler=resampler, order=order, dltile=dltile, max_workers=max_workers, **pass_through_params ) if dltile is None: if resolution is None and dimensions is None: raise ValueError("Must set `resolution` or `dimensions`") if srs is None: raise ValueError("Must set `srs`") if bounds is None: raise ValueError("Must set `bounds`") full_stack = None metadata = [None] * len(inputs) for i, arr, meta in self._threaded_ndarray(inputs, **params): if len(arr.shape) == 2: if order == "image": arr = np.expand_dims(arr, -1) elif order == "gdal": arr = np.expand_dims(arr, 0) else: raise ValueError("Unknown order '{}'; should be one of 'image' or 'gdal'".format(order)) if full_stack is None: stack_shape = (len(inputs),) + arr.shape full_stack = np.empty(stack_shape, dtype=arr.dtype) full_stack[i] = arr metadata[i] = meta return full_stack, metadata