Python LazilyOuterIndexedArrayの例、xarray.core.indexing.LazilyOuterIndexedArray Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_indexing.py プロジェクト: yifanmai/xarray

    def test_lazily_indexed_array(self):
        original = np.random.rand(10, 20, 30)
        x = indexing.NumpyIndexingAdapter(original)
        v = Variable(['i', 'j', 'k'], original)
        lazy = indexing.LazilyOuterIndexedArray(x)
        v_lazy = Variable(['i', 'j', 'k'], lazy)
        I = ReturnItem()  # noqa: E741  # allow ambiguous name
        # test orthogonally applied indexers
        indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], [0], np.arange(10) < 5]
        for i in indexers:
            for j in indexers:
                for k in indexers:
                    if isinstance(j, np.ndarray) and j.dtype.kind == 'b':
                        j = np.arange(20) < 5
                    if isinstance(k, np.ndarray) and k.dtype.kind == 'b':
                        k = np.arange(30) < 5
                    expected = np.asarray(v[i, j, k])
                    for actual in [
                            v_lazy[i, j, k], v_lazy[:, j, k][i],
                            v_lazy[:, :, k][:, j][i]
                    ]:
                        assert expected.shape == actual.shape
                        assert_array_equal(expected, actual)
                        assert isinstance(actual._data,
                                          indexing.LazilyOuterIndexedArray)

                        # make sure actual.key is appropriate type
                        if all(
                                isinstance(k, (
                                    int,
                                    slice,
                                )) for k in v_lazy._data.key.tuple):
                            assert isinstance(v_lazy._data.key,
                                              indexing.BasicIndexer)
                        else:
                            assert isinstance(v_lazy._data.key,
                                              indexing.OuterIndexer)

        # test sequentially applied indexers
        indexers = [(3, 2), (I[:], 0), (I[:2], -1), (I[:4], [0]), ([4, 5], 0),
                    ([0, 1, 2], [0, 1]), ([0, 3, 5], I[:2])]
        for i, j in indexers:
            expected = v[i][j]
            actual = v_lazy[i][j]
            assert expected.shape == actual.shape
            assert_array_equal(expected, actual)

            # test transpose
            if actual.ndim > 1:
                order = np.random.choice(actual.ndim, actual.ndim)
                order = np.array(actual.dims)
                transposed = actual.transpose(*order)
                assert_array_equal(expected.transpose(*order), transposed)
                assert isinstance(actual._data,
                                  (indexing.LazilyVectorizedIndexedArray,
                                   indexing.LazilyOuterIndexedArray))

            assert isinstance(actual._data, indexing.LazilyOuterIndexedArray)
            assert isinstance(actual._data.array,
                              indexing.NumpyIndexingAdapter)

コード例 #2

0

ファイルを表示

ファイル: test_indexing.py プロジェクト: yifanmai/xarray

 def test_sub_array(self):
     original = indexing.LazilyOuterIndexedArray(np.arange(10))
     wrapped = indexing.MemoryCachedArray(original)
     child = wrapped[B[:5]]
     assert isinstance(child, indexing.MemoryCachedArray)
     assert_array_equal(child, np.arange(5))
     assert isinstance(child.array, indexing.NumpyIndexingAdapter)
     assert isinstance(wrapped.array, indexing.LazilyOuterIndexedArray)

コード例 #3

0

ファイルを表示

ファイル: cfgrib_.py プロジェクト: shoyer/cfgrib

    def open_store_variable(self, name, var):
        if isinstance(var.data, np.ndarray):
            data = var.data
        else:
            wrapped_array = CfGribArrayWrapper(self, var.data)
            data = indexing.LazilyOuterIndexedArray(wrapped_array)

        encoding = self.ds.encoding.copy()
        encoding['original_shape'] = var.data.shape

        return Variable(var.dimensions, data, var.attributes, encoding)

コード例 #4

0

ファイルを表示

ファイル: test_indexing.py プロジェクト: yutiansut/xarray

    def test_vectorized_lazily_indexed_array(self):
        original = np.random.rand(10, 20, 30)
        x = indexing.NumpyIndexingAdapter(original)
        v_eager = Variable(["i", "j", "k"], x)
        lazy = indexing.LazilyOuterIndexedArray(x)
        v_lazy = Variable(["i", "j", "k"], lazy)
        I = ReturnItem()  # noqa: E741  # allow ambiguous name

        def check_indexing(v_eager, v_lazy, indexers):
            for indexer in indexers:
                actual = v_lazy[indexer]
                expected = v_eager[indexer]
                assert expected.shape == actual.shape
                assert isinstance(
                    actual._data,
                    (
                        indexing.LazilyVectorizedIndexedArray,
                        indexing.LazilyOuterIndexedArray,
                    ),
                )
                assert_array_equal(expected, actual)
                v_eager = expected
                v_lazy = actual

        # test orthogonal indexing
        indexers = [(I[:], 0, 1), (Variable("i", [0, 1]), )]
        check_indexing(v_eager, v_lazy, indexers)

        # vectorized indexing
        indexers = [
            (Variable("i", [0, 1]), Variable("i", [0, 1]), slice(None)),
            (slice(1, 3, 2), 0),
        ]
        check_indexing(v_eager, v_lazy, indexers)

        indexers = [
            (slice(None, None, 2), 0, slice(None, 10)),
            (Variable("i", [3, 2, 4, 3]), Variable("i", [3, 2, 1, 0])),
            (Variable(["i", "j"], [[0, 1], [1, 2]]), ),
        ]
        check_indexing(v_eager, v_lazy, indexers)

        indexers = [
            (Variable("i", [3, 2, 4, 3]), Variable("i", [3, 2, 1, 0])),
            (Variable(["i", "j"], [[0, 1], [1, 2]]), ),
        ]
        check_indexing(v_eager, v_lazy, indexers)

コード例 #5

0

ファイルを表示

    def open_store_variable(self, name, tiledb_array):
        # TODO: What / why was LazilyOuterIndexedArray being used?

        dimensions, attributes = _get_tiledb_dims_and_attrs(
            tiledb_array, _DIMENSION_KEY)
        attributes = dict(attributes)
        # encoding = {
        #     "chunks": zarr_array.chunks,
        #     "compressor": zarr_array.compressor,
        #     "filters": zarr_array.filters,
        # }
        # _FillValue needs to be in attributes, not encoding, so it will get
        # picked up by decode_cf

        # TODO: fill value???
        # if getattr(attributes, "fill_value") is not None:
        #     attributes["_FillValue"] = zarr_array.fill_value
        data = indexing.LazilyOuterIndexedArray(LazyTileDB(tiledb_array))
        return Variable(dimensions, data, attributes)

コード例 #6

0

ファイルを表示

ファイル: xarray_store.py プロジェクト: victorialy/cfgrib

    def open_store_variable(self, name, var):
        if isinstance(var.data, cfgrib.dataset.OnDiskArray):
            data = indexing.LazilyOuterIndexedArray(WrapGrib(var.data))
        else:
            data = var.data

        dimensions = tuple(self.variable_map.get(dim, dim) for dim in var.dimensions)
        attrs = var.attributes

        # the coordinates attributes need a special treatment
        if 'coordinates' in attrs:
            coordinates = [self.variable_map.get(d, d) for d in attrs['coordinates'].split()]
            attrs['coordinates'] = ' '.join(coordinates)

        encoding = {}
        # save source so __repr__ can detect if it's local or not
        encoding['source'] = self.ds.stream.path
        encoding['original_shape'] = var.data.shape

        return Variable(dimensions, data, attrs, encoding)

コード例 #7

0

ファイルを表示

def open_rasterio(
    filename,
    parse_coordinates=None,
    chunks=None,
    cache=None,
    lock=None,
    masked=False,
    mask_and_scale=False,
    variable=None,
    group=None,
    default_name=None,
    **open_kwargs,
):
    """Open a file with rasterio (experimental).

    This should work with any file that rasterio can open (most often:
    geoTIFF). The x and y coordinates are generated automatically from the
    file's geoinformation, shifted to the center of each pixel (see
    `"PixelIsArea" Raster Space
    <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
    for more information).

    You can generate 2D coordinates from the file's attributes with::

        from affine import Affine
        da = xr.open_rasterio('path_to_file.tif')
        transform = Affine.from_gdal(*da.attrs['transform'])
        nx, ny = da.sizes['x'], da.sizes['y']
        x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform


    Parameters
    ----------
    filename: str, rasterio.DatasetReader, or rasterio.WarpedVRT
        Path to the file to open. Or already open rasterio dataset.
    parse_coordinates: bool, optional
        Whether to parse the x and y coordinates out of the file's
        ``transform`` attribute or not. The default is to automatically
        parse the coordinates only if they are rectilinear (1D).
        It can be useful to set ``parse_coordinates=False``
        if your files are very large or if you don't need the coordinates.
    chunks: int, tuple or dict, optional
        Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
        ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
        DataArray into a dask array. Chunks can also be set to
        ``True`` or ``"auto"`` to choose sensible chunk sizes according to
        ``dask.config.get("array.chunk-size")``.
    cache: bool, optional
        If True, cache data loaded from the underlying datastore in memory as
        NumPy arrays when accessed to avoid reading from the underlying data-
        store multiple times. Defaults to True unless you specify the `chunks`
        argument to use dask, in which case it defaults to False.
    lock: False, True or threading.Lock, optional
        If chunks is provided, this argument is passed on to
        :py:func:`dask.array.from_array`. By default, a global lock is
        used to avoid issues with concurrent access to the same file when using
        dask's multithreaded backend.
    masked: bool, optional
        If True, read the mask and set values to NaN. Defaults to False.
    mask_and_scale: bool, optional
        Lazily scale (using the `scales` and `offsets` from rasterio) and mask.
        If the _Unsigned attribute is present treat integer arrays as unsigned.
    variable: str or list or tuple, optional
        Variable name or names to use to filter loading.
    group: str or list or tuple, optional
        Group name or names to use to filter loading.
    default_name: str, optional
        The name of the data array if none exists. Default is None.
    **open_kwargs: kwargs, optional
        Optional keyword arguments to pass into rasterio.open().

    Returns
    -------
    :obj:`xarray.Dataset` | :obj:`xarray.DataArray` | List[:obj:`xarray.Dataset`]:
        The newly created dataset(s).
    """
    parse_coordinates = True if parse_coordinates is None else parse_coordinates
    masked = masked or mask_and_scale
    vrt_params = None
    if isinstance(filename, rasterio.io.DatasetReader):
        filename = filename.name
    elif isinstance(filename, rasterio.vrt.WarpedVRT):
        vrt = filename
        filename = vrt.src_dataset.name
        vrt_params = dict(
            src_crs=vrt.src_crs.to_string(),
            crs=vrt.crs.to_string(),
            resampling=vrt.resampling,
            tolerance=vrt.tolerance,
            src_nodata=vrt.src_nodata,
            nodata=vrt.nodata,
            width=vrt.width,
            height=vrt.height,
            src_transform=vrt.src_transform,
            transform=vrt.transform,
            dtype=vrt.working_dtype,
            warp_extras=vrt.warp_extras,
        )

    if lock is None:
        lock = RASTERIO_LOCK

    # ensure default for sharing is False
    # ref https://github.com/mapbox/rasterio/issues/1504
    open_kwargs["sharing"] = open_kwargs.get("sharing", False)
    with warnings.catch_warnings(record=True) as rio_warnings:
        manager = CachingFileManager(rasterio.open,
                                     filename,
                                     lock=lock,
                                     mode="r",
                                     kwargs=open_kwargs)
        riods = manager.acquire()
        captured_warnings = rio_warnings.copy()
    # raise the NotGeoreferencedWarning if applicable
    for rio_warning in captured_warnings:
        if not riods.subdatasets or not isinstance(rio_warning.message,
                                                   NotGeoreferencedWarning):
            warnings.warn(str(rio_warning.message), type(rio_warning.message))

    # open the subdatasets if they exist
    if riods.subdatasets:
        return _load_subdatasets(
            riods=riods,
            group=group,
            variable=variable,
            parse_coordinates=parse_coordinates,
            chunks=chunks,
            cache=cache,
            lock=lock,
            masked=masked,
            mask_and_scale=mask_and_scale,
        )

    if vrt_params is not None:
        riods = WarpedVRT(riods, **vrt_params)

    if cache is None:
        cache = chunks is None

    # Get bands
    if riods.count < 1:
        raise ValueError("Unknown dims")

    # parse tags & load alternate coords
    attrs = _get_rasterio_attrs(riods=riods)
    coords = _load_netcdf_1d_coords(riods.tags())
    _parse_driver_tags(riods=riods, attrs=attrs, coords=coords)
    for coord in coords:
        if f"NETCDF_DIM_{coord}" in attrs:
            coord_name = coord
            attrs.pop(f"NETCDF_DIM_{coord}")
            break
    else:
        coord_name = "band"
        coords[coord_name] = np.asarray(riods.indexes)

    # Get geospatial coordinates
    transform = _rio_transform(riods)
    if parse_coordinates and transform.is_rectilinear:
        # 1d coordinates
        coords.update(
            affine_to_coords(riods.transform, riods.width, riods.height))
    elif parse_coordinates:
        # 2d coordinates
        warnings.warn(
            "The file coordinates' transformation isn't "
            "rectilinear: xarray won't parse the coordinates "
            "in this case. Set `parse_coordinates=False` to "
            "suppress this warning.",
            RuntimeWarning,
            stacklevel=3,
        )

    unsigned = False
    encoding = {}
    if mask_and_scale and "_Unsigned" in attrs:
        unsigned = variables.pop_to(attrs, encoding, "_Unsigned") == "true"

    da_name = attrs.pop("NETCDF_VARNAME", default_name)
    data = indexing.LazilyOuterIndexedArray(
        RasterioArrayWrapper(
            manager,
            lock,
            name=da_name,
            vrt_params=vrt_params,
            masked=masked,
            mask_and_scale=mask_and_scale,
            unsigned=unsigned,
        ))

    # this lets you write arrays loaded with rasterio
    data = indexing.CopyOnWriteArray(data)
    if cache and chunks is None:
        data = indexing.MemoryCachedArray(data)

    result = DataArray(data=data,
                       dims=(coord_name, "y", "x"),
                       coords=coords,
                       attrs=attrs,
                       name=da_name)
    result.encoding = encoding

    # update attributes from NetCDF attributess
    _load_netcdf_attrs(riods.tags(), result)
    result = _decode_datetime_cf(result)

    # make sure the _FillValue is correct dtype
    if "_FillValue" in attrs:
        attrs["_FillValue"] = result.dtype.type(attrs["_FillValue"])

    # handle encoding
    if mask_and_scale:
        if "scale_factor" in result.attrs:
            variables.pop_to(result.attrs,
                             result.encoding,
                             "scale_factor",
                             name=da_name)
        if "add_offset" in result.attrs:
            variables.pop_to(result.attrs,
                             result.encoding,
                             "add_offset",
                             name=da_name)
    if masked:
        if "_FillValue" in result.attrs:
            variables.pop_to(result.attrs,
                             result.encoding,
                             "_FillValue",
                             name=da_name)
        if "missing_value" in result.attrs:
            variables.pop_to(result.attrs,
                             result.encoding,
                             "missing_value",
                             name=da_name)

    # Affine transformation matrix (always available)
    # This describes coefficients mapping pixel coordinates to CRS
    # For serialization store as tuple of 6 floats, the last row being
    # always (0, 0, 1) per definition (see
    # https://github.com/sgillies/affine)
    result.rio.write_transform(riods.transform, inplace=True)
    if hasattr(riods, "crs") and riods.crs:
        result.rio.write_crs(riods.crs, inplace=True)

    if chunks is not None:
        result = _prepare_dask(result, riods, filename, chunks)

    # Make the file closeable
    result._file_obj = manager

    return result

コード例 #8

0

ファイルを表示

def open_rasterio(
    filename,
    parse_coordinates=None,
    chunks=None,
    cache=None,
    lock=None,
    masked=False,
    **open_kwargs
):
    """Open a file with rasterio (experimental).

    This should work with any file that rasterio can open (most often:
    geoTIFF). The x and y coordinates are generated automatically from the
    file's geoinformation, shifted to the center of each pixel (see
    `"PixelIsArea" Raster Space
    <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
    for more information).

    You can generate 2D coordinates from the file's attributes with::

        from affine import Affine
        da = xr.open_rasterio('path_to_file.tif')
        transform = Affine.from_gdal(*da.attrs['transform'])
        nx, ny = da.sizes['x'], da.sizes['y']
        x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform


    Parameters
    ----------
    filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT
        Path to the file to open. Or already open rasterio dataset.
    parse_coordinates : bool, optional
        Whether to parse the x and y coordinates out of the file's
        ``transform`` attribute or not. The default is to automatically
        parse the coordinates only if they are rectilinear (1D).
        It can be useful to set ``parse_coordinates=False``
        if your files are very large or if you don't need the coordinates.
    chunks : int, tuple or dict, optional
        Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
        ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
        DataArray into a dask array. Chunks can also be set to
        ``True`` or ``"auto"`` to choose sensible chunk sizes according to
        ``dask.config.get("array.chunk-size").
    cache : bool, optional
        If True, cache data loaded from the underlying datastore in memory as
        NumPy arrays when accessed to avoid reading from the underlying data-
        store multiple times. Defaults to True unless you specify the `chunks`
        argument to use dask, in which case it defaults to False.
    lock : False, True or threading.Lock, optional
        If chunks is provided, this argument is passed on to
        :py:func:`dask.array.from_array`. By default, a global lock is
        used to avoid issues with concurrent access to the same file when using
        dask's multithreaded backend.
    masked : bool, optional
        If True, read the mask and to set values to NaN. Defaults to False.
    **open_kwargs: kwargs, optional
        Optional keyword arguments to pass into rasterio.open().

    Returns
    -------
    data : DataArray
        The newly created DataArray.
    """
    parse_coordinates = True if parse_coordinates is None else parse_coordinates

    import rasterio
    from rasterio.vrt import WarpedVRT

    vrt_params = None
    if isinstance(filename, rasterio.io.DatasetReader):
        filename = filename.name
    elif isinstance(filename, rasterio.vrt.WarpedVRT):
        vrt = filename
        filename = vrt.src_dataset.name
        vrt_params = dict(
            crs=vrt.crs.to_string(),
            resampling=vrt.resampling,
            src_nodata=vrt.src_nodata,
            dst_nodata=vrt.dst_nodata,
            tolerance=vrt.tolerance,
            transform=vrt.transform,
            width=vrt.width,
            height=vrt.height,
            warp_extras=vrt.warp_extras,
        )

    if lock is None:
        lock = RASTERIO_LOCK

    # ensure default for sharing is False
    # ref https://github.com/mapbox/rasterio/issues/1504
    open_kwargs["sharing"] = open_kwargs.get("sharing", False)
    manager = CachingFileManager(
        rasterio.open, filename, lock=lock, mode="r", kwargs=open_kwargs
    )
    riods = manager.acquire()

    # open the subdatasets if they exist
    if riods.subdatasets:
        data_arrays = {}
        for iii, subdataset in enumerate(riods.subdatasets):
            rioda = open_rasterio(
                subdataset,
                parse_coordinates=iii == 0 and parse_coordinates,
                chunks=chunks,
                cache=cache,
                lock=lock,
                masked=masked,
            )
            data_arrays[rioda.name] = rioda
        return Dataset(data_arrays)

    if vrt_params is not None:
        riods = WarpedVRT(riods, **vrt_params)

    if cache is None:
        cache = chunks is None

    coords = OrderedDict()

    # Get bands
    if riods.count < 1:
        raise ValueError("Unknown dims")
    coords["band"] = np.asarray(riods.indexes)

    # Get coordinates
    if LooseVersion(rasterio.__version__) < LooseVersion("1.0"):
        transform = riods.affine
    else:
        transform = riods.transform

    if transform.is_rectilinear and parse_coordinates:
        # 1d coordinates
        coords.update(affine_to_coords(riods.transform, riods.width, riods.height))
    elif parse_coordinates:
        # 2d coordinates
        warnings.warn(
            "The file coordinates' transformation isn't "
            "rectilinear: xarray won't parse the coordinates "
            "in this case. Set `parse_coordinates=False` to "
            "suppress this warning.",
            RuntimeWarning,
            stacklevel=3,
        )

    # Attributes
    attrs = _parse_tags(riods.tags(1))
    encoding = dict()
    # Affine transformation matrix (always available)
    # This describes coefficients mapping pixel coordinates to CRS
    # For serialization store as tuple of 6 floats, the last row being
    # always (0, 0, 1) per definition (see
    # https://github.com/sgillies/affine)
    attrs["transform"] = tuple(transform)[:6]
    if hasattr(riods, "nodata") and riods.nodata is not None:
        # The nodata values for the raster bands
        if masked:
            encoding["_FillValue"] = riods.nodata
        else:
            attrs["_FillValue"] = riods.nodata
    if hasattr(riods, "scales"):
        # The scale values for the raster bands
        attrs["scales"] = riods.scales
    if hasattr(riods, "offsets"):
        # The offset values for the raster bands
        attrs["offsets"] = riods.offsets
    if hasattr(riods, "descriptions") and any(riods.descriptions):
        # Descriptions for each dataset band
        attrs["descriptions"] = riods.descriptions
    if hasattr(riods, "units") and any(riods.units):
        # A list of units string for each dataset band
        attrs["units"] = riods.units

    # Parse extra metadata from tags, if supported
    parsers = {"ENVI": _parse_envi}

    driver = riods.driver
    if driver in parsers:
        meta = parsers[driver](riods.tags(ns=driver))

        for k, v in meta.items():
            # Add values as coordinates if they match the band count,
            # as attributes otherwise
            if isinstance(v, (list, np.ndarray)) and len(v) == riods.count:
                coords[k] = ("band", np.asarray(v))
            else:
                attrs[k] = v

    data = indexing.LazilyOuterIndexedArray(
        RasterioArrayWrapper(manager, lock, vrt_params, masked=masked)
    )

    # this lets you write arrays loaded with rasterio
    data = indexing.CopyOnWriteArray(data)
    if cache and chunks is None:
        data = indexing.MemoryCachedArray(data)

    da_name = attrs.pop("NETCDF_VARNAME", None)
    result = DataArray(
        data=data, dims=("band", "y", "x"), coords=coords, attrs=attrs, name=da_name
    )
    result.encoding = encoding

    if hasattr(riods, "crs") and riods.crs:
        result.rio.write_crs(riods.crs, inplace=True)

    if chunks is not None:
        from dask.base import tokenize

        # augment the token with the file modification time
        try:
            mtime = os.path.getmtime(filename)
        except OSError:
            # the filename is probably an s3 bucket rather than a regular file
            mtime = None

        if chunks in (True, "auto"):
            from dask.array.core import normalize_chunks
            import dask

            if LooseVersion(dask.__version__) < LooseVersion("0.18.0"):
                msg = (
                    "Automatic chunking requires dask.__version__ >= 0.18.0 . "
                    "You currently have version %s" % dask.__version__
                )
                raise NotImplementedError(msg)
            block_shape = (1,) + riods.block_shapes[0]
            chunks = normalize_chunks(
                chunks=(1, "auto", "auto"),
                shape=(riods.count, riods.height, riods.width),
                dtype=riods.dtypes[0],
                previous_chunks=tuple((c,) for c in block_shape),
            )
        token = tokenize(filename, mtime, chunks)
        name_prefix = "open_rasterio-%s" % token
        result = result.chunk(chunks, name_prefix=name_prefix, token=token)

    # Make the file closeable
    result._file_obj = manager

    return result

コード例 #9

0

ファイルを表示

ファイル: _io.py プロジェクト: JWCook/rioxarray

def open_rasterio(
    filename,
    parse_coordinates=None,
    chunks=None,
    cache=None,
    lock=None,
    masked=False,
    mask_and_scale=False,
    variable=None,
    group=None,
    default_name=None,
    decode_times=True,
    decode_timedelta=None,
    **open_kwargs,
):
    # pylint: disable=too-many-statements,too-many-locals,too-many-branches
    """Open a file with rasterio (experimental).

    This should work with any file that rasterio can open (most often:
    geoTIFF). The x and y coordinates are generated automatically from the
    file's geoinformation, shifted to the center of each pixel (see
    `"PixelIsArea" Raster Space
    <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
    for more information).

    Parameters
    ----------
    filename: str, rasterio.io.DatasetReader, or rasterio.vrt.WarpedVRT
        Path to the file to open. Or already open rasterio dataset.
    parse_coordinates: bool, optional
        Whether to parse the x and y coordinates out of the file's
        ``transform`` attribute or not. The default is to automatically
        parse the coordinates only if they are rectilinear (1D).
        It can be useful to set ``parse_coordinates=False``
        if your files are very large or if you don't need the coordinates.
    chunks: int, tuple or dict, optional
        Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
        ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
        DataArray into a dask array. Chunks can also be set to
        ``True`` or ``"auto"`` to choose sensible chunk sizes according to
        ``dask.config.get("array.chunk-size")``.
    cache: bool, optional
        If True, cache data loaded from the underlying datastore in memory as
        NumPy arrays when accessed to avoid reading from the underlying data-
        store multiple times. Defaults to True unless you specify the `chunks`
        argument to use dask, in which case it defaults to False.
    lock: bool or dask.utils.SerializableLock, optional

        If chunks is provided, this argument is used to ensure that only one
        thread per process is reading from a rasterio file object at a time.

        By default and when a lock instance is provided,
        a :class:`xarray.backends.CachingFileManager` is used to cache File objects.
        Since rasterio also caches some data, this will make repeated reads from the
        same object fast.

        When ``lock=False``, no lock is used, allowing for completely parallel reads
        from multiple threads or processes. However, a new file handle is opened on
        each request.

    masked: bool, optional
        If True, read the mask and set values to NaN. Defaults to False.
    mask_and_scale: bool, optional
        Lazily scale (using the `scales` and `offsets` from rasterio) and mask.
        If the _Unsigned attribute is present treat integer arrays as unsigned.
    variable: str or list or tuple, optional
        Variable name or names to use to filter loading.
    group: str or list or tuple, optional
        Group name or names to use to filter loading.
    default_name: str, optional
        The name of the data array if none exists. Default is None.
    decode_times: bool, optional
        If True, decode times encoded in the standard NetCDF datetime format
        into datetime objects. Otherwise, leave them encoded as numbers.
    decode_timedelta: bool, optional
        If True, decode variables and coordinates with time units in
        {“days”, “hours”, “minutes”, “seconds”, “milliseconds”, “microseconds”}
        into timedelta objects. If False, leave them encoded as numbers.
        If None (default), assume the same value of decode_time.
    **open_kwargs: kwargs, optional
        Optional keyword arguments to pass into rasterio.open().

    Returns
    -------
    :obj:`xarray.Dataset` | :obj:`xarray.DataArray` | List[:obj:`xarray.Dataset`]:
        The newly created dataset(s).
    """
    parse_coordinates = True if parse_coordinates is None else parse_coordinates
    masked = masked or mask_and_scale
    vrt_params = None
    if isinstance(filename, rasterio.io.DatasetReader):
        filename = filename.name
    elif isinstance(filename, rasterio.vrt.WarpedVRT):
        vrt = filename
        filename = vrt.src_dataset.name
        vrt_params = dict(
            src_crs=vrt.src_crs.to_string() if vrt.src_crs else None,
            crs=vrt.crs.to_string() if vrt.crs else None,
            resampling=vrt.resampling,
            tolerance=vrt.tolerance,
            src_nodata=vrt.src_nodata,
            nodata=vrt.nodata,
            width=vrt.width,
            height=vrt.height,
            src_transform=vrt.src_transform,
            transform=vrt.transform,
            dtype=vrt.working_dtype,
            warp_extras=vrt.warp_extras,
        )

    if lock in (True, None):
        lock = RASTERIO_LOCK
    elif lock is False:
        lock = NO_LOCK

    # ensure default for sharing is False
    # ref https://github.com/mapbox/rasterio/issues/1504
    open_kwargs["sharing"] = open_kwargs.get("sharing", False)

    with warnings.catch_warnings(record=True) as rio_warnings:
        if lock is not NO_LOCK:
            manager = CachingFileManager(
                rasterio.open, filename, lock=lock, mode="r", kwargs=open_kwargs
            )
        else:
            manager = URIManager(rasterio.open, filename, mode="r", kwargs=open_kwargs)
        riods = manager.acquire()
        captured_warnings = rio_warnings.copy()

    # raise the NotGeoreferencedWarning if applicable
    for rio_warning in captured_warnings:
        if not riods.subdatasets or not isinstance(
            rio_warning.message, NotGeoreferencedWarning
        ):
            warnings.warn(str(rio_warning.message), type(rio_warning.message))

    # open the subdatasets if they exist
    if riods.subdatasets:
        return _load_subdatasets(
            riods=riods,
            group=group,
            variable=variable,
            parse_coordinates=parse_coordinates,
            chunks=chunks,
            cache=cache,
            lock=lock,
            masked=masked,
            mask_and_scale=mask_and_scale,
            decode_times=decode_times,
            decode_timedelta=decode_timedelta,
            **open_kwargs,
        )

    if vrt_params is not None:
        riods = WarpedVRT(riods, **vrt_params)

    if cache is None:
        cache = chunks is None

    # Get bands
    if riods.count < 1:
        raise ValueError("Unknown dims")

    # parse tags & load alternate coords
    attrs = _get_rasterio_attrs(riods=riods)
    coords = _load_netcdf_1d_coords(riods.tags())
    _parse_driver_tags(riods=riods, attrs=attrs, coords=coords)
    for coord in coords:
        if f"NETCDF_DIM_{coord}" in attrs:
            coord_name = coord
            attrs.pop(f"NETCDF_DIM_{coord}")
            break
    else:
        coord_name = "band"
        coords[coord_name] = np.asarray(riods.indexes)

    has_gcps = riods.gcps[0]
    if has_gcps:
        parse_coordinates = False

    # Get geospatial coordinates
    if parse_coordinates:
        coords.update(
            _generate_spatial_coords(_rio_transform(riods), riods.width, riods.height)
        )

    unsigned = False
    encoding = {}
    if mask_and_scale and "_Unsigned" in attrs:
        unsigned = variables.pop_to(attrs, encoding, "_Unsigned") == "true"

    if masked:
        encoding["dtype"] = str(_rasterio_to_numpy_dtype(riods.dtypes))

    da_name = attrs.pop("NETCDF_VARNAME", default_name)
    data = indexing.LazilyOuterIndexedArray(
        RasterioArrayWrapper(
            manager,
            lock,
            name=da_name,
            vrt_params=vrt_params,
            masked=masked,
            mask_and_scale=mask_and_scale,
            unsigned=unsigned,
        )
    )

    # this lets you write arrays loaded with rasterio
    data = indexing.CopyOnWriteArray(data)
    if cache and chunks is None:
        data = indexing.MemoryCachedArray(data)

    result = DataArray(
        data=data, dims=(coord_name, "y", "x"), coords=coords, attrs=attrs, name=da_name
    )
    result.encoding = encoding

    # update attributes from NetCDF attributess
    _load_netcdf_attrs(riods.tags(), result)
    result = _decode_datetime_cf(
        result, decode_times=decode_times, decode_timedelta=decode_timedelta
    )

    # make sure the _FillValue is correct dtype
    if "_FillValue" in attrs:
        attrs["_FillValue"] = result.dtype.type(attrs["_FillValue"])

    # handle encoding
    _handle_encoding(result, mask_and_scale, masked, da_name)
    # Affine transformation matrix (always available)
    # This describes coefficients mapping pixel coordinates to CRS
    # For serialization store as tuple of 6 floats, the last row being
    # always (0, 0, 1) per definition (see
    # https://github.com/sgillies/affine)
    result.rio.write_transform(_rio_transform(riods), inplace=True)
    if riods.crs:
        result.rio.write_crs(riods.crs, inplace=True)
    if has_gcps:
        result.rio.write_gcps(*riods.gcps, inplace=True)

    if chunks is not None:
        result = _prepare_dask(result, riods, filename, chunks)

    # Make the file closeable
    result.set_close(manager.close)
    result.rio._manager = manager
    # add file path to encoding
    result.encoding["source"] = riods.name
    result.encoding["rasterio_dtype"] = str(riods.dtypes[0])
    return result

コード例 #10

0

ファイルを表示

ファイル: test_indexing.py プロジェクト: yifanmai/xarray

 def test_wrapper(self):
     original = indexing.LazilyOuterIndexedArray(np.arange(10))
     wrapped = indexing.MemoryCachedArray(original)
     assert_array_equal(wrapped, np.arange(10))
     assert isinstance(wrapped.array, indexing.NumpyIndexingAdapter)

コード例 #11

0

ファイルを表示

def open_rasterio(
    filename,
    parse_coordinates=None,
    chunks=None,
    cache=None,
    lock=None,
    masked=False,
    variable=None,
    group=None,
    default_name=None,
    **open_kwargs,
):
    """Open a file with rasterio (experimental).

    This should work with any file that rasterio can open (most often:
    geoTIFF). The x and y coordinates are generated automatically from the
    file's geoinformation, shifted to the center of each pixel (see
    `"PixelIsArea" Raster Space
    <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
    for more information).

    You can generate 2D coordinates from the file's attributes with::

        from affine import Affine
        da = xr.open_rasterio('path_to_file.tif')
        transform = Affine.from_gdal(*da.attrs['transform'])
        nx, ny = da.sizes['x'], da.sizes['y']
        x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform


    Parameters
    ----------
    filename: str, rasterio.DatasetReader, or rasterio.WarpedVRT
        Path to the file to open. Or already open rasterio dataset.
    parse_coordinates: bool, optional
        Whether to parse the x and y coordinates out of the file's
        ``transform`` attribute or not. The default is to automatically
        parse the coordinates only if they are rectilinear (1D).
        It can be useful to set ``parse_coordinates=False``
        if your files are very large or if you don't need the coordinates.
    chunks: int, tuple or dict, optional
        Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
        ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
        DataArray into a dask array. Chunks can also be set to
        ``True`` or ``"auto"`` to choose sensible chunk sizes according to
        ``dask.config.get("array.chunk-size").
    cache: bool, optional
        If True, cache data loaded from the underlying datastore in memory as
        NumPy arrays when accessed to avoid reading from the underlying data-
        store multiple times. Defaults to True unless you specify the `chunks`
        argument to use dask, in which case it defaults to False.
    lock: False, True or threading.Lock, optional
        If chunks is provided, this argument is passed on to
        :py:func:`dask.array.from_array`. By default, a global lock is
        used to avoid issues with concurrent access to the same file when using
        dask's multithreaded backend.
    masked: bool, optional
        If True, read the mask and to set values to NaN. Defaults to False.
    variable: str or list or tuple, optional
        Variable name or names to use to filter loading.
    group: str or list or tuple, optional
        Group name or names to use to filter loading.
    default_name: str, optional
        The name of the data array if none exists. Default is None.
    **open_kwargs: kwargs, optional
        Optional keyword arguments to pass into rasterio.open().

    Returns
    -------
    data : DataArray
        The newly created DataArray.
    """
    parse_coordinates = True if parse_coordinates is None else parse_coordinates
    vrt_params = None
    if isinstance(filename, rasterio.io.DatasetReader):
        filename = filename.name
    elif isinstance(filename, rasterio.vrt.WarpedVRT):
        vrt = filename
        filename = vrt.src_dataset.name
        vrt_params = dict(
            crs=vrt.crs.to_string(),
            resampling=vrt.resampling,
            src_nodata=vrt.src_nodata,
            dst_nodata=vrt.dst_nodata,
            tolerance=vrt.tolerance,
            transform=vrt.transform,
            width=vrt.width,
            height=vrt.height,
            warp_extras=vrt.warp_extras,
        )

    if lock is None:
        lock = RASTERIO_LOCK

    # ensure default for sharing is False
    # ref https://github.com/mapbox/rasterio/issues/1504
    open_kwargs["sharing"] = open_kwargs.get("sharing", False)
    manager = CachingFileManager(
        rasterio.open, filename, lock=lock, mode="r", kwargs=open_kwargs
    )
    riods = manager.acquire()
    # open the subdatasets if they exist
    if riods.subdatasets:
        return _load_subdatasets(
            riods=riods,
            group=group,
            variable=variable,
            parse_coordinates=parse_coordinates,
            chunks=chunks,
            cache=cache,
            lock=lock,
            masked=masked,
        )

    if vrt_params is not None:
        riods = WarpedVRT(riods, **vrt_params)

    if cache is None:
        cache = chunks is None

    # Get bands
    if riods.count < 1:
        raise ValueError("Unknown dims")
    coords = OrderedDict()
    coords["band"] = np.asarray(riods.indexes)

    # parse tags
    attrs, encoding = _get_rasterio_attrs(riods=riods, masked=masked)
    _parse_driver_tags(riods=riods, attrs=attrs, coords=coords)

    # Get geospatial coordinates
    transform = _rio_transform(riods)
    if parse_coordinates and transform.is_rectilinear:
        # 1d coordinates
        coords.update(affine_to_coords(riods.transform, riods.width, riods.height))
    elif parse_coordinates:
        # 2d coordinates
        warnings.warn(
            "The file coordinates' transformation isn't "
            "rectilinear: xarray won't parse the coordinates "
            "in this case. Set `parse_coordinates=False` to "
            "suppress this warning.",
            RuntimeWarning,
            stacklevel=3,
        )

    data = indexing.LazilyOuterIndexedArray(
        RasterioArrayWrapper(manager, lock, vrt_params, masked=masked)
    )

    # this lets you write arrays loaded with rasterio
    data = indexing.CopyOnWriteArray(data)
    if cache and chunks is None:
        data = indexing.MemoryCachedArray(data)

    # create the output data array
    da_name = attrs.pop("NETCDF_VARNAME", default_name)
    result = DataArray(
        data=data, dims=("band", "y", "x"), coords=coords, attrs=attrs, name=da_name
    )
    result.encoding = encoding

    if hasattr(riods, "crs") and riods.crs:
        result.rio.write_crs(riods.crs, inplace=True)

    if chunks is not None:
        result = _prepare_dask(result, riods, filename, chunks)

    # Make the file closeable
    result._file_obj = manager

    return result

コード例 #12

0

ファイルを表示

 def open_store_variable(self, name, var):
     """Turn CDMRemote variable into something like a numpy.ndarray."""
     data = indexing.LazilyOuterIndexedArray(CDMArrayWrapper(name, self))
     return Variable(var.dimensions, data,
                     {a: getattr(var, a)
                      for a in var.ncattrs()})