Ejemplo n.º 1
0
def unsqueeze_dataset(ds: xr.Dataset,
                      dim: str,
                      coord: int = 0,
                      pos: int = 0) -> xr.Dataset:
    ds = ds.apply(unsqueeze_data_array,
                  dim=dim,
                  pos=pos,
                  keep_attrs=True,
                  coord=coord)
    return ds
Ejemplo n.º 2
0
def _resample_dataset(ds_master: xr.Dataset, ds_replica: xr.Dataset,
                      method_us: int, method_ds: int,
                      monitor: Monitor) -> xr.Dataset:
    """
    Resample replica onto the grid of the master.
    This does spatial resampling the whole dataset, e.g., all
    variables in the replica dataset.
    This method works only if both datasets have (time, lat, lon) dimensions.

    Note that dataset attributes are not propagated due to currently undecided CDM attributes' set.

    :param ds_master: xr.Dataset whose lat/lon coordinates are used as the resampling grid
    :param ds_replica: xr.Dataset that will be resampled on the masters' grid
    :param method_us: Interpolation method for upsampling, see resampling.py
    :param method_ds: Interpolation method for downsampling, see resampling.py
    :param monitor: a progress monitor.
    :return: xr.Dataset The resampled replica dataset
    """
    # Find lat/lon bounds of the intersection of master and replica grids. The
    # bounds should fall on pixel boundaries for both spatial dimensions for
    # both datasets
    lat_min, lat_max = _find_intersection(ds_master['lat'].values,
                                          ds_replica['lat'].values,
                                          global_bounds=(-90, 90))
    lon_min, lon_max = _find_intersection(ds_master['lon'].values,
                                          ds_replica['lon'].values,
                                          global_bounds=(-180, 180))

    # Subset replica dataset and master grid. We're not using here the subset
    # operation, because the subset operation may produce datasets that cross
    # the anti-meridian by design. However, such a disjoint dataset can not be
    # resampled using our current resampling methods.
    lat_slice = slice(lat_min, lat_max)
    lon_slice = slice(lon_min, lon_max)

    lon = ds_master['lon'].sel(lon=lon_slice)
    lat = ds_master['lat'].sel(lat=lat_slice)
    ds_replica = ds_replica.sel(lon=lon_slice, lat=lat_slice)

    # Don't do anything if datasets already have the same spatial definition
    if _grids_equal(ds_master, ds_replica):
        return ds_replica

    with monitor.starting("coregister dataset", len(ds_replica.data_vars)):
        kwargs = {
            'lon': lon,
            'lat': lat,
            'method_us': method_us,
            'method_ds': method_ds,
            'parent_monitor': monitor
        }
        retset = ds_replica.apply(_resample_array, keep_attrs=True, **kwargs)

    return adjust_spatial_attrs(retset)
Ejemplo n.º 3
0
def _resample_dataset(ds_master: xr.Dataset, ds_replica: xr.Dataset, method_us: int, method_ds: int, monitor: Monitor) -> xr.Dataset:
    """
    Resample replica onto the grid of the master.
    This does spatial resampling the whole dataset, e.g., all
    variables in the replica dataset.
    This method works only if both datasets have (time, lat, lon) dimensions.

    Note that dataset attributes are not propagated due to currently undecided CDM attributes' set.

    :param ds_master: xr.Dataset whose lat/lon coordinates are used as the resampling grid
    :param ds_replica: xr.Dataset that will be resampled on the masters' grid
    :param method_us: Interpolation method for upsampling, see resampling.py
    :param method_ds: Interpolation method for downsampling, see resampling.py
    :param monitor: a progress monitor.
    :return: xr.Dataset The resampled replica dataset
    """
    # Find lat/lon bounds of the intersection of master and replica grids. The
    # bounds should fall on pixel boundaries for both spatial dimensions for
    # both datasets
    lat_min, lat_max = _find_intersection(ds_master['lat'].values,
                                          ds_replica['lat'].values,
                                          global_bounds=(-90, 90))
    lon_min, lon_max = _find_intersection(ds_master['lon'].values,
                                          ds_replica['lon'].values,
                                          global_bounds=(-180, 180))

    # Subset replica dataset and master grid. We're not using here the subset
    # operation, because the subset operation may produce datasets that cross
    # the anti-meridian by design. However, such a disjoint dataset can not be
    # resampled using our current resampling methods.
    lat_slice = slice(lat_min, lat_max)
    lon_slice = slice(lon_min, lon_max)

    lon = ds_master['lon'].sel(lon=lon_slice)
    lat = ds_master['lat'].sel(lat=lat_slice)
    ds_replica = ds_replica.sel(lon=lon_slice, lat=lat_slice)

    # Don't do anything if datasets already have the same spatial definition
    if _grids_equal(ds_master, ds_replica):
        return ds_replica

    with monitor.starting("coregister dataset", len(ds_replica.data_vars)):
        kwargs = {'lon': lon, 'lat': lat, 'method_us': method_us, 'method_ds': method_ds, 'parent_monitor': monitor}
        retset = ds_replica.apply(_resample_array, keep_attrs=True, **kwargs)

    return adjust_spatial_attrs(retset)
Ejemplo n.º 4
0
def _resample_dataset(ds_master: xr.Dataset, ds_slave: xr.Dataset, method_us: int, method_ds: int) -> xr.Dataset:
    """
    Resample slave onto the grid of the master.
    This does spatial resampling the whole dataset, e.g., all
    variables in the slave dataset.
    This method works only if both datasets have (time, lat, lon) dimensions.

    Note that dataset attributes are not propagated due to currently undecided CDM attributes' set.

    :param ds_master: xr.Dataset whose lat/lon coordinates are used as the resampling grid
    :param ds_slave: xr.Dataset that will be resampled on the masters' grid
    :param method_us: Interpolation method for upsampling, see resampling.py
    :param method_ds: Interpolation method for downsampling, see resampling.py
    :return: xr.Dataset The resampled slave dataset
    """
    # master_keys = ds_master.dims.keys()
    # slave_keys = ds_master.dims.keys()

    lon = ds_master['lon']
    lat = ds_master['lat']

    kwargs = {'lon': lon, 'lat': lat, 'method_us': method_us, 'method_ds': method_ds}
    return ds_slave.apply(_resample_array, **kwargs)
Ejemplo n.º 5
0
def zeros_like(ds: xr.Dataset, override_chunks=None):
    """Performant implementation of zeros_like.

    xr.zeros_like(ds).chunk(chunks) is very slow for datasets with many
    changes.

    Parameters
    ----------
    ds : xr.Dataset
        Input dataset with dask-backed data variables.
    override_chunks : dict
        Dimension chunk-size pairs indicating any dimensions one would like to
        override the original chunk sizes along.  For any dimensions that are not
        present, zeros_like will use the chunk size along that dimension for each
        variable in the input Dataset.

    Returns
    -------
    xr.Dataset
    """
    return ds.apply(
        _zeros_like_dataarray, override_chunks=override_chunks, keep_attrs=True
    )
Ejemplo n.º 6
0
def cast_back(data: xarray.Dataset,
              measurements: Iterable[Dict[str, Any]]) -> xarray.Dataset:
    """
    Cast calculated statistic `Dataset` into intended data types.
    When going through intermediate representation as floats,
    restore `nodata` values in place of `NaN`s.
    """
    assert isinstance(data, xarray.Dataset)
    measurements = {
        measurement['name']: measurement
        for measurement in measurements
    }

    data_vars = [name for name in data.data_vars]
    assert set(data_vars) == set(measurements.keys())

    def cast(da):
        """ Cast `DataArray` into intended type. """
        output_measurement = measurements[da.name]
        expected_dtype = np.dtype(output_measurement['dtype'])
        actual_dtype = da.dtype

        if actual_dtype.kind != 'f' or 'nodata' not in output_measurement:
            # did not go through intermediate representation
            # or nodata is unspecified
            if expected_dtype == actual_dtype:
                return da
            else:
                return da.astype(expected_dtype)

        # replace NaNs with nodata
        nans = np.isnan(da.values)
        clone = da.astype(expected_dtype)
        clone.values[nans] = output_measurement['nodata']
        return clone

    return data.apply(cast, keep_attrs=True)