Exemple #1
0
def test_file_cache_maxsize():
    with pytest.raises(ValueError):
        xarray.set_options(file_cache_maxsize=0)
    original_size = FILE_CACHE.maxsize
    with xarray.set_options(file_cache_maxsize=123):
        assert FILE_CACHE.maxsize == 123
    assert FILE_CACHE.maxsize == original_size
Exemple #2
0
def test_nested_options():
    original = OPTIONS['display_width']
    with xarray.set_options(display_width=1):
        assert OPTIONS['display_width'] == 1
        with xarray.set_options(display_width=2):
            assert OPTIONS['display_width'] == 2
        assert OPTIONS['display_width'] == 1
    assert OPTIONS['display_width'] == original
Exemple #3
0
def test_keep_attrs():
    with pytest.raises(ValueError):
        xarray.set_options(keep_attrs='invalid_str')
    with xarray.set_options(keep_attrs=True):
        assert OPTIONS['keep_attrs']
    with xarray.set_options(keep_attrs=False):
        assert not OPTIONS['keep_attrs']
    with xarray.set_options(keep_attrs='default'):
        assert _get_keep_attrs(default=True)
        assert not _get_keep_attrs(default=False)
Exemple #4
0
    def test_dataset_attr_retention(self):
        # Use .mean() for all tests: a typical reduction operation
        ds = create_test_dataset_attrs()
        original_attrs = ds.attrs

        # Test default behaviour
        result = ds.mean()
        assert result.attrs == {}
        with xarray.set_options(keep_attrs='default'):
            result = ds.mean()
            assert result.attrs == {}

        with xarray.set_options(keep_attrs=True):
            result = ds.mean()
            assert result.attrs == original_attrs

        with xarray.set_options(keep_attrs=False):
            result = ds.mean()
            assert result.attrs == {}
Exemple #5
0
    def test_groupby_attr_retention(self):
        da = xarray.DataArray([1, 2, 3], [('x', [1, 1, 2])])
        da.attrs = {'attr1': 5, 'attr2': 'history',
                    'attr3': {'nested': 'more_info'}}
        original_attrs = da.attrs

        # Test default behaviour
        result = da.groupby('x').sum(keep_attrs=True)
        assert result.attrs == original_attrs
        with xarray.set_options(keep_attrs='default'):
            result = da.groupby('x').sum(keep_attrs=True)
            assert result.attrs == original_attrs

        with xarray.set_options(keep_attrs=True):
            result1 = da.groupby('x')
            result = result1.sum()
            assert result.attrs == original_attrs

        with xarray.set_options(keep_attrs=False):
            result = da.groupby('x').sum()
            assert result.attrs == {}
Exemple #6
0
def test_display_width():
    with pytest.raises(ValueError):
        xarray.set_options(display_width=0)
    with pytest.raises(ValueError):
        xarray.set_options(display_width=-10)
    with pytest.raises(ValueError):
        xarray.set_options(display_width=3.5)
Exemple #7
0
def test_alignment():
    ds1 = xr.Dataset({'a': ('x', [1, 2])}, {'x': [0, 1]})
    ds2 = xr.Dataset({'a': ('x', [2, 3]), 'b': 4}, {'x': [1, 2]})

    actual = np.add(ds1, ds2)
    expected = xr.Dataset({'a': ('x', [4])}, {'x': [1]})
    assert_identical_(actual, expected)

    with xr.set_options(arithmetic_join='outer'):
        actual = np.add(ds1, ds2)
        expected = xr.Dataset({'a': ('x', [np.nan, 4, np.nan]), 'b': np.nan},
                              coords={'x': [0, 1, 2]})
        assert_identical_(actual, expected)
def test_decode_cf_enable_cftimeindex(calendar, enable_cftimeindex):
    days = [1., 2., 3.]
    da = DataArray(days, coords=[days], dims=['time'], name='test')
    ds = da.to_dataset()

    for v in ['test', 'time']:
        ds[v].attrs['units'] = 'days since 2001-01-01'
        ds[v].attrs['calendar'] = calendar

    if (not has_cftime and enable_cftimeindex and
       calendar not in coding.times._STANDARD_CALENDARS):
        with pytest.raises(ValueError):
            with set_options(enable_cftimeindex=enable_cftimeindex):
                ds = decode_cf(ds)
    else:
        with set_options(enable_cftimeindex=enable_cftimeindex):
            ds = decode_cf(ds)

        if (enable_cftimeindex and
           calendar not in coding.times._STANDARD_CALENDARS):
            assert ds.test.dtype == np.dtype('O')
        else:
            assert ds.test.dtype == np.dtype('M8[ns]')
def test_concat_cftimeindex(date_type, enable_cftimeindex):
    with xr.set_options(enable_cftimeindex=enable_cftimeindex):
        da1 = xr.DataArray(
            [1., 2.], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]],
            dims=['time'])
        da2 = xr.DataArray(
            [3., 4.], coords=[[date_type(1, 3, 1), date_type(1, 4, 1)]],
            dims=['time'])
        da = xr.concat([da1, da2], dim='time')

    if enable_cftimeindex and has_cftime:
        assert isinstance(da.indexes['time'], CFTimeIndex)
    else:
        assert isinstance(da.indexes['time'], pd.Index)
        assert not isinstance(da.indexes['time'], CFTimeIndex)
Exemple #10
0
def test_alignment():
    ds1 = xr.Dataset({'a': ('x', [1, 2])}, {'x': [0, 1]})
    ds2 = xr.Dataset({'a': ('x', [2, 3]), 'b': 4}, {'x': [1, 2]})

    actual = np.add(ds1, ds2)
    expected = xr.Dataset({'a': ('x', [4])}, {'x': [1]})
    assert_identical_(actual, expected)

    with xr.set_options(arithmetic_join='outer'):
        actual = np.add(ds1, ds2)
        expected = xr.Dataset({
            'a': ('x', [np.nan, 4, np.nan]),
            'b': np.nan
        },
                              coords={'x': [0, 1, 2]})
        assert_identical_(actual, expected)
Exemple #11
0
def test_repr_file_collapsed(tmp_path):
    arr = xr.DataArray(np.arange(300), dims="test")
    arr.to_netcdf(tmp_path / "test.nc", engine="netcdf4")

    with xr.open_dataarray(tmp_path / "test.nc") as arr, xr.set_options(
        display_expand_data=False
    ):
        actual = formatting.array_repr(arr)
        expected = dedent(
            """\
        <xarray.DataArray (test: 300)>
        array([  0,   1,   2, ..., 297, 298, 299])
        Dimensions without coordinates: test"""
        )

        assert actual == expected
Exemple #12
0
def apply_correction(x: xr.DataArray,
                     factor: xr.DataArray,
                     kind: Optional[str] = None) -> xr.DataArray:
    """Apply the additive or multiplicative correction/adjustment factors.

    If kind is not given, default to the one stored in the "kind" attribute of factor.
    """
    kind = kind or factor.get("kind", None)
    with xr.set_options(keep_attrs=True):
        if kind == ADDITIVE:
            out = x + factor
        elif kind == MULTIPLICATIVE:
            out = x * factor
        else:
            raise ValueError
    return out
    def test_repr_html(self, converted_zarr):
        zarr_path_string = str(converted_zarr.absolute())
        ed = EchoData(converted_raw_path=converted_zarr)
        assert hasattr(ed, "_repr_html_")
        html_repr = ed._repr_html_().strip()
        assert (
            f"""<div class="xr-obj-type">EchoData: standardized raw data from {zarr_path_string}</div>"""
            in html_repr
        )

        with xr.set_options(display_style="text"):
            html_fallback = ed._repr_html_().strip()

        assert html_fallback.startswith(
            "<pre>EchoData"
        ) and html_fallback.endswith("</pre>")
Exemple #14
0
def test_alignment():
    ds1 = xr.Dataset({"a": ("x", [1, 2])}, {"x": [0, 1]})
    ds2 = xr.Dataset({"a": ("x", [2, 3]), "b": 4}, {"x": [1, 2]})

    actual = np.add(ds1, ds2)
    expected = xr.Dataset({"a": ("x", [4])}, {"x": [1]})
    assert_identical_(actual, expected)

    with xr.set_options(arithmetic_join="outer"):
        actual = np.add(ds1, ds2)
        expected = xr.Dataset({
            "a": ("x", [np.nan, 4, np.nan]),
            "b": np.nan
        },
                              coords={"x": [0, 1, 2]})
        assert_identical_(actual, expected)
Exemple #15
0
def _to_quarter(
    freq: str,
    pr: Optional[xarray.DataArray] = None,
    tas: Optional[xarray.DataArray] = None,
) -> xarray.DataArray:
    """Convert daily, weekly or monthly time series to quarterly time series according to ANUCLIM specifications."""
    if freq.upper().startswith("D"):
        if tas is not None:
            tas = tg_mean(tas, freq="7D")

        if pr is not None:
            pr = precip_accumulation(pr, freq="7D")
            pr.attrs["units"] = "mm/week"

        freq = "W"

    if freq.upper().startswith("W"):
        window = 13
        u = units.week

    elif freq.upper().startswith("M"):
        window = 3
        u = units.month

    else:
        raise NotImplementedError(
            f'Unknown input time frequency "{freq}": must be one of "D", "W" or "M".'
        )

    if tas is not None:
        tas = ensure_chunk_size(tas, time=np.ceil(window / 2))
    if pr is not None:
        pr = ensure_chunk_size(pr, time=np.ceil(window / 2))

    with xarray.set_options(keep_attrs=True):
        if pr is not None:
            pr = pint_multiply(pr, 1 * u, "mm")
            out = pr.rolling(time=window, center=False).sum()
            out.attrs = pr.attrs
            out.attrs["units"] = "mm"

        if tas is not None:
            out = tas.rolling(time=window, center=False).mean(skipna=False)
            out.attrs = tas.attrs

    out = ensure_chunk_size(out, time=-1)
    return out
Exemple #16
0
def prcptot_warmcold_quarter(
    pr: xarray.DataArray,
    tas: xarray.DataArray,
    op: str = None,
    src_timestep: str = None,
    freq: str = "YS",
) -> xarray.DataArray:
    r"""ANUCLIM Total precipitation of warmest/coldest quarter.

    The warmest (or coldest) quarter of the year is determined, and the total
    precipitation of this period is calculated.  If the input data frequency is daily ("D) or weekly ("W"), quarters
    are defined as 13 week periods, otherwise are 3 months.

    Parameters
    ----------
    pr : xarray.DataArray
      Total precipitation rate at daily, weekly, or monthly frequency.
    tas : xarray.DataArray
      Mean temperature [℃] or [K] at daily, weekly, or monthly frequency.
    op : str
      Operation to perform: 'warmest' calculate for the warmest quarter ; 'coldest' calculate for the coldest quarter.
    src_timestep : {'D', 'W', 'M'}
      Input data time frequency - One of daily, weekly or monthly.
    freq : str
      Resampling frequency; Defaults to "YS".

    Returns
    -------
    xarray.DataArray
       Total precipitation values of the warmest/coldest quarter of each year.

    Notes
    -----
    According to the ANUCLIM user-guide https://fennerschool.anu.edu.au/files/anuclim61.pdf (ch. 6), input
    values should be at a weekly (or monthly) frequency.  However, the xclim.indices implementation here will calculate
    the result with input data with daily frequency as well. As such weekly or monthly input values, if desired,
    should be calculated prior to calling the function.
    """
    # determine input data frequency
    tas_qrt = _to_quarter(src_timestep, tas=tas)
    pr_qrt = _to_quarter(src_timestep, pr=pr)

    xr_op = _xr_argops[op]
    with xarray.set_options(keep_attrs=True):
        out = _from_other_arg(criteria=tas_qrt, output=pr_qrt, op=xr_op, freq=freq)
        out.attrs = pr_qrt.attrs
        return out
Exemple #17
0
def _add_nudging_tendencies(merged: xr.Dataset):
    with xr.set_options(keep_attrs=True):
        Q1 = merged.Q1 + merged.air_temperature_tendency_due_to_nudging
        Q2 = merged.Q2 + merged.specific_humidity_tendency_due_to_nudging
    Q1.attrs.update({
        "long_name":
        merged.Q1.attrs.get("long_name") + " plus dynamics nudging tendency",
        "description":
        merged.Q1.attrs.get("description") + " + dynamics nudging tendency",
    })
    Q2.attrs.update({
        "long_name":
        merged.Q2.attrs.get("long_name") + " plus dynamics nudging tendency",
        "description":
        merged.Q2.attrs.get("description") + " + dynamics nudging tendency",
    })
    return Q1, Q2
Exemple #18
0
def tg_mean_wetdry_quarter(
    tas: xarray.DataArray,
    pr: xarray.DataArray,
    op: str = None,
    freq: str = "YS",
) -> xarray.DataArray:
    r"""ANUCLIM Mean temperature of wettest/driest quarter.

    The wettest (or driest) quarter of the year is determined, and the mean temperature of this period is calculated.
    If the input data frequency is daily ("D") or weekly ("W"), quarters are defined as 13-week periods,
    otherwise are 3 months.

    Parameters
    ----------
    tas : xarray.DataArray
      Mean temperature at daily, weekly, or monthly frequency.
    pr : xarray.DataArray
      Total precipitation rate at daily, weekly, or monthly frequency.
    op : {'wettest', 'driest'}
      Operation to perform: 'wettest' calculate for the wettest quarter; 'driest' calculate for the driest quarter.
    freq : str
      Resampling frequency.

    Returns
    -------
    xarray.DataArray, [same as tas]
       Mean temperature of {op} quarter

    Notes
    -----
    According to the ANUCLIM user-guide https://fennerschool.anu.edu.au/files/anuclim61.pdf (ch. 6), input
    values should be at a weekly (or monthly) frequency.  However, the xclim.indices implementation here will calculate
    the result with input data with daily frequency as well. As such weekly or monthly input values, if desired,
    should be calculated prior to calling the function.
    """
    tas_qrt = _to_quarter(tas=tas)
    pr_qrt = _to_quarter(pr=pr)

    xr_op = _xr_argops[op]
    with xarray.set_options(keep_attrs=True):
        out = _from_other_arg(criteria=pr_qrt,
                              output=tas_qrt,
                              op=xr_op,
                              freq=freq)
        out.attrs = tas.attrs
        return out
Exemple #19
0
def test_concat_cftimeindex(date_type, enable_cftimeindex):
    with xr.set_options(enable_cftimeindex=enable_cftimeindex):
        da1 = xr.DataArray([1., 2.],
                           coords=[[date_type(1, 1, 1),
                                    date_type(1, 2, 1)]],
                           dims=['time'])
        da2 = xr.DataArray([3., 4.],
                           coords=[[date_type(1, 3, 1),
                                    date_type(1, 4, 1)]],
                           dims=['time'])
        da = xr.concat([da1, da2], dim='time')

    if enable_cftimeindex and has_cftime:
        assert isinstance(da.indexes['time'], CFTimeIndex)
    else:
        assert isinstance(da.indexes['time'], pd.Index)
        assert not isinstance(da.indexes['time'], CFTimeIndex)
Exemple #20
0
def _downsample_only(ds: xr.Dataset, freq_label: str,
                     method: str) -> xr.Dataset:
    """Resample in time, only if given freq_label is lower frequency than time
    sampling of given dataset ds"""
    ds_freq = ds.time.values[1] - ds.time.values[0]
    if ds_freq < pd.to_timedelta(freq_label):
        resampled = ds.resample(time=freq_label, label="right")
        if method == "nearest":
            return resampled.nearest()
        elif method == "mean":
            with xr.set_options(keep_attrs=True):
                return resampled.mean()
        else:
            raise ValueError(
                f"Don't know how to resample with method={method}.")
    else:
        return ds
Exemple #21
0
def _calc_ds_diurnal_cycle(ds):
    """
    Calculates the diurnal cycle for all variables.  Expects
    time dimension and longitude variable "lon".
    """
    local_time = vcm.local_time(ds, time="time", lon_var="lon")
    local_time.attrs = {"long_name": "local time", "units": "hour"}

    local_time = np.floor(local_time)  # equivalent to hourly binning
    ds["local_time"] = local_time
    diurnal_cycles = xr.Dataset()
    for var in ds.data_vars:
        with xr.set_options(keep_attrs=True):
            diurnal_cycles[var] = (
                ds[[var, "local_time"]].groupby("local_time").mean()[var].load()
            )
    return diurnal_cycles
Exemple #22
0
def season_clim(ds, calendar='standard'):
    # Make a DataArray with the number of days in each month, size = len(time)
    month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar=calendar),
                                coords=[ds.time],
                                name='month_length')
    # Calculate the weights by grouping by 'time.season'
    weights = month_length.groupby('time.season') / month_length.groupby(
        'time.season').sum()

    # Test that the sum of the weights for each season is 1.0
    np.testing.assert_allclose(
        weights.groupby('time.season').sum().values, np.ones(4))

    # Calculate the weighted average
    with xr.set_options(keep_attrs=True):
        return (ds * weights).groupby('time.season').sum(dim='time',
                                                         skipna=False)
Exemple #23
0
def _mean_bias_removal_quick(hind, bias, dim):
    """Quick removal of mean bias over all initializations.

    Args:
        hind (xr.object): hindcast.
        bias (xr.object): bias.
        dim (str): Time dimension name in bias.

    Returns:
        xr.object: bias removed hind

    """
    with xr.set_options(keep_attrs=True):
        bias_removed_hind = (hind.groupby(f"{dim}.dayofyear") -
                             bias.groupby(f"{dim}.dayofyear").mean())
    bias_removed_hind.attrs = hind.attrs
    return bias_removed_hind
Exemple #24
0
def test_adapt_freq(use_dask):
    time = pd.date_range("1990-01-01", "2020-12-31", freq="D")
    prvals = np.random.randint(0, 100, size=(time.size, 3))
    pr = xr.DataArray(
        prvals,
        coords={
            "time": time,
            "lat": [0, 1, 2]
        },
        dims=("time", "lat"),
        attrs={"units": "mm d-1"},
    )

    if use_dask:
        pr = pr.chunk({"lat": 1})
    group = Grouper("time.month")
    with xr.set_options(keep_attrs=True):
        prsim = xr.where(pr < 20, pr / 20, pr)
        prref = xr.where(pr < 10, pr / 20, pr)
    sim_ad, pth, dP0 = adapt_freq(prref, prsim, thresh="1 mm d-1", group=group)

    # Where the input is considered zero
    input_zeros = sim_ad.where(prsim <= 1)

    # The proportion of corrected values (time.size * 3 * 0.2 is the theoritical number of values under 1 in prsim)
    dP0_out = (input_zeros > 1).sum() / (time.size * 3 * 0.2)
    np.testing.assert_allclose(dP0_out, 0.5, atol=0.1)

    # Assert that corrected values were generated in the range ]1, 20 + tol[
    corrected = (input_zeros.where(input_zeros > 1).stack(
        flat=["lat", "time"]).reset_index("flat").dropna("flat"))
    assert ((corrected < 20.1) & (corrected > 1)).all()

    # Assert that non-corrected values are untouched
    # Again we add a 0.5 tol because of randomness.
    xr.testing.assert_equal(
        sim_ad.where(prsim > 20.1),
        prsim.where(prsim > 20.5).transpose("lat", "time"),
    )
    # Assert that Pth and dP0 are approx the good values
    np.testing.assert_allclose(pth, 20, rtol=0.05)
    np.testing.assert_allclose(dP0, 0.5, atol=0.14)
    assert sim_ad.units == "mm d-1"
    assert sim_ad.attrs["references"].startswith("Themeßl")
    assert pth.units == "mm d-1"
Exemple #25
0
def replace_nans_with_zeroes(ds, verbose=True):
    '''
    Replaces NaN values with zeroes for each variable
    within an an xarray Dataset.

    Args:
    ----
        ds : xarray Dataset
            The input dataset, containing one or more data variables.

    Keyword Args (optional):
    ------------------------
        verbose : boolean
            Set this switch to print out the variable name, as well
            as the min and max of the variable.  This will illustrate
            the replacement of NaNs with zeroes.
    '''

    # Keep all netCDF attributes
    with xr.set_options(keep_attrs=True):

        # Loop over all variables in the Dataset
        for v in ds.data_vars.keys():

            # OPTIONAL STEP:
            # Xarray will try convert missing values to NaN's,
            # so you may need to replace these with zeroes.
            #
            # If your netCDF files represent e.g. emissions,
            # or other physical quantities, you may want to
            # replace these with zeros, so that NaNs won't
            # get read into atmospheric models, etc.
            #
            # NOTE: ds[v].values converts to a numpy ndarray,
            # so that you can use numpy functions.
            ds[v].where(np.isnan(ds[v].values), other=0.0, drop=False)

            # OPTIONAL: Print min & max for each variable
            # Comment out if you wish
            if verbose:
                print('{} : {} {}'.format(v, np.min(ds[v].values),
                                          np.max(ds[v].values)))

    # Return the modified Datast
    return ds
Exemple #26
0
def multiply_area(dset, area=None):
    """
    DESCRIPTION:
    ===========
        Muliply gridded data by it's area, if the data set does not 
        contain an area variable, the area dataarray need to be provided. 

    """
    if area == None:
        try:
            area = dset.area.values
        except AttributeError:
            print('dataset does not contain a datavariable called area')

    with xr.set_options(keep_attributes=True):
        area_multiplied = dset[dset.varName] * area

    return area_multiplied
Exemple #27
0
def add_tendency(state: Any, tendency: State, dt: float) -> State:
    """Given state and tendency prediction, return updated state.
    Returned state only includes variables updated by ML model."""

    with xr.set_options(keep_attrs=True):
        updated = {}
        for name_ in tendency:
            name = str(name_)
            try:
                state_name = str(TENDENCY_TO_STATE_NAME[name])
            except KeyError:
                raise KeyError(
                    f"Tendency variable '{name}' does not have an entry mapping it "
                    "to a corresponding state variable to add to. "
                    "Existing tendencies with mappings to state are "
                    f"{list(TENDENCY_TO_STATE_NAME.keys())}")
            updated[state_name] = state[state_name] + tendency[name] * dt
    return updated  # type: ignore
Exemple #28
0
def test_repr_HC(
    hind_ds_initialized_1d,
    hist_ds_uninitialized_1d,
    observations_ds_1d,
    display_style,
):
    """Test html repr."""
    with xr.set_options(display_style=display_style):
        he = HindcastEnsemble(hind_ds_initialized_1d)
        display(he)
        he = he.add_uninitialized(hist_ds_uninitialized_1d)
        display(he)
        he = he.add_observations(observations_ds_1d)
        display(he)
        # no uninit
        he = HindcastEnsemble(hind_ds_initialized_1d)
        he = he.add_observations(observations_ds_1d)
        display(he)
Exemple #29
0
def zonal_bias_3d(diag_arg: DiagArg):
    logger.info("Preparing zonal mean bias (3d)")
    prognostic, verification, grid = (
        diag_arg.prediction,
        diag_arg.verification,
        diag_arg.grid,
    )
    zonal_means = xr.Dataset()
    common_vars = list(
        set(prognostic.data_vars).intersection(verification.data_vars))
    for var in common_vars:
        logger.info(f"Computing zonal+time mean biases (3d) for {var}")
        with xr.set_options(keep_attrs=True):
            zm_bias = zonal_mean(bias(verification[[var]], prognostic[[var]]),
                                 grid.lat)
            zm_bias_time_mean = time_mean(zm_bias)[var].load()
            zonal_means[var] = zm_bias_time_mean
    return zonal_means
Exemple #30
0
def zonal_mean_bias_hovmoller(diag_arg: DiagArg):

    logger.info(f"Preparing zonal mean biases (2d)")
    prognostic, verification, grid = (
        diag_arg.prediction,
        diag_arg.verification,
        diag_arg.grid,
    )
    common_vars = list(
        set(prognostic.data_vars).intersection(verification.data_vars))
    zonal_means = xr.Dataset()
    for var in common_vars:
        logger.info(f"Computing zonal mean biases (2d) over time for {var}")
        with xr.set_options(keep_attrs=True):
            zonal_means[var] = zonal_mean(
                bias(verification[[var]], prognostic[[var]]),
                grid.lat)[var].load()
    return zonal_means
Exemple #31
0
def test_repr_of_dataarray(dataarray) -> None:
    formatted = fh.array_repr(dataarray)
    assert "dim_0" in formatted
    # has an expanded data section
    assert formatted.count("class='xr-array-in' type='checkbox' checked>") == 1
    # coords and attrs don't have an items so they'll be be disabled and collapsed
    assert (formatted.count(
        "class='xr-section-summary-in' type='checkbox' disabled >") == 2)

    with xr.set_options(display_expand_data=False):
        formatted = fh.array_repr(dataarray)
        assert "dim_0" in formatted
        # has an expanded data section
        assert formatted.count(
            "class='xr-array-in' type='checkbox' checked>") == 0
        # coords and attrs don't have an items so they'll be be disabled and collapsed
        assert (formatted.count(
            "class='xr-section-summary-in' type='checkbox' disabled >") == 2)
Exemple #32
0
def test_amount2rate(pr_series):
    pr = pr_series(np.ones(365 + 366 + 365), start="2019-01-01")
    am = rate2amount(pr)

    np.testing.assert_allclose(amount2rate(am), pr)

    with xr.set_options(keep_attrs=True):
        am_ms = am.resample(time="MS").sum()
        am_m = am.resample(time="M").sum()

        pr_ms = amount2rate(am_ms)
        np.testing.assert_allclose(pr_ms, 1)
        pr_m = amount2rate(am_m)
        np.testing.assert_allclose(pr_m, 1)

        am_ys = am.resample(time="YS").sum()
        pr_ys = amount2rate(am_ys)
        np.testing.assert_allclose(pr_ys, 1)
Exemple #33
0
def remove_bias(fcst, bias, method, init_dim="init_date"):
    """Remove model bias.

    Parameters
    ----------
    fcst : xarray DataArray
        Forecast array with initial date and lead time dimensions
    bias : xarray DataArray
        Bias array
    method : {'additive', 'multiplicative'}
        Bias removal method
    init_dim: str, default 'init_date'
        Name of the initial date dimension in fcst

    Returns
    -------
    fcst_bc : xarray DataArray
        Bias corrected forecast array

    Raises
    ------
    ValueError
        For invalid method
    """

    if method == "additive":
        op = operator.sub
    elif method == "multiplicative":
        op = operator.truediv
    else:
        raise ValueError(f"Unrecognised bias removal method {method}")

    with xr.set_options(keep_attrs=True):
        fcst_bc = op(fcst.groupby(f"{init_dim}.month"), bias).drop("month")

    fcst_bc.attrs["bias_correction_method"] = bias.attrs[
        "bias_correction_method"]
    try:
        fcst_bc.attrs["bias_correction_period"] = bias.attrs[
            "bias_correction_period"]
    except KeyError:
        pass

    return fcst_bc
Exemple #34
0
    def test_dump_ds(self):
        self.maxDiff = None
        with xr.set_options(display_width=80):
            result = self.invoke_cli(["dump", TEST_NC_FILE])

        # xarray v0.16.0 changed line truncation behaviour (see
        # https://github.com/pydata/xarray/issues/3759 ), so this test checks
        # the output against two expected values to cover both pre- and
        # post-change behaviour.
        output_template = (
            "<xarray.Dataset>\n"
            "Dimensions:        (bnds: 2, lat: 180, lon: 360, time: 5)\n"
            "Coordinates:\n"
            "  * lon            (lon) float64 -179.5 -178.5 -177.5 ... 177.5 178.5 179.5\n"
            "  * lat            (lat) float64 -89.5 -88.5 -87.5 -86.5 ... 86.5 87.5 88.5 89.5\n"
            "  * time           (time) datetime64[ns] 2010-01-01T12:00:00 ... 2010-01-05T1%s\n"
            "    lon_bnds       (lon, bnds) float64 ...\n"
            "    lat_bnds       (lat, bnds) float64 ...\n"
            "    time_bnds      (time, bnds) datetime64[ns] ...\n"
            "Dimensions without coordinates: bnds\n"
            "Data variables:\n"
            "    precipitation  (time, lat, lon) float64 ...\n"
            "    temperature    (time, lat, lon) float64 ...\n"
            "    soil_moisture  (time, lat, lon) float64 ...\n"
            "Attributes:\n"
            "    Conventions:           CF-1.7\n"
            "    title:                 Test Cube\n"
            "    time_coverage_start:   2010-01-01T00:00:00\n"
            "    time_coverage_end:     2010-01-06T00:00:00\n"
            "    geospatial_lon_min:    -180.0\n"
            "    geospatial_lon_max:    180.0\n"
            "    geospatial_lon_units:  degrees_east\n"
            "    geospatial_lat_min:    -90.0\n"
            "    geospatial_lat_max:    90.0\n"
            "    geospatial_lat_units:  degrees_north\n"
        )

        possible_outputs = [
            output_template % '2:00:00',  # for xarray v0.15.1 and below
            output_template % '...'  # for xarray v0.16.0 and above
        ]

        self.assertIn(result.output, possible_outputs)
        self.assertEqual(0, result.exit_code)
Exemple #35
0
def temperature_seasonality(tas: xarray.DataArray) -> xarray.DataArray:
    r"""ANUCLIM temperature seasonality (coefficient of variation).

    The annual temperature coefficient of variation expressed in percent. Calculated as the standard deviation
    of temperature values for a given year expressed as a percentage of the mean of those temperatures.

    Parameters
    ----------
    tas : xarray.DataArray
      Mean temperature [℃] or [K] at daily, weekly, or monthly frequency.

    Returns
    -------
    xarray.DataArray
      The Coefficient of Variation of mean temperature values expressed in percent.

    Examples
    --------
    The following would compute for each grid cell of file `tas.day.nc` the annual temperature seasonality:

    >>> import xclim.indices as xci
    >>> t = xr.open_dataset(path_to_tas_file).tas
    >>> tday_seasonality = xci.temperature_seasonality(t)
    >>> t_weekly = xci.tg_mean(t, freq='7D')
    >>> tweek_seasonality = xci.temperature_seasonality(t_weekly)

    Notes
    -----
    For this calculation, the mean in degrees Kelvin is used. This avoids the possibility of having to
    divide by zero, but it does mean that the values are usually quite small.

    According to the ANUCLIM user-guide https://fennerschool.anu.edu.au/files/anuclim61.pdf (ch. 6), input
    values should be at a weekly (or monthly) frequency.  However, the xclim.indices implementation here will calculate
    the result with input data with daily frequency as well. As such weekly or monthly input values, if desired, should be
    calculated prior to calling the function.
    """
    tas = convert_units_to(tas, "K")

    with xarray.set_options(keep_attrs=True):
        seas = 100 * _anuclim_coeff_var(tas)

    seas.attrs["units"] = "%"
    return seas
Exemple #36
0
def _multiplicative_std_correction(hind, spread, dim, obs=None):
    """Quick removal of std bias over all initializations without cross validation.

    Args:
        hind (xr.Dataset): hindcast.
        spread (xr.Dataset): model spread.
        dim (str): Time dimension name in bias.
        obs (xr.Dataset): observations

    Returns:
        xr.Dataset: bias removed hind

    """
    seasonality = OPTIONS["seasonality"]
    if seasonality == "weekofyear":
        # convert to datetime for weekofyear operations
        hind = convert_cftime_to_datetime_coords(hind, "init")
        spread = convert_cftime_to_datetime_coords(spread, "init")
        obs = convert_cftime_to_datetime_coords(obs, "time")

    init_groupby = f"init.{seasonality}"
    obs_groupby = f"time.{seasonality}"

    with xr.set_options(keep_attrs=True):
        model_mean_spread = spread.groupby(init_groupby).mean()
        model_member_mean = hind.mean("member").groupby(init_groupby).mean()
        # assume that no trend here
        obs_spread = obs.groupby(obs_groupby).std()

        # z distr
        init_z = (hind.groupby(init_groupby) -
                  model_member_mean).groupby(init_groupby) / model_mean_spread

        # scale with obs_spread and model mean
        init_std_corrected = (init_z.groupby(init_groupby) * obs_spread
                              ).groupby(init_groupby) + model_member_mean

    init_std_corrected.attrs = hind.attrs
    # convert back to CFTimeIndex if needed
    if isinstance(init_std_corrected.init.to_index(), pd.DatetimeIndex):
        init_std_corrected = convert_time_index(init_std_corrected, "init",
                                                "hindcast")
    return init_std_corrected
Exemple #37
0
def test_repr_of_dataset(dataset) -> None:
    formatted = fh.dataset_repr(dataset)
    # coords, attrs, and data_vars are expanded
    assert (formatted.count(
        "class='xr-section-summary-in' type='checkbox'  checked>") == 3)
    assert "&lt;U4" in formatted or "&gt;U4" in formatted
    assert "&lt;IA&gt;" in formatted

    with xr.set_options(
            display_expand_coords=False,
            display_expand_data_vars=False,
            display_expand_attrs=False,
    ):
        formatted = fh.dataset_repr(dataset)
        # coords, attrs, and data_vars are collapsed
        assert (formatted.count(
            "class='xr-section-summary-in' type='checkbox'  checked>") == 0)
        assert "&lt;U4" in formatted or "&gt;U4" in formatted
        assert "&lt;IA&gt;" in formatted
Exemple #38
0
    def test_array_repr(self):
        ds = xr.Dataset(coords={"foo": [1, 2, 3], "bar": [1, 2, 3]})
        ds[(1, 2)] = xr.DataArray([0], dims="test")
        actual = formatting.array_repr(ds[(1, 2)])
        expected = dedent("""\
        <xarray.DataArray (1, 2) (test: 1)>
        array([0])
        Dimensions without coordinates: test""")

        assert actual == expected

        with xr.set_options(display_expand_data=False):
            actual = formatting.array_repr(ds[(1, 2)])
            expected = dedent("""\
            <xarray.DataArray (1, 2) (test: 1)>
            0
            Dimensions without coordinates: test""")

            assert actual == expected
Exemple #39
0
def load_dataset(filelist):
    # Load the dataset and fix the time coordinate
    # Input: list of file names *.nc to load

    with xr.set_options(enable_cftimeindex=True):
        ds = xr.open_mfdataset(filelist, autoclose='true', decode_times=False)

    time0 = ds['time']

    time2 = cftime.num2date(ds['time'][:],
                            units=ds['time'].units,
                            calendar=ds['time'].calendar,
                            only_use_cftime_datetimes=True)

    ds['time'].values = time2

    ds = convert_units(ds)

    return ds
def test_assert_has_data_for_time_cftime_datetimes(calendar, date_type):
    time_bounds = np.array([[0, 2], [2, 4], [4, 6]])
    nv = np.array([0, 1])
    time = np.array([1, 3, 5])
    data = np.zeros((3))
    var_name = 'a'
    ds = xr.DataArray(data,
                      coords=[time],
                      dims=[TIME_STR],
                      name=var_name).to_dataset()
    ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds,
                                       coords=[time, nv],
                                       dims=[TIME_STR, BOUNDS_STR],
                                       name=TIME_BOUNDS_STR)
    units_str = 'days since 0002-01-02 00:00:00'
    ds[TIME_STR].attrs['units'] = units_str
    ds[TIME_STR].attrs['calendar'] = calendar
    ds = ensure_time_avg_has_cf_metadata(ds)
    ds = set_grid_attrs_as_coords(ds)

    with warnings.catch_warnings(record=True):
        with xr.set_options(enable_cftimeindex=True):
            ds = xr.decode_cf(ds)
    da = ds[var_name]

    start_date = date_type(2, 1, 2)
    end_date = date_type(2, 1, 8)

    _assert_has_data_for_time(da, start_date, end_date)

    start_date_bad = date_type(2, 1, 1)
    end_date_bad = date_type(2, 1, 9)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date, end_date_bad)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date_bad)
Exemple #41
0
def _prep_time_data(ds):
    """Prepare time coordinate information in Dataset for use in aospy.

    1. If the Dataset contains a time bounds coordinate, add attributes
       representing the true beginning and end dates of the time interval used
       to construct the Dataset
    2. If the Dataset contains a time bounds coordinate, overwrite the time
       coordinate values with the averages of the time bounds at each timestep
    3. Decode the times into np.datetime64 objects for time indexing

    Parameters
    ----------
    ds : Dataset
        Pre-processed Dataset with time coordinate renamed to
        internal_names.TIME_STR

    Returns
    -------
    Dataset
        The processed Dataset
    """
    ds = times.ensure_time_as_index(ds)
    if TIME_BOUNDS_STR in ds:
        ds = times.ensure_time_avg_has_cf_metadata(ds)
        ds[TIME_STR] = times.average_time_bounds(ds)
    else:
        logging.warning("dt array not found.  Assuming equally spaced "
                        "values in time, even though this may not be "
                        "the case")
        ds = times.add_uniform_time_weights(ds)
    # Suppress enable_cftimeindex is a no-op warning; we'll keep setting it for
    # now to maintain backwards compatibility for older xarray versions.
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore')
        with xr.set_options(enable_cftimeindex=True):
            ds = xr.decode_cf(ds, decode_times=True, decode_coords=False,
                              mask_and_scale=True)
    return ds
Exemple #42
0
def test_invalid_option_raises():
    with pytest.raises(ValueError):
        xarray.set_options(not_a_valid_options=True)
Exemple #43
0
def test_arithmetic_join():
    with pytest.raises(ValueError):
        xarray.set_options(arithmetic_join='invalid')
    with xarray.set_options(arithmetic_join='exact'):
        assert OPTIONS['arithmetic_join'] == 'exact'
Exemple #44
0
def test_enable_cftimeindex():
    with pytest.raises(ValueError):
        xarray.set_options(enable_cftimeindex=None)
    with pytest.warns(FutureWarning, match='no-op'):
        with xarray.set_options(enable_cftimeindex=True):
            assert OPTIONS['enable_cftimeindex']