Esempio n. 1
0
def compute(ds: DatasetLike.TYPE,
            expr: str,
            var: VarName.TYPE,
            copy: bool = False,
            _ctx: dict = None,
            monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """

    :param ds: The primary dataset. If omitted, all variables need to be prefixed by their dataset resource names.
    :param expr: Math expression in which all *ds* variables may be used by name.
    :param var: The new variable's name.
    :param copy: Whether to copy all variables from *ds*.
    :param monitor: An optional progress monitor.
    :return: A new dataset with the new variable.
    """

    if _ctx is not None and 'value_cache' in _ctx:
        local_namespace = dict(_ctx['value_cache'])
    else:
        local_namespace = dict()

    if ds is not None:
        local_namespace.update(ds.data_vars)

    with monitor.observing("Computing variable"):
        data_array = safe_eval(expr, local_namespace=local_namespace)
        data_array.name = var

    if ds is not None and copy:
        new_ds = ds.copy()
        new_ds[var] = data_array
    else:
        new_ds = xr.Dataset(data_vars={var: data_array})
    return new_ds
Esempio n. 2
0
def long_term_average(ds: DatasetLike.TYPE,
                      var: VarNamesLike.TYPE = None,
                      monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Create a 'mean over years' dataset by averaging the values of the given input
    dataset over all years. The output is a climatological dataset with the same
    resolution as the input dataset. E.g. a daily input dataset will create a daily
    climatology consisting of 365 days, a monthly input dataset will create a monthly
    climatology, etc.

    Seasonal input datasets must have matching seasons over all years denoted by the
    same date each year. E.g., first date of each quarter. The output dataset will
    then be a seasonal climatology where each season is denoted with the same date
    as in the input dataset.

    For further information on climatological datasets, see
    http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#climatological-statistics

    :param ds: A dataset to average
    :param var: If given, only these variables will be preserved in the resulting dataset
    :param monitor: A progress monitor
    :return: A climatological long term average dataset
    """
    ds = DatasetLike.convert(ds)
    # Check if time dtype is what we want
    if 'datetime64[ns]' != ds.time.dtype:
        raise ValidationError(
            'Long term average operation expects a dataset with the'
            ' time coordinate of type datetime64[ns], but received'
            ' {}. Running the normalize operation on this'
            ' dataset may help'.format(ds.time.dtype))

    try:
        t_resolution = ds.attrs['time_coverage_resolution']
    except KeyError:
        raise ValidationError(
            'Could not determine temporal resolution. Running'
            ' the adjust_temporal_attrs operation beforehand may'
            ' help.')

    var = VarNamesLike.convert(var)
    # Shallow
    retset = ds.copy()
    if var:
        retset = select_var(retset, var)

    if t_resolution == 'P1D':
        return _lta_daily(retset, monitor)
    elif t_resolution == 'P1M':
        return _lta_monthly(retset, monitor)
    else:
        return _lta_general(retset, monitor)
Esempio n. 3
0
def reduce(ds: DatasetLike.TYPE,
           var: VarNamesLike.TYPE = None,
           dim: DimNamesLike.TYPE = None,
           method: str = 'mean',
           monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Reduce the given variables of the given dataset along the given dimensions.
    If no variables are given, all variables of the dataset will be reduced. If
    no dimensions are given, all dimensions will be reduced. If no variables
    have been given explicitly, it can be set that only variables featuring numeric
    values should be reduced.

    :param ds: Dataset to reduce
    :param var: Variables in the dataset to reduce
    :param dim: Dataset dimensions along which to reduce
    :param method: reduction method
    :param monitor: A progress monitor
    """
    ufuncs = {
        'min': np.nanmin,
        'max': np.nanmax,
        'mean': np.nanmean,
        'median': np.nanmedian,
        'sum': np.nansum
    }

    ds = DatasetLike.convert(ds)

    if not var:
        var = list(ds.data_vars.keys())
    var_names = VarNamesLike.convert(var)

    if not dim:
        dim = list(ds.coords.keys())
    else:
        dim = DimNamesLike.convert(dim)

    retset = ds.copy()

    for var_name in var_names:
        intersection = [
            value for value in dim if value in retset[var_name].dims
        ]
        with monitor.starting("Reduce dataset", total_work=100):
            monitor.progress(5)
            with monitor.child(95).observing("Reduce"):
                retset[var_name] = retset[var_name].reduce(ufuncs[method],
                                                           dim=intersection,
                                                           keep_attrs=True)

    return retset
Esempio n. 4
0
def long_term_average(ds: DatasetLike.TYPE,
                      var: VarNamesLike.TYPE = None,
                      monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Create a 'mean over years' dataset by averaging the values of the given input
    dataset over all years. The output is a climatological dataset with the same
    resolution as the input dataset. E.g. a daily input dataset will create a daily
    climatology consisting of 365 days, a monthly input dataset will create a monthly
    climatology, etc.

    Seasonal input datasets must have matching seasons over all years denoted by the
    same date each year. E.g., first date of each quarter. The output dataset will
    then be a seasonal climatology where each season is denoted with the same date
    as in the input dataset.

    For further information on climatological datasets, see
    http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#climatological-statistics

    :param ds: A dataset to average
    :param var: If given, only these variables will be preserved in the resulting dataset
    :param monitor: A progress monitor
    :return: A climatological long term average dataset
    """
    ds = DatasetLike.convert(ds)
    # Check if time dtype is what we want
    if 'datetime64[ns]' != ds.time.dtype:
        raise ValidationError('Long term average operation expects a dataset with the'
                              ' time coordinate of type datetime64[ns], but received'
                              ' {}. Running the normalize operation on this'
                              ' dataset may help'.format(ds.time.dtype))

    try:
        t_resolution = ds.attrs['time_coverage_resolution']
    except KeyError:
        raise ValidationError('Could not determine temporal resolution. Running'
                              ' the adjust_temporal_attrs operation beforehand may'
                              ' help.')

    var = VarNamesLike.convert(var)
    # Shallow
    retset = ds.copy()
    if var:
        retset = select_var(retset, var)

    if t_resolution == 'P1D':
        return _lta_daily(retset, monitor)
    elif t_resolution == 'P1M':
        return _lta_monthly(retset, monitor)
    else:
        return _lta_general(retset, monitor)
Esempio n. 5
0
def compute_dataset(ds: DatasetLike.TYPE,
                    script: str,
                    copy: bool = False,
                    _ctx: dict = None,
                    monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Compute a new dataset from the given Python *script*.
    The argument *script* must be valid Python code or a single expression comprising at least one
    value assignment of the form {name} = {expr}. Multiple assignments can be done on multiple lines
    or on a single line separated by semicolons.

    {expr} may reference variables in the given context dataset *ds* or other resources and their variables
    from the current workflow.
    In the latter case, use the dot operator to select a variable from a dataset resource.

    Any new variable in *script* whose name does not begin with and underscore ('_') and
    that has an appropriate data type will be added to the new dataset.

    The following packages are available in the *script*:

    * ``geopandas``, ``gpd``: The ``geopandas`` top-level package (http://geopandas.org/)
    * ``math``: The standard Python ``math`` library (https://docs.python.org/3/library/math.html)
    * ``numpy``, ``np``: The ``numpy`` top-level package (https://docs.scipy.org/doc/numpy/reference/)
    * ``pandas``, ``pd``: The ``pandas`` top-level package (http://pandas.pydata.org/pandas-docs/stable/api.html)
    * ``scipy``, ``sp``: The ``scipy`` top-level package (https://docs.scipy.org/doc/scipy/reference/)
    * ``xarray``, ``xr``: The ``xarray`` top-level package (http://xarray.pydata.org/en/stable/api.html)

    :param ds: Optional context dataset. If provided, all variables of this dataset are
           directly accessible in the *script*.
           If omitted, all variables (series) of other dataset (data frame) resources need to be prefixed
           by their resource name.
    :param script: Valid Python expression comprising at least one assignment of the form {name} = {expr}.
    :param copy: Whether to copy all variables from *ds*.
    :param monitor: An optional progress monitor.
    :return: A new dataset object.
    """
    data_vars = _exec_script(script, (xr.DataArray, np.ndarray, float, int),
                             _ctx, ds, monitor)

    if ds is not None and copy:
        new_ds = ds.copy()
        for name, data in data_vars.items():
            new_ds[name] = data
    else:
        new_ds = xr.Dataset(data_vars=data_vars)

    return new_ds
Esempio n. 6
0
def compute_dataset(ds: DatasetLike.TYPE,
                    script: str,
                    copy: bool = False,
                    _ctx: dict = None,
                    monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Compute a new dataset from the given Python *script*.
    The argument *script* must be valid Python code or a single expression comprising at least one
    value assignment of the form {name} = {expr}. Multiple assignments can be done on multiple lines
    or on a single line separated by semicolons.

    {expr} may reference variables in the given context dataset *ds* or other resources and their variables
    from the current workflow.
    In the latter case, use the dot operator to select a variable from a dataset resource.

    Any new variable in *script* whose name does not begin with and underscore ('_') and
    that has an appropriate data type will be added to the new dataset.

    The following packages are available in the *script*:

    * ``geopandas``, ``gpd``: The ``geopandas`` top-level package (http://geopandas.org/)
    * ``math``: The standard Python ``math`` library (https://docs.python.org/3/library/math.html)
    * ``numpy``, ``np``: The ``numpy`` top-level package (https://docs.scipy.org/doc/numpy/reference/)
    * ``pandas``, ``pd``: The ``pandas`` top-level package (http://pandas.pydata.org/pandas-docs/stable/api.html)
    * ``scipy``, ``sp``: The ``scipy`` top-level package (https://docs.scipy.org/doc/scipy/reference/)
    * ``xarray``, ``xr``: The ``xarray`` top-level package (http://xarray.pydata.org/en/stable/api.html)

    :param ds: Optional context dataset. If provided, all variables of this dataset are
           directly accessible in the *script*.
           If omitted, all variables (series) of other dataset (data frame) resources need to be prefixed
           by their resource name.
    :param script: Valid Python expression comprising at least one assignment of the form {name} = {expr}.
    :param copy: Whether to copy all variables from *ds*.
    :param monitor: An optional progress monitor.
    :return: A new dataset object.
    """
    data_vars = _exec_script(script, (xr.DataArray, np.ndarray, float, int), _ctx, ds, monitor)

    if ds is not None and copy:
        new_ds = ds.copy()
        for name, data in data_vars.items():
            new_ds[name] = data
    else:
        new_ds = xr.Dataset(data_vars=data_vars)

    return new_ds
Esempio n. 7
0
def reduce(ds: DatasetLike.TYPE,
           var: VarNamesLike.TYPE = None,
           dim: DimNamesLike.TYPE = None,
           method: str = 'mean',
           monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Reduce the given variables of the given dataset along the given dimensions.
    If no variables are given, all variables of the dataset will be reduced. If
    no dimensions are given, all dimensions will be reduced. If no variables
    have been given explicitly, it can be set that only variables featuring numeric
    values should be reduced.

    :param ds: Dataset to reduce
    :param var: Variables in the dataset to reduce
    :param dim: Dataset dimensions along which to reduce
    :param method: reduction method
    :param monitor: A progress monitor
    """
    ufuncs = {'min': np.nanmin, 'max': np.nanmax, 'mean': np.nanmean,
              'median': np.nanmedian, 'sum': np.nansum}

    ds = DatasetLike.convert(ds)

    if not var:
        var = list(ds.data_vars.keys())
    var_names = VarNamesLike.convert(var)

    if not dim:
        dim = list(ds.coords.keys())
    else:
        dim = DimNamesLike.convert(dim)

    retset = ds.copy()

    for var_name in var_names:
        intersection = [value for value in dim if value in retset[var_name].dims]
        with monitor.starting("Reduce dataset", total_work=100):
            monitor.progress(5)
            with monitor.child(95).observing("Reduce"):
                retset[var_name] = retset[var_name].reduce(ufuncs[method],
                                                           dim=intersection,
                                                           keep_attrs=True)

    return retset
Esempio n. 8
0
def long_term_average(ds: DatasetLike.TYPE,
                      var: VarNamesLike.TYPE = None,
                      monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Perform long term average of the given dataset by doing a mean of monthly
    values over the time range covered by the dataset. E.g. it averages all
    January values, all February values, etc, to create a dataset with twelve
    time slices each containing a mean of respective monthly values.

    For further information on climatological datasets, see
    http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#climatological-statistics

    :param ds: A monthly dataset to average
    :param var: If given, only these variables will be preserved in the resulting dataset
    :param monitor: A progress monitor
    :return: A climatological long term average dataset
    """
    ds = DatasetLike.convert(ds)
    # Check if time dtype is what we want
    if 'datetime64[ns]' != ds.time.dtype:
        raise ValueError(
            'Long term average operation expects a dataset with the'
            ' time coordinate of type datetime64[ns], but received'
            ' {}. Running the normalize operation on this'
            ' dataset may help'.format(ds.time.dtype))

    # Check if we have a monthly dataset
    try:
        if ds.attrs['time_coverage_resolution'] != 'P1M':
            raise ValueError(
                'Long term average operation expects a monthly dataset'
                ' running temporal aggregation on this dataset'
                ' beforehand may help.')
    except KeyError:
        raise ValueError('Could not determine temporal resolution. Running'
                         ' the adjust_temporal_attrs operation beforehand may'
                         ' help.')

    var = VarNamesLike.convert(var)
    # Shallow
    retset = ds.copy()
    if var:
        retset = select_var(retset, var)

    time_min = pd.Timestamp(ds.time.values[0])
    time_max = pd.Timestamp(ds.time.values[-1])

    total_work = 100

    with monitor.starting('LTA', total_work=total_work):
        monitor.progress(work=0)
        step = total_work / 12
        kwargs = {'monitor': monitor, 'step': step}
        retset = retset.groupby('time.month',
                                squeeze=False).apply(_mean, **kwargs)

    # Make the return dataset CF compliant
    retset = retset.rename({'month': 'time'})
    retset['time'] = pd.date_range('{}-01-01'.format(time_min.year),
                                   freq='MS',
                                   periods=12)

    climatology_bounds = xr.DataArray(data=np.tile([time_min, time_max],
                                                   (12, 1)),
                                      dims=['time', 'nv'],
                                      name='climatology_bounds')
    retset['climatology_bounds'] = climatology_bounds
    retset.time.attrs = ds.time.attrs
    retset.time.attrs['climatology'] = 'climatology_bounds'

    for var in retset.data_vars:
        try:
            retset[var].attrs['cell_methods'] = \
                    retset[var].attrs['cell_methods'] + ' time: mean over years'
        except KeyError:
            retset[var].attrs['cell_methods'] = 'time: mean over years'

    return retset
Esempio n. 9
0
def compute(ds: DatasetLike.TYPE,
            script: str,
            copy: bool = False,
            _ctx: dict = None,
            monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Compute a new dataset from the given Python *script*.
    The argument *script* must be valid Python code or a single expression comprising at least one
    value assignment of the form <name> = <expr>. Multiple assignments can be done on multiple lines
    or on a single line separated by semicolons.

    <expr> may reference variables in the given context dataset *ds* or other resources and their variables
    from the current workflow.
    In the latter case, use the dot operator to select a variable from a dataset resource.

    Every new variable in *script* of type data array will be added to the new dataset.

    The following packages are available in the code:

    * ``np``: The ``numpy`` top-level package (https://docs.scipy.org/doc/numpy/reference/)
    * ``pd``: The ``pandas`` top-level package (http://pandas.pydata.org/pandas-docs/stable/api.html)
    * ``xr``: The ``xarray`` top-level package (http://xarray.pydata.org/en/stable/api.html)
    * ``xu``: The ``xarray.ufuncs`` package (http://xarray.pydata.org/en/stable/api.html#universal-functions)

    Note, in contrast to the ``np`` package, all the math functions defined in ``xu`` will preserve variable attributes.

    :param ds: Optional context dataset. All variables of this dataset are directly accessible in the *script*.
               If omitted, all variables need to be prefixed by their dataset resource names.
    :param script: Valid Python expression comprising at least one assignment of the form <name> = <expr>.
    :param copy: Whether to copy all variables from *ds*.
    :param monitor: An optional progress monitor.
    :return: A new dataset.
    """

    if _ctx is not None and 'value_cache' in _ctx:
        orig_namespace = dict(_ctx['value_cache'])
    else:
        orig_namespace = dict()

    if ds is not None:
        orig_namespace.update(ds.data_vars)

    orig_namespace['np'] = np
    orig_namespace['pd'] = pd
    orig_namespace['xr'] = xr
    orig_namespace['xu'] = xu

    local_namespace = dict(orig_namespace)

    with monitor.observing("Executing script"):
        safe_exec(script, local_namespace=local_namespace)

    data_vars = {}
    for name, array in local_namespace.items():
        if isinstance(array, xr.DataArray) or isinstance(array, xr.Variable):
            is_new_data_var = name not in orig_namespace
            if not is_new_data_var:
                is_new_data_var = array is not orig_namespace[name]
            if is_new_data_var:
                array.name = name
                data_vars[name] = array

    if ds is not None and copy:
        new_ds = ds.copy()
        for name, array in data_vars.items():
            new_ds[name] = array
    else:
        new_ds = xr.Dataset(data_vars=data_vars)

    return new_ds