Example #1
0
def pearson_correlation_scalar(ds_x: DatasetLike.TYPE,
                               ds_y: DatasetLike.TYPE,
                               var_x: VarName.TYPE,
                               var_y: VarName.TYPE,
                               monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
    """
    Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis.

    Performs a simple correlation analysis on two data variables and returns
    a correlation coefficient and the corresponding p_value.

    Positive correlation implies that as x grows, so does y. Negative
    correlation implies that as x increases, y decreases.

    For more information how to interpret the results, see
    `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_,
    and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_.

    :param ds_x: The 'x' dataset
    :param ds_y: The 'y' dataset
    :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset
    :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset
    :param monitor: a progress monitor.
    :return: Data frame {'corr_coef': correlation coefficient, 'p_value': probability value}
    """
    ds_x = DatasetLike.convert(ds_x)
    ds_y = DatasetLike.convert(ds_y)
    var_x = VarName.convert(var_x)
    var_y = VarName.convert(var_y)

    array_y = ds_y[var_y]
    array_x = ds_x[var_x]

    if (array_x.dims != array_y.dims):
        raise ValidationError('Both datasets should feature the same'
                              ' dimensionality. Currently provided ds_x[var_x] '
                              f'has {array_x.dims}, provided ds_y[var_y]'
                              f' has {array_y.dims}')

    for dim in array_x.dims:
        if len(array_x[dim]) != len(array_y[dim]):
            raise ValidationError('All dimensions of both provided data variables'
                                  f' must be the same length. Currently {dim} of ds_x[var_x]'
                                  f' has {len(array_x[dim])} values, while'
                                  f' {dim} of ds_y[var_y] has {len(array_y[dim])} values.'
                                  ' You may want to try to coregister the datasets beforehand.')

    n_vals = 1
    for dim in array_x.dims:
        n_vals = n_vals * len(array_x[dim])

    if n_vals < 3:
        raise ValidationError('There should be no less than 3 values in both data variables'
                              f' to perform the correlation. Currently there are {n_vals} values')

    with monitor.observing("Calculate Pearson correlation"):
        cc, pv = pearsonr(array_x.stack(z=array_x.dims), array_y.stack(z=array_y.dims))

    return pd.DataFrame({'corr_coef': [cc], 'p_value': [pv]})
Example #2
0
    def test_convert(self):
        expected = 'aa'
        actual = VarName.convert('aa')
        self.assertEqual(actual, expected)

        with self.assertRaises(ValidationError) as err:
            VarName.convert(['aa', 'bb', 'cc'])
        self.assertEqual(str(err.exception), 'Variable name expected.')
        self.assertEqual(None, VarName.convert(None))
Example #3
0
    def test_convert(self):
        expected = 'aa'
        actual = VarName.convert('aa')
        self.assertEqual(actual, expected)

        with self.assertRaises(ValueError) as err:
            VarName.convert(['aa', 'bb', 'cc'])
        self.assertTrue('cannot convert' in str(err.exception))
        self.assertEqual(None, VarName.convert(None))
Example #4
0
    def test_convert(self):
        expected = 'aa'
        actual = VarName.convert('aa')
        self.assertEqual(actual, expected)

        with self.assertRaises(ValidationError) as err:
            VarName.convert(['aa', 'bb', 'cc'])
        self.assertEqual(str(err.exception), 'Variable name expected.')
        self.assertEqual(None, VarName.convert(None))
Example #5
0
def pearson_correlation_scalar(
        ds_x: DatasetLike.TYPE,
        ds_y: DatasetLike.TYPE,
        var_x: VarName.TYPE,
        var_y: VarName.TYPE,
        monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
    """
    Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis.

    Performs a simple correlation analysis on two timeseries and returns
    a correlation coefficient and the corresponding p_value.

    Positive correlation implies that as x grows, so does y. Negative
    correlation implies that as x increases, y decreases.

    For more information how to interpret the results, see
    `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_,
    and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_.

    :param ds_x: The 'x' dataset
    :param ds_y: The 'y' dataset
    :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset
    :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset
    :param monitor: a progress monitor.
    :return: {'corr_coef': correlation coefficient, 'p_value': probability value}
    """
    ds_x = DatasetLike.convert(ds_x)
    ds_y = DatasetLike.convert(ds_y)
    var_x = VarName.convert(var_x)
    var_y = VarName.convert(var_y)

    array_y = ds_y[var_y]
    array_x = ds_x[var_x]

    if ((len(array_x.dims) != len(array_y.dims)) and (len(array_x.dims) != 1)):
        raise ValidationError('To calculate simple correlation, both provided'
                              ' datasets should be simple 1d timeseries. To'
                              ' create a map of correlation coefficients, use'
                              ' pearson_correlation operation instead.')

    if len(array_x['time']) != len(array_y['time']):
        raise ValidationError(
            'The length of the time dimension differs between'
            ' the given datasets. Can not perform the calculation'
            ', please review operation documentation.')

    if len(array_x['time']) < 3:
        raise ValidationError(
            'The length of the time dimension should not be less'
            ' than three to run the calculation.')

    with monitor.observing("Calculate Pearson correlation"):
        cc, pv = pearsonr(array_x.values, array_y.values)

    return pd.DataFrame({'corr_coef': [cc], 'p_value': [pv]})
Example #6
0
def plot_contour(ds: xr.Dataset,
                 var: VarName.TYPE,
                 time: TimeLike.TYPE = None,
                 indexers: DictLike.TYPE = None,
                 title: str = None,
                 filled: bool = True,
                 properties: DictLike.TYPE = None,
                 file: str = None) -> Figure:
    """
    Create a contour plot of a variable given by dataset *ds* and variable name *var*.

    :param ds: the dataset containing the variable to plot
    :param var: the variable's name
    :param time: time slice index to plot, can be a string "YYYY-MM-DD" or an integer number
    :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary
           or a comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "layer=4".
    :param title: an optional title
    :param filled: whether the regions between two contours shall be filled
    :param properties: optional plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'"
           For full reference refer to
           https://matplotlib.org/api/lines_api.html and
           https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """
    var_name = VarName.convert(var)
    if not var_name:
        raise ValueError("Missing value for 'var'")
    var = ds[var_name]

    time = TimeLike.convert(time)
    indexers = DictLike.convert(indexers) or {}
    properties = DictLike.convert(properties) or {}

    figure = plt.figure(figsize=(8, 4))
    ax = figure.add_subplot(111)

    var_data = _get_var_data(var, indexers, time=time)
    if filled:
        var_data.plot.contourf(ax=ax, **properties)
    else:
        var_data.plot.contour(ax=ax, **properties)

    if title:
        ax.set_title(title)

    figure.tight_layout()

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None
Example #7
0
def plot_hist(ds: xr.Dataset,
              var: VarName.TYPE,
              indexers: DictLike.TYPE = None,
              title: str = None,
              properties: DictLike.TYPE = None,
              file: str = None) -> Figure:
    """
    Plot a variable, optionally save the figure in a file.

    The plot can either be shown using pyplot functionality, or saved,
    if a path is given. The following file formats for saving the plot
    are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg,
    svgz, tif, tiff

    :param ds: Dataset that contains the variable named by *var*.
    :param var: The name of the variable to plot
    :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary
           or a comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "lon=12.6, layer=3, time='2012-05-02'".
    :param title: an optional title
    :param properties: optional histogram plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'"
           For full reference refer to
           https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.hist.html and
           https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """

    var_name = VarName.convert(var)
    if not var_name:
        raise ValueError("Missing value for 'var'")

    var = ds[var]

    indexers = DictLike.convert(indexers)
    properties = DictLike.convert(properties) or {}

    figure = plt.figure(figsize=(8, 4))
    ax = figure.add_subplot(111)
    figure.tight_layout()

    var_data = _get_var_data(var, indexers)
    var_data.plot.hist(ax=ax, **properties)

    if title:
        ax.set_title(title)

    figure.tight_layout()

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None
Example #8
0
def plot_hist(ds: xr.Dataset,
              var: VarName.TYPE,
              indexers: DictLike.TYPE = None,
              title: str = None,
              properties: DictLike.TYPE = None,
              file: str = None) -> Figure:
    """
    Plot a variable, optionally save the figure in a file.

    The plot can either be shown using pyplot functionality, or saved,
    if a path is given. The following file formats for saving the plot
    are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg,
    svgz, tif, tiff

    :param ds: Dataset that contains the variable named by *var*.
    :param var: The name of the variable to plot
    :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary
           or a comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "lon=12.6, layer=3, time='2012-05-02'".
    :param title: an optional title
    :param properties: optional histogram plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'"
           For full reference refer to
           https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.hist.html and
           https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """
    var_name = VarName.convert(var)
    if not var_name:
        raise ValidationError("Missing name for 'var'")

    var = ds[var]

    indexers = DictLike.convert(indexers)
    properties = DictLike.convert(properties) or {}

    figure = plt.figure(figsize=(8, 4))
    ax = figure.add_subplot(111)
    figure.tight_layout()

    var_data = get_var_data(var, indexers)
    var_data.plot.hist(ax=ax, **properties)

    if title:
        ax.set_title(title)

    figure.tight_layout()

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None
Example #9
0
def data_frame_max(df: DataFrameLike.TYPE, var: VarName.TYPE) -> pd.DataFrame:
    """
    Select the first record of a data frame for which the given variable value is maximal.

    :param df: The data frame or dataset.
    :param var: The variable.
    :return: A new, one-record data frame.
    """
    data_frame = DataFrameLike.convert(df)
    var_name = VarName.convert(var)
    row_index = data_frame[var_name].idxmax()
    row_frame = data_frame.loc[[row_index]]
    return _maybe_convert_to_geo_data_frame(data_frame, row_frame)
Example #10
0
def data_frame_max(df: DataFrameLike.TYPE, var: VarName.TYPE) -> pd.DataFrame:
    """
    Select the first record of a data frame for which the given variable value is maximal.

    :param df: The data frame or dataset.
    :param var: The variable.
    :return: A new, one-record data frame.
    """
    data_frame = DataFrameLike.convert(df)
    var_name = VarName.convert(var)
    row_index = data_frame[var_name].idxmax()
    row_frame = data_frame.loc[[row_index]]
    return _maybe_convert_to_geo_data_frame(data_frame, row_frame)
Example #11
0
def plot_contour(ds: xr.Dataset,
                 var: VarName.TYPE,
                 indexers: DictLike.TYPE = None,
                 title: str = None,
                 filled: bool = True,
                 properties: DictLike.TYPE = None,
                 file: str = None) -> Figure:
    """
    Create a contour plot of a variable given by dataset *ds* and variable name *var*.

    :param ds: the dataset containing the variable to plot
    :param var: the variable's name
    :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary
           or a comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "layer=4".
    :param title: an optional title
    :param filled: whether the regions between two contours shall be filled
    :param properties: optional plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'"
           For full reference refer to
           https://matplotlib.org/api/lines_api.html and
           https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """
    var_name = VarName.convert(var)
    if not var_name:
        raise ValidationError("Missing name for 'var'")
    var = ds[var_name]

    indexers = DictLike.convert(indexers) or {}
    properties = DictLike.convert(properties) or {}

    figure = plt.figure(figsize=(8, 4))
    ax = figure.add_subplot(111)

    var_data = get_var_data(var, indexers)
    if filled:
        var_data.plot.contourf(ax=ax, **properties)
    else:
        var_data.plot.contour(ax=ax, **properties)

    if title:
        ax.set_title(title)

    figure.tight_layout()

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None
Example #12
0
def plot(ds: xr.Dataset,
         var: VarName.TYPE,
         indexers: DictLike.TYPE = None,
         title: str = None,
         properties: DictLike.TYPE = None,
         file: str = None) -> Figure:
    """
    Create a 1D/line or 2D/image plot of a variable given by dataset *ds* and variable name *var*.

    :param ds: Dataset or Dataframe that contains the variable named by *var*.
    :param var: The name of the variable to plot
    :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary
           or a comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "lat=12.4, time='2012-05-02'".
    :param title: an optional plot title
    :param properties: optional plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'"
           For full reference refer to
           https://matplotlib.org/api/lines_api.html and
           https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """
    ds = DatasetLike.convert(ds)

    var_name = VarName.convert(var)
    if not var_name:
        raise ValueError("Missing value for 'var'")
    var = ds[var_name]

    indexers = DictLike.convert(indexers)
    properties = DictLike.convert(properties) or {}

    figure = plt.figure()
    ax = figure.add_subplot(111)

    var_data = _get_var_data(var, indexers)
    var_data.plot(ax=ax, **properties)

    if title:
        ax.set_title(title)

    figure.tight_layout()

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None
Example #13
0
File: index.py Project: whigg/cate
def _generic_index_calculation(
        ds: xr.Dataset,
        var: VarName.TYPE,
        region: PolygonLike.TYPE,
        window: int,
        file: str,
        name: str,
        threshold: float = None,
        monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
    """
    A generic index calculation. Where an index is defined as an anomaly
    against the given reference of a moving average of the given window size of
    the given given region of the given variable of the given dataset.

    :param ds: Dataset from which to calculate the index
    :param var: Variable from which to calculate index
    :param region: Spatial subset from which to calculate the index
    :param window: Window size for the moving average
    :param file: Path to the reference file
    :param threshold: Absolute threshold that indicates an ENSO event
    :param name: Name of the index
    :param monitor: a progress monitor.
    :return: A dataset that contains the index timeseries
    """
    var = VarName.convert(var)
    region = PolygonLike.convert(region)

    with monitor.starting("Calculate the index", total_work=2):
        ds = select_var(ds, var)
        ds_subset = subset_spatial(ds, region)
        anom = anomaly_external(ds_subset, file, monitor=monitor.child(1))
        with monitor.child(1).observing("Calculate mean"):
            ts = anom.mean(dim=['lat', 'lon'])
        df = pd.DataFrame(data=ts[var].values,
                          columns=[name],
                          index=ts.time.values)
        retval = df.rolling(window=window, center=True).mean().dropna()

    if threshold is None:
        return retval

    retval['El Nino'] = pd.Series((retval[name] > threshold),
                                  index=retval.index)
    retval['La Nina'] = pd.Series((retval[name] < -threshold),
                                  index=retval.index)
    return retval
Example #14
0
def _generic_index_calculation(ds: xr.Dataset,
                               var: VarName.TYPE,
                               region: PolygonLike.TYPE,
                               window: int,
                               file: str,
                               name: str,
                               threshold: float = None,
                               monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
    """
    A generic index calculation. Where an index is defined as an anomaly
    against the given reference of a moving average of the given window size of
    the given given region of the given variable of the given dataset.

    :param ds: Dataset from which to calculate the index
    :param var: Variable from which to calculate index
    :param region: Spatial subset from which to calculate the index
    :param window: Window size for the moving average
    :param file: Path to the reference file
    :param threshold: Absolute threshold that indicates an ENSO event
    :param name: Name of the index
    :param monitor: a progress monitor.
    :return: A dataset that contains the index timeseries
    """
    var = VarName.convert(var)
    region = PolygonLike.convert(region)

    with monitor.starting("Calculate the index", total_work=2):
        ds = select_var(ds, var)
        ds_subset = subset_spatial(ds, region)
        anom = anomaly_external(ds_subset, file, monitor=monitor.child(1))
        with monitor.child(1).observing("Calculate mean"):
            ts = anom.mean(dim=['lat', 'lon'])
        df = pd.DataFrame(data=ts[var].values, columns=[name], index=ts.time)
        retval = df.rolling(window=window, center=True).mean().dropna()

    if threshold is None:
        return retval

    retval['El Nino'] = pd.Series((retval[name] > threshold),
                                  index=retval.index)
    retval['La Nina'] = pd.Series((retval[name] < -threshold),
                                  index=retval.index)
    return retval
Example #15
0
def plot(ds: xr.Dataset,
         var: VarName.TYPE,
         index: DictLike.TYPE = None,
         file: str = None) -> None:
    """
    Plot a variable, optionally save the figure in a file.

    The plot can either be shown using pyplot functionality, or saved,
    if a path is given. The following file formats for saving the plot
    are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg,
    svgz, tif, tiff

    :param ds: Dataset that contains the variable named by *var*.
    :param var: The name of the variable to plot
    :param index: Optional index into the variable's data array. The *index* is a dictionary
                  that maps the variable's dimension names to constant labels. For example,
                  ``lat`` and ``lon`` are given in decimal degrees, while a ``time`` value may be provided as
                  datetime object or a date string. *index* may also be a comma-separated string of key-value pairs,
                  e.g. "lat=12.4, time='2012-05-02'".
    :param file: path to a file in which to save the plot
    """

    var = VarName.convert(var)
    var = ds[var]

    index = DictLike.convert(index)

    try:
        if index:
            var_data = var.sel(**index)
        else:
            var_data = var
    except ValueError:
        var_data = var

    fig = plt.figure(figsize=(16, 8))
    var_data.plot()
    if file:
        fig.savefig(file)
Example #16
0
def plot_scatter(ds1: xr.Dataset,
                 ds2: xr.Dataset,
                 var1: VarName.TYPE,
                 var2: VarName.TYPE,
                 indexers1: DictLike.TYPE = None,
                 indexers2: DictLike.TYPE = None,
                 title: str = None,
                 properties: DictLike.TYPE = None,
                 file: str = None) -> Figure:
    """
    Create a scatter plot of two variables of two variables given by datasets *ds1*, *ds2* and the
    variable names *var1*, *var2*.

    :param ds1: Dataset that contains the variable named by *var1*.
    :param ds2: Dataset that contains the variable named by *var2*.
    :param var1: The name of the first variable to plot
    :param var2: The name of the second variable to plot
    :param indexers1: Optional indexers into data array *var1*. The *indexers1* is a dictionary
           or comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "lat=12.4, time='2012-05-02'".
    :param indexers2: Optional indexers into data array *var2*.
    :param title: optional plot title
    :param properties: optional plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'"
           For full reference refer to
           https://matplotlib.org/api/lines_api.html and
           https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """

    var_name1 = VarName.convert(var1)
    var_name2 = VarName.convert(var2)
    if not var_name1:
        raise ValueError("Missing value for 'var1'")
    if not var_name2:
        raise ValueError("Missing value for 'var2'")
    var1 = ds1[var_name1]
    var2 = ds2[var_name2]

    indexers1 = DictLike.convert(indexers1) or {}
    indexers2 = DictLike.convert(indexers2) or {}
    properties = DictLike.convert(properties) or {}

    try:
        if indexers1:
            var_data1 = var1.sel(method='nearest', **indexers1)
            if not indexers2:
                indexers2 = indexers1

            var_data2 = var2.sel(method='nearest', **indexers2)
            remaining_dims = list(set(var1.dims) ^ set(indexers1.keys()))
            min_dim = max(var_data1[remaining_dims[0]].min(),
                          var_data2[remaining_dims[0]].min())
            max_dim = min(var_data1[remaining_dims[0]].max(),
                          var_data2[remaining_dims[0]].max())
            print(min_dim, max_dim)
            var_data1 = var_data1.where(
                (var_data1[remaining_dims[0]] >= min_dim) &
                (var_data1[remaining_dims[0]] <= max_dim),
                drop=True)
            var_data2 = var_data2.where(
                (var_data2[remaining_dims[0]] >= min_dim) &
                (var_data2[remaining_dims[0]] <= max_dim),
                drop=True)
            print(var_data1)
            print(var_data2)
            if len(remaining_dims) is 1:
                print(remaining_dims)
                indexer3 = {
                    remaining_dims[0]: var_data1[remaining_dims[0]].data
                }
                var_data2.reindex(method='nearest', **indexer3)
            else:
                print("Err!")
        else:
            var_data1 = var1
            var_data2 = var2
    except ValueError:
        var_data1 = var1
        var_data2 = var2

    figure = plt.figure(figsize=(12, 8))
    ax = figure.add_subplot(111)

    # var_data1.plot(ax = ax, **properties)
    ax.plot(var_data1.values, var_data2.values, '.', **properties)
    # var_data1.plot(ax=ax, **properties)
    xlabel_txt = "".join(", " + str(key) + " = " + str(value)
                         for key, value in indexers1.items())
    xlabel_txt = var_name1 + xlabel_txt
    ylabel_txt = "".join(", " + str(key) + " = " + str(value)
                         for key, value in indexers2.items())
    ylabel_txt = var_name2 + ylabel_txt
    ax.set_xlabel(xlabel_txt)
    ax.set_ylabel(ylabel_txt)
    figure.tight_layout()

    if title:
        ax.set_title(title)

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None
Example #17
0
def plot_map(ds: xr.Dataset,
             var: VarName.TYPE = None,
             index: DictLike.TYPE = None,
             time: Union[str, int] = None,
             region: PolygonLike.TYPE = None,
             projection: str = 'PlateCarree',
             central_lon: float = 0.0,
             file: str = None) -> None:
    """
    Plot the given variable from the given dataset on a map with coastal lines.
    In case no variable name is given, the first encountered variable in the
    dataset is plotted. In case no time index is given, the first time slice
    is taken. It is also possible to set extents of the plot. If no extents
    are given, a global plot is created.

    The plot can either be shown using pyplot functionality, or saved,
    if a path is given. The following file formats for saving the plot
    are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg,
    svgz, tif, tiff

    :param ds: xr.Dataset to plot
    :param var: variable name in the dataset to plot
    :param index: Optional index into the variable's data array. The *index* is a dictionary
                  that maps the variable's dimension names to constant labels. For example,
                  ``lat`` and ``lon`` are given in decimal degrees, while a ``time`` value may be provided as
                  datetime object or a date string. *index* may also be a comma-separated string of key-value pairs,
                  e.g. "lat=12.4, time='2012-05-02'".
    :param time: time slice index to plot
    :param region: Region to plot
    :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
    :param central_lon: central longitude of the projection in degrees
    :param file: path to a file in which to save the plot
    """
    if not isinstance(ds, xr.Dataset):
        raise NotImplementedError('Only raster datasets are currently '
                                  'supported')

    var_name = None
    if not var:
        for key in ds.data_vars.keys():
            var_name = key
            break
    else:
        var_name = VarName.convert(var)

    var = ds[var_name]
    index = DictLike.convert(index)

    # 0 is a valid index, hence test if time is None
    if time is not None and isinstance(time, int) and 'time' in var.coords:
        time = var.coords['time'][time]

    if time:
        if not index:
            index = dict()
        index['time'] = time

    for dim_name in var.dims:
        if dim_name not in ('lat', 'lon'):
            if not index:
                index = dict()
            if dim_name not in index:
                index[dim_name] = 0

    if region is None:
        lat_min = -90.0
        lat_max = 90.0
        lon_min = -180.0
        lon_max = 180.0
    else:
        region = PolygonLike.convert(region)
        lon_min, lat_min, lon_max, lat_max = region.bounds

    if not _check_bounding_box(lat_min, lat_max, lon_min, lon_max):
        raise ValueError(
            'Provided plot extents do not form a valid bounding box '
            'within [-180.0,+180.0,-90.0,+90.0]')
    extents = [lon_min, lon_max, lat_min, lat_max]

    # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html#
    if projection == 'PlateCarree':
        proj = ccrs.PlateCarree(central_longitude=central_lon)
    elif projection == 'LambertCylindrical':
        proj = ccrs.LambertCylindrical(central_longitude=central_lon)
    elif projection == 'Mercator':
        proj = ccrs.Mercator(central_longitude=central_lon)
    elif projection == 'Miller':
        proj = ccrs.Miller(central_longitude=central_lon)
    elif projection == 'Mollweide':
        proj = ccrs.Mollweide(central_longitude=central_lon)
    elif projection == 'Orthographic':
        proj = ccrs.Orthographic(central_longitude=central_lon)
    elif projection == 'Robinson':
        proj = ccrs.Robinson(central_longitude=central_lon)
    elif projection == 'Sinusoidal':
        proj = ccrs.Sinusoidal(central_longitude=central_lon)
    elif projection == 'NorthPolarStereo':
        proj = ccrs.NorthPolarStereo(central_longitude=central_lon)
    elif projection == 'SouthPolarStereo':
        proj = ccrs.SouthPolarStereo(central_longitude=central_lon)
    else:
        raise ValueError('illegal projection')

    try:
        if index:
            var_data = var.sel(**index)
        else:
            var_data = var
    except ValueError:
        var_data = var

    fig = plt.figure(figsize=(16, 8))
    ax = plt.axes(projection=proj)
    if extents:
        ax.set_extent(extents)
    else:
        ax.set_global()

    ax.coastlines()
    var_data.plot.contourf(ax=ax, transform=proj)
    if file:
        fig.savefig(file)
Example #18
0
def plot_map(ds: xr.Dataset,
             var: VarName.TYPE = None,
             indexers: DictLike.TYPE = None,
             time: TimeLike.TYPE = None,
             region: PolygonLike.TYPE = None,
             projection: str = 'PlateCarree',
             central_lon: float = 0.0,
             title: str = None,
             properties: DictLike.TYPE = None,
             file: str = None) -> Figure:
    """
    Create a geographic map plot for the variable given by dataset *ds* and variable name *var*.

    Plots the given variable from the given dataset on a map with coastal lines.
    In case no variable name is given, the first encountered variable in the
    dataset is plotted. In case no *time* is given, the first time slice
    is taken. It is also possible to set extents of the plot. If no extents
    are given, a global plot is created.

    The plot can either be shown using pyplot functionality, or saved,
    if a path is given. The following file formats for saving the plot
    are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg,
    svgz, tif, tiff

    :param ds: the dataset containing the variable to plot
    :param var: the variable's name
    :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary
           or a comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "layer=4".
    :param time: time slice index to plot, can be a string "YYYY-MM-DD" or an integer number
    :param region: Region to plot
    :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
    :param central_lon: central longitude of the projection in degrees
    :param title: an optional title
    :param properties: optional plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5)"
           For full reference refer to
           https://matplotlib.org/api/lines_api.html and
           https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """
    if not isinstance(ds, xr.Dataset):
        raise NotImplementedError(
            'Only gridded datasets are currently supported')

    var_name = None
    if not var:
        for key in ds.data_vars.keys():
            var_name = key
            break
    else:
        var_name = VarName.convert(var)
    var = ds[var_name]

    time = TimeLike.convert(time)
    indexers = DictLike.convert(indexers) or {}
    properties = DictLike.convert(properties) or {}

    extents = None
    region = PolygonLike.convert(region)
    if region:
        lon_min, lat_min, lon_max, lat_max = region.bounds
        if not _check_bounding_box(lat_min, lat_max, lon_min, lon_max):
            raise ValueError(
                'Provided plot extents do not form a valid bounding box '
                'within [-180.0,+180.0,-90.0,+90.0]')
        extents = [lon_min, lon_max, lat_min, lat_max]

    # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html#
    if projection == 'PlateCarree':
        proj = ccrs.PlateCarree(central_longitude=central_lon)
    elif projection == 'LambertCylindrical':
        proj = ccrs.LambertCylindrical(central_longitude=central_lon)
    elif projection == 'Mercator':
        proj = ccrs.Mercator(central_longitude=central_lon)
    elif projection == 'Miller':
        proj = ccrs.Miller(central_longitude=central_lon)
    elif projection == 'Mollweide':
        proj = ccrs.Mollweide(central_longitude=central_lon)
    elif projection == 'Orthographic':
        proj = ccrs.Orthographic(central_longitude=central_lon)
    elif projection == 'Robinson':
        proj = ccrs.Robinson(central_longitude=central_lon)
    elif projection == 'Sinusoidal':
        proj = ccrs.Sinusoidal(central_longitude=central_lon)
    elif projection == 'NorthPolarStereo':
        proj = ccrs.NorthPolarStereo(central_longitude=central_lon)
    elif projection == 'SouthPolarStereo':
        proj = ccrs.SouthPolarStereo(central_longitude=central_lon)
    else:
        raise ValueError('illegal projection: "%s"' % projection)

    figure = plt.figure(figsize=(8, 4))
    ax = plt.axes(projection=proj)
    if extents:
        ax.set_extent(extents)
    else:
        ax.set_global()

    ax.coastlines()
    var_data = _get_var_data(var,
                             indexers,
                             time=time,
                             remaining_dims=('lon', 'lat'))
    var_data.plot.contourf(ax=ax, transform=proj, **properties)

    if title:
        ax.set_title(title)

    figure.tight_layout()

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None
Example #19
0
def plot_map(ds: xr.Dataset,
             var: VarName.TYPE = None,
             indexers: DictLike.TYPE = None,
             region: PolygonLike.TYPE = None,
             projection: str = 'PlateCarree',
             central_lon: float = 0.0,
             title: str = None,
             contour_plot: bool = False,
             properties: DictLike.TYPE = None,
             file: str = None) -> object:
    """
    Create a geographic map plot for the variable given by dataset *ds* and variable name *var*.

    Plots the given variable from the given dataset on a map with coastal lines.
    In case no variable name is given, the first encountered variable in the
    dataset is plotted. In case no *time* is given, the first time slice
    is taken. It is also possible to set extents of the plot. If no extents
    are given, a global plot is created.

    The plot can either be shown using pyplot functionality, or saved,
    if a path is given. The following file formats for saving the plot
    are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg,
    svgz, tif, tiff

    :param ds: the dataset containing the variable to plot
    :param var: the variable's name
    :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary
           or a comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "layer=4".
    :param region: Region to plot
    :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
    :param central_lon: central longitude of the projection in degrees
    :param title: an optional title
    :param contour_plot: If true plot a filled contour plot of data, otherwise plots a pixelated colormesh
    :param properties: optional plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5)"
           For full reference refer to
           https://matplotlib.org/api/lines_api.html and
           https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """
    if not isinstance(ds, xr.Dataset):
        raise ValidationError('Only gridded datasets are currently supported.')

    var_name = None
    if not var:
        for key in ds.data_vars.keys():
            var_name = key
            break
    else:
        var_name = VarName.convert(var)
    var = ds[var_name]

    indexers = DictLike.convert(indexers) or {}
    properties = DictLike.convert(properties) or {}

    extents = None
    bounds = handle_plot_polygon(region)
    if bounds:
        lon_min, lat_min, lon_max, lat_max = bounds
        extents = [lon_min, lon_max, lat_min, lat_max]

    if len(ds.lat) < 2 or len(ds.lon) < 2:
        # Matplotlib can not plot datasets with less than these dimensions with
        # contourf and pcolormesh methods
        raise ValidationError(
            'The minimum dataset spatial dimensions to create a map'
            ' plot are (2,2)')

    # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html#
    if projection == 'PlateCarree':
        proj = ccrs.PlateCarree(central_longitude=central_lon)
    elif projection == 'LambertCylindrical':
        proj = ccrs.LambertCylindrical(central_longitude=central_lon)
    elif projection == 'Mercator':
        proj = ccrs.Mercator(central_longitude=central_lon)
    elif projection == 'Miller':
        proj = ccrs.Miller(central_longitude=central_lon)
    elif projection == 'Mollweide':
        proj = ccrs.Mollweide(central_longitude=central_lon)
    elif projection == 'Orthographic':
        proj = ccrs.Orthographic(central_longitude=central_lon)
    elif projection == 'Robinson':
        proj = ccrs.Robinson(central_longitude=central_lon)
    elif projection == 'Sinusoidal':
        proj = ccrs.Sinusoidal(central_longitude=central_lon)
    elif projection == 'NorthPolarStereo':
        proj = ccrs.NorthPolarStereo(central_longitude=central_lon)
    elif projection == 'SouthPolarStereo':
        proj = ccrs.SouthPolarStereo(central_longitude=central_lon)
    else:
        raise ValidationError('illegal projection: "%s"' % projection)

    figure = plt.figure(figsize=(8, 4))
    ax = plt.axes(projection=proj)
    if extents:
        ax.set_extent(extents, ccrs.PlateCarree())
    else:
        ax.set_global()

    ax.coastlines()
    var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat'))

    # transform keyword is for the coordinate our data is in, which in case of a
    # 'normal' lat/lon dataset is PlateCarree.
    if contour_plot:
        var_data.plot.contourf(ax=ax,
                               transform=ccrs.PlateCarree(),
                               subplot_kws={'projection': proj},
                               **properties)
    else:
        var_data.plot.pcolormesh(ax=ax,
                                 transform=ccrs.PlateCarree(),
                                 subplot_kws={'projection': proj},
                                 **properties)

    if title:
        ax.set_title(title)

    figure.tight_layout()

    if file:
        try:
            figure.savefig(file)
        except MemoryError:
            raise MemoryError(
                'Not enough memory to save the plot. Try using a different file format'
                ' or enabling contour_plot.')

    return figure if not in_notebook() else ax
Example #20
0
def pearson_correlation(ds_x: DatasetLike.TYPE,
                        ds_y: DatasetLike.TYPE,
                        var_x: VarName.TYPE,
                        var_y: VarName.TYPE,
                        monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis.

    Perform Pearson correlation on two datasets and produce a lon/lat map of
    correlation coefficients and the correspoding p_values.

    In case two 3D lon/lat/time datasets are provided, pixel by pixel
    correlation will be performed. It is also possible two pro
    Perform Pearson correlation analysis on two time/lat/lon datasets and
    produce a lat/lon map of correlation coefficients and p_values of
    underlying timeseries in the provided datasets.

    The lat/lon definition of both datasets has to be the same. The length of
    the time dimension should be equal, but not neccessarily have the same
    definition. E.g., it is possible to correlate different times of the same
    area.

    There are 'x' and 'y' datasets. Positive correlations imply that as x
    grows, so does y. Negative correlations imply that as x increases, y
    decreases.

    For more information how to interpret the results, see
    `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_,
    and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_.

    :param ds_x: The 'x' dataset
    :param ds_y: The 'y' dataset
    :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset
    :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset
    :param monitor: a progress monitor.
    :return: a dataset containing a map of correlation coefficients and p_values
    """
    ds_x = DatasetLike.convert(ds_x)
    ds_y = DatasetLike.convert(ds_y)
    var_x = VarName.convert(var_x)
    var_y = VarName.convert(var_y)

    array_y = ds_y[var_y]
    array_x = ds_x[var_x]

    # Further validate inputs
    if array_x.dims == array_y.dims:
        if len(array_x.dims) != 3 or len(array_y.dims) != 3:
            raise ValidationError('A correlation coefficient map can only be produced'
                                  ' if both provided datasets are 3D datasets with'
                                  ' lon/lat/time dimensionality, or if a combination'
                                  ' of a 3D lon/lat/time dataset and a 1D timeseries'
                                  ' is provided.')

        if array_x.values.shape != array_y.values.shape:
            raise ValidationError(f'The provided variables {var_x} and {var_y} do not have the'
                                  ' same shape, Pearson correlation can not be'
                                  ' performed. Please review operation'
                                  ' documentation')

        if (not ds_x['lat'].equals(ds_y['lat']) or not ds_x['lon'].equals(ds_y['lon'])):
            raise ValidationError('When performing a pixel by pixel correlation the'
                                  ' datasets have to have the same lat/lon'
                                  ' definition. Consider running coregistration'
                                  ' first')

    elif (((len(array_x.dims) == 3) and (len(array_y.dims) != 1))
          or ((len(array_x.dims) == 1) and (len(array_y.dims) != 3))
          or ((len(array_x.dims) != 3) and (len(array_y.dims) == 1))
          or ((len(array_x.dims) != 1) and (len(array_y.dims) == 3))):
        raise ValidationError('A correlation coefficient map can only be produced'
                              ' if both provided datasets are 3D datasets with'
                              ' lon/lat/time dimensionality, or if a combination'
                              ' of a 3D lon/lat/time dataset and a 1D timeseries'
                              ' is provided.')

    if len(array_x['time']) != len(array_y['time']):
        raise ValidationError('The length of the time dimension differs between'
                              ' the given datasets. Can not perform the calculation'
                              ', please review operation documentation.')

    if len(array_x['time']) < 3:
        raise ValidationError('The length of the time dimension should not be less'
                              ' than three to run the calculation.')

    # Do pixel by pixel correlation
    retset = _pearsonr(array_x, array_y, monitor)
    retset.attrs['Cate_Description'] = f'Correlation between {var_y} {var_x}'

    return adjust_spatial_attrs(retset)
Example #21
0
def plot_scatter(ds1: xr.Dataset,
                 ds2: xr.Dataset,
                 var1: VarName.TYPE,
                 var2: VarName.TYPE,
                 indexers1: DictLike.TYPE = None,
                 indexers2: DictLike.TYPE = None,
                 type: str = '2D Histogram',
                 title: str = None,
                 properties: DictLike.TYPE = None,
                 file: str = None) -> Figure:
    """
    Create a scatter plot of two variables of two variables given by datasets *ds1*, *ds2* and the
    variable names *var1*, *var2*.

    :param ds1: Dataset that contains the variable named by *var1*.
    :param ds2: Dataset that contains the variable named by *var2*.
    :param var1: The name of the first variable to plot
    :param var2: The name of the second variable to plot
    :param indexers1: Optional indexers into data array *var1*. The *indexers1* is a dictionary
           or comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "lat=12.4, time='2012-05-02'".
    :param indexers2: Optional indexers into data array *var2*.
    :param type: The plot type.
    :param title: optional plot title
    :param properties: optional plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'"
           For full reference refer to
           https://matplotlib.org/api/lines_api.html and
           https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """
    var_name1 = VarName.convert(var1)
    var_name2 = VarName.convert(var2)
    indexers1 = DictLike.convert(indexers1) or {}
    indexers2 = DictLike.convert(indexers2) or {}
    properties = DictLike.convert(properties) or {}

    datasets = ds1, ds2
    var_names = var_name1, var_name2
    vars = [None, None]
    for i in (0, 1):
        try:
            vars[i] = datasets[i][var_names[i]]
        except KeyError as e:
            raise ValidationError(f'"{var_names[i]}" is not a variable in dataset given by "ds{i+1}"') from e

    var_dim_names = set(vars[0].dims), set(vars[1].dims)
    indexer_dim_names = set(indexers1.keys()), set(indexers2.keys())

    if set(var_dim_names[0]).isdisjoint(var_dim_names[1]):
        raise ValidationError('"var1" and "var2" have no dimensions in common:'
                              f' {var_dim_names[0]} and {var_dim_names[1]}.')

    for i in (0, 1):
        if indexer_dim_names[i] and not (indexer_dim_names[i] < var_dim_names[i]):
            raise ValidationError(f'"indexers{i+1}" must be a subset of the dimensions of "var{i+1}",'
                                  f' but {indexer_dim_names[i]} is not a subset of {var_dim_names[i]}.')

    rem_dim_names1 = var_dim_names[0] - indexer_dim_names[0]
    rem_dim_names2 = var_dim_names[1] - indexer_dim_names[1]
    if rem_dim_names1 != rem_dim_names2:
        raise ValidationError('Remaining dimensions of data from "var1" must be equal to'
                              f' remaining dimensions of data from "var2",'
                              f' but {rem_dim_names1} is not equal to {rem_dim_names2}.'
                              ' You may need to use the indexers correctly.')

    indexers = indexers1, indexers2
    labels = [None, None]
    for i in (0, 1):
        # Note, long_name can be really long, too long.
        # name = vars[i].attrs.get('long_name', var_names[i])
        name = var_names[i]
        units = vars[i].attrs.get('units', '-')
        labels[i] = f'{name} ({units})'
        if indexers[i]:
            try:
                vars[i] = vars[i].sel(method='nearest', **indexers[i])
            except (KeyError, ValueError, TypeError) as e:
                raise ValidationError(f'"indexers{i+1}" is not valid for "var{i+1}": {e}') from e
            labels[i] += " at " + ",".join(f"{key} = {value}" for key, value in indexers[i].items())

    shape1 = vars[0].shape
    shape2 = vars[1].shape
    if shape1 != shape2:
        raise ValidationError('Remaining shape of data from "var1" must be equal to'
                              ' remaining shape of data from "var2",'
                              f' but {shape1} is not equal to {shape2}.'
                              ' You may need to use the "coregister" operation first.')

    figure = plt.figure(figsize=(8, 8))
    ax = figure.add_subplot(111)

    try:
        x = vars[0].values.flatten()
        y = vars[1].values.flatten()
    except MemoryError as e:
        raise ValidationError('Out of memory. Try using a data subset'
                              ' or specify indexers to reduce number of dimensions.') from e

    default_cmap = 'Reds'

    if type == 'Point':
        ax.grid(color='grey', linestyle='-', linewidth=0.25, alpha=0.5)
        if 'alpha' not in properties:
            properties['alpha'] = 0.25
        if 'markerfacecolor' not in properties:
            properties['markerfacecolor'] = '#880000'
        if 'markeredgewidth' not in properties:
            properties['markeredgewidth'] = 0.0
        if 'markersize' not in properties:
            properties['markersize'] = 5.0
        ax.plot(x, y, '.', **properties)
    elif type == '2D Histogram':
        if 'cmap' not in properties:
            properties['cmap'] = default_cmap
        if 'bins' not in properties:
            properties['bins'] = (256, 256)
        if 'norm' not in properties:
            properties['norm'] = matplotlib.colors.LogNorm()
        if 'range' not in properties:
            xrange = np.nanpercentile(x, [0, 100])
            yrange = np.nanpercentile(y, [0, 100])
            properties['range'] = [xrange, yrange]
        h, xedges, yedges, pc = ax.hist2d(x, y, **properties)
        figure.colorbar(pc, ax=ax, cmap=properties['cmap'])
    elif type == 'Hexbin':
        if 'cmap' not in properties:
            properties['cmap'] = default_cmap
        if 'gridsize' not in properties:
            properties['gridsize'] = (64, 64)
        if 'norm' not in properties:
            properties['norm'] = matplotlib.colors.LogNorm()
        x = np.ma.masked_invalid(x, copy=False)
        y = np.ma.masked_invalid(y, copy=False)
        collection = ax.hexbin(x, y, **properties)
        figure.colorbar(collection, ax=ax, cmap=properties['cmap'])

    ax.set_xlabel(labels[0])
    ax.set_ylabel(labels[1])

    if title:
        ax.set_title(title)

    # see https://matplotlib.org/tutorials/intermediate/tight_layout_guide.html
    figure.tight_layout()

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None
Example #22
0
def plot_scatter(ds1: xr.Dataset,
                 ds2: xr.Dataset,
                 var1: VarName.TYPE,
                 var2: VarName.TYPE,
                 indexers1: DictLike.TYPE = None,
                 indexers2: DictLike.TYPE = None,
                 type: str = '2D Histogram',
                 title: str = None,
                 properties: DictLike.TYPE = None,
                 file: str = None) -> Figure:
    """
    Create a scatter plot of two variables of two variables given by datasets *ds1*, *ds2* and the
    variable names *var1*, *var2*.

    :param ds1: Dataset that contains the variable named by *var1*.
    :param ds2: Dataset that contains the variable named by *var2*.
    :param var1: The name of the first variable to plot
    :param var2: The name of the second variable to plot
    :param indexers1: Optional indexers into data array *var1*. The *indexers1* is a dictionary
           or comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "lat=12.4, time='2012-05-02'".
    :param indexers2: Optional indexers into data array *var2*.
    :param type: The plot type.
    :param title: optional plot title
    :param properties: optional plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5), label='Sea Surface Temperature'"
           For full reference refer to
           https://matplotlib.org/api/lines_api.html and
           https://matplotlib.org/devdocs/api/_as_gen/matplotlib.patches.Patch.html#matplotlib.patches.Patch
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """
    var_name1 = VarName.convert(var1)
    var_name2 = VarName.convert(var2)
    indexers1 = DictLike.convert(indexers1) or {}
    indexers2 = DictLike.convert(indexers2) or {}
    properties = DictLike.convert(properties) or {}

    datasets = ds1, ds2
    var_names = var_name1, var_name2
    vars = [None, None]
    for i in (0, 1):
        try:
            vars[i] = datasets[i][var_names[i]]
        except KeyError as e:
            raise ValidationError(
                f'"{var_names[i]}" is not a variable in dataset given by "ds{i+1}"'
            ) from e

    var_dim_names = set(vars[0].dims), set(vars[1].dims)
    indexer_dim_names = set(indexers1.keys()), set(indexers2.keys())

    if set(var_dim_names[0]).isdisjoint(var_dim_names[1]):
        raise ValidationError('"var1" and "var2" have no dimensions in common:'
                              f' {var_dim_names[0]} and {var_dim_names[1]}.')

    for i in (0, 1):
        if indexer_dim_names[i] and not (indexer_dim_names[i] <
                                         var_dim_names[i]):
            raise ValidationError(
                f'"indexers{i+1}" must be a subset of the dimensions of "var{i+1}",'
                f' but {indexer_dim_names[i]} is not a subset of {var_dim_names[i]}.'
            )

    rem_dim_names1 = var_dim_names[0] - indexer_dim_names[0]
    rem_dim_names2 = var_dim_names[1] - indexer_dim_names[1]
    if rem_dim_names1 != rem_dim_names2:
        raise ValidationError(
            'Remaining dimensions of data from "var1" must be equal to'
            f' remaining dimensions of data from "var2",'
            f' but {rem_dim_names1} is not equal to {rem_dim_names2}.'
            ' You may need to use the indexers correctly.')

    indexers = indexers1, indexers2
    labels = [None, None]
    for i in (0, 1):
        # Note, long_name can be really long, too long.
        # name = vars[i].attrs.get('long_name', var_names[i])
        name = var_names[i]
        units = vars[i].attrs.get('units', '-')
        labels[i] = f'{name} ({units})'
        if indexers[i]:
            try:
                vars[i] = vars[i].sel(method='nearest', **indexers[i])
            except (KeyError, ValueError, TypeError) as e:
                raise ValidationError(
                    f'"indexers{i+1}" is not valid for "var{i+1}": {e}') from e
            labels[i] += " at " + ",".join(
                f"{key} = {value}" for key, value in indexers[i].items())

    shape1 = vars[0].shape
    shape2 = vars[1].shape
    if shape1 != shape2:
        raise ValidationError(
            'Remaining shape of data from "var1" must be equal to'
            ' remaining shape of data from "var2",'
            f' but {shape1} is not equal to {shape2}.'
            ' You may need to use the "coregister" operation first.')

    figure = plt.figure(figsize=(8, 8))
    ax = figure.add_subplot(111)

    try:
        x = vars[0].values.flatten()
        y = vars[1].values.flatten()
    except MemoryError as e:
        raise ValidationError(
            'Out of memory. Try using a data subset'
            ' or specify indexers to reduce number of dimensions.') from e

    default_cmap = 'Reds'

    if type == 'Point':
        ax.grid(color='grey', linestyle='-', linewidth=0.25, alpha=0.5)
        if 'alpha' not in properties:
            properties['alpha'] = 0.25
        if 'markerfacecolor' not in properties:
            properties['markerfacecolor'] = '#880000'
        if 'markeredgewidth' not in properties:
            properties['markeredgewidth'] = 0.0
        if 'markersize' not in properties:
            properties['markersize'] = 5.0
        ax.plot(x, y, '.', **properties)
    elif type == '2D Histogram':
        if 'cmap' not in properties:
            properties['cmap'] = default_cmap
        if 'bins' not in properties:
            properties['bins'] = (256, 256)
        if 'norm' not in properties:
            properties['norm'] = matplotlib.colors.LogNorm()
        if 'range' not in properties:
            xrange = np.nanpercentile(x, [0, 100])
            yrange = np.nanpercentile(y, [0, 100])
            properties['range'] = [xrange, yrange]
        h, xedges, yedges, pc = ax.hist2d(x, y, **properties)
        figure.colorbar(pc, ax=ax, cmap=properties['cmap'])
    elif type == 'Hexbin':
        if 'cmap' not in properties:
            properties['cmap'] = default_cmap
        if 'gridsize' not in properties:
            properties['gridsize'] = (64, 64)
        if 'norm' not in properties:
            properties['norm'] = matplotlib.colors.LogNorm()
        x = np.ma.masked_invalid(x, copy=False)
        y = np.ma.masked_invalid(y, copy=False)
        collection = ax.hexbin(x, y, **properties)
        figure.colorbar(collection, ax=ax, cmap=properties['cmap'])

    ax.set_xlabel(labels[0])
    ax.set_ylabel(labels[1])

    if title:
        ax.set_title(title)

    # see https://matplotlib.org/tutorials/intermediate/tight_layout_guide.html
    figure.tight_layout()

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None
Example #23
0
def pearson_correlation(ds_x: DatasetLike.TYPE,
                        ds_y: DatasetLike.TYPE,
                        var_x: VarName.TYPE,
                        var_y: VarName.TYPE,
                        monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis.

    Perform Pearson correlation on two datasets and produce a lon/lat map of
    correlation coefficients and the correspoding p_values.

    In case two 3D lon/lat/time datasets are provided, pixel by pixel
    correlation will be performed. It is also possible two pro
    Perform Pearson correlation analysis on two time/lat/lon datasets and
    produce a lat/lon map of correlation coefficients and p_values of
    underlying timeseries in the provided datasets.

    The lat/lon definition of both datasets has to be the same. The length of
    the time dimension should be equal, but not neccessarily have the same
    definition. E.g., it is possible to correlate different times of the same
    area.

    There are 'x' and 'y' datasets. Positive correlations imply that as x
    grows, so does y. Negative correlations imply that as x increases, y
    decreases.

    For more information how to interpret the results, see
    `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_,
    and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_.

    :param ds_x: The 'x' dataset
    :param ds_y: The 'y' dataset
    :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset
    :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset
    :param monitor: a progress monitor.
    :return: a dataset containing a map of correlation coefficients and p_values
    """
    ds_x = DatasetLike.convert(ds_x)
    ds_y = DatasetLike.convert(ds_y)
    var_x = VarName.convert(var_x)
    var_y = VarName.convert(var_y)

    array_y = ds_y[var_y]
    array_x = ds_x[var_x]

    # Further validate inputs
    if array_x.dims == array_y.dims:
        if len(array_x.dims) != 3 or len(array_y.dims) != 3:
            raise ValueError(
                'A correlation coefficient map can only be produced'
                ' if both provided datasets are 3D datasets with'
                ' lon/lat/time dimensionality, or if a combination'
                ' of a 3D lon/lat/time dataset and a 1D timeseries'
                ' is provided.')

        if array_x.values.shape != array_y.values.shape:
            raise ValueError('The provided variables {} and {} do not have the'
                             ' same shape, Pearson correlation can not be'
                             ' performed. Please review operation'
                             ' documentation'.format(var_x, var_y))

        if (not ds_x['lat'].equals(ds_y['lat'])
                or not ds_x['lon'].equals(ds_y['lon'])):
            raise ValueError('When performing a pixel by pixel correlation the'
                             ' datasets have to have the same lat/lon'
                             ' definition. Consider running coregistration'
                             ' first')

    elif (((len(array_x.dims) == 3) and (len(array_y.dims) != 1))
          or ((len(array_x.dims) == 1) and (len(array_y.dims) != 3))
          or ((len(array_x.dims) != 3) and (len(array_y.dims) == 1))
          or ((len(array_x.dims) != 1) and (len(array_y.dims) == 3))):
        raise ValueError('A correlation coefficient map can only be produced'
                         ' if both provided datasets are 3D datasets with'
                         ' lon/lat/time dimensionality, or if a combination'
                         ' of a 3D lon/lat/time dataset and a 1D timeseries'
                         ' is provided.')

    if len(array_x['time']) != len(array_y['time']):
        raise ValueError('The length of the time dimension differs between'
                         ' the given datasets. Can not perform the calculation'
                         ', please review operation documentation.')

    if len(array_x['time']) < 3:
        raise ValueError('The length of the time dimension should not be less'
                         ' than three to run the calculation.')

    # Do pixel by pixel correlation
    retset = _pearsonr(array_x, array_y, monitor)
    retset.attrs['Cate_Description'] = 'Correlation between {} {}'.format(
        var_y, var_x)

    return adjust_spatial_attrs(retset)
Example #24
0
def pearson_correlation_scalar(
        ds_x: DatasetLike.TYPE,
        ds_y: DatasetLike.TYPE,
        var_x: VarName.TYPE,
        var_y: VarName.TYPE,
        monitor: Monitor = Monitor.NONE) -> pd.DataFrame:
    """
    Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis.

    Performs a simple correlation analysis on two data variables and returns
    a correlation coefficient and the corresponding p_value.

    Positive correlation implies that as x grows, so does y. Negative
    correlation implies that as x increases, y decreases.

    For more information how to interpret the results, see
    `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_,
    and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_.

    :param ds_x: The 'x' dataset
    :param ds_y: The 'y' dataset
    :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset
    :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset
    :param monitor: a progress monitor.
    :return: Data frame {'corr_coef': correlation coefficient, 'p_value': probability value}
    """
    ds_x = DatasetLike.convert(ds_x)
    ds_y = DatasetLike.convert(ds_y)
    var_x = VarName.convert(var_x)
    var_y = VarName.convert(var_y)

    array_y = ds_y[var_y]
    array_x = ds_x[var_x]

    if (array_x.dims != array_y.dims):
        raise ValidationError(
            'Both datasets should feature the same'
            ' dimensionality. Currently provided ds_x[var_x] '
            f'has {array_x.dims}, provided ds_y[var_y]'
            f' has {array_y.dims}')

    for dim in array_x.dims:
        if len(array_x[dim]) != len(array_y[dim]):
            raise ValidationError(
                'All dimensions of both provided data variables'
                f' must be the same length. Currently {dim} of ds_x[var_x]'
                f' has {len(array_x[dim])} values, while'
                f' {dim} of ds_y[var_y] has {len(array_y[dim])} values.'
                ' You may want to try to coregister the datasets beforehand.')

    n_vals = 1
    for dim in array_x.dims:
        n_vals = n_vals * len(array_x[dim])

    if n_vals < 3:
        raise ValidationError(
            'There should be no less than 3 values in both data variables'
            f' to perform the correlation. Currently there are {n_vals} values'
        )

    with monitor.observing("Calculate Pearson correlation"):
        cc, pv = pearsonr(array_x.stack(z=array_x.dims),
                          array_y.stack(z=array_y.dims))

    return pd.DataFrame({'corr_coef': [cc], 'p_value': [pv]})
Example #25
0
def animate_map(ds: xr.Dataset,
                var: VarName.TYPE = None,
                animate_dim: str = 'time',
                interval: int = 200,
                true_range: bool = False,
                indexers: DictLike.TYPE = None,
                region: PolygonLike.TYPE = None,
                projection: str = 'PlateCarree',
                central_lon: float = 0.0,
                title: str = None,
                contour_plot: bool = False,
                cmap_params: DictLike.TYPE = None,
                plot_properties: DictLike.TYPE = None,
                file: str = None,
                monitor: Monitor = Monitor.NONE) -> HTML:
    """
    Create a geographic map animation for the variable given by dataset *ds* and variable name *var*.

    Creates an animation of the given variable from the given dataset on a map with coastal lines.
    In case no variable name is given, the first encountered variable in the
    dataset is animated.
    It is also possible to set extents of the animation. If no extents
    are given, a global animation is created.

    The following file formats for saving the animation are supported: html

    :param ds: the dataset containing the variable to animate
    :param var: the variable's name
    :param animate_dim: Dimension to animate, if none given defaults to time.
    :param interval: Delay between frames in milliseconds. Defaults to 200.
    :param true_range: If True, calculates colormap and colorbar configuration parameters from the
    whole dataset. Can potentially take a lot of time. Defaults to False, in which case the colormap
    is calculated from the first frame.
    :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary
           or a comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "layer=4".
    :param region: Region to animate
    :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
    :param central_lon: central longitude of the projection in degrees
    :param title: an optional title
    :param contour_plot: If true plot a filled contour plot of data, otherwise plots a pixelated colormesh
    :param cmap_params: optional additional colormap configuration parameters,
           e.g. "vmax=300, cmap='magma'"
           For full reference refer to
           http://xarray.pydata.org/en/stable/generated/xarray.plot.contourf.html
    :param plot_properties: optional plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5)"
           For full reference refer to
           https://matplotlib.org/api/lines_api.html and
           https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html
    :param file: path to a file in which to save the animation
    :param monitor: A progress monitor.
    :return: An animation in HTML format
    """
    if not isinstance(ds, xr.Dataset):
        raise NotImplementedError('Only gridded datasets are currently supported')

    var_name = None
    if not var:
        for key in ds.data_vars.keys():
            var_name = key
            break
    else:
        var_name = VarName.convert(var)

    try:
        var = ds[var_name]
    except KeyError:
        raise ValidationError('Provided variable name "{}" does not exist in the given dataset'.format(var_name))

    indexers = DictLike.convert(indexers) or {}
    properties = DictLike.convert(plot_properties) or {}
    cmap_params = DictLike.convert(cmap_params) or {}

    extents = None
    bounds = handle_plot_polygon(region)
    if bounds:
        lon_min, lat_min, lon_max, lat_max = bounds
        extents = [lon_min, lon_max, lat_min, lat_max]

    if len(ds.lat) < 2 or len(ds.lon) < 2:
        # Matplotlib can not plot datasets with less than these dimensions with
        # contourf and pcolormesh methods
        raise ValidationError('The minimum dataset spatial dimensions to create a map'
                              ' plot are (2,2)')

    # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html#
    if projection == 'PlateCarree':
        proj = ccrs.PlateCarree(central_longitude=central_lon)
    elif projection == 'LambertCylindrical':
        proj = ccrs.LambertCylindrical(central_longitude=central_lon)
    elif projection == 'Mercator':
        proj = ccrs.Mercator(central_longitude=central_lon)
    elif projection == 'Miller':
        proj = ccrs.Miller(central_longitude=central_lon)
    elif projection == 'Mollweide':
        proj = ccrs.Mollweide(central_longitude=central_lon)
    elif projection == 'Orthographic':
        proj = ccrs.Orthographic(central_longitude=central_lon)
    elif projection == 'Robinson':
        proj = ccrs.Robinson(central_longitude=central_lon)
    elif projection == 'Sinusoidal':
        proj = ccrs.Sinusoidal(central_longitude=central_lon)
    elif projection == 'NorthPolarStereo':
        proj = ccrs.NorthPolarStereo(central_longitude=central_lon)
    elif projection == 'SouthPolarStereo':
        proj = ccrs.SouthPolarStereo(central_longitude=central_lon)
    else:
        raise ValidationError('illegal projection: "%s"' % projection)

    figure = plt.figure(figsize=(8, 4))
    ax = plt.axes(projection=proj)
    if extents:
        ax.set_extent(extents, ccrs.PlateCarree())
    else:
        ax.set_global()

    ax.coastlines()

    if not animate_dim:
        animate_dim = 'time'

    indexers[animate_dim] = var[animate_dim][0]

    var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat'))

    with monitor.starting("animate", len(var[animate_dim]) + 3):
        if true_range:
            data_min, data_max = _get_min_max(var, monitor=monitor)
        else:
            data_min, data_max = _get_min_max(var_data, monitor=monitor)

        cmap_params = determine_cmap_params(data_min, data_max, **cmap_params)
        plot_kwargs = {**properties, **cmap_params}

        # Plot the first frame to set-up the axes with the colorbar properly
        # transform keyword is for the coordinate our data is in, which in case of a
        # 'normal' lat/lon dataset is PlateCarree.
        if contour_plot:
            var_data.plot.contourf(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj},
                                   add_colorbar=True, **plot_kwargs)
        else:
            var_data.plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj},
                                     add_colorbar=True, **plot_kwargs)
        if title:
            ax.set_title(title)
        figure.tight_layout()
        monitor.progress(1)

        def run(value):
            ax.clear()
            if extents:
                ax.set_extent(extents, ccrs.PlateCarree())
            else:
                ax.set_global()
            ax.coastlines()
            indexers[animate_dim] = value
            var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat'))
            var_data.plot.contourf(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj},
                                   add_colorbar=False, **plot_kwargs)
            if title:
                ax.set_title(title)
            monitor.progress(1)
            return ax
        anim = animation.FuncAnimation(figure, run, [i for i in var[animate_dim]],
                                       interval=interval, blit=False, repeat=False)
        anim_html = anim.to_jshtml()

        # Prevent the animation for running after it's finished
        del anim

        # Delete the rogue temp-file
        try:
            os.remove('None0000000.png')
        except FileNotFoundError:
            pass

        if file:
            with open(file, 'w') as outfile:
                outfile.write(anim_html)
                monitor.progress(1)

    return HTML(anim_html)
Example #26
0
def plot_hovmoeller(ds: xr.Dataset,
                    var: VarName.TYPE = None,
                    x_axis: DimName.TYPE = None,
                    y_axis: DimName.TYPE = None,
                    method: str = 'mean',
                    contour: bool = True,
                    title: str = None,
                    file: str = None,
                    monitor: Monitor = Monitor.NONE,
                    **kwargs) -> Figure:
    """
    Create a Hovmoeller plot of the given dataset. Dimensions other than
    the ones defined as x and y axis will be aggregated using the given
    method to produce the plot.

    :param ds: Dataset to plot
    :param var: Name of the variable to plot
    :param x_axis: Dimension to show on x axis
    :param y_axis: Dimension to show on y axis
    :param method: Aggregation method
    :param contour: Whether to produce a contour plot
    :param title: Plot title
    :param file: path to a file in which to save the plot
    :param monitor: A progress monitor
    :param kwargs: Keyword arguments to pass to underlying xarray plotting fuction
    """
    var_name = None
    if not var:
        for key in ds.data_vars.keys():
            var_name = key
            break
    else:
        var_name = VarName.convert(var)
    var = ds[var_name]

    if not x_axis:
        x_axis = var.dims[0]
    else:
        x_axis = DimName.convert(x_axis)

    if not y_axis:
        try:
            y_axis = var.dims[1]
        except IndexError:
            raise ValidationError(
                'Given dataset variable should have at least two dimensions.')
    else:
        y_axis = DimName.convert(y_axis)

    if x_axis == y_axis:
        raise ValidationError('Dimensions should differ between plot axis.')

    dims = list(var.dims)
    try:
        dims.remove(x_axis)
        dims.remove(y_axis)
    except ValueError:
        raise ValidationError(
            'Given dataset variable: {} does not feature requested dimensions:\
 {}, {}.'.format(var_name, x_axis, y_axis))

    ufuncs = {
        'min': np.nanmin,
        'max': np.nanmax,
        'mean': np.nanmean,
        'median': np.nanmedian,
        'sum': np.nansum
    }

    with monitor.starting("Plot Hovmoeller", total_work=100):
        monitor.progress(5)
        with monitor.child(90).observing("Aggregate"):
            var = var.reduce(ufuncs[method], dim=dims)
        monitor.progress(5)

    figure = plt.figure()
    ax = figure.add_subplot(111)
    if x_axis == 'time':
        figure.autofmt_xdate()

    if contour:
        var.plot.contourf(ax=ax, x=x_axis, y=y_axis, **kwargs)
    else:
        var.plot.pcolormesh(ax=ax, x=x_axis, y=y_axis, **kwargs)

    if title:
        ax.set_title(title)

    figure.tight_layout()

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None
Example #27
0
 def test_accepts(self):
     self.assertTrue(VarName.accepts('aa'))
     self.assertFalse(VarName.accepts(['aa', 'bb', 'cc']))
     self.assertFalse(VarName.accepts(1.0))
Example #28
0
def pearson_correlation(ds_x: xr.Dataset,
                        ds_y: xr.Dataset,
                        var_x: VarName.TYPE,
                        var_y: VarName.TYPE,
                        file: str = None,
                        corr_type: str = 'pixel_by_pixel') -> xr.Dataset:
    """
    Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis.

    For more information how to interpret the results, see
    `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_,
    and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_.

    The provided variables have to have the same shape, but depending on the
    type of variables and chosen correlation type, not necessarily the same
    definition for all dimensions. E.g., it is possible to correlate two
    datasets of the same area at different times.

    If two 1D or 2D variables are provided, a single pair of correlation
    coefficient and p_value will be calculated and returned, as well as
    optionally saved in a text file.

    In case 3D time/lat/lon variables are provided, a correlation will be
    perfomed according to the given correlation type. In case a pixel_by_pixel
    correlation is chosen, the datasets have to have the same lat/lon
    definition, so that a 2D lat/lon map of correlation coefficients, as well
    as p_values can be constructed.

    There are 'x' and 'y' datasets. Positive correlations imply that as x
    grows, so does y. Negative correlations imply that as x increases, y
    decreases.

    :param ds_y: The 'y' dataset
    :param ds_x: The 'x' dataset
    :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset
    :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset
    :param file: Filepath variable. If given, this is where the results will be saved in a text file.
    :param corr_type: Correlation type to use for 3D time/lat/lon variables.
    """
    var_x = VarName.convert(var_x)
    var_y = VarName.convert(var_y)

    array_y = ds_y[var_y]
    array_x = ds_x[var_x]

    if len(array_x.dims) > 3 or len(array_y.dims) > 3:
        raise NotImplementedError(
            'Pearson correlation for multi-dimensional variables is not yet implemented.'
        )

    if array_x.values.shape != array_y.values.shape:
        raise ValueError(
            'The provided variables {} and {} do not have the same shape, '
            'Pearson correlation can not be performed.'.format(var_x, var_y))

    # Perform a simple Pearson correlation that returns just a coefficient and
    # a p_value.
    if len(array_x.dims) < 3:
        return _pearson_simple(ds_x, ds_y, var_x, var_y, file)

    if corr_type != 'pixel_by_pixel':
        raise NotImplementedError(
            'Only pixel by pixel Pearson correlation is currently implemented for '
            'time/lat/lon dataset variables.')

    if (not ds_x['lat'].equals(ds_y['lat'])
            or not ds_x['lon'].equals(ds_y['lon'])):
        raise ValueError(
            'When performing a pixel by pixel correlation the datasets have to have the same '
            'lat/lon definition.')

    # Do pixel by pixel correlation

    lat = ds_x['lat']
    lon = ds_y['lon']

    corr_coef = np.zeros([len(lat), len(lon)])
    p_value = np.zeros([len(lat), len(lon)])

    for lai in range(0, len(lat)):
        for loi in range(0, len(lon)):
            x = array_x.isel(lat=lai, lon=loi).values
            y = array_y.isel(lat=lai, lon=loi).values
            corr_coef[lai, loi], p_value[lai, loi] = pearsonr(x, y)

    retset = xr.Dataset({
        'corr_coef': (['lat', 'lon'], corr_coef),
        'p_value': (['lat', 'lon'], p_value),
        'lat': lat,
        'lon': lon
    })
    retset.attrs['Cate_Description'] = 'Correlation between {} {}'.format(
        var_y, var_x)

    if file:
        with open(file, "w") as text_file:
            print(retset, file=text_file)
            print(retset['corr_coef'], file=text_file)
            print(retset['p_value'], file=text_file)

    return retset
Example #29
0
 def test_format(self):
     self.assertEqual('aa', VarName.format('aa'))
Example #30
0
 def test_accepts(self):
     self.assertTrue(VarName.accepts('aa'))
     self.assertFalse(VarName.accepts(['aa', 'bb', 'cc']))
     self.assertFalse(VarName.accepts(1.0))
Example #31
0
def plot_map(ds: xr.Dataset,
             var: VarName.TYPE = None,
             indexers: DictLike.TYPE = None,
             region: PolygonLike.TYPE = None,
             projection: str = 'PlateCarree',
             central_lon: float = 0.0,
             title: str = None,
             contour_plot: bool = False,
             properties: DictLike.TYPE = None,
             file: str = None) -> object:
    """
    Create a geographic map plot for the variable given by dataset *ds* and variable name *var*.

    Plots the given variable from the given dataset on a map with coastal lines.
    In case no variable name is given, the first encountered variable in the
    dataset is plotted. In case no *time* is given, the first time slice
    is taken. It is also possible to set extents of the plot. If no extents
    are given, a global plot is created.

    The plot can either be shown using pyplot functionality, or saved,
    if a path is given. The following file formats for saving the plot
    are supported: eps, jpeg, jpg, pdf, pgf, png, ps, raw, rgba, svg,
    svgz, tif, tiff

    :param ds: the dataset containing the variable to plot
    :param var: the variable's name
    :param indexers: Optional indexers into data array of *var*. The *indexers* is a dictionary
           or a comma-separated string of key-value pairs that maps the variable's dimension names
           to constant labels. e.g. "layer=4".
    :param region: Region to plot
    :param projection: name of a global projection, see http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
    :param central_lon: central longitude of the projection in degrees
    :param title: an optional title
    :param contour_plot: If true plot a filled contour plot of data, otherwise plots a pixelated colormesh
    :param properties: optional plot properties for Python matplotlib,
           e.g. "bins=512, range=(-1.5, +1.5)"
           For full reference refer to
           https://matplotlib.org/api/lines_api.html and
           https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.contourf.html
    :param file: path to a file in which to save the plot
    :return: a matplotlib figure object or None if in IPython mode
    """
    if not isinstance(ds, xr.Dataset):
        raise ValidationError('Only gridded datasets are currently supported.')

    var_name = None
    if not var:
        for key in ds.data_vars.keys():
            var_name = key
            break
    else:
        var_name = VarName.convert(var)
    var = ds[var_name]

    indexers = DictLike.convert(indexers) or {}
    properties = DictLike.convert(properties) or {}

    extents = None
    bounds = handle_plot_polygon(region)
    if bounds:
        lon_min, lat_min, lon_max, lat_max = bounds
        extents = [lon_min, lon_max, lat_min, lat_max]

    if len(ds.lat) < 2 or len(ds.lon) < 2:
        # Matplotlib can not plot datasets with less than these dimensions with
        # contourf and pcolormesh methods
        raise ValidationError('The minimum dataset spatial dimensions to create a map'
                              ' plot are (2,2)')

    # See http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html#
    if projection == 'PlateCarree':
        proj = ccrs.PlateCarree(central_longitude=central_lon)
    elif projection == 'LambertCylindrical':
        proj = ccrs.LambertCylindrical(central_longitude=central_lon)
    elif projection == 'Mercator':
        proj = ccrs.Mercator(central_longitude=central_lon)
    elif projection == 'Miller':
        proj = ccrs.Miller(central_longitude=central_lon)
    elif projection == 'Mollweide':
        proj = ccrs.Mollweide(central_longitude=central_lon)
    elif projection == 'Orthographic':
        proj = ccrs.Orthographic(central_longitude=central_lon)
    elif projection == 'Robinson':
        proj = ccrs.Robinson(central_longitude=central_lon)
    elif projection == 'Sinusoidal':
        proj = ccrs.Sinusoidal(central_longitude=central_lon)
    elif projection == 'NorthPolarStereo':
        proj = ccrs.NorthPolarStereo(central_longitude=central_lon)
    elif projection == 'SouthPolarStereo':
        proj = ccrs.SouthPolarStereo(central_longitude=central_lon)
    else:
        raise ValidationError('illegal projection: "%s"' % projection)

    figure = plt.figure(figsize=(8, 4))
    ax = plt.axes(projection=proj)
    if extents:
        ax.set_extent(extents, ccrs.PlateCarree())
    else:
        ax.set_global()

    ax.coastlines()
    var_data = get_var_data(var, indexers, remaining_dims=('lon', 'lat'))

    # transform keyword is for the coordinate our data is in, which in case of a
    # 'normal' lat/lon dataset is PlateCarree.
    if contour_plot:
        var_data.plot.contourf(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, **properties)
    else:
        var_data.plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), subplot_kws={'projection': proj}, **properties)

    if title:
        ax.set_title(title)

    figure.tight_layout()

    if file:
        try:
            figure.savefig(file)
        except MemoryError:
            raise MemoryError('Not enough memory to save the plot. Try using a different file format'
                              ' or enabling contour_plot.')

    return figure if not in_notebook() else ax
Example #32
0
 def test_format(self):
     self.assertEqual('aa', VarName.format('aa'))
Example #33
0
def plot_hovmoeller(ds: xr.Dataset,
                    var: VarName.TYPE = None,
                    x_axis: DimName.TYPE = None,
                    y_axis: DimName.TYPE = None,
                    method: str = 'mean',
                    contour: bool = True,
                    title: str = None,
                    file: str = None,
                    monitor: Monitor = Monitor.NONE,
                    **kwargs) -> Figure:
    """
    Create a Hovmoeller plot of the given dataset. Dimensions other than
    the ones defined as x and y axis will be aggregated using the given
    method to produce the plot.

    :param ds: Dataset to plot
    :param var: Name of the variable to plot
    :param x_axis: Dimension to show on x axis
    :param y_axis: Dimension to show on y axis
    :param method: Aggregation method
    :param contour: Whether to produce a contour plot
    :param title: Plot title
    :param file: path to a file in which to save the plot
    :param monitor: A progress monitor
    :param kwargs: Keyword arguments to pass to underlying xarray plotting fuction
    """
    var_name = None
    if not var:
        for key in ds.data_vars.keys():
            var_name = key
            break
    else:
        var_name = VarName.convert(var)
    var = ds[var_name]

    if not x_axis:
        x_axis = var.dims[0]
    else:
        x_axis = DimName.convert(x_axis)

    if not y_axis:
        try:
            y_axis = var.dims[1]
        except IndexError:
            raise ValidationError('Given dataset variable should have at least two dimensions.')
    else:
        y_axis = DimName.convert(y_axis)

    if x_axis == y_axis:
        raise ValidationError('Dimensions should differ between plot axis.')

    dims = list(var.dims)
    try:
        dims.remove(x_axis)
        dims.remove(y_axis)
    except ValueError:
        raise ValidationError('Given dataset variable: {} does not feature requested dimensions:\
 {}, {}.'.format(var_name, x_axis, y_axis))

    ufuncs = {'min': np.nanmin, 'max': np.nanmax, 'mean': np.nanmean,
              'median': np.nanmedian, 'sum': np.nansum}

    with monitor.starting("Plot Hovmoeller", total_work=100):
        monitor.progress(5)
        with monitor.child(90).observing("Aggregate"):
            var = var.reduce(ufuncs[method], dim=dims)
        monitor.progress(5)

    figure = plt.figure()
    ax = figure.add_subplot(111)
    if x_axis == 'time':
        figure.autofmt_xdate()

    if contour:
        var.plot.contourf(ax=ax, x=x_axis, y=y_axis, **kwargs)
    else:
        var.plot.pcolormesh(ax=ax, x=x_axis, y=y_axis, **kwargs)

    if title:
        ax.set_title(title)

    figure.tight_layout()

    if file:
        figure.savefig(file)

    return figure if not in_notebook() else None