예제 #1
0
 def test_mare(self):
     # https://support.numxl.com/hc/en-us/articles/115001223363-MRAE-Mean-Relative-Absolute-Error
     data = np.array(
         [[-2.9, 	-2.95],
         [-2.83, 	-2.7],
         [-0.95, 	-1.00],
         [-0.88, 	-0.68],
         [1.21 ,	1.50],
         [-1.67, 	-1.00],
         [0.83, 	0.90],
         [-0.27, 	-0.37],
         [1.36, 	1.26],
         [-0.34, 	-0.54],
         [0.48, 	0.58],
         [-2.83, 	-2.13],
         [-0.95, 	-0.75],
         [-0.88, 	-0.89],
         [1.21, 	1.25],
         [-1.67, 	-1.65],
         [-2.99, 	-3.20],
         [1.24, 	1.29],
         [0.64, 	0.60]]
     )
     errs = RegressionMetrics(data[:, 0], data[:, 1])
     np.testing.assert_almost_equal(0.348, errs.mrae(), 2)
     assert errs.mare() * 100.0 == errs.mape()
     return
예제 #2
0
        def f(**kwargs):

            kwargs['objective'] = 'reg:squarederror'

            kwargs = Jsonize(kwargs)()

            model = Model(inputs=inputs,
                          outputs=outputs,
                          lookback=1,
                          batches="2d",
                          val_data="same",
                          test_fraction=0.3,
                          model={"xgboostregressor": kwargs},
                          transformation=None,
                          data=data,
                          prefix='testing',
                          verbosity=0)

            model.fit(indices="random")

            t, p = model.predict(indices=model.test_indices, prefix='test')
            mse = RegressionMetrics(t, p).mse()
            print(f"Validation mse {mse}")

            return mse
예제 #3
0
    def fn(**suggestion):
        model = Model(inputs=inputs,
                      outputs=outputs,
                      model={"xgboostregressor": suggestion},
                      data=data,
                      prefix=f'test_{algorithm}_xgboost_{backend}',
                      verbosity=0)

        model.fit(indices="random")

        t, p = model.predict(indices=model.test_indices, prefix='test')
        mse = RegressionMetrics(t, p).mse()

        return mse
예제 #4
0
        def fn(**suggestion):

            model = Model(inputs=inputs,
                          outputs=outputs,
                          model={"xgboostregressor": suggestion},
                          data=data,
                          prefix='test_tpe_xgboost',
                          verbosity=0)

            model.fit(indices="random")

            t, p = model.predict(indices=model.test_indices, prefix='test')
            mse = RegressionMetrics(t, p).mse()
            print(f"Validation mse {mse}")

            return mse
예제 #5
0
def taylor_plot(trues: dict,
                simulations: dict,
                axis_locs: dict = None,
                cont_kws: dict = None,
                grid_kws: dict = None,
                leg_kws: dict = None,
                axis_fontdict=None,
                axis_kws: dict = None,
                **kwargs) -> None:
    """
    Helper function to plot [Taylor's](https://doi.org/10.1029/2000JD900719) plot.

    Arguments:
        trues dict :
            a dictionary of length > 1, whose keys are scenarios and values
            represent true/observations at that scenarios.
        simulations dict :
            A dictionary of length > 1 whose keys are scenarios and whose values
            are also dictionary. Each sub-dictionary i.e. dictionary of scenario
            consist of models/simulations.
        axis_locs dict :
            dictionary defining axis orientation of figure. For example with two
            scenarios named 'scenario1' and 'scenario2', if we want to plot two
            plots in one column, then this argument will be
                      {'scenario1': 211,
                       'scenario2': 212}.
            Default is None.
        cont_kws dict :
            keyword arguments related to contours. Following args can be used:
                - levels level of contours
                - colors color of contours
                - label_fs fontsize of labels
                - label_fmt format of labels
                - linewidths float or sequence of floats
                - linestyles {None, 'solid', 'dashed', 'dashdot', 'dotted'}
            https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.contour.html

        grid_kws dict :
            keyword arguments related to grid. Following args can be used.
            Following keyword arguments are allowed
                - title_fontsize: int, fontsize of the axis title
                - which {'major', 'minor', 'both'}
                - axis {'both', 'x', 'y'},
            any kwargs from https://matplotlib.org/3.3.3/api/_as_gen/matplotlib.axes.Axes.grid.html

        leg_kws dict :
            keyword arguments related to legends:
                - position defaults to `center`
                - fontsize int or {'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'}
                - numpoints int, default: rcParams["legend.numpoints"] (default: 1)
                - markerscale float, default: rcParams["legend.markerscale"] (default: 1.0)
            https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.legend.html
            example leg_kws = {'loc': 'upper right', 'numpoints': 1, 'fontsize': 15, 'markerscale': 1}

        axis_fontdict dict :
            dictionary defining propertiies of left, bottom and top axis labels
            ```python
            axis_fontdict = {'left': {'fontsize': 20, 'color': 'k', 'ticklabel_fs': 14},
                             'bottom': {'fontsize': 20, 'color': 'g', 'ticklabel_fs': 14},
                             'top': {'fontsize': 20, 'color': 'k', 'ticklabel_fs': 14}}
            ```
            The user can define properties of either one or all axis.

        axis_kws dict :
            dictionary containing general parameters related to axis such as title.

        kwargs dict :
            Following keyword arguments are optional:
                - add_ith_interval: bool

                - plot_bias: bool, if True, the size of the markers will be used to
                    represent bias. The markers will be triangles with their sides up/down
                    depending upon value of bias.

                - ref_color: str, color of refrence dot

                - sim_marker : marker to use for simulations. It can be any valid
                    marker for matplotlib axis/plot. If None, then counting is used.
                    If string, then same marker is used for all simulations. If dict,
                    keys of dict should match with names of models in `simulations` dictionary.

                - true_label: label to use for `trues`. Default is 'Reference'.

                - intervals: list, if add_ith_interval is True, then this argument is used. It
                    must be list of lists or list of tuples, where the inner tuple/list must
                    consist of two values one each for x and y.

                - colors: 2d numpy array, defining colors. The first dimension
                    should be equal to number of models.

                - extend: bool, default False, if True, will plot negative correlation

                - save: bool, if True, will save the plot

                - figsize: tuple defining figsize, default is (11,8).
    return:
        None

    Example
    ---------
    ```python
    >>>import numpy as np
    >>>from AI4Water.utils import taylor_plot
    >>>np.random.seed(92)
    >>>taylor_plot(trues={'site1': np.random.normal(20, 40, 10)},
    ...            simulations={
    ...             "site1":
    ...                    {"LSTM": np.random.normal(20, 40, 10),
    ...                     "CNN": np.random.normal(20, 40, 10),
    ...                     "TCN": np.random.normal(20, 40, 10),
    ...                     "CNN-LSTM": np.random.normal(20, 40, 10)}
    ...            },
    ...    cont_kws={'colors': 'blue', 'linewidths': 1.0, 'linestyles': 'dotted'},
    ...    grid_kws={'axis': 'x', 'color': 'g', 'lw': 1.0},
    ...    axis_fontdict={'left': {'fontsize': 20, 'color': 'k', 'ticklabel_fs': 14},
    ...             'bottom': {'fontsize': 20, 'color': 'k', 'ticklabel_fs': 14},
    ...             'top': {'fontsize': 20, 'color': 'g', 'ticklabel_fs': 14}},
    ...    leg_kws={'fontsize': 16, 'markerscale': 2}
    ...            )
    ```
    """
    scenarios = trues.keys()

    assert all([len(array) > 1 for array in trues.values()
                ]), f"""one or more array in true values has less than 2 values
                                                    {pprint.pprint({key:len(array) for key, array in trues.items()},
                                                                   width=20)}"""

    add_ith_interval = kwargs.get('add_idth_interval', False)
    ref_color = kwargs.get('ref_color', 'r')
    intervals = kwargs.get('intervals', [])
    colors = kwargs.get('colors', COLORS)
    extend = kwargs.get('extend', False)
    save = kwargs.get('save', True)
    name = kwargs.get('name', 'taylor.png')
    plot_bias = kwargs.get('plot_bias', False)
    title = kwargs.get('title', "")
    figsize = kwargs.get("figsize", (11, 8))  # widht and heigt respectively
    bbox_inches = kwargs.get("bbox_inches", None)
    sim_marker = kwargs.get("sim_marker", None)
    true_label = kwargs.get("true_label", "Reference")

    if axis_locs is None:
        axis_locs = {k: v for k, v in zip(scenarios, RECTS[len(scenarios)])}

    n_plots = len(trues)
    assert n_plots == len(simulations)

    sims = list(simulations.values())
    models = len(sims[0])

    for m in sims:
        assert len(m) == models

    def msg(key, where="simulations"):
        return f"Scenario {key} does not match any of the provided scenarios in {where}"

    for scen in scenarios:
        if scen not in simulations:
            raise KeyError(msg(scen))
        if scen not in axis_locs:
            raise KeyError(msg(scen, "axis_locs"))

    def get_marker(er, idx, _name):
        ls = ''
        ms = 10
        marker = '$%d$' % (idx + 1)

        if sim_marker is not None:
            if isinstance(sim_marker, str):
                return sim_marker
            elif isinstance(sim_marker, dict):
                return sim_marker[_name]

        if plot_bias:
            pbias = er.pbias()
            if pbias >= 0.0:
                marker = "^"
            else:
                marker = "v"

        return marker, ms, ls

    plt.close('all')
    fig = plt.figure(figsize=figsize)
    if title is not None:
        fig.suptitle(title, fontsize=18)

    if axis_fontdict is None:
        axis_fontdict = {'left': {}, 'right': {}, 'bottom': {}, 'top': {}}
    else:
        assert isinstance(axis_fontdict, dict)
        for k in ['left', 'right', 'bottom', 'top']:
            if k not in axis_fontdict:
                axis_fontdict[k] = {}

    add_grid = True
    if grid_kws is None:
        add_grid = False
        grid_kws = dict()

    for season in scenarios:

        dia = TaylorDiagram(trues[season],
                            fig=fig,
                            rect=axis_locs[season],
                            label=true_label,
                            axis_fontdict=axis_fontdict,
                            extend=extend)

        dia.samplePoints[0].set_color(
            ref_color)  # Mark reference point as a red star

        if add_ith_interval:
            for interval in intervals:
                dia.ax.plot(*interval, color='k')

        # Add samples to Taylor diagram
        idx = 0
        for model_name, model in simulations[season].items():
            er = RegressionMetrics(trues[season], model)
            stddev = np.std(model)
            corrcoef = er.corr_coeff()

            marker, ms, ls, = get_marker(er, idx, model_name)

            dia.add_sample(
                stddev,
                corrcoef,
                marker=marker,
                ms=ms,
                ls=ls,
                # mfc='k', mec='k', # B&W
                mfc=colors[idx],
                mec=colors[idx],  # Colors
                label=model_name)

            idx += 1

        if cont_kws is None:
            cont_kws = dict()

        # Add RMS contours, and label them
        contours = dia.add_contours(levels=cont_kws.get('level', 5),
                                    colors=cont_kws.get('colors', '0.5'),
                                    linewidths=cont_kws.get('linewidths', 1.5),
                                    linestyles=cont_kws.get(
                                        'linestyles', None))  # 5 levels
        dia.ax.clabel(contours,
                      inline=cont_kws.get('inline', 1),
                      fontsize=cont_kws.get('label_fs', 10),
                      fmt='%.1f')

        if add_grid:
            dia.add_grid(**grid_kws)  # Add grid
            dia._ax.axis[:].major_ticks.set_tick_out(True)  # Put ticks outward

        if axis_kws is None:
            axis_kws = dict()
        title_fontsize = axis_kws.pop(
            'title_fontsize') if 'title_fontsize' in axis_kws else 14
        # Tricky: ax is the polar ax (used for plots), _ax is the
        # container (used for layout)
        if len(scenarios) > 1:
            dia._ax.set_title(season.capitalize(),
                              fontdict={'fontsize': title_fontsize})

    # Add a figure legend and title. For loc option, place x,y tuple inside [ ].
    # Can also use special options here:
    # http://matplotlib.sourceforge.net/users/legend_guide.html

    if leg_kws is None:
        position = "center" if len(scenarios) == 4 else "upper right"
        leg_kws = {'loc': position}

    fig.legend(dia.samplePoints, [p.get_label() for p in dia.samplePoints],
               **leg_kws)

    fig.tight_layout()

    if save:
        plt.savefig(name, dpi=400, bbox_inches=bbox_inches)
    plt.show()
    plt.close('all')
    return
예제 #6
0
from AI4Water.utils import tf_losses
from AI4Water.utils.SeqMetrics import RegressionMetrics


import tensorflow as tf
import tensorflow.keras.backend as K

tf_losses.reset_graph()
_true = np.random.random(10)
pred = np.random.random(10)

t = tf.convert_to_tensor(_true, dtype=tf.float32)
p = tf.convert_to_tensor(pred, dtype=tf.float32)

np_errors = RegressionMetrics(_true, pred)

class test_errors(unittest.TestCase):

    def test_corr_coeff(self):
        self.assertAlmostEqual(np_errors.corr_coeff(), K.eval(tf_losses.corr_coeff(t, p)), 4)

    def test_r2(self):
        self.assertAlmostEqual(np_errors.r2(), K.eval(tf_losses.tf_r2(t, p)), 4)  # TODO why not minus 1.

    def test_nse(self):
        self.assertAlmostEqual(np_errors.nse(), 1.0 - K.eval(tf_losses.tf_nse(t, p)), 4)

    def test_kge(self):
        self.assertAlmostEqual(np_errors.kge(), 1.0 - K.eval(tf_losses.tf_kge(t, p)), 4)
예제 #7
0
import os
import unittest
import site   # so that AI4Water directory is in path
site.addsitedir(os.path.dirname(os.path.dirname(__file__)) )

from AI4Water.utils.SeqMetrics import RegressionMetrics
from AI4Water.utils.SeqMetrics.utils import plot_metrics

import numpy as np


t = np.random.random((20, 1))
p = np.random.random((20, 1))

er = RegressionMetrics(t, p)

all_errors = er.calculate_all()

not_metrics = ['calculate_all',
               'stats',
               "treat_arrays",
               "scale_free_metrics",
               "scale_dependent_metrics",
               "composite_metrics",
               "relative_metrics",
               "percentage_metrics"]

class test_errors(unittest.TestCase):

    def test_radial_pots(self):
        plot_metrics(all_errors, plot_type='bar', max_metrics_per_fig=50)