Exemple #1
0
    def test_random_idx_with_nan_inputs_outputs(self):
        """
        Test that when nans are present in inputs and outputs and we use random indices, then x,y data is correctly made.
        """

        df = get_df_with_nans(inputs=True, outputs=True, frac=0.1)

        model = Model(inputs=['in1', 'in2'],
                      outputs=['out1'],
                      transformation=None,
                      val_data='same',
                      test_fraction=0.3,
                      epochs=1,
                      data=df,
                      input_nans={'fillna': {
                          'method': 'bfill'
                      }},
                      verbosity=1)

        model.fit(indices='random')

        x, _, y = model.train_data(indices=model.train_indices)

        # for i in range(100):
        #     idx = model.train_indices[i]
        #     df_x = df[['in1', 'in2']].iloc[idx]
        #     if idx > model.lookback and int(df_x.isna().sum()) == 0:
        #         self.assertAlmostEqual(float(df['out1'].iloc[idx]), y[i], 6)
        #         self.assertTrue(np.allclose(df[['in1', 'in2']].iloc[idx], x[0][i, -1]))

        assert np.max(model.test_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        assert np.max(model.train_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        return
Exemple #2
0
def run_same_train_val_data(**kwargs):

    model = Model(data=nasdaq_df,
                  val_data="same",
                  test_fraction=0.2,
                  epochs=1,
                  verbosity=0)

    model.fit(**kwargs)

    x, _, y = model.train_data(indices=model.train_indices)
    return x, y
Exemple #3
0
    def test_random_idx_with_nan_in_outputs(self):
        # testing that if output contains nans and we use random indices, then correct examples are assinged
        # for training and testing given val_data is 'same'.
        df = get_df_with_nans(inputs=False, outputs=True, frac=0.8)

        model = Model(inputs=['in1', 'in2'],
                      outputs=['out1'],
                      transformation=None,
                      val_data='same',
                      test_fraction=0.3,
                      epochs=1,
                      data=df,
                      verbosity=0)

        model.fit(indices='random')
        idx5 = [50, 0, 72, 153, 39, 31, 170, 8]  # last 8 train indices
        self.assertTrue(np.allclose(idx5, model.train_indices[-8:]))

        x, _, y = model.train_data(indices=model.train_indices)

        eighth_non_nan_val_4m_st = df['out1'][df['out1'].notnull()].iloc[8]
        # the last training index is 8, so the last y value must be 8th non-nan value
        self.assertAlmostEqual(float(y[-1]), eighth_non_nan_val_4m_st)

        # checking that x values are also correct
        eighth_non_nan_val_4m_st = df[['in1',
                                       'in2']][df['out1'].notnull()].iloc[8]
        self.assertTrue(
            np.allclose(df[['in1', 'in2']].iloc[86], eighth_non_nan_val_4m_st))
        self.assertTrue(np.allclose(x[0][-1, -1], eighth_non_nan_val_4m_st))

        xx, _, yy = model.test_data(indices=model.test_indices)
        # the second test index is 9, so second value of yy must be 9th non-nan value
        self.assertEqual(model.test_indices[2], 10)
        self.assertAlmostEqual(float(yy[2]),
                               df['out1'][df['out1'].notnull()].iloc[10])
        self.assertTrue(
            np.allclose(xx[0][2, -1],
                        df[['in1', 'in2']][df['out1'].notnull()].iloc[10]))

        assert np.max(model.test_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        assert np.max(model.train_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        return
Exemple #4
0
#How to use AI4Water for regression problems

import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes

from AI4Water import Model

data_class = load_diabetes()
cols = data_class['feature_names'] + ['target']
df = pd.DataFrame(np.concatenate(
    [data_class['data'], data_class['target'].reshape(-1, 1)], axis=1),
                  columns=cols)

model = Model(
    data=df,
    inputs=data_class['feature_names'],
    outputs=['target'],
    lookback=1,
    batches="2d",
    val_fraction=0.0,
    model={'DecisionTreeRegressor': {
        "max_depth": 3,
        "criterion": "mae"
    }},
    transformation=None)

h = model.fit()

x, _, y = model.train_data()