def test_random_idx_with_nan_inputs_outputs(self): """ Test that when nans are present in inputs and outputs and we use random indices, then x,y data is correctly made. """ df = get_df_with_nans(inputs=True, outputs=True, frac=0.1) model = Model(inputs=['in1', 'in2'], outputs=['out1'], transformation=None, val_data='same', test_fraction=0.3, epochs=1, data=df, input_nans={'fillna': { 'method': 'bfill' }}, verbosity=1) model.fit(indices='random') x, _, y = model.train_data(indices=model.train_indices) # for i in range(100): # idx = model.train_indices[i] # df_x = df[['in1', 'in2']].iloc[idx] # if idx > model.lookback and int(df_x.isna().sum()) == 0: # self.assertAlmostEqual(float(df['out1'].iloc[idx]), y[i], 6) # self.assertTrue(np.allclose(df[['in1', 'in2']].iloc[idx], x[0][i, -1])) assert np.max(model.test_indices) < ( model.data.shape[0] - int(model.data[model.out_cols].isna().sum())) assert np.max(model.train_indices) < ( model.data.shape[0] - int(model.data[model.out_cols].isna().sum())) return
def run_same_train_val_data(**kwargs): model = Model(data=nasdaq_df, val_data="same", test_fraction=0.2, epochs=1, verbosity=0) model.fit(**kwargs) x, _, y = model.train_data(indices=model.train_indices) return x, y
def test_random_idx_with_nan_in_outputs(self): # testing that if output contains nans and we use random indices, then correct examples are assinged # for training and testing given val_data is 'same'. df = get_df_with_nans(inputs=False, outputs=True, frac=0.8) model = Model(inputs=['in1', 'in2'], outputs=['out1'], transformation=None, val_data='same', test_fraction=0.3, epochs=1, data=df, verbosity=0) model.fit(indices='random') idx5 = [50, 0, 72, 153, 39, 31, 170, 8] # last 8 train indices self.assertTrue(np.allclose(idx5, model.train_indices[-8:])) x, _, y = model.train_data(indices=model.train_indices) eighth_non_nan_val_4m_st = df['out1'][df['out1'].notnull()].iloc[8] # the last training index is 8, so the last y value must be 8th non-nan value self.assertAlmostEqual(float(y[-1]), eighth_non_nan_val_4m_st) # checking that x values are also correct eighth_non_nan_val_4m_st = df[['in1', 'in2']][df['out1'].notnull()].iloc[8] self.assertTrue( np.allclose(df[['in1', 'in2']].iloc[86], eighth_non_nan_val_4m_st)) self.assertTrue(np.allclose(x[0][-1, -1], eighth_non_nan_val_4m_st)) xx, _, yy = model.test_data(indices=model.test_indices) # the second test index is 9, so second value of yy must be 9th non-nan value self.assertEqual(model.test_indices[2], 10) self.assertAlmostEqual(float(yy[2]), df['out1'][df['out1'].notnull()].iloc[10]) self.assertTrue( np.allclose(xx[0][2, -1], df[['in1', 'in2']][df['out1'].notnull()].iloc[10])) assert np.max(model.test_indices) < ( model.data.shape[0] - int(model.data[model.out_cols].isna().sum())) assert np.max(model.train_indices) < ( model.data.shape[0] - int(model.data[model.out_cols].isna().sum())) return
#How to use AI4Water for regression problems import pandas as pd import numpy as np from sklearn.datasets import load_diabetes from AI4Water import Model data_class = load_diabetes() cols = data_class['feature_names'] + ['target'] df = pd.DataFrame(np.concatenate( [data_class['data'], data_class['target'].reshape(-1, 1)], axis=1), columns=cols) model = Model( data=df, inputs=data_class['feature_names'], outputs=['target'], lookback=1, batches="2d", val_fraction=0.0, model={'DecisionTreeRegressor': { "max_depth": 3, "criterion": "mae" }}, transformation=None) h = model.fit() x, _, y = model.train_data()