def test_random_idx_with_nan_inputs_outputs(self): """ Test that when nans are present in inputs and outputs and we use random indices, then x,y data is correctly made. """ df = get_df_with_nans(inputs=True, outputs=True, frac=0.1) model = Model(inputs=['in1', 'in2'], outputs=['out1'], transformation=None, val_data='same', test_fraction=0.3, epochs=1, data=df, input_nans={'fillna': { 'method': 'bfill' }}, verbosity=1) model.fit(indices='random') x, _, y = model.train_data(indices=model.train_indices) # for i in range(100): # idx = model.train_indices[i] # df_x = df[['in1', 'in2']].iloc[idx] # if idx > model.lookback and int(df_x.isna().sum()) == 0: # self.assertAlmostEqual(float(df['out1'].iloc[idx]), y[i], 6) # self.assertTrue(np.allclose(df[['in1', 'in2']].iloc[idx], x[0][i, -1])) assert np.max(model.test_indices) < ( model.data.shape[0] - int(model.data[model.out_cols].isna().sum())) assert np.max(model.train_indices) < ( model.data.shape[0] - int(model.data[model.out_cols].isna().sum())) return
def f(**kwargs): kwargs['objective'] = 'reg:squarederror' kwargs = Jsonize(kwargs)() model = Model(inputs=inputs, outputs=outputs, lookback=1, batches="2d", val_data="same", test_fraction=0.3, model={"xgboostregressor": kwargs}, transformation=None, data=data, prefix='testing', verbosity=0) model.fit(indices="random") t, p = model.predict(indices=model.test_indices, prefix='test') mse = RegressionMetrics(t, p).mse() print(f"Validation mse {mse}") return mse
def objective_fn(**suggestion): print(suggestion, 'suggestion') model = Model(model={ 'layers': { 'lstm': { 'config': { 'units': 64, 'activation': suggestion['activation'], 'dropout': 0.2, 'recurrent_dropout': 0.2 } } } }, inputs=inputs, outputs=outputs, lookback=int(suggestion['lookback']), lr=float(suggestion['lr']), batch_size=int(suggestion['batch_size']), data=data['224206'], verbosity=0, epochs=500, prefix=_suffix) h = model.fit() return np.min(h.history['val_loss'])
def run_same_train_val_data(**kwargs): model = Model(data=nasdaq_df, val_data="same", test_fraction=0.2, epochs=1, verbosity=0) model.fit(**kwargs) x, _, y = model.train_data(indices=model.train_indices) return x, y
def fn(**suggestion): model = Model(inputs=inputs, outputs=outputs, model={"xgboostregressor": suggestion}, data=data, prefix=f'test_{algorithm}_xgboost_{backend}', verbosity=0) model.fit(indices="random") t, p = model.predict(indices=model.test_indices, prefix='test') mse = RegressionMetrics(t, p).mse() return mse
def fn(**suggestion): model = Model(inputs=inputs, outputs=outputs, model={"xgboostregressor": suggestion}, data=data, prefix='test_tpe_xgboost', verbosity=0) model.fit(indices="random") t, p = model.predict(indices=model.test_indices, prefix='test') mse = RegressionMetrics(t, p).mse() print(f"Validation mse {mse}") return mse
def test_multi_out_nans(self): """ Test that when multiple outputs are the target and they contain nans, then we ignore these nans during loss calculation. """ if int(''.join(tf.__version__.split('.')[0:2])) < 23 or int( tf.__version__[0]) < 2: warnings.warn( f"test with ignoring nan in labels can not be done in tf version {tf.__version__}" ) else: df = get_df_with_nans(200, inputs=False, outputs=True, output_cols=['out1', 'out2'], frac=0.5) layers = { "Flatten": { "config": {} }, "Dense": { "config": { "units": 2 } }, "Reshape": { "config": { "target_shape": (2, 1) } } } model = Model(allow_nan_labels=True, model={'layers': layers}, inputs=['in1', 'in2'], outputs=['out1', 'out2'], epochs=10, verbosity=0, data=df) history = model.fit() self.assertTrue(np.abs(np.sum(history.history['nse'])) > 0.0) self.assertTrue(np.abs(np.sum(history.history['val_nse'])) > 0.0) return
def build_model(layers, lookback): model = Model(model={'layers': layers}, inputs=inputs, outputs=outputs, lookback=lookback, data=data, verbosity=0 ) return model
def build_and_run(outputs, transformation=None, indices=None): model = Model(model={"layers": make_layers(len(outputs['inp_1d']))}, lookback=lookback, inputs={ "inp_1d": inp_1d, "inp_2d": inp_2d }, outputs=outputs, data={ 'inp_1d': make_1d(outputs['inp_1d']), 'inp_2d': data_2d }, transformation=transformation, epochs=2, verbosity=0) model.fit(indices=indices) return model.predict(indices=model.test_indices if indices else None)
def build_and_run(transformation, data, inputs, outputs): model = Model(data=data, inputs=inputs, outputs=outputs, transformation=transformation, verbosity=0) tr_data, sc = model.normalize( model.data, transformation=model.config['transformation'], key='5') pred, true = model.denormalize_data( inputs=tr_data[inputs], true=tr_data[outputs], predicted=tr_data[outputs], scaler_key='5', in_cols=model.in_cols, out_cols=model.out_cols, transformation=model.config['transformation']) return pred
def test_nan_labels1(self): if int(''.join(tf.__version__.split('.')[0:2])) < 23 or int( tf.__version__[0]) < 2: warnings.warn( f"test with ignoring nan in labels can not be done in tf version {tf.__version__}" ) else: df = get_df_with_nans(500, inputs=False, outputs=True, output_cols=['out1', 'out2'], frac=0.9) layers = { "Flatten": { "config": {} }, "Dense": { "config": { "units": 2 } }, "Reshape": { "config": { "target_shape": (2, 1) } } } model = Model(allow_nan_labels=1, transformation=None, model={'layers': layers}, inputs=['in1', 'in2'], outputs=['out1', 'out2'], epochs=10, verbosity=0, data=df.copy()) history = model.fit(indices='random') self.assertFalse( any(np.isin(model.train_indices, model.test_indices))) self.assertTrue(np.abs(np.sum(history.history['val_nse'])) > 0.0) return
def run_class_test(method): problem = "classification" if method.lower().startswith( "class") else "regression" if method not in [ "STACKINGREGRESSOR", "VOTINGREGRESSOR", "LOGISTICREGRESSIONCV", # has convergence issues "RIDGE_REGRESSION", "MULTIOUTPUTREGRESSOR", "REGRESSORCHAIN", "REGRESSORMIXIN", # classifications methods "STACKINGCLASSIFIER", "VOTINGCLASSIFIER", "CLASSIFIERCHAIN", "CLASSIFIERMIXIN", "MULTIOUTPUTCLASSIFIER", "CHECK_CLASSIFICATION_TARGETS", "IS_CLASSIFIER" ]: kwargs = {} if "CATBOOST" in method: kwargs = {'iterations': 2} elif "TPOT" in method.upper(): kwargs = {'generations': 2, 'population_size': 2} print(f"testing {method}") model = Model(inputs=data_reg['feature_names'] if problem == "regression" else data_class['feature_names'], outputs=['target'], val_fraction=0.2, problem=problem, transformation=None, data=df_reg if problem == "regression" else data_class, model={method: kwargs}, verbosity=0) return model.fit()
def build_model(**kwargs): model = Model(data=data1, verbosity=0, batch_size=batch_size, lookback=lookback, transformation=None, epochs=1, **kwargs) return model
def test_add_output_layer2(self): # check if it reshapes the output correctly model = Model(model={'layers': { 'lstm': 64, 'Dense': 1 }}, data=load_nasdaq(), verbosity=0) self.assertEqual(model._model.outputs[0].shape[1], model.outs) self.assertEqual(model._model.outputs[0].shape[-1], model.forecast_len) return
def test_add_output_layer1(self): # check if it adds both dense and reshapes it correctly or not model = Model(model={'layers': { 'lstm': 64 }}, data=load_nasdaq(), verbosity=0) self.assertEqual(model._model.outputs[0].shape[1], model.outs) self.assertEqual(model._model.outputs[0].shape[-1], model.forecast_len) return
def test_as_fns(self): layers = {} for idx, act_fn in enumerate([ 'tanh', 'relu', 'elu', 'leakyrelu', 'crelu', 'selu', 'relu6', 'sigmoid', 'hardsigmoid', 'swish' ]): layers["Dense_" + str(idx)] = { 'config': { 'units': 1, 'activation': act_fn } } layers["reshape"] = {'config': {'target_shape': (1, 1)}} model = Model(epochs=2, lookback=1, model={'layers': layers}, data=df, verbosity=0) history = model.fit() val = { '21_nt': [0.8971164431680119, 0.7911620726129243], '23_nt': [0.10781528055667877, 0.09552989155054092], '24_nt': [0.10781528055667877, 0.09552989155054092], '23_posix': [0.10781528055667877, 0.09552989155054092], '24_posix': [0.10781528055667877, 0.09552989155054092], '21_posix': [0.10688107734841351, 0.0938945620801094], '20_posix': [0.8971164431680119, 0.10688107734841351] } if int(tf.__version__.split('.')[0]) > 1: for t, p in zip(history.history['val_loss'], val[version + '_' + os.name]): self.assertAlmostEqual(t, p, 2) return
def test_as_layers(self): layers = {} for lyr in [ 'PRELU', "RELU", "TANH", "ELU", "LEAKYRELU", "THRESHOLDRELU", "SELU", 'sigmoid', 'hardsigmoid', 'crelu', 'relu6', 'softmax', 'softplus', 'softsign', 'swish' ]: layers[lyr] = {'config': {}} layers["Dense"] = {'config': {'units': 1}} layers["reshape"] = {'config': {'target_shape': (1, 1)}} model = Model(epochs=2, lookback=1, model={'layers': layers}, data=df, verbosity=0) val = { '21_nt': [0.09297575600513237, 0.09400989675627566], '23_posix': [0.0870760977268219, 0.1053781732916832], '24_posix': [0.0870760977268219, 0.1053781732916832], '21_posix': [0.09297575600513237, 0.095427157656984], '20_posix': [0.09297575600513237, 0.095427157656984], '23_nt': [0.0870760977268219, 0.1053781732916832], '24_nt': [0.0870760977268219, 0.1053781732916832] } history = model.fit() if int(tf.__version__.split('.')[0]) > 1: for t, p in zip(history.history['val_loss'], val[version + '_' + os.name]): self.assertAlmostEqual(t, p, 2) return
def test_datetimeindex(self): # makes sure that using datetime_index=True during prediction, the returned values are in correct order model = Model(data=data1, inputs=in_cols, outputs=out_cols, epochs=2, model={ 'layers': { "LSTM": { "config": { "units": 2 } }, "Dense": { "config": { "units": 1 } }, "Reshape": { "config": { "target_shape": (1, 1) } } } }, lookback=lookback, verbosity=0) model.fit(indices="random") t, p = model.predict(indices=model.train_indices, use_datetime_index=True) # the values in t must match the corresponding indices after adding 10000, because y column starts from 100000 for i in range(100): self.assertEqual(int(t[i]), model.train_indices[i] + 10000) return
def test_add_no_output_layer(self): # check if it does not add layers when it does not have to model = Model(model={ 'layers': { 'lstm': 64, 'Dense': 1, 'Reshape': { 'target_shape': (1, 1) } } }, data=load_nasdaq(), verbosity=0) self.assertEqual(model._model.outputs[0].shape[1], model.outs) self.assertEqual(model._model.outputs[0].shape[-1], model.forecast_len) return
def test_as_layer(self): layers = { "Input": {"config": {"shape": (params['total_time_steps'], params['num_inputs'])}}, "TemporalFusionTransformer": {"config": params}, "lambda": {"config": tf.keras.layers.Lambda(lambda _x: _x[Ellipsis, num_encoder_steps:, :])}, "TimeDistributed": {"config": {}}, "Dense": {"config": {"units": output_size * len(quantiles)}} } model = Model(model={'layers':layers}, inputs=['inp1', 'inp2', 'inp3', 'inp4', 'inp5'], outputs=['out1', 'out2', 'out3'], verbosity=0) h = model._model.fit(x=x,y=y, validation_split=0.3) # TODO, this h['loss'] is different than what we got from other test #np.testing.assert_almost_equal(h.history['loss'][0], 0.4319019560303007) num_paras = np.sum([np.prod(v.get_shape().as_list()) for v in model._model.trainable_variables]) self.assertEqual(num_paras, 7411) return
def test_random_idx_with_nan_in_outputs(self): # testing that if output contains nans and we use random indices, then correct examples are assinged # for training and testing given val_data is 'same'. df = get_df_with_nans(inputs=False, outputs=True, frac=0.8) model = Model(inputs=['in1', 'in2'], outputs=['out1'], transformation=None, val_data='same', test_fraction=0.3, epochs=1, data=df, verbosity=0) model.fit(indices='random') idx5 = [50, 0, 72, 153, 39, 31, 170, 8] # last 8 train indices self.assertTrue(np.allclose(idx5, model.train_indices[-8:])) x, _, y = model.train_data(indices=model.train_indices) eighth_non_nan_val_4m_st = df['out1'][df['out1'].notnull()].iloc[8] # the last training index is 8, so the last y value must be 8th non-nan value self.assertAlmostEqual(float(y[-1]), eighth_non_nan_val_4m_st) # checking that x values are also correct eighth_non_nan_val_4m_st = df[['in1', 'in2']][df['out1'].notnull()].iloc[8] self.assertTrue( np.allclose(df[['in1', 'in2']].iloc[86], eighth_non_nan_val_4m_st)) self.assertTrue(np.allclose(x[0][-1, -1], eighth_non_nan_val_4m_st)) xx, _, yy = model.test_data(indices=model.test_indices) # the second test index is 9, so second value of yy must be 9th non-nan value self.assertEqual(model.test_indices[2], 10) self.assertAlmostEqual(float(yy[2]), df['out1'][df['out1'].notnull()].iloc[10]) self.assertTrue( np.allclose(xx[0][2, -1], df[['in1', 'in2']][df['out1'].notnull()].iloc[10])) assert np.max(model.test_indices) < ( model.data.shape[0] - int(model.data[model.out_cols].isna().sum())) assert np.max(model.train_indices) < ( model.data.shape[0] - int(model.data[model.out_cols].isna().sum())) return
def test_ml_random_indices(self): model = Model(inputs=data_reg['feature_names'], outputs=["target"], lookback=1, batches="2d", val_fraction=0.0, val_data="same", test_fraction=0.3, category="ML", problem="regression", model={"xgboostregressor": {}}, transformation=None, data=df_reg, verbosity=0) model.fit(indices="random") trtt, trp = model.predict(indices=model.train_indices, prefix='train') t, p = model.predict(indices=model.test_indices, prefix='test') self.assertGreater(len(t), 1) self.assertGreater(len(trtt), 1) return
"config": { "units": units, "return_sequences": True } }, "Flatten": { "config": {} }, "Dense": { "config": { "units": outs } }, } model = Model(model={'layers': layers}, lookback=lookback, epochs=epochs, batch_size=batch_size, inputs=[f'in_{i}' for i in range(ins)], outputs=['out'], data=None) x = np.random.random((examples, lookback, ins)) y = np.random.random((examples, outs, 1)) model.fit(data=(x, y)) model.plot_layer_outputs(data=(x, y)) model.plot_act_grads(data=(x, y)) model.plot_weights() model.plot_weight_grads(data=(x, y))
x, y = data with tf.GradientTape() as tape: y_pred = self(x, training=True) # Forward pass # Compute the loss value # (the loss function is configured in `compile()`) loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses) # Compute gradients trainable_vars = self.trainable_variables gradients = tape.gradient(loss, trainable_vars) # Update weights self.optimizer.apply_gradients(zip(gradients, trainable_vars)) # Update metrics (includes the metric that tracks the loss) self.compiled_metrics.update_state(y, y_pred) # Return a dict mapping metric names to current value return {m.name: m.result() for m in self.metrics} df = load_nasdaq() model = Model(batch_size=32, lookback=1, lr=8.95e-5, data=df) model.KModel = CustomModel history = model.fit(indices='random') y, obs = model.predict()
# this example shows how to build the Models from `from_checkout` class method # first we will train and save a simple model and load it from config file import os from AI4Water import Model from AI4Water.utils.datasets import load_nasdaq from AI4Water.utils.utils import find_best_weight df = load_nasdaq() model = Model(lookback=1, epochs=2, data=df, ) history = model.fit(indices='random') w_path = model.path # for clarity, delete the model, although it is overwritten del model # Load the `Model` from checkpoint, provide the checkpoint cpath = os.path.join(w_path, "config.json") # "provide complete path of config file" model = Model.from_config(cpath, data=df) w_file = find_best_weight(os.path.join(w_path, "weights")) # The file name of weights model.load_weights(w_file) x, y = model.predict(indices=model.test_indices, use_datetime_index=False)
# Put channel and spatial attention of CBAM model for time-series prediction from AI4Water import Model from AI4Water.utils.datasets import arg_beach layers = { "Conv1D": {"config": {"filters": 64, "kernel_size": 7}}, "MaxPool1D": {"config": {}}, "ChannelAttention": {"config": {"conv_dim": "1d", "in_planes": 32}}, "SpatialAttention": {"config": {"conv_dim": "1d"}}, "Flatten": {"config": {}}, "Dense": {"config": {"units": 1}}, "Reshape": {"config": {"target_shape": (1,1)}} } model = Model( model={'layers':layers}, lookback=10, data=arg_beach()) history = model.fit(indices="random")
'config': { 'units': 32, 'activation': 'relu', 'dropout': 0.4, 'recurrent_dropout': 0.5, 'name': 'lstm_1' } }, 'sigmoid_2': { 'config': {} }, 'Dense': { 'config': { 'units': 1 } } } model = Model(data=df, batch_size=16, lookback=lookback, inputs=input_features, outputs=outputs, model={'layers': layers}, lr=0.0001) # This model is built only to showcase how to build multi layer model by manipulating config # history = model.fit(indices='random') #y, obs = model.predict(st=0, use_datetime_index=False, marker='.', linestyle='')
#How to use AI4Water for classification problems import pandas as pd import numpy as np from sklearn.datasets import load_breast_cancer from AI4Water import Model data_class = load_breast_cancer() cols = data_class['feature_names'].tolist() + ['target'] df = pd.DataFrame(np.concatenate( [data_class['data'], data_class['target'].reshape(-1, 1)], axis=1), columns=cols) model = Model( data=df, inputs=data_class['feature_names'].tolist(), outputs=['target'], val_fraction=0.0, model={"DecisionTreeClassifier": { "max_depth": 4, "random_state": 313 }}, transformation=None, problem="classification") h = model.fit() model.view_model()
#How to use AI4Water for regression problems import pandas as pd import numpy as np from sklearn.datasets import load_diabetes from AI4Water import Model data_class = load_diabetes() cols = data_class['feature_names'] + ['target'] df = pd.DataFrame(np.concatenate( [data_class['data'], data_class['target'].reshape(-1, 1)], axis=1), columns=cols) model = Model( data=df, inputs=data_class['feature_names'], outputs=['target'], lookback=1, batches="2d", val_fraction=0.0, model={'DecisionTreeRegressor': { "max_depth": 3, "criterion": "mae" }}, transformation=None) h = model.fit() x, _, y = model.train_data()
"Dense_0": { 'units': 64, 'activation': 'relu' }, "Flatten": {}, "Dense_3": { 'units': 1 }, } } df = arg_beach() input_features = list(df.columns)[0:-1] # column in dataframe to bse used as output/target outputs = list(df.columns)[-1] model = Model(data=df, batch_size=16, lookback=1, model=mlp_model, inputs=input_features, outputs=[outputs], lr=0.0001) history = model.fit(indices='random') y, obs = model.predict() model.view_model(st=0)