コード例 #1
0
ファイル: test_utils.py プロジェクト: AtrCheema/AI4Water
    def test_random_idx_with_nan_inputs_outputs(self):
        """
        Test that when nans are present in inputs and outputs and we use random indices, then x,y data is correctly made.
        """

        df = get_df_with_nans(inputs=True, outputs=True, frac=0.1)

        model = Model(inputs=['in1', 'in2'],
                      outputs=['out1'],
                      transformation=None,
                      val_data='same',
                      test_fraction=0.3,
                      epochs=1,
                      data=df,
                      input_nans={'fillna': {
                          'method': 'bfill'
                      }},
                      verbosity=1)

        model.fit(indices='random')

        x, _, y = model.train_data(indices=model.train_indices)

        # for i in range(100):
        #     idx = model.train_indices[i]
        #     df_x = df[['in1', 'in2']].iloc[idx]
        #     if idx > model.lookback and int(df_x.isna().sum()) == 0:
        #         self.assertAlmostEqual(float(df['out1'].iloc[idx]), y[i], 6)
        #         self.assertTrue(np.allclose(df[['in1', 'in2']].iloc[idx], x[0][i, -1]))

        assert np.max(model.test_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        assert np.max(model.train_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        return
コード例 #2
0
        def f(**kwargs):

            kwargs['objective'] = 'reg:squarederror'

            kwargs = Jsonize(kwargs)()

            model = Model(inputs=inputs,
                          outputs=outputs,
                          lookback=1,
                          batches="2d",
                          val_data="same",
                          test_fraction=0.3,
                          model={"xgboostregressor": kwargs},
                          transformation=None,
                          data=data,
                          prefix='testing',
                          verbosity=0)

            model.fit(indices="random")

            t, p = model.predict(indices=model.test_indices, prefix='test')
            mse = RegressionMetrics(t, p).mse()
            print(f"Validation mse {mse}")

            return mse
コード例 #3
0
ファイル: compare_hpos.py プロジェクト: AtrCheema/AI4Water
    def objective_fn(**suggestion):

        print(suggestion, 'suggestion')

        model = Model(model={
            'layers': {
                'lstm': {
                    'config': {
                        'units': 64,
                        'activation': suggestion['activation'],
                        'dropout': 0.2,
                        'recurrent_dropout': 0.2
                    }
                }
            }
        },
                      inputs=inputs,
                      outputs=outputs,
                      lookback=int(suggestion['lookback']),
                      lr=float(suggestion['lr']),
                      batch_size=int(suggestion['batch_size']),
                      data=data['224206'],
                      verbosity=0,
                      epochs=500,
                      prefix=_suffix)

        h = model.fit()
        return np.min(h.history['val_loss'])
コード例 #4
0
ファイル: test_utils.py プロジェクト: AtrCheema/AI4Water
def run_same_train_val_data(**kwargs):

    model = Model(data=nasdaq_df,
                  val_data="same",
                  test_fraction=0.2,
                  epochs=1,
                  verbosity=0)

    model.fit(**kwargs)

    x, _, y = model.train_data(indices=model.train_indices)
    return x, y
コード例 #5
0
    def fn(**suggestion):
        model = Model(inputs=inputs,
                      outputs=outputs,
                      model={"xgboostregressor": suggestion},
                      data=data,
                      prefix=f'test_{algorithm}_xgboost_{backend}',
                      verbosity=0)

        model.fit(indices="random")

        t, p = model.predict(indices=model.test_indices, prefix='test')
        mse = RegressionMetrics(t, p).mse()

        return mse
コード例 #6
0
        def fn(**suggestion):

            model = Model(inputs=inputs,
                          outputs=outputs,
                          model={"xgboostregressor": suggestion},
                          data=data,
                          prefix='test_tpe_xgboost',
                          verbosity=0)

            model.fit(indices="random")

            t, p = model.predict(indices=model.test_indices, prefix='test')
            mse = RegressionMetrics(t, p).mse()
            print(f"Validation mse {mse}")

            return mse
コード例 #7
0
ファイル: test_utils.py プロジェクト: AtrCheema/AI4Water
    def test_multi_out_nans(self):
        """
        Test that when multiple outputs are the target and they contain nans, then we ignore these nans during
        loss calculation.
        """
        if int(''.join(tf.__version__.split('.')[0:2])) < 23 or int(
                tf.__version__[0]) < 2:
            warnings.warn(
                f"test with ignoring nan in labels can not be done in tf version {tf.__version__}"
            )
        else:
            df = get_df_with_nans(200,
                                  inputs=False,
                                  outputs=True,
                                  output_cols=['out1', 'out2'],
                                  frac=0.5)

            layers = {
                "Flatten": {
                    "config": {}
                },
                "Dense": {
                    "config": {
                        "units": 2
                    }
                },
                "Reshape": {
                    "config": {
                        "target_shape": (2, 1)
                    }
                }
            }

            model = Model(allow_nan_labels=True,
                          model={'layers': layers},
                          inputs=['in1', 'in2'],
                          outputs=['out1', 'out2'],
                          epochs=10,
                          verbosity=0,
                          data=df)

            history = model.fit()

            self.assertTrue(np.abs(np.sum(history.history['nse'])) > 0.0)
            self.assertTrue(np.abs(np.sum(history.history['val_nse'])) > 0.0)
            return
コード例 #8
0
def build_model(layers, lookback):
    model = Model(model={'layers': layers},
        inputs=inputs,
        outputs=outputs,
        lookback=lookback,
        data=data,
        verbosity=0
    )
    return model
コード例 #9
0
ファイル: test_models.py プロジェクト: AtrCheema/AI4Water
def build_and_run(outputs, transformation=None, indices=None):
    model = Model(model={"layers": make_layers(len(outputs['inp_1d']))},
                  lookback=lookback,
                  inputs={
                      "inp_1d": inp_1d,
                      "inp_2d": inp_2d
                  },
                  outputs=outputs,
                  data={
                      'inp_1d': make_1d(outputs['inp_1d']),
                      'inp_2d': data_2d
                  },
                  transformation=transformation,
                  epochs=2,
                  verbosity=0)

    model.fit(indices=indices)
    return model.predict(indices=model.test_indices if indices else None)
コード例 #10
0
def build_and_run(transformation, data, inputs, outputs):
    model = Model(data=data,
                  inputs=inputs,
                  outputs=outputs,
                  transformation=transformation,
                  verbosity=0)
    tr_data, sc = model.normalize(
        model.data, transformation=model.config['transformation'], key='5')

    pred, true = model.denormalize_data(
        inputs=tr_data[inputs],
        true=tr_data[outputs],
        predicted=tr_data[outputs],
        scaler_key='5',
        in_cols=model.in_cols,
        out_cols=model.out_cols,
        transformation=model.config['transformation'])
    return pred
コード例 #11
0
ファイル: test_utils.py プロジェクト: AtrCheema/AI4Water
    def test_nan_labels1(self):
        if int(''.join(tf.__version__.split('.')[0:2])) < 23 or int(
                tf.__version__[0]) < 2:
            warnings.warn(
                f"test with ignoring nan in labels can not be done in tf version {tf.__version__}"
            )
        else:
            df = get_df_with_nans(500,
                                  inputs=False,
                                  outputs=True,
                                  output_cols=['out1', 'out2'],
                                  frac=0.9)

            layers = {
                "Flatten": {
                    "config": {}
                },
                "Dense": {
                    "config": {
                        "units": 2
                    }
                },
                "Reshape": {
                    "config": {
                        "target_shape": (2, 1)
                    }
                }
            }

            model = Model(allow_nan_labels=1,
                          transformation=None,
                          model={'layers': layers},
                          inputs=['in1', 'in2'],
                          outputs=['out1', 'out2'],
                          epochs=10,
                          verbosity=0,
                          data=df.copy())

            history = model.fit(indices='random')

            self.assertFalse(
                any(np.isin(model.train_indices, model.test_indices)))
            self.assertTrue(np.abs(np.sum(history.history['val_nse'])) > 0.0)
            return
コード例 #12
0
ファイル: test_ml_methods.py プロジェクト: AtrCheema/AI4Water
def run_class_test(method):

    problem = "classification" if method.lower().startswith(
        "class") else "regression"

    if method not in [
            "STACKINGREGRESSOR",
            "VOTINGREGRESSOR",
            "LOGISTICREGRESSIONCV",  # has convergence issues
            "RIDGE_REGRESSION",
            "MULTIOUTPUTREGRESSOR",
            "REGRESSORCHAIN",
            "REGRESSORMIXIN",
            # classifications methods
            "STACKINGCLASSIFIER",
            "VOTINGCLASSIFIER",
            "CLASSIFIERCHAIN",
            "CLASSIFIERMIXIN",
            "MULTIOUTPUTCLASSIFIER",
            "CHECK_CLASSIFICATION_TARGETS",
            "IS_CLASSIFIER"
    ]:

        kwargs = {}
        if "CATBOOST" in method:
            kwargs = {'iterations': 2}
        elif "TPOT" in method.upper():
            kwargs = {'generations': 2, 'population_size': 2}

        print(f"testing {method}")

        model = Model(inputs=data_reg['feature_names'] if problem
                      == "regression" else data_class['feature_names'],
                      outputs=['target'],
                      val_fraction=0.2,
                      problem=problem,
                      transformation=None,
                      data=df_reg if problem == "regression" else data_class,
                      model={method: kwargs},
                      verbosity=0)

        return model.fit()
コード例 #13
0
ファイル: test_utils.py プロジェクト: AtrCheema/AI4Water
def build_model(**kwargs):

    model = Model(data=data1,
                  verbosity=0,
                  batch_size=batch_size,
                  lookback=lookback,
                  transformation=None,
                  epochs=1,
                  **kwargs)

    return model
コード例 #14
0
ファイル: test_models.py プロジェクト: AtrCheema/AI4Water
    def test_add_output_layer2(self):
        # check if it reshapes the output correctly
        model = Model(model={'layers': {
            'lstm': 64,
            'Dense': 1
        }},
                      data=load_nasdaq(),
                      verbosity=0)

        self.assertEqual(model._model.outputs[0].shape[1], model.outs)
        self.assertEqual(model._model.outputs[0].shape[-1], model.forecast_len)
        return
コード例 #15
0
ファイル: test_models.py プロジェクト: AtrCheema/AI4Water
    def test_add_output_layer1(self):
        # check if it adds both dense and reshapes it correctly or not
        model = Model(model={'layers': {
            'lstm': 64
        }},
                      data=load_nasdaq(),
                      verbosity=0)

        self.assertEqual(model._model.outputs[0].shape[1], model.outs)
        self.assertEqual(model._model.outputs[0].shape[-1], model.forecast_len)

        return
コード例 #16
0
    def test_as_fns(self):
        layers = {}
        for idx, act_fn in enumerate([
                'tanh', 'relu', 'elu', 'leakyrelu', 'crelu', 'selu', 'relu6',
                'sigmoid', 'hardsigmoid', 'swish'
        ]):

            layers["Dense_" + str(idx)] = {
                'config': {
                    'units': 1,
                    'activation': act_fn
                }
            }

        layers["reshape"] = {'config': {'target_shape': (1, 1)}}

        model = Model(epochs=2,
                      lookback=1,
                      model={'layers': layers},
                      data=df,
                      verbosity=0)

        history = model.fit()
        val = {
            '21_nt': [0.8971164431680119, 0.7911620726129243],
            '23_nt': [0.10781528055667877, 0.09552989155054092],
            '24_nt': [0.10781528055667877, 0.09552989155054092],
            '23_posix': [0.10781528055667877, 0.09552989155054092],
            '24_posix': [0.10781528055667877, 0.09552989155054092],
            '21_posix': [0.10688107734841351, 0.0938945620801094],
            '20_posix': [0.8971164431680119, 0.10688107734841351]
        }

        if int(tf.__version__.split('.')[0]) > 1:
            for t, p in zip(history.history['val_loss'],
                            val[version + '_' + os.name]):
                self.assertAlmostEqual(t, p, 2)
        return
コード例 #17
0
    def test_as_layers(self):

        layers = {}

        for lyr in [
                'PRELU', "RELU", "TANH", "ELU", "LEAKYRELU", "THRESHOLDRELU",
                "SELU", 'sigmoid', 'hardsigmoid', 'crelu', 'relu6', 'softmax',
                'softplus', 'softsign', 'swish'
        ]:
            layers[lyr] = {'config': {}}

        layers["Dense"] = {'config': {'units': 1}}
        layers["reshape"] = {'config': {'target_shape': (1, 1)}}

        model = Model(epochs=2,
                      lookback=1,
                      model={'layers': layers},
                      data=df,
                      verbosity=0)

        val = {
            '21_nt': [0.09297575600513237, 0.09400989675627566],
            '23_posix': [0.0870760977268219, 0.1053781732916832],
            '24_posix': [0.0870760977268219, 0.1053781732916832],
            '21_posix': [0.09297575600513237, 0.095427157656984],
            '20_posix': [0.09297575600513237, 0.095427157656984],
            '23_nt': [0.0870760977268219, 0.1053781732916832],
            '24_nt': [0.0870760977268219, 0.1053781732916832]
        }

        history = model.fit()
        if int(tf.__version__.split('.')[0]) > 1:
            for t, p in zip(history.history['val_loss'],
                            val[version + '_' + os.name]):
                self.assertAlmostEqual(t, p, 2)
        return
コード例 #18
0
ファイル: test_utils.py プロジェクト: AtrCheema/AI4Water
    def test_datetimeindex(self):
        # makes sure that using datetime_index=True during prediction, the returned values are in correct order

        model = Model(data=data1,
                      inputs=in_cols,
                      outputs=out_cols,
                      epochs=2,
                      model={
                          'layers': {
                              "LSTM": {
                                  "config": {
                                      "units": 2
                                  }
                              },
                              "Dense": {
                                  "config": {
                                      "units": 1
                                  }
                              },
                              "Reshape": {
                                  "config": {
                                      "target_shape": (1, 1)
                                  }
                              }
                          }
                      },
                      lookback=lookback,
                      verbosity=0)

        model.fit(indices="random")
        t, p = model.predict(indices=model.train_indices,
                             use_datetime_index=True)
        # the values in t must match the corresponding indices after adding 10000, because y column starts from 100000
        for i in range(100):
            self.assertEqual(int(t[i]), model.train_indices[i] + 10000)
        return
コード例 #19
0
ファイル: test_models.py プロジェクト: AtrCheema/AI4Water
    def test_add_no_output_layer(self):
        # check if it does not add layers when it does not have to
        model = Model(model={
            'layers': {
                'lstm': 64,
                'Dense': 1,
                'Reshape': {
                    'target_shape': (1, 1)
                }
            }
        },
                      data=load_nasdaq(),
                      verbosity=0)

        self.assertEqual(model._model.outputs[0].shape[1], model.outs)
        self.assertEqual(model._model.outputs[0].shape[-1], model.forecast_len)
        return
コード例 #20
0
    def test_as_layer(self):
        layers = {
            "Input": {"config": {"shape": (params['total_time_steps'], params['num_inputs'])}},
            "TemporalFusionTransformer": {"config": params},
            "lambda": {"config": tf.keras.layers.Lambda(lambda _x: _x[Ellipsis, num_encoder_steps:, :])},
            "TimeDistributed": {"config": {}},
            "Dense": {"config": {"units": output_size * len(quantiles)}}
        }
        model = Model(model={'layers':layers},
                      inputs=['inp1', 'inp2', 'inp3', 'inp4', 'inp5'],
                      outputs=['out1', 'out2', 'out3'],
                      verbosity=0)
        h = model._model.fit(x=x,y=y, validation_split=0.3)  # TODO, this h['loss'] is different than what we got from other test
        #np.testing.assert_almost_equal(h.history['loss'][0], 0.4319019560303007)

        num_paras = np.sum([np.prod(v.get_shape().as_list()) for v in model._model.trainable_variables])
        self.assertEqual(num_paras, 7411)
        return
コード例 #21
0
ファイル: test_utils.py プロジェクト: AtrCheema/AI4Water
    def test_random_idx_with_nan_in_outputs(self):
        # testing that if output contains nans and we use random indices, then correct examples are assinged
        # for training and testing given val_data is 'same'.
        df = get_df_with_nans(inputs=False, outputs=True, frac=0.8)

        model = Model(inputs=['in1', 'in2'],
                      outputs=['out1'],
                      transformation=None,
                      val_data='same',
                      test_fraction=0.3,
                      epochs=1,
                      data=df,
                      verbosity=0)

        model.fit(indices='random')
        idx5 = [50, 0, 72, 153, 39, 31, 170, 8]  # last 8 train indices
        self.assertTrue(np.allclose(idx5, model.train_indices[-8:]))

        x, _, y = model.train_data(indices=model.train_indices)

        eighth_non_nan_val_4m_st = df['out1'][df['out1'].notnull()].iloc[8]
        # the last training index is 8, so the last y value must be 8th non-nan value
        self.assertAlmostEqual(float(y[-1]), eighth_non_nan_val_4m_st)

        # checking that x values are also correct
        eighth_non_nan_val_4m_st = df[['in1',
                                       'in2']][df['out1'].notnull()].iloc[8]
        self.assertTrue(
            np.allclose(df[['in1', 'in2']].iloc[86], eighth_non_nan_val_4m_st))
        self.assertTrue(np.allclose(x[0][-1, -1], eighth_non_nan_val_4m_st))

        xx, _, yy = model.test_data(indices=model.test_indices)
        # the second test index is 9, so second value of yy must be 9th non-nan value
        self.assertEqual(model.test_indices[2], 10)
        self.assertAlmostEqual(float(yy[2]),
                               df['out1'][df['out1'].notnull()].iloc[10])
        self.assertTrue(
            np.allclose(xx[0][2, -1],
                        df[['in1', 'in2']][df['out1'].notnull()].iloc[10]))

        assert np.max(model.test_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        assert np.max(model.train_indices) < (
            model.data.shape[0] - int(model.data[model.out_cols].isna().sum()))
        return
コード例 #22
0
ファイル: test_ml_methods.py プロジェクト: AtrCheema/AI4Water
    def test_ml_random_indices(self):

        model = Model(inputs=data_reg['feature_names'],
                      outputs=["target"],
                      lookback=1,
                      batches="2d",
                      val_fraction=0.0,
                      val_data="same",
                      test_fraction=0.3,
                      category="ML",
                      problem="regression",
                      model={"xgboostregressor": {}},
                      transformation=None,
                      data=df_reg,
                      verbosity=0)

        model.fit(indices="random")
        trtt, trp = model.predict(indices=model.train_indices, prefix='train')
        t, p = model.predict(indices=model.test_indices, prefix='test')
        self.assertGreater(len(t), 1)
        self.assertGreater(len(trtt), 1)
        return
コード例 #23
0
        "config": {
            "units": units,
            "return_sequences": True
        }
    },
    "Flatten": {
        "config": {}
    },
    "Dense": {
        "config": {
            "units": outs
        }
    },
}

model = Model(model={'layers': layers},
              lookback=lookback,
              epochs=epochs,
              batch_size=batch_size,
              inputs=[f'in_{i}' for i in range(ins)],
              outputs=['out'],
              data=None)

x = np.random.random((examples, lookback, ins))
y = np.random.random((examples, outs, 1))
model.fit(data=(x, y))

model.plot_layer_outputs(data=(x, y))
model.plot_act_grads(data=(x, y))
model.plot_weights()
model.plot_weight_grads(data=(x, y))
コード例 #24
0
        x, y = data

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.compiled_loss(y,
                                      y_pred,
                                      regularization_losses=self.losses)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, y_pred)
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}


df = load_nasdaq()

model = Model(batch_size=32, lookback=1, lr=8.95e-5, data=df)

model.KModel = CustomModel

history = model.fit(indices='random')

y, obs = model.predict()
コード例 #25
0
# this example shows how to build the Models from `from_checkout` class method
# first we will train and save a simple model and load it from config file

import os

from AI4Water import Model
from AI4Water.utils.datasets import load_nasdaq
from AI4Water.utils.utils import find_best_weight

df = load_nasdaq()

model = Model(lookback=1, epochs=2,
              data=df,
              )

history = model.fit(indices='random')

w_path = model.path
# for clarity, delete the model, although it is overwritten
del model

# Load the `Model` from checkpoint, provide the checkpoint
cpath = os.path.join(w_path, "config.json") # "provide complete path of config file"
model = Model.from_config(cpath, data=df)

w_file = find_best_weight(os.path.join(w_path, "weights"))  # The file name of weights
model.load_weights(w_file)
x, y = model.predict(indices=model.test_indices, use_datetime_index=False)
コード例 #26
0
ファイル: cbam_model.py プロジェクト: AtrCheema/AI4Water
# Put channel and spatial attention of CBAM model for time-series prediction


from AI4Water import Model
from AI4Water.utils.datasets import arg_beach


layers = {
    "Conv1D": {"config": {"filters": 64, "kernel_size": 7}},
    "MaxPool1D": {"config": {}},
    "ChannelAttention": {"config": {"conv_dim": "1d", "in_planes": 32}},
    "SpatialAttention": {"config": {"conv_dim": "1d"}},

    "Flatten": {"config": {}},
    "Dense": {"config": {"units": 1}},
    "Reshape": {"config": {"target_shape": (1,1)}}
}

model = Model(
    model={'layers':layers},
    lookback=10,
    data=arg_beach())

history = model.fit(indices="random")
コード例 #27
0
        'config': {
            'units': 32,
            'activation': 'relu',
            'dropout': 0.4,
            'recurrent_dropout': 0.5,
            'name': 'lstm_1'
        }
    },
    'sigmoid_2': {
        'config': {}
    },
    'Dense': {
        'config': {
            'units': 1
        }
    }
}

model = Model(data=df,
              batch_size=16,
              lookback=lookback,
              inputs=input_features,
              outputs=outputs,
              model={'layers': layers},
              lr=0.0001)

# This model is built only to showcase how to build multi layer model by manipulating config
# history = model.fit(indices='random')

#y, obs = model.predict(st=0, use_datetime_index=False, marker='.', linestyle='')
コード例 #28
0
#How to use AI4Water for classification problems

import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer

from AI4Water import Model

data_class = load_breast_cancer()
cols = data_class['feature_names'].tolist() + ['target']
df = pd.DataFrame(np.concatenate(
    [data_class['data'], data_class['target'].reshape(-1, 1)], axis=1),
                  columns=cols)

model = Model(
    data=df,
    inputs=data_class['feature_names'].tolist(),
    outputs=['target'],
    val_fraction=0.0,
    model={"DecisionTreeClassifier": {
        "max_depth": 4,
        "random_state": 313
    }},
    transformation=None,
    problem="classification")

h = model.fit()

model.view_model()
コード例 #29
0
#How to use AI4Water for regression problems

import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes

from AI4Water import Model

data_class = load_diabetes()
cols = data_class['feature_names'] + ['target']
df = pd.DataFrame(np.concatenate(
    [data_class['data'], data_class['target'].reshape(-1, 1)], axis=1),
                  columns=cols)

model = Model(
    data=df,
    inputs=data_class['feature_names'],
    outputs=['target'],
    lookback=1,
    batches="2d",
    val_fraction=0.0,
    model={'DecisionTreeRegressor': {
        "max_depth": 3,
        "criterion": "mae"
    }},
    transformation=None)

h = model.fit()

x, _, y = model.train_data()
コード例 #30
0
        "Dense_0": {
            'units': 64,
            'activation': 'relu'
        },
        "Flatten": {},
        "Dense_3": {
            'units': 1
        },
    }
}

df = arg_beach()

input_features = list(df.columns)[0:-1]

# column in dataframe to bse used as output/target
outputs = list(df.columns)[-1]

model = Model(data=df,
              batch_size=16,
              lookback=1,
              model=mlp_model,
              inputs=input_features,
              outputs=[outputs],
              lr=0.0001)

history = model.fit(indices='random')

y, obs = model.predict()
model.view_model(st=0)