def train_model(data_path,
                model_path,
                norm_path,
                test_size=0.05,
                shuffle=True,
                lr=0.003,
                minibatch_size=2048,
                epochs=30,
                lambd=0.001,
                testing=False,
                loading=False,
                plot_start=1,
                plot_end=5000):
    """
    Description
       ---
          Trains a normalized (min-max) linear regression model, given the data from data_path.  Model will be saved
          to model_path.  Advanced settings are set above.


    Inputs
       ---
              data_path: Path for the process data.  First column should be labels
             model_path: Path for the model saving.
              norm_path: Path for the normalization object.
              test_size: Size
                shuffle: Boolean, shuffle the data for training?  Breaks time correlation of data
                     lr: Learning rate of the model, higher learning rate results in faster, more unstable learning.
         minibatch_size: Size of batches for stochastic / minibatch gradient descent
                 epochs: Number of passes through the whole data
                  lambd: Regularization term
                testing: Training or testing?
                loading: If you want to load an old model for further training
             plot_start: Index for the start of the validation plot
               plot_end: Index for the end of the validation plot


    Returns
       ---
               raw_data: Data used for model building
          heading_names: Headings of the raw data
             linear_reg: Linear regression object
         weights_biases: Weights and biases of the model

    """

    raw_data = pd.read_csv(data_path)

    heading_names = list(raw_data)
    raw_data = raw_data.values

    print('There are {} feature(s) and {} label(s) with {} examples.'.format(
        raw_data.shape[1] - 1, 1, raw_data.shape[0]))

    # Train / Test split
    train_x, test_x, train_y, test_y = train_test_split(raw_data[:, 1:],
                                                        raw_data[:, 0],
                                                        test_size=test_size,
                                                        shuffle=shuffle,
                                                        random_state=42)

    # Reshape for TensorFlow
    train_x = train_x.reshape(-1, raw_data.shape[1] - 1)
    test_x = test_x.reshape(-1, raw_data.shape[1] - 1)

    train_y = train_y.reshape(-1, 1)
    test_y = test_y.reshape(-1, 1)

    # Normalization
    if testing:
        min_max_normalization = load(norm_path)

    else:
        min_max_normalization = MinMaxNormalization(
            np.concatenate([train_y, train_x], axis=1))

    training_data = min_max_normalization(
        np.concatenate([train_y, train_x], axis=1))
    testing_data = min_max_normalization(
        np.concatenate([test_y, test_x], axis=1))

    # Reshape for TensorFlow
    train_x = training_data[:, 1:].reshape(-1, raw_data.shape[1] - 1)
    test_x = testing_data[:, 1:].reshape(-1, raw_data.shape[1] - 1)

    train_y = training_data[:, 0].reshape(-1, 1)
    test_y = testing_data[:, 0].reshape(-1, 1)

    # Test cases for NaN values
    assert (not np.isnan(train_x).any())
    assert (not np.isnan(test_x).any())

    assert (not np.isnan(train_y).any())
    assert (not np.isnan(test_y).any())

    with tf.Session() as sess:

        # Build linear regression object
        linear_reg = LinearRegression(sess,
                                      train_x,
                                      train_y,
                                      test_x,
                                      test_y,
                                      lr=lr,
                                      minibatch_size=minibatch_size,
                                      train_size=(1 - test_size),
                                      epochs=epochs,
                                      lambd=lambd)

        # If testing, just run it
        if testing:
            # Restore model
            linear_reg.saver.restore(sess, save_path=model_path)

            # Pred testing values
            pred = linear_reg.test(test_x)

            # Unnormalize
            pred = min_max_normalization.unnormalize_y(pred)
            test_y = min_max_normalization.unnormalize_y(test_y)

            # Evaluate loss
            rmse, mae = linear_reg.eval_loss(pred, test_y)

            print('Test RMSE: {:2f} | Test MAE: {:2f}'.format(rmse, mae))

            weights_biases = linear_reg.weights_and_biases()

            # Non-scrambled data plot
            seq_pred(session=sess,
                     model=linear_reg.z,
                     features=linear_reg.X,
                     normalizer=min_max_normalization,
                     data=raw_data,
                     time_start=plot_start,
                     time_end=plot_end,
                     adv_plot=False)

        else:

            # Load old model for further testing
            if loading:
                linear_reg.saver.restore(sess, Model_path)

            else:
                # Global variables initializer
                sess.run(linear_reg.init)

            for epoch in range(linear_reg.epochs):

                for i in range(linear_reg.total_batch_number):

                    # Mini-batch gradient descent
                    batch_index = i * linear_reg.minibatch_size
                    minibatch_x = train_x[batch_index:batch_index +
                                          linear_reg.minibatch_size, :]
                    minibatch_y = train_y[batch_index:batch_index +
                                          linear_reg.minibatch_size, :]

                    # Optimize machine learning model
                    linear_reg.train(features=minibatch_x, labels=minibatch_y)

                    # Record loss
                    if i % 10 == 0:
                        _ = linear_reg.loss_check(features=train_x,
                                                  labels=train_y)

                    # Evaluate train and test losses
                    if i % 150 == 0:
                        current_loss = linear_reg.loss_check(features=train_x,
                                                             labels=train_y)

                        train_pred = linear_reg.test(features=train_x)

                        # Unnormalize data
                        train_pred = min_max_normalization.unnormalize_y(
                            train_pred)
                        actual_y = min_max_normalization.unnormalize_y(train_y)

                        # Evaluate error
                        train_rmse, train_mae = linear_reg.eval_loss(
                            train_pred, actual_y)

                        test_pred = linear_reg.test(features=test_x)

                        # Unnormalize data
                        test_pred = min_max_normalization.unnormalize_y(
                            test_pred)
                        actual_y = min_max_normalization.unnormalize_y(test_y)

                        test_rmse, test_mae = linear_reg.eval_loss(
                            test_pred, actual_y)

                        print(
                            'Epoch: {} | Loss: {:2f} | Train RMSE: {:2f} | Test RMSE: {:2f}'
                            .format(epoch, current_loss, train_rmse,
                                    test_rmse))

            # Save model
            linear_reg.saver.save(sess, model_path)
            print("Model saved at: {}".format(model_path))

            # Save normalizer
            save(min_max_normalization, norm_path)
            print("Normalization saved at: {}".format(norm_path))

            # Final test
            test_pred = linear_reg.test(features=test_x)

            # Unnormalize data
            test_pred = min_max_normalization.unnormalize_y(test_pred)
            actual_y = min_max_normalization.unnormalize_y(test_y)

            test_rmse, test_mae = linear_reg.eval_loss(test_pred, actual_y)
            print('Final Test Results:  Test RMSE: {:2f} | Test MAE: {:2f}'.
                  format(test_rmse, test_mae))

            weights_biases = linear_reg.weights_and_biases()

            # Non-scrambled data plot
            seq_pred(session=sess,
                     model=linear_reg.z,
                     features=linear_reg.X,
                     normalizer=min_max_normalization,
                     data=raw_data,
                     time_start=plot_start,
                     time_end=plot_end,
                     adv_plot=False)

    return raw_data, heading_names, linear_reg, weights_biases, min_max_normalization
        save_path = saver.save(sess, Args["model_path"])
        print("Model was saved in {}".format(save_path))

    # Output weights
    weights = sess.run(W)

    # Predictions
    predictions = sess.run(z, feed_dict={x: test_X})

    # Unnormalize data
    predictions = np.multiply(predictions,
                              min_max_normalization.denominator[0, 0])
    predictions = predictions + min_max_normalization.col_min[0, 0]

    test_y = np.multiply(test_y, min_max_normalization.denominator[0, 0])
    test_y = test_y + min_max_normalization.col_min[0, 0]

    # RMSE & MAE Calc
    RMSE_loss = np.sqrt(np.mean(np.square(np.subtract(test_y, predictions))))
    MAE_loss = np.mean(np.abs(np.subtract(test_y, predictions)))

    print('Test RMSE: {} | Test MAE: {}'.format(RMSE_loss, MAE_loss))

    # Non-scrambled data plot
    seq_pred(sess, raw_data, min_max_normalization, 1, 5999, err_plot=False)

    # Pickle normalization
    pickle_out = open('normalization/ls.pickle', 'wb')
    pickle.dump(min_max_normalization, pickle_out)
    pickle_out.close()
                    .format(epoch, current_loss, train_loss, test_loss))

    if Args['save_graph']:
        save_path = saver.save(sess, Args["model_path"])
        print("Model was saved in {}".format(save_path))

    # Output weights
    weights = sess.run(W)
    biases = sess.run(b)

    # Predictions
    predictions = sess.run(z, feed_dict={x: test_X, y: test_y})

    # RMSE & MAE Calc
    RMSE_loss = np.sqrt(np.mean(np.square(np.subtract(test_y, predictions))))
    MAE_loss = np.mean(np.abs(np.subtract(test_y, predictions)))

    print('RMSE: {} | MAE: {}'.format(RMSE_loss, MAE_loss))

    # Visualization of what it looks like
    seq_pred(session=sess,
             model=z,
             features=x,
             normalizer=None,
             data=raw_data,
             time_start=1,
             time_end=2500,
             adv_plot=False,
             xlabel='Time',
             ylabel='Discharge Pressure')
Beispiel #4
0
    weights = sess.run(W)
    biases = sess.run(b)

    # Predictions
    predictions = sess.run(z, feed_dict={x: train_X, y: train_y})

    # Unnormalize data
    predictions = min_max_normalization.unnormalize_y(predictions)
    train_y = min_max_normalization.unnormalize_y(train_y)

    # RMSE & MAE Calc
    RMSE_loss = np.sqrt(np.mean(np.square(np.subtract(train_y, predictions))))
    MAE_loss = np.mean(np.abs(np.subtract(train_y, predictions)))

    print('RMSE: {} | MAE: {}'.format(RMSE_loss, MAE_loss))

    # Visualization of what it looks like
    seq_pred(sess,
             z,
             x,
             raw_data,
             min_max_normalization,
             0,
             3000,
             adv_plot=False)

    # Pickle normalization
    pickle_out = open('normalization/ls.pickle', 'wb')
    pickle.dump(min_max_normalization, pickle_out)
    pickle_out.close()
Beispiel #5
0
        save_path = saver.save(sess, Args["model_path"])
        print("Model was saved in {}".format(save_path))

    # Output weights
    weights = sess.run(W)

    # Predictions
    predictions = sess.run(z, feed_dict={x: train_X, y: train_y})

    # Unnormalize data
    predictions = np.multiply(predictions,
                              min_max_normalization.denominator[0, 0])
    predictions = predictions + min_max_normalization.col_min[0, 0]

    train_X = np.multiply(train_y, min_max_normalization.denominator[0, 0])
    train_X = train_X + min_max_normalization.col_min[0, 0]

    # RMSE & MAE Calc
    RMSE_loss = np.sqrt(np.mean(np.square(np.subtract(train_X, predictions))))
    MAE_loss = np.mean(np.abs(np.subtract(train_X, predictions)))

    print('RMSE: {} | MAE: {}'.format(RMSE_loss, MAE_loss))

    # Visualization of what it looks like
    seq_pred(sess, z, raw_data, min_max_normalization, 1, 5000, adv_plot=True)

    # Pickle normalization
    pickle_out = open('normalization/ls.pickle', 'wb')
    pickle.dump(min_max_normalization, pickle_out)
    pickle_out.close()