def train_model(data_path, model_path, norm_path, test_size=0.05, shuffle=True, lr=0.003, minibatch_size=2048, epochs=30, lambd=0.001, testing=False, loading=False, plot_start=1, plot_end=5000): """ Description --- Trains a normalized (min-max) linear regression model, given the data from data_path. Model will be saved to model_path. Advanced settings are set above. Inputs --- data_path: Path for the process data. First column should be labels model_path: Path for the model saving. norm_path: Path for the normalization object. test_size: Size shuffle: Boolean, shuffle the data for training? Breaks time correlation of data lr: Learning rate of the model, higher learning rate results in faster, more unstable learning. minibatch_size: Size of batches for stochastic / minibatch gradient descent epochs: Number of passes through the whole data lambd: Regularization term testing: Training or testing? loading: If you want to load an old model for further training plot_start: Index for the start of the validation plot plot_end: Index for the end of the validation plot Returns --- raw_data: Data used for model building heading_names: Headings of the raw data linear_reg: Linear regression object weights_biases: Weights and biases of the model """ raw_data = pd.read_csv(data_path) heading_names = list(raw_data) raw_data = raw_data.values print('There are {} feature(s) and {} label(s) with {} examples.'.format( raw_data.shape[1] - 1, 1, raw_data.shape[0])) # Train / Test split train_x, test_x, train_y, test_y = train_test_split(raw_data[:, 1:], raw_data[:, 0], test_size=test_size, shuffle=shuffle, random_state=42) # Reshape for TensorFlow train_x = train_x.reshape(-1, raw_data.shape[1] - 1) test_x = test_x.reshape(-1, raw_data.shape[1] - 1) train_y = train_y.reshape(-1, 1) test_y = test_y.reshape(-1, 1) # Normalization if testing: min_max_normalization = load(norm_path) else: min_max_normalization = MinMaxNormalization( np.concatenate([train_y, train_x], axis=1)) training_data = min_max_normalization( np.concatenate([train_y, train_x], axis=1)) testing_data = min_max_normalization( np.concatenate([test_y, test_x], axis=1)) # Reshape for TensorFlow train_x = training_data[:, 1:].reshape(-1, raw_data.shape[1] - 1) test_x = testing_data[:, 1:].reshape(-1, raw_data.shape[1] - 1) train_y = training_data[:, 0].reshape(-1, 1) test_y = testing_data[:, 0].reshape(-1, 1) # Test cases for NaN values assert (not np.isnan(train_x).any()) assert (not np.isnan(test_x).any()) assert (not np.isnan(train_y).any()) assert (not np.isnan(test_y).any()) with tf.Session() as sess: # Build linear regression object linear_reg = LinearRegression(sess, train_x, train_y, test_x, test_y, lr=lr, minibatch_size=minibatch_size, train_size=(1 - test_size), epochs=epochs, lambd=lambd) # If testing, just run it if testing: # Restore model linear_reg.saver.restore(sess, save_path=model_path) # Pred testing values pred = linear_reg.test(test_x) # Unnormalize pred = min_max_normalization.unnormalize_y(pred) test_y = min_max_normalization.unnormalize_y(test_y) # Evaluate loss rmse, mae = linear_reg.eval_loss(pred, test_y) print('Test RMSE: {:2f} | Test MAE: {:2f}'.format(rmse, mae)) weights_biases = linear_reg.weights_and_biases() # Non-scrambled data plot seq_pred(session=sess, model=linear_reg.z, features=linear_reg.X, normalizer=min_max_normalization, data=raw_data, time_start=plot_start, time_end=plot_end, adv_plot=False) else: # Load old model for further testing if loading: linear_reg.saver.restore(sess, Model_path) else: # Global variables initializer sess.run(linear_reg.init) for epoch in range(linear_reg.epochs): for i in range(linear_reg.total_batch_number): # Mini-batch gradient descent batch_index = i * linear_reg.minibatch_size minibatch_x = train_x[batch_index:batch_index + linear_reg.minibatch_size, :] minibatch_y = train_y[batch_index:batch_index + linear_reg.minibatch_size, :] # Optimize machine learning model linear_reg.train(features=minibatch_x, labels=minibatch_y) # Record loss if i % 10 == 0: _ = linear_reg.loss_check(features=train_x, labels=train_y) # Evaluate train and test losses if i % 150 == 0: current_loss = linear_reg.loss_check(features=train_x, labels=train_y) train_pred = linear_reg.test(features=train_x) # Unnormalize data train_pred = min_max_normalization.unnormalize_y( train_pred) actual_y = min_max_normalization.unnormalize_y(train_y) # Evaluate error train_rmse, train_mae = linear_reg.eval_loss( train_pred, actual_y) test_pred = linear_reg.test(features=test_x) # Unnormalize data test_pred = min_max_normalization.unnormalize_y( test_pred) actual_y = min_max_normalization.unnormalize_y(test_y) test_rmse, test_mae = linear_reg.eval_loss( test_pred, actual_y) print( 'Epoch: {} | Loss: {:2f} | Train RMSE: {:2f} | Test RMSE: {:2f}' .format(epoch, current_loss, train_rmse, test_rmse)) # Save model linear_reg.saver.save(sess, model_path) print("Model saved at: {}".format(model_path)) # Save normalizer save(min_max_normalization, norm_path) print("Normalization saved at: {}".format(norm_path)) # Final test test_pred = linear_reg.test(features=test_x) # Unnormalize data test_pred = min_max_normalization.unnormalize_y(test_pred) actual_y = min_max_normalization.unnormalize_y(test_y) test_rmse, test_mae = linear_reg.eval_loss(test_pred, actual_y) print('Final Test Results: Test RMSE: {:2f} | Test MAE: {:2f}'. format(test_rmse, test_mae)) weights_biases = linear_reg.weights_and_biases() # Non-scrambled data plot seq_pred(session=sess, model=linear_reg.z, features=linear_reg.X, normalizer=min_max_normalization, data=raw_data, time_start=plot_start, time_end=plot_end, adv_plot=False) return raw_data, heading_names, linear_reg, weights_biases, min_max_normalization
save_path = saver.save(sess, Args["model_path"]) print("Model was saved in {}".format(save_path)) # Output weights weights = sess.run(W) # Predictions predictions = sess.run(z, feed_dict={x: test_X}) # Unnormalize data predictions = np.multiply(predictions, min_max_normalization.denominator[0, 0]) predictions = predictions + min_max_normalization.col_min[0, 0] test_y = np.multiply(test_y, min_max_normalization.denominator[0, 0]) test_y = test_y + min_max_normalization.col_min[0, 0] # RMSE & MAE Calc RMSE_loss = np.sqrt(np.mean(np.square(np.subtract(test_y, predictions)))) MAE_loss = np.mean(np.abs(np.subtract(test_y, predictions))) print('Test RMSE: {} | Test MAE: {}'.format(RMSE_loss, MAE_loss)) # Non-scrambled data plot seq_pred(sess, raw_data, min_max_normalization, 1, 5999, err_plot=False) # Pickle normalization pickle_out = open('normalization/ls.pickle', 'wb') pickle.dump(min_max_normalization, pickle_out) pickle_out.close()
.format(epoch, current_loss, train_loss, test_loss)) if Args['save_graph']: save_path = saver.save(sess, Args["model_path"]) print("Model was saved in {}".format(save_path)) # Output weights weights = sess.run(W) biases = sess.run(b) # Predictions predictions = sess.run(z, feed_dict={x: test_X, y: test_y}) # RMSE & MAE Calc RMSE_loss = np.sqrt(np.mean(np.square(np.subtract(test_y, predictions)))) MAE_loss = np.mean(np.abs(np.subtract(test_y, predictions))) print('RMSE: {} | MAE: {}'.format(RMSE_loss, MAE_loss)) # Visualization of what it looks like seq_pred(session=sess, model=z, features=x, normalizer=None, data=raw_data, time_start=1, time_end=2500, adv_plot=False, xlabel='Time', ylabel='Discharge Pressure')
weights = sess.run(W) biases = sess.run(b) # Predictions predictions = sess.run(z, feed_dict={x: train_X, y: train_y}) # Unnormalize data predictions = min_max_normalization.unnormalize_y(predictions) train_y = min_max_normalization.unnormalize_y(train_y) # RMSE & MAE Calc RMSE_loss = np.sqrt(np.mean(np.square(np.subtract(train_y, predictions)))) MAE_loss = np.mean(np.abs(np.subtract(train_y, predictions))) print('RMSE: {} | MAE: {}'.format(RMSE_loss, MAE_loss)) # Visualization of what it looks like seq_pred(sess, z, x, raw_data, min_max_normalization, 0, 3000, adv_plot=False) # Pickle normalization pickle_out = open('normalization/ls.pickle', 'wb') pickle.dump(min_max_normalization, pickle_out) pickle_out.close()
save_path = saver.save(sess, Args["model_path"]) print("Model was saved in {}".format(save_path)) # Output weights weights = sess.run(W) # Predictions predictions = sess.run(z, feed_dict={x: train_X, y: train_y}) # Unnormalize data predictions = np.multiply(predictions, min_max_normalization.denominator[0, 0]) predictions = predictions + min_max_normalization.col_min[0, 0] train_X = np.multiply(train_y, min_max_normalization.denominator[0, 0]) train_X = train_X + min_max_normalization.col_min[0, 0] # RMSE & MAE Calc RMSE_loss = np.sqrt(np.mean(np.square(np.subtract(train_X, predictions)))) MAE_loss = np.mean(np.abs(np.subtract(train_X, predictions))) print('RMSE: {} | MAE: {}'.format(RMSE_loss, MAE_loss)) # Visualization of what it looks like seq_pred(sess, z, raw_data, min_max_normalization, 1, 5000, adv_plot=True) # Pickle normalization pickle_out = open('normalization/ls.pickle', 'wb') pickle.dump(min_max_normalization, pickle_out) pickle_out.close()