X_train, y_train, X_test, y_test = lstm.load_data(fname, seq_len, True) print('> Data Loaded. Compiling...') model = lstm.build_model([1, 50, 100, 1]) model.fit(X_train, y_train, batch_size=512, nb_epoch=epochs, validation_split=0.05) fig_name = company + '_' + dataset + '_' + method print(fig_name) if method == 'window': predictions = lstm.predict_sequences_multiple(model, X_test, seq_len, 50) print('Training duration (s) : ', time.time() - global_start_time) plot_results_multiple(predictions, y_test, 50, method, dataset, company) if method == 'sequence': predictions = lstm.predict_sequence_full(model, X_test, seq_len) print('Training duration (s) : ', time.time() - global_start_time) plot_results(predictions, y_test, method, dataset, company) if method == 'point': predictions = lstm.predict_point_by_point(model, X_test) print('Training duration (s) : ', time.time() - global_start_time) plot_results(predictions, y_test, method, dataset, company)
#Main Run Thread if __name__ == '__main__': global_start_time = time.time() epochs = 2 seq_len = 50 print('> Loading data... ') X_train, y_train, X_test, y_test = lstm.load_data('sp500.csv', seq_len, True) print('> Data Loaded. Compiling...') model = lstm.build_model([1, 50, 100, 1]) model.fit(X_train, y_train, batch_size=512, nb_epoch=epochs, validation_split=0.05) predictions = lstm.predict_sequences_multiple(model, X_test, seq_len, 50) # predictedd = lstm.predict_sequence_full(model, X_test, seq_len) predicted = lstm.predict_point_by_point(model, X_test) print('Training duration (s) : ', time.time() - global_start_time) # plot_results_multiple(predictions, y_test, 50) plot_results(predicted, y_test) # plot_results(predictedd, y_test)
def predict(batch_size, nb_epoch, timestep, hidden_state, layers=[1], save=False, predict_multiple=False, prediction_len=1, predict_full=False): # Load Data normalise = True X_train, y_train, X_test, y_test, y_test_restorer = lstm.load_data( './input/sp500.csv', timestep, normalise) print "X_train length" + str(len(X_train)) print "X_test length" + str(len(X_test)) # Build Model if (len(layers) == 3): model = lstm.build_model_single_layer_LSTM(layers) elif (len(layers) == 4): model = lstm.build_model_double_layer_LSTM(layers) elif (len(layers) == 5): model = lstm.build_model_triple_layer_LSTM(layers) else: model = lstm.build_model_single_layer_LSTM([1, hidden_state, 1]) model.compile(loss='mse', optimizer='rmsprop') # Train the model model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.05) model.save('./data/modelMetaData.h5') # Predict test data with trained model predictions = lstm.predict_point_by_point(model, X_test) if predict_multiple: predictions_multiple = lstm.predict_sequences_multiple( model, X_test, window_size=timestep, prediction_len=prediction_len) if predict_full: predictions_full = lstm.predict_sequence_full(model, X_test, window_size=timestep) if normalise: # restore the normalised data and predictions to the original value= for i in range(len(y_test)): y_test[i] = (y_test[i] + 1) * float(y_test_restorer[i]) predictions[i] = (predictions[i] + 1) * float(y_test_restorer[i]) if predict_multiple: col = len(predictions_multiple[0]) tmp = np.asarray(predictions_multiple).reshape(1, -1) for i in range(len(tmp)): tmp[i] = (tmp[i] + 1) * float(y_test_restorer[i]) predictions_multiple = tmp.reshape(-1, col).tolist() if predict_full: for i in range(len(predictions_full)): predictions_full[i] = (predictions_full[i] + 1) * float( y_test_restorer[i]) # save predictions and the test data for further experiments if save: np.save('./data/y_test', y_test) np.save('./data/predictions', predictions) if predict_multiple: np.save('./data/predictions_multi', predictions_multiple) if predict_full: np.save('./data/predictions_full', predictions_full) return y_test, predictions
def predict(): global_start_time = time.time() epochs = 10 seq_len = 10 num_predict = 5 print('> Loading data... ') # X_train, y_train, X_test, Y_test = lstm.load_data('sp500_2.csv', seq_len, True) # X_train_, y_train_, X_test_, Y_test_ = lstm.load_data('sp500_2.csv', seq_len, False) X_train, y_train, X_test, Y_test = lstm.load_data('ibermansa.csv', seq_len, True) X_train_, y_train_, X_test_, Y_test_ = lstm.load_data( 'ibermansa.csv', seq_len, False) print('> Data Loaded. Compiling...') model = lstm.build_model([1, seq_len, 100, 1]) model.fit(X_train, y_train, batch_size=100, nb_epoch=epochs, validation_split=0.40) predictions2, full_predicted = lstm.predict_sequences_multiple( model, X_test, seq_len, num_predict) # predictions = lstm.predict_sequence_full(model, X_test, seq_len) predictions = lstm.predict_point_by_point(model, X_test, Y_test, batch_size=100) # sequence_length = seq_len + 1 # result = [] # for index in range(len(predictions) - sequence_length): # result.append(predictions[index: index + sequence_length]) # result = lstm.unnormalise_windows(result) # predictions = np.array(result) # result = [] # for index in range(len(Y_test) - sequence_length): # result.append(Y_test[index: index + sequence_length]) # result = lstm.unnormalise_windows(result) # Y_test = np.array(result) # Y_test = Y_test+Y_test_.astype(np.float) # Y_test = Y_test.astype(np.float)[:296] # aux = predictions[:]+Y_test_ # print(aux) # mape = mean_absolute_percentage_error(Y_test[-42:-1], np.array(predictions2)[:,0]) # mse = mean_squared_error(Y_test[-42:-1],np.array(predictions2)[:,0]) # mae = mean_absolute_percentage_error(Y_test[-42:-1],np.array(predictions2)[:,0]) mape = mean_absolute_percentage_error(Y_test[-2050:-1], full_predicted[0:-1]) mse = mean_squared_error(Y_test[-2050:-1], full_predicted[0:-1]) mae = mean_absolute_percentage_error(Y_test[-2050:-1], full_predicted[0:-1]) # msle = mean_squared_logarithmic_error(Y_test, predictions) # print(mape) init_op = tf.initialize_all_variables() # def weighted_mape_tf(Y_test,predictions): #tot = tf.reduce_sum(Y_test) #tot = tf.clip_by_value(tot, clip_value_min=1,clip_value_max=1000) #wmape = tf.realdiv(tf.reduce_sum(tf.abs(tf.subtract(Y_test,predictions))),tot)*100#/tot #return(wmape) # mape = weighted_mape_tf(Y_test,predictions) # run the graph with tf.Session() as sess: sess.run(init_op) print('mape -> {} '.format(sess.run(mape))) print('mse -> {}'.format(sess.run(mse))) print('mae -> {} '.format(sess.run(mae))) # print ('msle -> {} %'.format(sess.run(msle))) print('Training duration (s) : ', time.time() - global_start_time) print(predictions) im1 = plot_results(predictions, Y_test) im2 = plot_results(np.array(Y_test_) + np.array(predictions), Y_test_) im3 = plot_results_multiple(predictions2, Y_test, num_predict) im4 = plot_results( np.array(Y_test_)[-118:-1] + np.array(full_predicted)[-118:-1], Y_test_) return im1, im2, im3, im4
scaled = scaler.fit_transform(values) scaled = pd.DataFrame(scaled) X_train, y_train, X_test, y_test = lstm.data_pre(scaled, seq_len) print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) # fit model global_start_time = time.time() print('> Data Loaded. Compiling...') # layers [X_input feature dim, LSTM[1].unit, LSTM[2].unit, output_dim] model = lstm.build_model(layers=[X_train.shape[-1], 20, 20, X_train.shape[-1]], sequence_length=seq_len) model = lstm.fit_model(X_train, y_train, model, batch_size=64, nb_epoch=1000, validation_split=0.2) print('Training duration (s) : ', time.time() - global_start_time) # Predict predicted = lstm.predict_point_by_point(X_test) predicted = scaler.inverse_transform(predicted) y_test = scaler.inverse_transform(y_test) fig = plt.figure(facecolor='white') for i in range(6): ax = fig.add_subplot(2, 3, (i+1)) ax.plot(y_test[:, i], label='True Data') ax.plot(predicted[:, i], label='Predict') ax.legend() plt.show() fig = plt.figure(facecolor='white') for i in range(4): ax = fig.add_subplot(2, 2, (i+1))
model = lstm.build_model([1, 400,400, 1]) hist = model.fit( X_train, y_train, batch_size=128, nb_epoch=epochs, validation_split=0.05) plt.plot(hist.history['loss'], label = 'train') plt.plot(hist.history['val_loss'], label = 'test') plt.legend() predicted_model1_train = lstm.predict_point_by_point(model, X_train) predicted = lstm.predict_point_by_point(model, X_test) ## denomalize data predicted_model1_train_price = (predicted_model1_train+1)*orig_data[:len(predicted_model1_train)] predicted_price = (predicted+1)*orig_data[len(predicted_model1_train):-seq_len] print('Training duration (s) : ', time.time() - global_start_time) y_test_price = orig_data[len(predicted_model1_train)+seq_len:] lag = 1 [X_train_2, y_train_2, X_test_2, y_test_2] = lstm.load_data_2(orig_data, X_train, X_test, seq_len, lag, predicted, predicted_model1_train) print('> Data_2 Loaded. Compiling...')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Hide messy TensorFlow warnings model = load_model("lstm.h5") seq_len = 400 data = Dataloader(seq_len, 'merged-dataset-with-noise-and-seasonal(datetime).csv', 'Value') x_train, y_train = data.get_train_data() x_test, y_test = data.get_test_data() #print (x_train.shape) errordata = [] print('\n[Model] Predicting point by point') predicted_data = lstm.predict_point_by_point(model, x_test, y_test, x_train, y_train) x = [] y = [] errordata = [] for i in range(len(predicted_data)): error = abs(y_test[i] - predicted_data[i]) errordata.append(error) if (error > 0.5): x.append(i) y.append(y_test[i]) mse = mean_squared_error(predicted_data, y_test) rmse = sqrt(mse) print('RMSE: %f' % rmse)