def train(ticker): url = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=" nextP = "&outputsize=full&apikey=T69K620H31T06293&datatype=csv" r = requests.get(url + ticker + nextP) path = 'temp/data/' + ticker + '.csv' pathm = 'temp/models/' + ticker + '.h5' with open(path, 'wb') as f: f.write(r.content) # FOR REPRODUCIBILITY np.random.seed(7) # IMPORTING DATASET dataset = pd.read_csv(path, usecols=[1, 2, 3, 4]) dataset = dataset.reindex(index=dataset.index[::-1]) # CREATING OWN INDEX FOR FLEXIBILITY obs = np.arange(1, len(dataset) + 1, 1) # TAKING DIFFERENT INDICATORS FOR PREDICTION OHLC_avg = dataset.mean(axis=1) HLC_avg = dataset[['high', 'low', 'close']].mean(axis=1) close_val = dataset[['close']] # PREPARATION OF TIME SERIES DATASE OHLC_avg = np.reshape(OHLC_avg.values, (len(OHLC_avg), 1)) # 1664 scaler = MinMaxScaler(feature_range=(0, 1)) OHLC_avg = scaler.fit_transform(OHLC_avg) # TRAIN-TEST SPLIT train_OHLC = int(len(OHLC_avg) * 0.75) test_OHLC = len(OHLC_avg) - train_OHLC train_OHLC, test_OHLC = OHLC_avg[0:train_OHLC, :], OHLC_avg[ train_OHLC:len(OHLC_avg), :] # TIME-SERIES DATASET (FOR TIME T, VALUES FOR TIME T+1) trainX, trainY = preprocessing.new_dataset(train_OHLC, 1) testX, testY = preprocessing.new_dataset(test_OHLC, 1) # RESHAPING TRAIN AND TEST DATA trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) step_size = 1 # LSTM MODEL model = Sequential() model.add(LSTM(32, input_shape=(1, step_size), return_sequences=True)) model.add(LSTM(16)) model.add(Dense(1)) model.add(Activation('linear')) # MODEL COMPILING AND TRAINING # Try SGD, adam, adagrad and compare!!! model.compile(loss='mean_squared_error', optimizer='adagrad') model.fit(trainX, trainY, epochs=5, batch_size=1, verbose=2) model.save(pathm) return ticker + " Trained"
plt.plot(obs, close_val, 'g', label = 'Closing price') plt.legend(loc = 'upper right') plt.show() # PREPARATION OF TIME SERIES DATASE OHLC_avg = np.reshape(OHLC_avg.values, (len(OHLC_avg),1)) # 1664 scaler = MinMaxScaler(feature_range=(0, 1)) OHLC_avg = scaler.fit_transform(OHLC_avg) # TRAIN-TEST SPLIT train_OHLC = int(len(OHLC_avg) * 0.75) test_OHLC = len(OHLC_avg) - train_OHLC train_OHLC, test_OHLC = OHLC_avg[0:train_OHLC,:], OHLC_avg[train_OHLC:len(OHLC_avg),:] # TIME-SERIES DATASET (FOR TIME T, VALUES FOR TIME T+1) trainX, trainY = preprocessing.new_dataset(train_OHLC, 1) testX, testY = preprocessing.new_dataset(test_OHLC, 1) # RESHAPING TRAIN AND TEST DATA trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) step_size = 1 # LSTM MODEL model = Sequential() model.add(LSTM(32, input_shape=(1, step_size), return_sequences = True)) model.add(LSTM(16)) model.add(Dense(1)) model.add(Activation('linear')) print(model.summary())
# Calculate average of Low and High values OHLC_avg = np.reshape(OHLC_avg, (len(OHLC_avg), 2)) #Scale the values scaler = MinMaxScaler(feature_range=(0, 1)) OHLC_avg = scaler.fit_transform(OHLC_avg) # TRAIN-TEST SPLIT train_OHLC = int(len(OHLC_avg) * 0.75) test_OHLC = len(OHLC_avg) - train_OHLC train_datetime = int(len(datetime) * 0.75) test_datetime = len(datetime) - train_datetime train_OHLC, test_OHLC = OHLC_avg[0:train_OHLC, :], OHLC_avg[ train_OHLC:len(OHLC_avg), :] train_datetime, test_datetime = datetime[ 0:train_datetime, :], datetime[train_datetime:len(datetime), :] # TIME-SERIES DATASET (FOR TIME T, VALUES FOR TIME T+1) trainX, trainY = preprocessing.new_dataset(train_OHLC, 2) testX, testY = preprocessing.new_dataset(test_OHLC, 2) train_datetimeX, train_datetimeY = preprocessing.new_dataset( train_datetime, 2) test_datetimeX, test_datetimeY = preprocessing.new_dataset( test_datetime, 2) # RESHAPING TRAIN AND TEST DATA trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) train_datetime = np.reshape( train_datetime, (train_datetime.shape[0], 1, train_datetime.shape[1])) test_datetime = np.reshape( test_datetime, (test_datetime.shape[0], 1, test_datetime.shape[1])) step_size = 2
def net(close_val, step): nextValues = [] for i in range(step): # PREPARATION OF TIME SERIES DATASE OHLC_avg = np.reshape(close_val.values, (len(close_val), 1)) scaler = MinMaxScaler(feature_range=(0, 1)) OHLC_avg = scaler.fit_transform(OHLC_avg) # TRAIN-TEST SPLIT train_OHLC = int(len(OHLC_avg) * .99) test_OHLC = len(OHLC_avg) - train_OHLC train_OHLC, test_OHLC = OHLC_avg[0:train_OHLC, :], OHLC_avg[ train_OHLC:len(OHLC_avg), :] # TIME-SERIES DATASET (FOR TIME T, VALUES FOR TIME T+1) trainX, trainY = preprocessing.new_dataset(train_OHLC, 1) testX, testY = preprocessing.new_dataset(test_OHLC, 1) # RESHAPING TRAIN AND TEST DATA trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) step_size = 1 # LSTM MODEL model = Sequential() model.add(LSTM(32, input_shape=(1, step_size), return_sequences=True)) model.add(LSTM(16)) model.add(Dense(1)) model.add(Activation('linear')) # MODEL COMPILING AND TRAINING model.compile( loss='mean_squared_error', optimizer='adagrad') # Try SGD, adam, adagrad and compare!!! model.fit(trainX, trainY, epochs=5, batch_size=1, verbose=2) # PREDICTION trainPredict = model.predict(trainX) testPredict = model.predict(testX) # DE-NORMALIZING FOR PLOTTING trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) testPredict = scaler.inverse_transform(testPredict) testY = scaler.inverse_transform([testY]) # TRAINING RMSE trainScore = math.sqrt( mean_squared_error(trainY[0], trainPredict[:, 0])) print('Train RMSE: %.2f' % (trainScore)) # TEST RMSE testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:, 0])) print('Test RMSE: %.2f' % (testScore)) # CREATING SIMILAR DATASET TO PLOT TRAINING PREDICTIONS trainPredictPlot = np.empty_like(OHLC_avg) trainPredictPlot[:, :] = np.nan trainPredictPlot[step_size:len(trainPredict) + step_size, :] = trainPredict OHLC_avg # CREATING SIMILAR DATASSET TO PLOT TEST PREDICTIONS testPredictPlot = np.empty_like(OHLC_avg) testPredictPlot[:, :] = np.nan testPredictPlot[len(trainPredict) + (step_size * 2) + 1:len(OHLC_avg) - 1, :] = testPredict # DE-NORMALIZING MAIN DATASET OHLC_avg = scaler.inverse_transform(OHLC_avg) # PREDICT FUTURE VALUES last_val = pd.Series(testPredict[-1]) close_val.append(last_val) nextValues.append(last_val) return nextValues
# PREPARATION OF TIME SERIES DATASE OHLC_avg = np.reshape(OHLC_avg.values, (len(OHLC_avg), 1)) # 1664 scaler = MinMaxScaler(feature_range=(0, 1)) OHLC_avg = scaler.fit_transform(OHLC_avg) # TRAIN-TEST SPLIT train_OHLC = int(len(OHLC_avg) * 0.75) test_OHLC = len(OHLC_avg) - train_OHLC train_OHLC, test_OHLC = OHLC_avg[0:train_OHLC, :], OHLC_avg[ train_OHLC:len(OHLC_avg), :] step_size = 60 # TIME-SERIES DATASET (FOR TIME T, VALUES FOR TIME T+1) trainX, trainY = preprocessing.new_dataset(train_OHLC, step_size) testX, testY = preprocessing.new_dataset(test_OHLC, step_size) # RESHAPING TRAIN AND TEST DATA trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1)) # LSTM MODEL model = Sequential() model.add(LSTM(32, input_shape=(step_size, 1), return_sequences=True)) model.add(Dropout(0.02)) # model.add(LSTM(16, return_sequences=True)) # model.add(Dropout(0.02)) model.add(LSTM(16)) model.add(Dropout(0.02)) model.add(Dense(1))
def pred(ticker): path = 'temp/data/' + ticker + '.csv' pathm = 'temp/models/' + ticker + '.h5' # IMPORTING DATASET dataset = pd.read_csv(path, usecols=[1, 2, 3, 4]) dataset = dataset.reindex(index=dataset.index[::-1]) # CREATING OWN INDEX FOR FLEXIBILITY obs = np.arange(1, len(dataset) + 1, 1) # TAKING DIFFERENT INDICATORS FOR PREDICTION OHLC_avg = dataset.mean(axis=1) HLC_avg = dataset[['high', 'low', 'close']].mean(axis=1) close_val = dataset[['close']] # PREPARATION OF TIME SERIES DATASE OHLC_avg = np.reshape(OHLC_avg.values, (len(OHLC_avg), 1)) # 1664 scaler = MinMaxScaler(feature_range=(0, 1)) OHLC_avg = scaler.fit_transform(OHLC_avg) # TRAIN-TEST SPLIT train_OHLC = int(len(OHLC_avg) * 0.75) test_OHLC = len(OHLC_avg) - train_OHLC train_OHLC, test_OHLC = OHLC_avg[0:train_OHLC, :], OHLC_avg[ train_OHLC:len(OHLC_avg), :] # TIME-SERIES DATASET (FOR TIME T, VALUES FOR TIME T+1) trainX, trainY = preprocessing.new_dataset(train_OHLC, 1) testX, testY = preprocessing.new_dataset(test_OHLC, 1) # RESHAPING TRAIN AND TEST DATA trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) step_size = 1 model = load_model(pathm) # PREDICTION trainPredict = model.predict(trainX) testPredict = model.predict(testX) # DE-NORMALIZING FOR PLOTTING trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) testPredict = scaler.inverse_transform(testPredict) testY = scaler.inverse_transform([testY]) # TRAINING RMSE trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0])) # print('Train RMSE: %.2f' % (trainScore)) # TEST RMSE testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:, 0])) # print('Test RMSE: %.2f' % (testScore)) # CREATING SIMILAR DATASET TO PLOT TRAINING PREDICTIONS trainPredictPlot = np.empty_like(OHLC_avg) trainPredictPlot[:, :] = np.nan trainPredictPlot[step_size:len(trainPredict) + step_size, :] = trainPredict # CREATING SIMILAR DATASSET TO PLOT TEST PREDICTIONS testPredictPlot = np.empty_like(OHLC_avg) testPredictPlot[:, :] = np.nan testPredictPlot[len(trainPredict) + (step_size * 2) + 1:len(OHLC_avg) - 1, :] = testPredict # DE-NORMALIZING MAIN DATASET OHLC_avg = scaler.inverse_transform(OHLC_avg) # PREDICT FUTURE VALUES last_val = testPredict[-1] last_val_scaled = last_val / last_val next_val = model.predict(np.reshape(last_val_scaled, (1, 1, 1))) finobj = { 'last': np.asscalar(last_val), 'next': np.asscalar(last_val * next_val) } r.set(ticker + "last", finobj['last']) r.set(ticker + 'next', finobj['next']) return {'result': ticker + " Values Stored"}
def lstm_neural_network(historical_data, look_back=240): # FOR REPRODUCIBILITY np.random.seed(7) # IMPORTING DATASET # dataset = pd.read_csv('apple_share_price.csv', usecols=[1,2,3,4]) dataset = historical_data.drop(['Date', 'Volume'], axis=1) dataset = dataset.reindex(index=dataset.index[::-1]) # CREATING OWN INDEX FOR FLEXIBILITY obs = np.arange(1, len(dataset) + 1, 1) # TAKING DIFFERENT INDICATORS FOR PREDICTION # OHLC_avg = dataset.mean(axis = 1) HLC_avg = dataset[['High', 'Low', 'Close']].mean(axis=1) close_val = dataset[['Close']] OHLC_avg = dataset[['Close']] # PLOTTING ALL INDICATORS IN ONE PLOT # plt.plot(obs, OHLC_avg, 'r', label = 'OHLC avg') # plt.plot(obs, HLC_avg, 'b', label = 'HLC avg') # plt.plot(obs, close_val, 'g', label = 'Closing price') # plt.legend(loc = 'upper right') # plt.show() # PREPARATION OF TIME SERIES DATASE OHLC_avg = np.reshape(OHLC_avg.values, (len(OHLC_avg), 1)) # 1664 scaler = MinMaxScaler(feature_range=(0, 1)) OHLC_avg = scaler.fit_transform(OHLC_avg) # TRAIN-TEST SPLIT train_OHLC = int(len(OHLC_avg) * 0.75) test_OHLC = len(OHLC_avg) - train_OHLC train_OHLC, test_OHLC = OHLC_avg[0:train_OHLC, :], OHLC_avg[ train_OHLC:len(OHLC_avg), :] # TIME-SERIES DATASET (FOR TIME T, VALUES FOR TIME T+1) trainX, trainY = preprocessing.new_dataset(train_OHLC, look_back) testX, testY = preprocessing.new_dataset(test_OHLC, look_back) # RESHAPING TRAIN AND TEST DATA trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) # # LSTM MODEL # model = Sequential() # model.add(LSTM(32, input_shape=(1, look_back), return_sequences = True)) # model.add(Dropout(0.1)) # model.add(LSTM(16)) # model.add(Dropout(0.1)) # model.add(Dense(1)) # model.add(Activation('linear')) # # MODEL COMPILING AND TRAINING # model.compile(loss='mean_squared_error', optimizer='adagrad') # Try SGD, adam, adagrad and compare!!! # model.fit(trainX, trainY, epochs=1000, batch_size=1, verbose=2) # LSTM MODEL 2 model = Sequential() model.add(LSTM(25, input_shape=(1, look_back))) model.add(Dropout(0.1)) model.add(Dense(1)) model.compile(loss='mse', optimizer='adam') model.fit(trainX, trainY, epochs=1000, batch_size=240, verbose=2) # PREDICTION trainPredict = model.predict(trainX) testPredict = model.predict(testX) # DE-NORMALIZING FOR PLOTTING trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) testPredict = scaler.inverse_transform(testPredict) testY = scaler.inverse_transform([testY]) # TRAINING RMSE trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0])) print('Train RMSE: %.2f' % (trainScore)) # TEST RMSE testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:, 0])) print('Test RMSE: %.2f' % (testScore)) # CREATING SIMILAR DATASET TO PLOT TRAINING PREDICTIONS trainPredictPlot = np.empty_like(OHLC_avg) trainPredictPlot[:, :] = np.nan trainPredictPlot[look_back:len(trainPredict) + look_back, :] = trainPredict # CREATING SIMILAR DATASSET TO PLOT TEST PREDICTIONS testPredictPlot = np.empty_like(OHLC_avg) testPredictPlot[:, :] = np.nan testPredictPlot[len(trainPredict) + (look_back * 2) + 1:len(OHLC_avg) - 1, :] = testPredict # DE-NORMALIZING MAIN DATASET OHLC_avg = scaler.inverse_transform(OHLC_avg) # PLOT OF MAIN OHLC VALUES, TRAIN PREDICTIONS AND TEST PREDICTIONS plt.plot(OHLC_avg, 'g', label='original dataset') plt.plot(trainPredictPlot, 'r', label='training set') plt.plot(testPredictPlot, 'b', label='predicted stock price/test set') plt.legend(loc='upper right') plt.xlabel('Time in Days') plt.ylabel('OHLC Value of Apple Stocks') # plt.show() # PREDICT FUTURE VALUES last_val = testPredict[-240:] print(last_val.shape) # last_val_scaled = last_val/np.linalg.norm(last_val) last_val_scaled = scaler.fit_transform(last_val) next_val = model.predict(np.reshape(last_val_scaled, (1, 1, 240))) last_val_dnormalized = scaler.inverse_transform(last_val) next_val_dnormalized = scaler.inverse_transform(next_val) print("Last Candle Value: {}".format(np.asscalar( last_val_dnormalized[-1]))) print("Next Candle value: {}".format(np.asscalar( next_val_dnormalized[-1]))) plt.plot(np.asscalar(last_val_dnormalized[-1]), 'y', label='next candle prediction') plt.show() return np.asscalar(last_val_dnormalized[-1]), np.asscalar( next_val_dnormalized[-1])
def LSTMPredict(Stockname): # FOR REPRODUCIBILITY np.random.seed(7) engine = create_engine( 'mysql+mysqlconnector://root:123456@localhost:3306/TESTDB') df = pd.read_sql_query('SELECT * FROM ' + Stockname + '_History_Price', engine) #dataset = pd.read_csv('apple_share_price.csv', usecols=[1,2,3,4]) # IMPORTING DATASET #dataset=df[['R_Price']] dataset = df[['Open_Price', 'High_Price', 'Low_Price', 'Close_Price']] dataset = dataset.apply(pd.to_numeric) dataset = dataset.reindex(index=dataset.index[::-1]) # CREATING OWN INDEX FOR FLEXIBILITY obs = np.arange(1, len(dataset) + 1, 1) # TAKING DIFFERENT INDICATORS FOR PREDICTION OHLC_avg = dataset.mean(axis=1) HLC_avg = dataset[['High_Price', 'Low_Price', 'Close_Price']].mean(axis=1) close_val = dataset[['Close_Price']] # PLOTTING ALL INDICATORS IN ONE PLOT plt.plot(obs, OHLC_avg, 'r', label='OHLC avg') plt.plot(obs, HLC_avg, 'b', label='HLC avg') plt.plot(obs, close_val, 'g', label='Closing price') plt.legend(loc='upper right') plt.show() # PREPARATION OF TIME SERIES DATASE OHLC_avg = np.reshape(OHLC_avg.values, (len(OHLC_avg), 1)) # 1664 scaler = MinMaxScaler(feature_range=(0, 1)) OHLC_avg = scaler.fit_transform(OHLC_avg) # TRAIN-TEST SPLIT train_OHLC = int(len(OHLC_avg) * 0.75) test_OHLC = len(OHLC_avg) - train_OHLC train_OHLC, test_OHLC = OHLC_avg[0:train_OHLC, :], OHLC_avg[ train_OHLC:len(OHLC_avg), :] # TIME-SERIES DATASET (FOR TIME T, VALUES FOR TIME T+1) trainX, trainY = preprocessing.new_dataset(train_OHLC, 1) testX, testY = preprocessing.new_dataset(test_OHLC, 1) # RESHAPING TRAIN AND TEST DATA trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) step_size = 1 # LSTM MODEL model = Sequential() model.add(LSTM(32, input_shape=(1, step_size), return_sequences=True)) model.add(LSTM(16)) model.add(Dense(1)) model.add(Activation('linear')) # MODEL COMPILING AND TRAINING model.compile(loss='mean_squared_error', optimizer='adagrad') # Try SGD, adam, adagrad and compare!!! model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2) # PREDICTION trainPredict = model.predict(trainX) testPredict = model.predict(testX) # DE-NORMALIZING FOR PLOTTING trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) testPredict = scaler.inverse_transform(testPredict) testY = scaler.inverse_transform([testY]) # TRAINING RMSE trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0])) print('Train RMSE: %.2f' % (trainScore)) # TEST RMSE testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:, 0])) print('Test RMSE: %.2f' % (testScore)) print(mean_absolute_error(testY[0], testPredict[:, 0])) # CREATING SIMILAR DATASET TO PLOT TRAINING PREDICTIONS trainPredictPlot = np.empty_like(OHLC_avg) trainPredictPlot[:, :] = np.nan trainPredictPlot[step_size:len(trainPredict) + step_size, :] = trainPredict # CREATING SIMILAR DATASSET TO PLOT TEST PREDICTIONS testPredictPlot = np.empty_like(OHLC_avg) testPredictPlot[:, :] = np.nan testPredictPlot[len(trainPredict) + (step_size * 2) + 1:len(OHLC_avg) - 1, :] = testPredict avg = testPredict.mean(axis=1) av = avg.mean(axis=0) # DE-NORMALIZING MAIN DATASET OHLC_avg = scaler.inverse_transform(OHLC_avg) # PLOT OF MAIN OHLC VALUES, TRAIN PREDICTIONS AND TEST PREDICTIONS plt.plot(OHLC_avg, 'g', label='original dataset') plt.plot(trainPredictPlot, 'r', label='training set') plt.plot(testPredictPlot, 'b', label='predicted stock price/test set') plt.legend(loc='upper right') plt.xlabel('Time in Days') plt.ylabel('OHLC Value of Stocks') plt.show() # PREDICT FUTURE VALUES last_val = testPredict[-1] last_val_scaled = last_val / last_val next_val = model.predict(np.reshape(last_val_scaled, (1, 1, 1))) print("Last Day Value:", np.asscalar(last_val)) print("Next Day Value:", np.asscalar(last_val * next_val))
def StockPricePrediction(): # IMPORTING IMPORTANT LIBRARIES import pandas as pd import matplotlib.pyplot as plt import numpy as np import math from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error from keras.models import Sequential from keras.models import load_model from keras.layers import Dense, Activation from keras.layers import LSTM import preprocessing import io import base64 # FOR REPRODUCIBILITY np.random.seed(7) # This is done to prevent the use of randomness, such as initializing to random weights, and in turn the same network trained on the same data can produce different results # IMPORTING DATASET dataset = pd.read_csv('IDBI.csv', usecols=[1,2,3,4]) dataset = dataset.reindex(index = dataset.index[::-1]) #transpose the matrix and fills with boolean values # CREATING OWN INDEX FOR FLEXIBILITY obs = np.arange(1, len(dataset) + 1, 1) #builds an array having content from 1 to 1664(Number of rows in dataset) # TAKING DIFFERENT INDICATORS FOR PREDICTION OHLC_avg = dataset.mean(axis = 1) # each row average is taken and stored in respective index HLC_avg = dataset[['High', 'Low', 'Close']].mean(axis = 1) close_val = dataset[['Close']] # PREPARATION OF TIME SERIES DATASE OHLC_avg = np.reshape(OHLC_avg.values, (len(OHLC_avg),1)) # 1664 scaler = MinMaxScaler(feature_range=(0, 1)) OHLC_avg = scaler.fit_transform(OHLC_avg) # TRAIN-TEST SPLIT train_OHLC = int(len(OHLC_avg) * 0.75) # Train is 75 % and test is 25 % test_OHLC = len(OHLC_avg) - train_OHLC train_OHLC, test_OHLC = OHLC_avg[0:train_OHLC,:], OHLC_avg[train_OHLC:len(OHLC_avg),:] # TIME-SERIES DATASET (FOR TIME T, VALUES FOR TIME T+1) trainX, trainY = preprocessing.new_dataset(train_OHLC, 1) testX, testY = preprocessing.new_dataset(test_OHLC, 1) # RESHAPING TRAIN AND TEST DATA trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) step_size = 1 # LSTM MODEL model = Sequential() model.add(LSTM(32, input_shape=(1, step_size), return_sequences = True)) model.add(LSTM(16)) model.add(Dense(1)) model.add(Activation('linear')) # MODEL COMPILING AND TRAINING model.compile(loss='mean_squared_error', optimizer='adagrad') # Try SGD, adam, adagrad and compare!!! model.fit(trainX, trainY, epochs=5, batch_size=1, verbose=2) # PREDICTION trainPredict = model.predict(trainX) testPredict = model.predict(testX) # DE-NORMALIZING FOR PLOTTING trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) testPredict = scaler.inverse_transform(testPredict) testY = scaler.inverse_transform([testY]) # TRAINING RMSE trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0])) print('Train RMSE: %.2f' % (trainScore)) # TEST RMSE testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0])) print('Test RMSE: %.2f' % (testScore)) # CREATING SIMILAR DATASET TO PLOT TRAINING PREDICTIONS trainPredictPlot = np.empty_like(OHLC_avg) trainPredictPlot[:, :] = np.nan trainPredictPlot[step_size:len(trainPredict)+step_size, :] = trainPredict # CREATING SIMILAR DATASSET TO PLOT TEST PREDICTIONS testPredictPlot = np.empty_like(OHLC_avg) testPredictPlot[:, :] = np.nan testPredictPlot[len(trainPredict)+(step_size*2)+1:len(OHLC_avg)-1, :] = testPredict # DE-NORMALIZING MAIN DATASET OHLC_avg = scaler.inverse_transform(OHLC_avg) img = io.BytesIO() # PLOTTING ALL INDICATORS IN ONE PLOT plt.plot(obs, OHLC_avg, 'r', label = 'OHLC avg') plt.plot(obs, HLC_avg, 'b', label = 'HLC avg') plt.plot(obs, close_val, 'g', label = 'Closing price') plt.legend(loc = 'upper left') plt.xlabel('Time in Days') plt.ylabel('OHLC Value of Apple Stocks') plt.savefig(img, format='png') img.seek(0) plot_url1 = base64.b64encode(img.getvalue()).decode() # PLOT OF MAIN OHLC VALUES, TRAIN PREDICTIONS AND TEST PREDICTIONS plt.plot(OHLC_avg, 'g', label = 'original dataset') plt.plot(trainPredictPlot, 'r', label = 'training set') plt.plot(testPredictPlot, 'b', label = 'predicted stock price/test set') plt.legend(loc = 'upper left') plt.xlabel('Time in Days') plt.ylabel('OHLC Value of Apple Stocks') plt.legend(loc = 'upper left') plt.savefig(img, format='png') img.seek(0) plot_url2 = base64.b64encode(img.getvalue()).decode() return '<h1>IDBI Bank - Actual Stock Price</h1><img src="data:image/png;base64,{}"><hl><h1>IDBI Bank Predicted Stock Price</h1><img src="data:image/png;base64,{}">'.format(plot_url1, plot_url2) # PREDICT FUTURE VALUES last_val = testPredict[-1] last_val_scaled = last_val/last_val next_val = model.predict(np.reshape(last_val_scaled, (1,1,1))) print ("Last Day Value:", np.asscalar(last_val)) print ("Next Day Value:", np.asscalar(last_val*next_val))
# plt.plot(close_val, 'g', label = 'Closing price') # plt.legend(loc = 'upper right') # plt.show() # PREPARATION OF TIME SERIES DATASE OHLC_avg = np.reshape(OHLC_avg.values, (len(OHLC_avg),1)) # 1664 scaler = MinMaxScaler(feature_range=(0, 1)) OHLC_avg = scaler.fit_transform(OHLC_avg) # TRAIN-TEST SPLIT train_OHLC = int(len(OHLC_avg) * 0.70) test_OHLC = len(OHLC_avg) - train_OHLC train_OHLC, test_OHLC = OHLC_avg[0:train_OHLC,:], OHLC_avg[train_OHLC:len(OHLC_avg),:] # TIME-SERIES DATASET (FOR TIME T, VALUES FOR TIME T+1) trainX, trainY = preprocessing.new_dataset(train_OHLC, 1) testX, testY = preprocessing.new_dataset(test_OHLC, 1) # RESHAPING TRAIN AND TEST DATA trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) step_size = 1 # LSTM MODEL model = Sequential() model.add(LSTM(32, input_shape=(1, step_size), return_sequences = True)) model.add(LSTM(16)) model.add(Dense(1)) model.add(Activation('linear')) # MODEL COMPILING AND TRAINING