def make_prediction(self, coin_id, TOTAL_DATA_SIZE): logging.info("Start Prediction for coin {}...".format(coin_id)) start_time = time.time() TEST_DATA_SIZE = 24 TRAIN_DATA_SIZE = TOTAL_DATA_SIZE - TEST_DATA_SIZE high_prices = ccdata.get_one(coin_id, "1h", "high") low_prices = ccdata.get_one(coin_id, "1h", "low") high_prices = high_prices[-TOTAL_DATA_SIZE:] low_prices = low_prices[-TOTAL_DATA_SIZE:] high_prices = np.array(high_prices).astype(float) low_prices = np.array(low_prices).astype(float) # Step 1: Get Source Data mid_prices = (high_prices + low_prices) / 2.0 if len(mid_prices) <= TRAIN_DATA_SIZE: # If the data is not enough, just return empty values return 0, 0 train_data = mid_prices[:TRAIN_DATA_SIZE] test_data = mid_prices[TRAIN_DATA_SIZE:] #Step 2: Scale the data scaler = preprocessing.MinMaxScaler() scaler.fit(mid_prices.reshape(-1, 1)) train_data = scaler.transform(train_data.reshape(-1, 1)).reshape(-1) test_data = scaler.transform(test_data.reshape(-1, 1)).reshape(-1) # Smoothing the train data EMA = 0.0 gamma = 0.1 for ti in range(TRAIN_DATA_SIZE): EMA = gamma * train_data[ti] + (1 - gamma) * EMA train_data[ti] = EMA all_mid_data = np.concatenate([train_data, test_data], axis=0) # LSTM D = 1 num_unrollings = 50 batch_size = 40 num_nodes = [20, 20, 15] n_layers = len(num_nodes) dropout = 0.2 tf.reset_default_graph() train_inputs, train_outputs = [], [] for ui in range(num_unrollings): train_inputs.append( tf.placeholder(tf.float32, shape=[batch_size, D], name='train_inputs_%d' % ui)) train_outputs.append( tf.placeholder(tf.float32, shape=[batch_size, 1], name='train_outputs_%d' % ui)) lstm_cells = [ tf.contrib.rnn.LSTMCell( num_units=num_nodes[li], state_is_tuple=True, initializer=tf.contrib.layers.xavier_initializer()) for li in range(n_layers) ] drop_lstm_cells = [ tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob=1.0, output_keep_prob=1.0 - dropout, state_keep_prob=1.0 - dropout) for lstm in lstm_cells ] drop_multi_cell = tf.contrib.rnn.MultiRNNCell(drop_lstm_cells) multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells) w = tf.get_variable('w', shape=[num_nodes[-1], 1], initializer=tf.contrib.layers.xavier_initializer()) b = tf.get_variable('b', initializer=tf.random_uniform([1], -0.1, 0.1)) c, h = [], [] initial_state = [] for li in range(n_layers): c.append( tf.Variable(tf.zeros([batch_size, num_nodes[li]]), trainable=False)) h.append( tf.Variable(tf.zeros([batch_size, num_nodes[li]]), trainable=False)) initial_state.append(tf.contrib.rnn.LSTMStateTuple(c[li], h[li])) all_inputs = tf.concat([tf.expand_dims(t, 0) for t in train_inputs], axis=0) all_lstm_outputs, state = tf.nn.dynamic_rnn( drop_multi_cell, all_inputs, initial_state=tuple(initial_state), time_major=True, dtype=tf.float32) all_lstm_outputs = tf.reshape( all_lstm_outputs, [batch_size * num_unrollings, num_nodes[-1]]) all_outputs = tf.nn.xw_plus_b(all_lstm_outputs, w, b) split_outputs = tf.split(all_outputs, num_unrollings, axis=0) logging.debug('Defining training Loss') loss = 0.0 with tf.control_dependencies( [tf.assign(c[li], state[li][0]) for li in range(n_layers)] + [tf.assign(h[li], state[li][1]) for li in range(n_layers)]): for ui in range(num_unrollings): loss += tf.reduce_mean( 0.5 * (split_outputs[ui] - train_outputs[ui])**2) logging.debug('Learning rate decay operations') global_step = tf.Variable(0, trainable=False) inc_gstep = tf.assign(global_step, global_step + 1) tf_learning_rate = tf.placeholder(shape=None, dtype=tf.float32) tf_min_learning_rate = tf.placeholder(shape=None, dtype=tf.float32) learning_rate = tf.maximum( tf.train.exponential_decay(tf_learning_rate, global_step, decay_steps=1, decay_rate=0.5, staircase=True), tf_min_learning_rate) # Optimizer. logging.debug('TF Optimization operations') optimizer = tf.train.AdamOptimizer(learning_rate) gradients, v = zip(*optimizer.compute_gradients(loss)) gradients, _ = tf.clip_by_global_norm(gradients, 5.0) optimizer = optimizer.apply_gradients(zip(gradients, v)) logging.debug('All done') logging.debug('Defining prediction related TF functions') sample_inputs = tf.placeholder(tf.float32, shape=[1, D]) sample_c, sample_h, initial_sample_state = [], [], [] for li in range(n_layers): sample_c.append( tf.Variable(tf.zeros([1, num_nodes[li]]), trainable=False)) sample_h.append( tf.Variable(tf.zeros([1, num_nodes[li]]), trainable=False)) initial_sample_state.append( tf.contrib.rnn.LSTMStateTuple(sample_c[li], sample_h[li])) reset_sample_states = tf.group( *[ tf.assign(sample_c[li], tf.zeros([1, num_nodes[li]])) for li in range(n_layers) ], *[ tf.assign(sample_h[li], tf.zeros([1, num_nodes[li]])) for li in range(n_layers) ]) sample_outputs, sample_state = tf.nn.dynamic_rnn( multi_cell, tf.expand_dims(sample_inputs, 0), initial_state=tuple(initial_sample_state), time_major=True, dtype=tf.float32) with tf.control_dependencies([ tf.assign(sample_c[li], sample_state[li][0]) for li in range(n_layers) ] + [ tf.assign(sample_h[li], sample_state[li][1]) for li in range(n_layers) ]): sample_prediction = tf.nn.xw_plus_b( tf.reshape(sample_outputs, [1, -1]), w, b) logging.debug('All done') # Run LSTM epochs = 15 valid_summary = 1 n_predict_once = 25 train_seq_length = train_data.size train_mse_ot = [] test_mse_ot = [] predictions_over_time = [] session = tf.InteractiveSession() tf.global_variables_initializer().run() loss_nondecrease_count = 0 loss_nondecrease_threshold = 2 logging.debug('Initialized') average_loss = 0 data_gen = DataGeneratorSeq(train_data, batch_size, num_unrollings) x_axis_seq = [] test_points_seq = np.arange(TRAIN_DATA_SIZE, TRAIN_DATA_SIZE + TEST_DATA_SIZE, 25).tolist() for ep in range(epochs): logging.debug("Start epochs {}".format(ep)) # ========================= Training ===================================== for step in range(train_seq_length // batch_size): u_data, u_labels = data_gen.unroll_batches() feed_dict = {} for ui, (dat, lbl) in enumerate(zip(u_data, u_labels)): feed_dict[train_inputs[ui]] = dat.reshape(-1, 1) feed_dict[train_outputs[ui]] = lbl.reshape(-1, 1) feed_dict.update({ tf_learning_rate: 0.0001, tf_min_learning_rate: 0.000001 }) _, l = session.run([optimizer, loss], feed_dict=feed_dict) average_loss += l # print("step={}".format(step)) # ============================ Validation ============================== if (ep + 1) % valid_summary == 0: average_loss = average_loss / ( valid_summary * (train_seq_length // batch_size)) # The average loss if (ep + 1) % valid_summary == 0: logging.debug('Average loss at step %d: %f' % (ep + 1, average_loss)) train_mse_ot.append(average_loss) average_loss = 0 # reset loss predictions_seq = [] mse_test_loss_seq = [] # ===================== Updating State and Making Predicitons ======================== for w_i in test_points_seq: mse_test_loss = 0.0 our_predictions = [] if (ep + 1) - valid_summary == 0: # Only calculate x_axis values in the first validation epoch x_axis = [] # Feed in the recent past behavior of stock prices # to make predictions from that point onwards for tr_i in range(w_i - num_unrollings + 1, w_i - 1): current_price = all_mid_data[tr_i] feed_dict[sample_inputs] = np.array( current_price).reshape(1, 1) _ = session.run(sample_prediction, feed_dict=feed_dict) feed_dict = {} current_price = all_mid_data[w_i - 1] feed_dict[sample_inputs] = np.array(current_price).reshape( 1, 1) # Make predictions for this many steps # Each prediction uses previous prediciton as it's current input for pred_i in range(n_predict_once): # Out of index range if w_i + pred_i >= TRAIN_DATA_SIZE + TEST_DATA_SIZE: continue pred = session.run(sample_prediction, feed_dict=feed_dict) our_predictions.append(np.asscalar(pred)) feed_dict[sample_inputs] = np.asarray(pred).reshape( -1, 1) if (ep + 1) - valid_summary == 0: # Only calculate x_axis values in the first validation epoch x_axis.append(w_i + pred_i) mse_test_loss += 0.5 * ( pred - all_mid_data[w_i + pred_i])**2 session.run(reset_sample_states) predictions_seq.append(np.array(our_predictions)) mse_test_loss /= n_predict_once mse_test_loss_seq.append(mse_test_loss) if (ep + 1) - valid_summary == 0: x_axis_seq.append(x_axis) current_test_mse = np.mean(mse_test_loss_seq) # Learning rate decay logic if len(test_mse_ot) > 0 and current_test_mse > min( test_mse_ot): loss_nondecrease_count += 1 else: loss_nondecrease_count = 0 if loss_nondecrease_count > loss_nondecrease_threshold: session.run(inc_gstep) loss_nondecrease_count = 0 logging.debug('Decreasing learning rate by 0.5') test_mse_ot.append(current_test_mse) logging.debug('Test MSE: %.5f' % np.mean(mse_test_loss_seq)) predictions_over_time.append(predictions_seq) logging.debug('Finished Predictions') session.close() logging.debug('Training finished.') predicted_value = predictions_over_time[-1][-1][-1] actual_value = all_mid_data[-1] delta_value = actual_value - predicted_value elapsed_time = time.time() - start_time logging.info( "Prediction for coin {} result: Predicted={}, Actual={}, Delta={}, Elapsed Time={}" .format(coin_id, predicted_value, actual_value, delta_value, elapsed_time)) return predicted_value, actual_value
def get_last_n_price(coin_id, n, dtype="close"): coin_prices = ccdata.get_one(coin_id, "D", dtype) if (coin_prices.any() == None or len(coin_prices) < n): return None return coin_prices[-n:]
def lstm_display_run(): high_prices = ccdata.get_one("EOS", "1h", "high") low_prices = ccdata.get_one("EOS", "1h", "low") #data = pd.read_csv("D:/Temp/lstm_test/price-volume-data-for-all-us-stocks-etfs/Data/Stocks/ge.us.txt") #df = data.sort_values('Date') predictions_over_time = np.load( "ccdatastore/Strategy/LSTM/predictions.npy", allow_pickle=True) x_axis_seq = np.load("ccdatastore/Strategy/LSTM/x_axis_seq.npy", allow_pickle=True) mid_prices = (high_prices + low_prices) / 2.0 train_data = mid_prices[:1000] test_data = mid_prices[1000:] #Step 2: Scale the data scaler = preprocessing.MinMaxScaler() scaler.fit(mid_prices.reshape(-1, 1)) train_data = scaler.transform(train_data.reshape(-1, 1)).reshape(-1) test_data = scaler.transform(test_data.reshape(-1, 1)).reshape(-1) #scaler = preprocessing.MinMaxScaler() # train_data = train_data.reshape(-1, 1) #test_data = test_data.reshape(-1, 1) #smoothing_window_size = 250 #for di in range(0, 1000, smoothing_window_size): # print("looping {}".format(di)) # scaler.fit(train_data[di:di + smoothing_window_size,:]) # train_data[di:di + smoothing_window_size, :] = scaler.transform(train_data[di:di + smoothing_window_size, :]) # You normalize the last bit of remaining data #scaler.fit(train_data[di+smoothing_window_size:,:]) #train_data[di+smoothing_window_size:,:] = scaler.transform(train_data[di + smoothing_window_size:,:]) #train_data = train_data.reshape(-1) #test_data = scaler.transform(test_data).reshape(-1) # Smoothing the train data EMA = 0.0 gamma = 0.1 for ti in range(1000): EMA = gamma * train_data[ti] + (1 - gamma) * EMA train_data[ti] = EMA all_mid_data = np.concatenate([train_data, test_data], axis=0) best_prediction_epoch = 18 plt.figure(figsize=(18, 18)) #plt.subplot(2, 1, 1) #plt.plot(range(mid_prices.shape[0]), all_mid_data, color='b') #predictions with high alpha #start_alpha = 0.25 #alpha = np.arange(start_alpha,1.1,(1.0-start_alpha)/len(predictions_over_time[::3])) #for p_i, p in enumerate(predictions_over_time[::3]): # for xval, yval in zip(x_axis_seq, p): # plt.plot(xval, yval, color='r',alpha=alpha[p_i]) #plt.title('Evolution of Test Predictions Over Time',fontsize=18) #plt.xlabel('Date',fontsize=18) #plt.ylabel('Mid Price',fontsize=18) #plt.xlim(100, 2000) plt.subplot(2, 1, 1) plt.plot(range(mid_prices.shape[0]), all_mid_data, color='b') for xval, yval in zip(x_axis_seq, predictions_over_time[best_prediction_epoch]): plt.plot(xval, yval, color='r') plt.title('Best Test Predictions Over Time', fontsize=18) plt.xlabel('Date', fontsize=18) plt.ylabel('Mid Price', fontsize=18) plt.xlim(100, 2000) return plt
def get_current_price(coin_id): coin_prices = ccdata.get_one(coin_id, "D", "close") if (coin_prices.any() == None or len(coin_prices) == 0): return None return coin_prices[-1]