def train(lstm, data, bestEpoch, bestLoss): print("\nOFFLINE TRAINING") for epoch in range(Constants.offlineTrainEpochs - bestEpoch): print("\nEPOCH", bestEpoch + epoch + 1, "/", Constants.offlineTrainEpochs) epochLosses, epochAccuracies = [], [] cursor = 0 epochComplete = False while not epochComplete: x, y, cursor = Batcher.getNextTrainBatch(data, cursor) lstm.setBatch(x, y, Constants.learningRate, Constants.dropout) loss, accuracy = lstm.get(['loss', 'accuracy']) epochLosses.append(loss) epochAccuracies.append(accuracy) lstm.train() if cursor == 0: epochComplete = True epochLoss = sum(epochLosses) / len(epochLosses) epochAccuracy = sum(epochAccuracies) / len(epochAccuracies) if epochLoss < bestLoss: lstm.save() pk.dump(epochLoss, open(Constants.modelDir + "bestLoss.p", "wb")) pk.dump(epoch + 1, open(Constants.modelDir + "bestEpoch.p", "wb")) print("Loss:", epochLoss) print("Acc: ", "%.2f" % (epochAccuracy * 100) + "%")
def next_fold(self): ''' sets up and returns a batcher on the next validation fold returns the (train, test) batchers on the next fold ''' train_inds, test_inds = self.get_train_val_inds() # return two Batcher train_mean = 0 if self.new_batch: print('calculating train mean') train_fp = Batcher.Batcher(self.batch_sz, self.metadata, train_inds, self.mass_headers, self.calc_headers, root_dir, self.attr2onehot, new_batch=self.new_batch) train_mean = train_fp.get_train_mean() print(train_mean) train, test = Batcher.Batcher(self.batch_sz, self.metadata, train_inds, self.mass_headers, self.calc_headers, root_dir, self.attr2onehot, mean=train_mean, new_batch=self.new_batch), \ Batcher.Batcher(self.batch_sz, self.metadata, test_inds, self.mass_headers, self.calc_headers, root_dir, self.attr2onehot, mean=train_mean) self.curr_fold += 1 return train, test
cost = map["cost"] model = map["model"] inputs = map["inputs"] params = map["params"] print "Computing cost..." cost_func = theano.function(inputs=inputs, outputs=cost) print "Computing update..." updates = model.cmp_grad(alpha, cost) print "Compution gradient step..." sgd_step = theano.function(inputs=inputs, updates=updates) print "Descent..." batcher = Batcher(params) batch = batcher.get_batch(batch_size) last_cost = cost_func(batch[0], batch[1]) best_cost = last_cost while batcher.epoch < max_epoch: batch = batcher.get_batch(batch_size) if batcher.epoch_percentage==0: last_cost = cost_func(batch[0], batch[1]) map["curve"] += [last_cost] if best_cost > last_cost: best_cost = last_cost file = open(name+".pkl", 'wb') pickle.dump(map, file, -1) sgd_step(batch[0], batch[1]) sys.stdout.write('\r%d%% of epoch %d completed. Best cost: %f, last cost %f'%(batcher.epoch_percentage, batcher.epoch, best_cost, last_cost))
batcher.reset() memory = np.zeros((model.depth, batch_size, params[0]), dtype=config.floatX) compilation = [] for i in range(10): batch = batcher.get_batch(batch_size) c, memory = cost_func(batch[0], memory, batch[1]) compilation += [c] map["curve"] += [np.asarray(compilation)] time = batcher.get_time() sys.stdout.write('\r%d mins %d secs: valid cost = %s'%(time[0], time[1], np.asarray(compilation)[:4].tolist())) if np.mean(compilation) < best: best = np.mean(compilation) file = open(name+".pkl", 'wb') pickle.dump(map, file, -1) file.close() return best print "Descent..." batcher = Batcher() valid = batcher.valid best = 10000 for n in range(max_epoch): best = valid_cost(best) batcher.reset() memory = np.zeros((model.depth, batch_size, params[0]), dtype=config.floatX) for i in range(10): batch = batcher.get_batch(batch_size) memory = sgd_step(batch[0], memory, batch[1]) print ""
cost = map["cost"] model = map["model"] inputs = map["inputs"] params = map["params"] print "Computing cost..." cost_func = theano.function(inputs=inputs, outputs=cost) print "Computing update..." updates = model.cmp_grad(alpha, cost) print "Compution gradient step..." sgd_step = theano.function(inputs=inputs, updates=updates) print "Descent..." batcher = Batcher(params) batch = batcher.get_batch(batch_size) last_cost = cost_func(batch[0], batch[1]) best_cost = last_cost while batcher.epoch < max_epoch: batch = batcher.get_batch(batch_size) if batcher.epoch_percentage == 0: last_cost = cost_func(batch[0], batch[1]) map["curve"] += [last_cost] if best_cost > last_cost: best_cost = last_cost file = open(name + ".pkl", 'wb') pickle.dump(map, file, -1) sgd_step(batch[0], batch[1]) sys.stdout.write( '\r%d%% of epoch %d completed. Best cost: %f, last cost %f' %
for i in range(10): batch = batcher.get_batch(batch_size) c, memory = cost_func(batch[0], memory, batch[1]) compilation += [c] map["curve"] += [np.asarray(compilation)] time = batcher.get_time() sys.stdout.write('\r%d mins %d secs: valid cost = %s' % (time[0], time[1], np.asarray(compilation)[:4].tolist())) if np.mean(compilation) < best: best = np.mean(compilation) file = open(name + ".pkl", 'wb') pickle.dump(map, file, -1) file.close() return best print "Descent..." batcher = Batcher() valid = batcher.valid best = 10000 for n in range(max_epoch): best = valid_cost(best) batcher.reset() memory = np.zeros((model.depth, batch_size, params[0]), dtype=config.floatX) for i in range(10): batch = batcher.get_batch(batch_size) memory = sgd_step(batch[0], memory, batch[1]) print ""
def simulate(lstm, data, prices, ticker): prices = prices[-(Constants.onlineLength + Constants.predictionWindow):] targets = prices[Constants.predictionWindow:] # PREPARE PLOTS red = "#D32F2F" blue = "#039BE5" black = "#424242" sb.set() sb.set_context("talk") sb.set_style("dark") plt.ion() figure, (pricesPlot, returnsPlot) = plt.subplots(2, 1) pricesPlot.set_xlim(0, 100) pricesPlot.set_ylim(min(prices) - 10, max(prices) + 10) pricesPlot.set_title("{} Stock Price (Last {} Days)".format( ticker, Constants.onlineLength)) pricesPlot.set_ylabel("Price") returnsPlot.set_xlim(0, 100) returnsPlot.set_ylim(-100, 100) returnsPlot.set_title("LSTM Model Cumulative Percentage Returns") returnsPlot.set_xlabel("Days") returnsPlot.set_ylabel("Returns (%)") returnsPlot.plot([0, 100], [0, 0], c=black) # SIMULATION pricesX, pricesY = [], [] returnsX, returnsY = [], [] cumulativeReturns = 0 trainLosses, trainAccuracies = [], [] testLosses, testAccuracies = [], [] cursor = 0 dataComplete = False while not dataComplete: print("\nPREDICTION:", cursor + 1, '/', data.shape[0] - Constants.sequenceLength + 1) # ########################################################### # TRAIN # ########################################################### x, y = Batcher.getNextOnlineBatch(data, cursor) lstm.setBatch(x, y, Constants.learningRate, Constants.dropout) for epoch in range(Constants.onlineTrainEpochs): lstm.train() trainLoss, trainAccuracy = lstm.get(['loss', 'accuracy']) trainLosses.append(trainLoss) trainAccuracies.append(trainAccuracy) # ########################################################### # TEST # ########################################################### x, y, cursor = Batcher.getNextOnlineBatch(data, cursor, predict=True) lstm.setBatch(x, y) testLoss, testAccuracy, labels, predictions = lstm.get( ['loss', 'accuracy', 'labels', 'roundedPredictions']) testLosses.append(testLoss) testAccuracies.append(testAccuracy) print("Train Loss: ", sum(trainLosses) / len(trainLosses)) print( "Train Acc: ", "%.2f" % ((sum(trainAccuracies) / len(trainAccuracies)) * 100) + "%") print("\nTest Loss: ", sum(testLosses) / len(testLosses)) print( "Test Acc: ", "%.2f" % ((sum(testAccuracies) / len(testAccuracies)) * 100) + "%") if cursor == 0: dataComplete = True # ########################################################### # UPDATE PLOTS # ########################################################### dayReturn = abs( ((targets[cursor] - prices[cursor]) / prices[cursor]) * 100) if labels[0][0] != predictions[0][0]: dayReturn = -dayReturn cumulativeReturns += dayReturn print("\nDay Return:\t ", "%.2f" % dayReturn + "%") print("Cumulative Return: ", "%.2f" % cumulativeReturns + "%") if cumulativeReturns > 100: returnsPlot.set_ylim(-100, cumulativeReturns + 10) if cumulativeReturns < -100: returnsPlot.set_ylim(cumulativeReturns - 10, 100) if cursor != 0: pricesX.append(cursor) pricesY.append(prices[cursor]) returnsX.append(cursor) returnsY.append(cumulativeReturns) else: pricesX.append(Constants.sequenceLength) pricesY.append(prices[Constants.sequenceLength]) returnsX.append(Constants.sequenceLength) returnsY.append(cumulativeReturns) pricesPlot.plot(pricesX, pricesY, c=blue) returnsPlot.plot(returnsX, returnsY, c=red) plt.pause(0.01) if cursor == 0: plt.savefig(Constants.workingDir + "Simulation Plot") while True: plt.pause(1)