def __init__(self, model, nbActions, explorations_iterations, outputFile, ensembleFolderName, optimizer="adamax"): self.ensembleFolderName = ensembleFolderName self.policy = EpsGreedyQPolicy() self.explorations_iterations = explorations_iterations self.nbActions = nbActions self.model = model #Define the memory self.memory = SequentialMemory(limit=10000, window_length=1) #Instantiate the agent with parameters received self.agent = DQNAgent(model=self.model, policy=self.policy, nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1, enable_double_dqn=True, enable_dueling_network=True) #Compile the agent with the optimizer given as parameter if optimizer == "adamax": self.agent.compile(Adamax(), metrics=['mae']) if optimizer == "adadelta": self.agent.compile(Adadelta(), metrics=['mae']) if optimizer == "sgd": self.agent.compile(SGD(), metrics=['mae']) if optimizer == "rmsprop": self.agent.compile(RMSprop(), metrics=['mae']) if optimizer == "nadam": self.agent.compile(Nadam(), metrics=['mae']) if optimizer == "adagrad": self.agent.compile(Adagrad(), metrics=['mae']) if optimizer == "adam": self.agent.compile(Adam(), metrics=['mae']) if optimizer == "radam": self.agent.compile(RAdam(total_steps=5000, warmup_proportion=0.1, min_lr=1e-5), metrics=['mae']) #Save the weights of the agents in the q.weights file #Save random weights self.agent.save_weights("q.weights", overwrite=True) #Load data self.train_data = pd.read_csv('./dataset/jpm/train_data.csv') self.validation_data = pd.read_csv('./dataset/jpm/train_data.csv') self.test_data = pd.read_csv('./dataset/jpm/test_data.csv') #Call the callback for training, validation and test in order to show results for each iteration self.trainer = ValidationCallback() self.validator = ValidationCallback() self.tester = ValidationCallback() self.outputFileName = outputFile
def __init__(self, model, explorations, trainSize, validationSize, testSize, outputFile, begin, end, nbActions, operationCost=0): self.policy = EpsGreedyQPolicy() self.explorations = explorations self.nbActions = nbActions self.model = model self.memory = SequentialMemory(limit=10000, window_length=50) self.agent = DQNAgent(model=self.model, policy=self.policy, nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=400, target_model_update=1e-1, enable_double_dqn=True, enable_dueling_network=True) self.agent.compile(Adam(lr=1e-3), metrics=['mae']) self.agent.save_weights("q.weights", overwrite=True) self.currentStartingPoint = begin self.trainSize = trainSize self.validationSize = validationSize self.testSize = testSize self.walkSize = trainSize + validationSize + testSize self.endingPoint = end self.sp = pd.read_csv('./dataset/sp500Hour.csv') self.sp['Datetime'] = pd.to_datetime(self.sp['Date'] + ' ' + self.sp['Time']) self.sp = self.sp.set_index('Datetime') self.sp = self.sp.drop(['Date', 'Time'], axis=1) self.sp = self.sp.index self.operationCost = operationCost self.trainer = ValidationCallback() self.validator = ValidationCallback() self.tester = ValidationCallback() self.outputFile = open(outputFile, "w+") self.outputFile.write( "date,trainAccuracy,trainCoverage,trainReward,validationAccuracy,validationCoverage,validationReward,testAccuracy,testCoverage,testReward\n" )
class DeepQTrading: def __init__(self, model, explorations, trainSize, validationSize, testSize, outputFile, begin, end, nbActions, operationCost=0): self.policy = EpsGreedyQPolicy() self.explorations = explorations self.nbActions = nbActions self.model = model self.memory = SequentialMemory(limit=10000, window_length=50) self.agent = DQNAgent(model=self.model, policy=self.policy, nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=400, target_model_update=1e-1, enable_double_dqn=True, enable_dueling_network=True) self.agent.compile(Adam(lr=1e-3), metrics=['mae']) self.agent.save_weights("q.weights", overwrite=True) self.currentStartingPoint = begin self.trainSize = trainSize self.validationSize = validationSize self.testSize = testSize self.walkSize = trainSize + validationSize + testSize self.endingPoint = end self.sp = pd.read_csv('./dataset/sp500Hour.csv') self.sp['Datetime'] = pd.to_datetime(self.sp['Date'] + ' ' + self.sp['Time']) self.sp = self.sp.set_index('Datetime') self.sp = self.sp.drop(['Date', 'Time'], axis=1) self.sp = self.sp.index self.operationCost = operationCost self.trainer = ValidationCallback() self.validator = ValidationCallback() self.tester = ValidationCallback() self.outputFile = open(outputFile, "w+") self.outputFile.write( "date,trainAccuracy,trainCoverage,trainReward,validationAccuracy,validationCoverage,validationReward,testAccuracy,testCoverage,testReward\n" ) def run(self): env = " " iteration = 0 while (self.currentStartingPoint + self.walkSize <= self.endingPoint): iteration += 1 del (self.memory) del (self.agent) self.memory = SequentialMemory(limit=10000, window_length=50) self.agent = DQNAgent(model=self.model, policy=self.policy, nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=400, target_model_update=1e-1, enable_double_dqn=True, enable_dueling_network=True) self.agent.compile(Adam(lr=1e-3), metrics=['mae']) self.agent.load_weights("q.weights") minLimit = None while (minLimit is None): try: minLimit = self.sp.get_loc(self.currentStartingPoint) except: self.currentStartingPoint += datetime.timedelta( 0, 0, 0, 0, 0, 1, 0) maxLimit = None while (maxLimit is None): try: maxLimit = self.sp.get_loc(self.currentStartingPoint + self.trainSize) except: self.currentStartingPoint += datetime.timedelta( 0, 0, 0, 0, 0, 1, 0) date = self.currentStartingPoint for eps in self.explorations: self.policy.eps = eps[0] del (env) env = SpEnv(operationCost=self.operationCost, minLimit=minLimit, maxLimit=maxLimit) for _ in range(0, eps[1]): self.trainer.reset() self.agent.fit( env, nb_steps=self.trainSize.days - 65, visualize=False, callbacks=[self.trainer], verbose=0 ) #problema con nb_steps (devo cercare di farlo in episodi) env.resetEnv() (_, trainCoverage, trainAccuracy, trainReward, _, _, _, _, _, _) = self.trainer.getInfo() print( str(iteration) + " TRAIN: acc: " + str(trainAccuracy) + " cov: " + str(trainCoverage) + " rew: " + str(trainReward)) minLimit = maxLimit maxLimit = None while (maxLimit is None): try: maxLimit = self.sp.get_loc(self.currentStartingPoint + self.trainSize + self.validationSize) except: self.currentStartingPoint += datetime.timedelta( 0, 0, 0, 0, 0, 1, 0) del (env) env = SpEnv(operationCost=self.operationCost, minLimit=minLimit, maxLimit=maxLimit) self.agent.test(env, nb_episodes=self.validationSize.days - 10, visualize=False, callbacks=[self.validator], verbose=0) (_, validCoverage, validAccuracy, validReward, _, _, _, _, _, _) = self.validator.getInfo() print( str(iteration) + " VALID: acc: " + str(validAccuracy) + " cov: " + str(validCoverage) + " rew: " + str(validReward)) self.validator.reset() minLimit = maxLimit maxLimit = None while (maxLimit is None): try: maxLimit = self.sp.get_loc(self.currentStartingPoint + self.trainSize + self.validationSize + self.testSize) except: self.currentStartingPoint += datetime.timedelta( 0, 0, 0, 0, 0, 1, 0) del (env) env = SpEnv(operationCost=self.operationCost, minLimit=minLimit, maxLimit=maxLimit) self.agent.test(env, nb_episodes=self.validationSize.days - 10, visualize=False, callbacks=[self.tester], verbose=0) (_, testCoverage, testAccuracy, testReward, _, _, _, _, _, _) = self.tester.getInfo() print( str(iteration) + " TEST: acc: " + str(testAccuracy) + " cov: " + str(testCoverage) + " rew: " + str(testReward)) self.tester.reset() print(" ") self.outputFile.write( str(date) + "," + str(trainAccuracy) + "," + str(trainCoverage) + "," + str(trainReward) + "," + str(validAccuracy) + "," + str(validCoverage) + "," + str(validReward) + "," + str(testAccuracy) + "," + str(testCoverage) + "," + str(testReward) + "\n") self.currentStartingPoint += self.testSize def end(self): import os self.outputFile.close() os.remove("q.weights")
from Callback import ValidationCallback from keras.models import Sequential from keras.layers import Dense, Activation, Flatten from keras.layers.advanced_activations import LeakyReLU, PReLU from keras.optimizers import Adam from rl.agents.dqn import DQNAgent from rl.memory import SequentialMemory from rl.policy import EpsGreedyQPolicy from datetime import datetime import sys #2006-2015 2016 trainEnv = SpEnv.SpEnv(operationCost=0, minLimit=13378, maxLimit=74336) validationEnv = SpEnv.SpEnv(operationCost=0, minLimit=74336, maxLimit=80500) validator = ValidationCallback() trainer = ValidationCallback() nb_actions = trainEnv.action_space.n model = Sequential() model.add(Flatten(input_shape=(50, 4, 68))) model.add(Dense(512, activation='linear')) model.add(LeakyReLU(alpha=.001)) model.add(Dense(1024, activation='linear')) model.add(LeakyReLU(alpha=.001)) model.add(Dense(512, activation='linear')) model.add(LeakyReLU(alpha=.001)) model.add(Dense(nb_actions)) model.add(Activation('linear'))
def __init__(self, model, explorations, trainSize, validationSize, testSize, outputFile, begin, end, nbActions, nOutput=1, operationCost=0,telegramToken="",telegramChatID=""): #If the telegram token for the bot and the telegram id of the receiver are empty, try to send a message #otherwise print error if(telegramToken!="" and telegramChatID!=""): self.chatID=telegramChatID self.telegramOutput=True try: self.bot = telegram.Bot(token=telegramToken) except: print("Error with Telegram Bot") #If they are not empty, prepare the bot to send messages else: self.telegramOutput=True #Define the policy, explorations, actions and model as received by parameters self.policy = EpsGreedyQPolicy() self.explorations=explorations self.nbActions=nbActions self.model=model #Define the memory self.memory = SequentialMemory(limit=10000, window_length=1) #Instantiate the agent with parameters received self.agent = DQNAgent(model=self.model, policy=self.policy, nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1, enable_double_dqn=True,enable_dueling_network=True) #Compile the agent with the adam optimizer and with the mean absolute error metric self.agent.compile(Adam(lr=1e-3), metrics=['mae']) #Save the weights of the agents in the q.weights file #Save random weights self.agent.save_weights("q.weights", overwrite=True) #Define the current starting point as the initial date self.currentStartingPoint = begin #Define the training, validation and testing size as informed by the call #Train: five years #Validation: 6 months #Test: 6 months self.trainSize=trainSize self.validationSize=validationSize self.testSize=testSize #The walk size is simply summing up the train, validation and test sizes self.walkSize=trainSize+validationSize+testSize #Define the ending point as the final date (January 1st of 2010) self.endingPoint=end #Read the hourly dataset #We join data from different files #Here read hour self.dates= pd.read_csv('./dataset/'+MK+'Hour.csv') #Read the hourly dataset self.sp = pd.read_csv('./dataset/'+MK+'Hour.csv') #Convert the pandas format to date and time format self.sp['Datetime'] = pd.to_datetime(self.sp['Date'] + ' ' + self.sp['Time']) #Set an index to Datetime on the pandas loaded dataset. Register will be indexes through this value self.sp = self.sp.set_index('Datetime') #Drop Time and Date from the Dataset self.sp = self.sp.drop(['Time','Date'], axis=1) #Just the index will be important, because date and time will be used to define the train, validation and test #for each walk self.sp = self.sp.index #Receives the operation cost which is 0 #Operation cost is the cost for long and short. It is defined as zero self.operationCost = operationCost #Call the callback for training, validation and test in order to show the results for each episode self.trainer=ValidationCallback() self.validator=ValidationCallback() self.tester=ValidationCallback() #Initiate the output file self.outputFile=[] #Write in the file for i in range(0,nOutput): self.outputFile.append(open(outputFile+str(i+1)+".csv", "w+")) #Write the fields in the file self.outputFile[i].write( "Iteration,"+ "trainAccuracy,"+ "trainCoverage,"+ "trainReward,"+ "trainLong%,"+ "trainShort%,"+ "trainLongAcc,"+ "trainShortAcc,"+ "trainLongPrec,"+ "trainShortPrec,"+ "validationAccuracy,"+ "validationCoverage,"+ "validationReward,"+ "validationLong%,"+ "validationShort%,"+ "validationLongAcc,"+ "validationShortAcc,"+ "validLongPrec,"+ "validShortPrec,"+ "testAccuracy,"+ "testCoverage,"+ "testReward,"+ "testLong%,"+ "testShort%,"+ "testLongAcc,"+ "testShortAcc,"+ "testLongPrec,"+ "testShortPrec\n")
class DeepQTrading: #Class constructor #model: Keras model considered #Explorations is a vector containing the policy of the probability of random predictions plus how many epochs will be # runned by the algorithm (we run the algorithm several times-several iterations) #trainSize: size of the training set #validationSize: size of the validation set #testSize: size of the testing set #outputFile: name of the file to print results #begin: Initial date #end: final date #nbActions: number of decisions (0-Hold 1-Long 2-Short) #nOutput is the number of walks. Tonio put 20 but it is 5 walks in reality. #operationCost: Price for the transaction #telegramToken: token used for the bot that will send messages #telegramChatID: ID of messager receiver in Telegram #ensemble.py runs the ensemble def __init__(self, model, explorations, trainSize, validationSize, testSize, outputFile, begin, end, nbActions, nOutput=1, operationCost=0,telegramToken="",telegramChatID=""): #If the telegram token for the bot and the telegram id of the receiver are empty, try to send a message #otherwise print error if(telegramToken!="" and telegramChatID!=""): self.chatID=telegramChatID self.telegramOutput=True try: self.bot = telegram.Bot(token=telegramToken) except: print("Error with Telegram Bot") #If they are not empty, prepare the bot to send messages else: self.telegramOutput=True #Define the policy, explorations, actions and model as received by parameters self.policy = EpsGreedyQPolicy() self.explorations=explorations self.nbActions=nbActions self.model=model #Define the memory self.memory = SequentialMemory(limit=10000, window_length=1) #Instantiate the agent with parameters received self.agent = DQNAgent(model=self.model, policy=self.policy, nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1, enable_double_dqn=True,enable_dueling_network=True) #Compile the agent with the adam optimizer and with the mean absolute error metric self.agent.compile(Adam(lr=1e-3), metrics=['mae']) #Save the weights of the agents in the q.weights file #Save random weights self.agent.save_weights("q.weights", overwrite=True) #Define the current starting point as the initial date self.currentStartingPoint = begin #Define the training, validation and testing size as informed by the call #Train: five years #Validation: 6 months #Test: 6 months self.trainSize=trainSize self.validationSize=validationSize self.testSize=testSize #The walk size is simply summing up the train, validation and test sizes self.walkSize=trainSize+validationSize+testSize #Define the ending point as the final date (January 1st of 2010) self.endingPoint=end #Read the hourly dataset #We join data from different files #Here read hour self.dates= pd.read_csv('./dataset/'+MK+'Hour.csv') #Read the hourly dataset self.sp = pd.read_csv('./dataset/'+MK+'Hour.csv') #Convert the pandas format to date and time format self.sp['Datetime'] = pd.to_datetime(self.sp['Date'] + ' ' + self.sp['Time']) #Set an index to Datetime on the pandas loaded dataset. Register will be indexes through this value self.sp = self.sp.set_index('Datetime') #Drop Time and Date from the Dataset self.sp = self.sp.drop(['Time','Date'], axis=1) #Just the index will be important, because date and time will be used to define the train, validation and test #for each walk self.sp = self.sp.index #Receives the operation cost which is 0 #Operation cost is the cost for long and short. It is defined as zero self.operationCost = operationCost #Call the callback for training, validation and test in order to show the results for each episode self.trainer=ValidationCallback() self.validator=ValidationCallback() self.tester=ValidationCallback() #Initiate the output file self.outputFile=[] #Write in the file for i in range(0,nOutput): self.outputFile.append(open(outputFile+str(i+1)+".csv", "w+")) #Write the fields in the file self.outputFile[i].write( "Iteration,"+ "trainAccuracy,"+ "trainCoverage,"+ "trainReward,"+ "trainLong%,"+ "trainShort%,"+ "trainLongAcc,"+ "trainShortAcc,"+ "trainLongPrec,"+ "trainShortPrec,"+ "validationAccuracy,"+ "validationCoverage,"+ "validationReward,"+ "validationLong%,"+ "validationShort%,"+ "validationLongAcc,"+ "validationShortAcc,"+ "validLongPrec,"+ "validShortPrec,"+ "testAccuracy,"+ "testCoverage,"+ "testReward,"+ "testLong%,"+ "testShort%,"+ "testLongAcc,"+ "testShortAcc,"+ "testLongPrec,"+ "testShortPrec\n") def run(self): #Initiate the training, trainEnv=validEnv=testEnv=" " iteration=-1 #While we did not pass through all the dates (i.e., while all the walks were not finished) #walk size is train+validation+test size #currentStarting point begins with begin date while(self.currentStartingPoint+self.walkSize <= self.endingPoint): #Iteration is a walks iteration+=1 #Send to the receiver the current walk if(self.telegramOutput): self.bot.send_message(chat_id=self.chatID, text="Walk "+str(iteration + 1 )+" started.") #Empty the memory and agent del(self.memory) del(self.agent) #Define the memory and agent #Memory is Sequential self.memory = SequentialMemory(limit=10000, window_length=1) #Agent is initiated as passed through parameters self.agent = DQNAgent(model=self.model, policy=self.policy, nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1, enable_double_dqn=True,enable_dueling_network=True) #Compile the agent with Adam initialization self.agent.compile(Adam(lr=1e-3), metrics=['mae']) #Load the weights saved before in a random way if it is the first time self.agent.load_weights("q.weights") ########################################TRAINING STAGE######################################################## #The TrainMinLimit will be loaded as the initial date at the beginning, and will be updated later. #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date trainMinLimit=None while(trainMinLimit is None): try: trainMinLimit = self.sp.get_loc(self.currentStartingPoint) except: self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0) #The TrainMaxLimit will be loaded as the interval between the initial date plus the training size. #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date trainMaxLimit=None while(trainMaxLimit is None): try: trainMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize) except: self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0) ########################################VALIDATION STAGE####################################################### #The ValidMinLimit will be loaded as the TrainMax limit validMinLimit=trainMaxLimit+1 #The ValidMaxLimit will be loaded as the interval after the begin + train size +validation size #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date validMaxLimit=None while(validMaxLimit is None): try: validMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize+self.validationSize) except: self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0) ########################################TESTING STAGE######################################################## #The TestMinLimit will be loaded as the ValidMaxlimit testMinLimit=validMaxLimit+1 #The testMaxLimit will be loaded as the interval after the begin + train size +validation size + Testsize #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date testMaxLimit=None while(testMaxLimit is None): try: testMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize+self.validationSize+self.testSize) except: self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0) #Separate the Validation and testing data according to the limits found before #Prepare the training and validation files for saving them later ensambleValid=pd.DataFrame(index=self.dates[validMinLimit:validMaxLimit].ix[:,'Date'].drop_duplicates().tolist()) ensambleTest=pd.DataFrame(index=self.dates[testMinLimit:testMaxLimit].ix[:,'Date'].drop_duplicates().tolist()) #Put the name of the index for validation and testing ensambleValid.index.name='Date' ensambleTest.index.name='Date' #Explorations are epochs, for eps in self.explorations: #policy will be 0.2, so the randomness of predictions (actions) will happen with 20% of probability self.policy.eps = eps[0] #there will be 100 iterations, or eps[1]) for i in range(0,eps[1]): del(trainEnv) #Define the training, validation and testing environments with their respective callbacks trainEnv = SpEnv(operationCost=self.operationCost,minLimit=trainMinLimit,maxLimit=trainMaxLimit,callback=self.trainer) del(validEnv) validEnv=SpEnv(operationCost=self.operationCost,minLimit=validMinLimit,maxLimit=validMaxLimit,callback=self.validator,ensamble=ensambleValid,columnName="iteration"+str(i)) del(testEnv) testEnv=SpEnv(operationCost=self.operationCost,minLimit=testMinLimit,maxLimit=testMaxLimit,callback=self.tester,ensamble=ensambleTest,columnName="iteration"+str(i)) #Reset the callback self.trainer.reset() self.validator.reset() self.tester.reset() #Reset the training environment trainEnv.resetEnv() #Train the agent self.agent.fit(trainEnv,nb_steps=floor(self.trainSize.days-self.trainSize.days*0.2),visualize=False,verbose=0) #Get the info from the train callback (_,trainCoverage,trainAccuracy,trainReward,trainLongPerc,trainShortPerc,trainLongAcc,trainShortAcc,trainLongPrec,trainShortPrec)=self.trainer.getInfo() #Print Callback values on the screen print(str(i) + " TRAIN: acc: " + str(trainAccuracy)+ " cov: " + str(trainCoverage)+ " rew: " + str(trainReward)) #Reset the validation environment validEnv.resetEnv() #Test the agent on validation data self.agent.test(validEnv,nb_episodes=floor(self.validationSize.days-self.validationSize.days*0.2),visualize=False,verbose=0) #Get the info from the validation callback (_,validCoverage,validAccuracy,validReward,validLongPerc,validShortPerc,validLongAcc,validShortAcc,validLongPrec,validShortPrec)=self.validator.getInfo() #Print callback values on the screen print(str(i) + " VALID: acc: " + str(validAccuracy)+ " cov: " + str(validCoverage)+ " rew: " + str(validReward)) #Reset the testing environment testEnv.resetEnv() #Test the agent on testing data self.agent.test(testEnv,nb_episodes=floor(self.validationSize.days-self.validationSize.days*0.2),visualize=False,verbose=0) #Get the info from the testing callback (_,testCoverage,testAccuracy,testReward,testLongPerc,testShortPerc,testLongAcc,testShortAcc,testLongPrec,testShortPrec)=self.tester.getInfo() #Print callback values on the screen print(str(i) + " TEST: acc: " + str(testAccuracy)+ " cov: " + str(testCoverage)+ " rew: " + str(testReward)) print(" ") #write the walk data on the text file self.outputFile[iteration].write( str(i)+","+ str(trainAccuracy)+","+ str(trainCoverage)+","+ str(trainReward)+","+ str(trainLongPerc)+","+ str(trainShortPerc)+","+ str(trainLongAcc)+","+ str(trainShortAcc)+","+ str(trainLongPrec)+","+ str(trainShortPrec)+","+ str(validAccuracy)+","+ str(validCoverage)+","+ str(validReward)+","+ str(validLongPerc)+","+ str(validShortPerc)+","+ str(validLongAcc)+","+ str(validShortAcc)+","+ str(validLongPrec)+","+ str(validShortPrec)+","+ str(testAccuracy)+","+ str(testCoverage)+","+ str(testReward)+","+ str(testLongPerc)+","+ str(testShortPerc)+","+ str(testLongAcc)+","+ str(testShortAcc)+","+ str(testLongPrec)+","+ str(testShortPrec)+"\n") #Close the file self.outputFile[iteration].close() #For the next walk, the current starting point will be the current starting point + the test size #It means that, for the next walk, the training data will start 6 months after the training data of #the previous walk self.currentStartingPoint+=self.testSize #Write validation and Testing Data into files #Save the files for processing later with the ensemble ensambleValid.to_csv("./Output/ensemble/walk"+str(iteration)+"ensemble_valid.csv") ensambleTest.to_csv("./Output/ensemble/walk"+str(iteration)+"ensemble_test.csv") #Function to end the Agent def end(self): import os #Close the files where the results were written for outputFile in self.outputFile: outputFile.close()
class DeepQTrading: #Class constructor #model: Keras model considered #explorations_iterations: a vector containing (i) probability of random predictions; (ii) how many iterations will be #run by the algorithm (we run the algorithm several times-several iterations) #outputFile: name of the file to print metrics of the training #ensembleFolderName: name of the file to print predictions #optimizer: optimizer to run def __init__(self, model, nbActions, explorations_iterations, outputFile, ensembleFolderName, optimizer="adamax"): self.ensembleFolderName = ensembleFolderName self.policy = EpsGreedyQPolicy() self.explorations_iterations = explorations_iterations self.nbActions = nbActions self.model = model #Define the memory self.memory = SequentialMemory(limit=10000, window_length=1) #Instantiate the agent with parameters received self.agent = DQNAgent(model=self.model, policy=self.policy, nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1, enable_double_dqn=True, enable_dueling_network=True) #Compile the agent with the optimizer given as parameter if optimizer == "adamax": self.agent.compile(Adamax(), metrics=['mae']) if optimizer == "adadelta": self.agent.compile(Adadelta(), metrics=['mae']) if optimizer == "sgd": self.agent.compile(SGD(), metrics=['mae']) if optimizer == "rmsprop": self.agent.compile(RMSprop(), metrics=['mae']) if optimizer == "nadam": self.agent.compile(Nadam(), metrics=['mae']) if optimizer == "adagrad": self.agent.compile(Adagrad(), metrics=['mae']) if optimizer == "adam": self.agent.compile(Adam(), metrics=['mae']) if optimizer == "radam": self.agent.compile(RAdam(total_steps=5000, warmup_proportion=0.1, min_lr=1e-5), metrics=['mae']) #Save the weights of the agents in the q.weights file #Save random weights self.agent.save_weights("q.weights", overwrite=True) #Load data self.train_data = pd.read_csv('./dataset/jpm/train_data.csv') self.validation_data = pd.read_csv('./dataset/jpm/train_data.csv') self.test_data = pd.read_csv('./dataset/jpm/test_data.csv') #Call the callback for training, validation and test in order to show results for each iteration self.trainer = ValidationCallback() self.validator = ValidationCallback() self.tester = ValidationCallback() self.outputFileName = outputFile def run(self): #Initiates the environments, trainEnv = validEnv = testEnv = " " if not os.path.exists(self.outputFileName): os.makedirs(self.outputFileName) file_name = self.outputFileName + "/results-agent-training.csv" self.outputFile = open(file_name, "w+") #write the first row of the csv self.outputFile.write("Iteration," + "trainAccuracy," + "trainCoverage," + "trainReward," + "trainLong%," + "trainShort%," + "trainLongAcc," + "trainShortAcc," + "trainLongPrec," + "trainShortPrec," + "validationAccuracy," + "validationCoverage," + "validationReward," + "validationLong%," + "validationShort%," + "validationLongAcc," + "validationShortAcc," + "validLongPrec," + "validShortPrec," + "testAccuracy," + "testCoverage," + "testReward," + "testLong%," + "testShort%," + "testLongAcc," + "testShortAcc," + "testLongPrec," + "testShortPrec\n") #Prepare the training and validation files for saving them later ensambleValid = pd.DataFrame( index=self.validation_data[:].ix[:, 'date_time'].drop_duplicates( ).tolist()) ensambleTest = pd.DataFrame( index=self.test_data[:].ix[:, 'date_time'].drop_duplicates().tolist()) #Put the name of the index for validation and testing ensambleValid.index.name = 'date_time' ensambleTest.index.name = 'date_time' #Explorations are epochs considered, or how many times the agent will play the game. for eps in self.explorations_iterations: #policy will use eps[0] (explorations), so the randomness of predictions (actions) will happen with eps[0] of probability self.policy.eps = eps[0] #there will be 25 iterations or eps[1] in explorations_iterations) for i in range(0, eps[1]): del (trainEnv) #Define the training, validation and testing environments with their respective callbacks trainEnv = SpEnv(data=self.train_data, callback=self.trainer) del (validEnv) validEnv = SpEnv(data=self.validation_data, ensamble=ensambleValid, callback=self.validator, columnName="iteration" + str(i)) del (testEnv) testEnv = SpEnv(data=self.test_data, callback=self.tester, ensamble=ensambleTest, columnName="iteration" + str(i)) #Reset the callback self.trainer.reset() self.validator.reset() self.tester.reset() #Reset the training environment trainEnv.resetEnv() #Train the agent #The agent receives as input one environment self.agent.fit(trainEnv, nb_steps=len(self.train_data), visualize=False, verbose=0) #Get the info from the train callback (_, trainCoverage, trainAccuracy, trainReward, trainLongPerc, trainShortPerc, trainLongAcc, trainShortAcc, trainLongPrec, trainShortPrec) = self.trainer.getInfo() print("Iteration " + str(i + 1) + " TRAIN: accuracy: " + str(trainAccuracy) + " coverage: " + str(trainCoverage) + " reward: " + str(trainReward)) #Reset the validation environment validEnv.resetEnv() #Test the agent on validation data self.agent.test(validEnv, nb_episodes=len(self.validation_data), visualize=False, verbose=0) #Get the info from the validation callback (_, validCoverage, validAccuracy, validReward, validLongPerc, validShortPerc, validLongAcc, validShortAcc, validLongPrec, validShortPrec) = self.validator.getInfo() #Print callback values on the screen print("Iteration " + str(i + 1) + " VALIDATION: accuracy: " + str(validAccuracy) + " coverage: " + str(validCoverage) + " reward: " + str(validReward)) #Reset the testing environment testEnv.resetEnv() #Test the agent on testing data self.agent.test(testEnv, nb_episodes=len(self.test_data), visualize=False, verbose=0) #Get the info from the testing callback (_, testCoverage, testAccuracy, testReward, testLongPerc, testShortPerc, testLongAcc, testShortAcc, testLongPrec, testShortPrec) = self.tester.getInfo() #Print callback values on the screen print("Iteration " + str(i + 1) + " TEST: acc: " + str(testAccuracy) + " cov: " + str(testCoverage) + " rew: " + str(testReward)) print(" ") #write the metrics in a text file self.outputFile.write( str(i) + "," + str(trainAccuracy) + "," + str(trainCoverage) + "," + str(trainReward) + "," + str(trainLongPerc) + "," + str(trainShortPerc) + "," + str(trainLongAcc) + "," + str(trainShortAcc) + "," + str(trainLongPrec) + "," + str(trainShortPrec) + "," + str(validAccuracy) + "," + str(validCoverage) + "," + str(validReward) + "," + str(validLongPerc) + "," + str(validShortPerc) + "," + str(validLongAcc) + "," + str(validShortAcc) + "," + str(validLongPrec) + "," + str(validShortPrec) + "," + str(testAccuracy) + "," + str(testCoverage) + "," + str(testReward) + "," + str(testLongPerc) + "," + str(testShortPerc) + "," + str(testLongAcc) + "," + str(testShortAcc) + "," + str(testLongPrec) + "," + str(testShortPrec) + "\n") #Close the file self.outputFile.close() if not os.path.exists("./Output/ensemble/" + self.ensembleFolderName): os.makedirs("./Output/ensemble/" + self.ensembleFolderName) ensambleValid.to_csv("./Output/ensemble/" + self.ensembleFolderName + "/ensemble_valid.csv") ensambleTest.to_csv("./Output/ensemble/" + self.ensembleFolderName + "/ensemble_test.csv") #Function to end the Agent def end(self): print("FINISHED")