예제 #1
0
    def __init__(self,
                 model,
                 nbActions,
                 explorations_iterations,
                 outputFile,
                 ensembleFolderName,
                 optimizer="adamax"):

        self.ensembleFolderName = ensembleFolderName
        self.policy = EpsGreedyQPolicy()
        self.explorations_iterations = explorations_iterations
        self.nbActions = nbActions
        self.model = model
        #Define the memory
        self.memory = SequentialMemory(limit=10000, window_length=1)
        #Instantiate the agent with parameters received
        self.agent = DQNAgent(model=self.model,
                              policy=self.policy,
                              nb_actions=self.nbActions,
                              memory=self.memory,
                              nb_steps_warmup=200,
                              target_model_update=1e-1,
                              enable_double_dqn=True,
                              enable_dueling_network=True)

        #Compile the agent with the optimizer given as parameter
        if optimizer == "adamax":
            self.agent.compile(Adamax(), metrics=['mae'])
        if optimizer == "adadelta":
            self.agent.compile(Adadelta(), metrics=['mae'])
        if optimizer == "sgd":
            self.agent.compile(SGD(), metrics=['mae'])
        if optimizer == "rmsprop":
            self.agent.compile(RMSprop(), metrics=['mae'])
        if optimizer == "nadam":
            self.agent.compile(Nadam(), metrics=['mae'])
        if optimizer == "adagrad":
            self.agent.compile(Adagrad(), metrics=['mae'])
        if optimizer == "adam":
            self.agent.compile(Adam(), metrics=['mae'])
        if optimizer == "radam":
            self.agent.compile(RAdam(total_steps=5000,
                                     warmup_proportion=0.1,
                                     min_lr=1e-5),
                               metrics=['mae'])

        #Save the weights of the agents in the q.weights file
        #Save random weights
        self.agent.save_weights("q.weights", overwrite=True)

        #Load data
        self.train_data = pd.read_csv('./dataset/jpm/train_data.csv')
        self.validation_data = pd.read_csv('./dataset/jpm/train_data.csv')
        self.test_data = pd.read_csv('./dataset/jpm/test_data.csv')

        #Call the callback for training, validation and test in order to show results for each iteration
        self.trainer = ValidationCallback()
        self.validator = ValidationCallback()
        self.tester = ValidationCallback()
        self.outputFileName = outputFile
예제 #2
0
 def __init__(self,
              model,
              explorations,
              trainSize,
              validationSize,
              testSize,
              outputFile,
              begin,
              end,
              nbActions,
              operationCost=0):
     self.policy = EpsGreedyQPolicy()
     self.explorations = explorations
     self.nbActions = nbActions
     self.model = model
     self.memory = SequentialMemory(limit=10000, window_length=50)
     self.agent = DQNAgent(model=self.model,
                           policy=self.policy,
                           nb_actions=self.nbActions,
                           memory=self.memory,
                           nb_steps_warmup=400,
                           target_model_update=1e-1,
                           enable_double_dqn=True,
                           enable_dueling_network=True)
     self.agent.compile(Adam(lr=1e-3), metrics=['mae'])
     self.agent.save_weights("q.weights", overwrite=True)
     self.currentStartingPoint = begin
     self.trainSize = trainSize
     self.validationSize = validationSize
     self.testSize = testSize
     self.walkSize = trainSize + validationSize + testSize
     self.endingPoint = end
     self.sp = pd.read_csv('./dataset/sp500Hour.csv')
     self.sp['Datetime'] = pd.to_datetime(self.sp['Date'] + ' ' +
                                          self.sp['Time'])
     self.sp = self.sp.set_index('Datetime')
     self.sp = self.sp.drop(['Date', 'Time'], axis=1)
     self.sp = self.sp.index
     self.operationCost = operationCost
     self.trainer = ValidationCallback()
     self.validator = ValidationCallback()
     self.tester = ValidationCallback()
     self.outputFile = open(outputFile, "w+")
     self.outputFile.write(
         "date,trainAccuracy,trainCoverage,trainReward,validationAccuracy,validationCoverage,validationReward,testAccuracy,testCoverage,testReward\n"
     )
예제 #3
0
class DeepQTrading:
    def __init__(self,
                 model,
                 explorations,
                 trainSize,
                 validationSize,
                 testSize,
                 outputFile,
                 begin,
                 end,
                 nbActions,
                 operationCost=0):
        self.policy = EpsGreedyQPolicy()
        self.explorations = explorations
        self.nbActions = nbActions
        self.model = model
        self.memory = SequentialMemory(limit=10000, window_length=50)
        self.agent = DQNAgent(model=self.model,
                              policy=self.policy,
                              nb_actions=self.nbActions,
                              memory=self.memory,
                              nb_steps_warmup=400,
                              target_model_update=1e-1,
                              enable_double_dqn=True,
                              enable_dueling_network=True)
        self.agent.compile(Adam(lr=1e-3), metrics=['mae'])
        self.agent.save_weights("q.weights", overwrite=True)
        self.currentStartingPoint = begin
        self.trainSize = trainSize
        self.validationSize = validationSize
        self.testSize = testSize
        self.walkSize = trainSize + validationSize + testSize
        self.endingPoint = end
        self.sp = pd.read_csv('./dataset/sp500Hour.csv')
        self.sp['Datetime'] = pd.to_datetime(self.sp['Date'] + ' ' +
                                             self.sp['Time'])
        self.sp = self.sp.set_index('Datetime')
        self.sp = self.sp.drop(['Date', 'Time'], axis=1)
        self.sp = self.sp.index
        self.operationCost = operationCost
        self.trainer = ValidationCallback()
        self.validator = ValidationCallback()
        self.tester = ValidationCallback()
        self.outputFile = open(outputFile, "w+")
        self.outputFile.write(
            "date,trainAccuracy,trainCoverage,trainReward,validationAccuracy,validationCoverage,validationReward,testAccuracy,testCoverage,testReward\n"
        )

    def run(self):
        env = " "

        iteration = 0

        while (self.currentStartingPoint + self.walkSize <= self.endingPoint):
            iteration += 1

            del (self.memory)
            del (self.agent)
            self.memory = SequentialMemory(limit=10000, window_length=50)
            self.agent = DQNAgent(model=self.model,
                                  policy=self.policy,
                                  nb_actions=self.nbActions,
                                  memory=self.memory,
                                  nb_steps_warmup=400,
                                  target_model_update=1e-1,
                                  enable_double_dqn=True,
                                  enable_dueling_network=True)
            self.agent.compile(Adam(lr=1e-3), metrics=['mae'])
            self.agent.load_weights("q.weights")

            minLimit = None

            while (minLimit is None):
                try:
                    minLimit = self.sp.get_loc(self.currentStartingPoint)
                except:
                    self.currentStartingPoint += datetime.timedelta(
                        0, 0, 0, 0, 0, 1, 0)
            maxLimit = None

            while (maxLimit is None):
                try:
                    maxLimit = self.sp.get_loc(self.currentStartingPoint +
                                               self.trainSize)
                except:
                    self.currentStartingPoint += datetime.timedelta(
                        0, 0, 0, 0, 0, 1, 0)

            date = self.currentStartingPoint
            for eps in self.explorations:
                self.policy.eps = eps[0]
                del (env)
                env = SpEnv(operationCost=self.operationCost,
                            minLimit=minLimit,
                            maxLimit=maxLimit)

                for _ in range(0, eps[1]):
                    self.trainer.reset()
                    self.agent.fit(
                        env,
                        nb_steps=self.trainSize.days - 65,
                        visualize=False,
                        callbacks=[self.trainer],
                        verbose=0
                    )  #problema con nb_steps (devo cercare di farlo in episodi)
                    env.resetEnv()

            (_, trainCoverage, trainAccuracy, trainReward, _, _, _, _, _,
             _) = self.trainer.getInfo()
            print(
                str(iteration) + " TRAIN:  acc: " + str(trainAccuracy) +
                " cov: " + str(trainCoverage) + " rew: " + str(trainReward))

            minLimit = maxLimit

            maxLimit = None
            while (maxLimit is None):
                try:
                    maxLimit = self.sp.get_loc(self.currentStartingPoint +
                                               self.trainSize +
                                               self.validationSize)
                except:
                    self.currentStartingPoint += datetime.timedelta(
                        0, 0, 0, 0, 0, 1, 0)
            del (env)
            env = SpEnv(operationCost=self.operationCost,
                        minLimit=minLimit,
                        maxLimit=maxLimit)

            self.agent.test(env,
                            nb_episodes=self.validationSize.days - 10,
                            visualize=False,
                            callbacks=[self.validator],
                            verbose=0)
            (_, validCoverage, validAccuracy, validReward, _, _, _, _, _,
             _) = self.validator.getInfo()
            print(
                str(iteration) + " VALID:  acc: " + str(validAccuracy) +
                " cov: " + str(validCoverage) + " rew: " + str(validReward))

            self.validator.reset()

            minLimit = maxLimit

            maxLimit = None
            while (maxLimit is None):
                try:
                    maxLimit = self.sp.get_loc(self.currentStartingPoint +
                                               self.trainSize +
                                               self.validationSize +
                                               self.testSize)
                except:
                    self.currentStartingPoint += datetime.timedelta(
                        0, 0, 0, 0, 0, 1, 0)

            del (env)
            env = SpEnv(operationCost=self.operationCost,
                        minLimit=minLimit,
                        maxLimit=maxLimit)

            self.agent.test(env,
                            nb_episodes=self.validationSize.days - 10,
                            visualize=False,
                            callbacks=[self.tester],
                            verbose=0)
            (_, testCoverage, testAccuracy, testReward, _, _, _, _, _,
             _) = self.tester.getInfo()
            print(
                str(iteration) + " TEST:  acc: " + str(testAccuracy) +
                " cov: " + str(testCoverage) + " rew: " + str(testReward))

            self.tester.reset()

            print(" ")

            self.outputFile.write(
                str(date) + "," + str(trainAccuracy) + "," +
                str(trainCoverage) + "," + str(trainReward) + "," +
                str(validAccuracy) + "," + str(validCoverage) + "," +
                str(validReward) + "," + str(testAccuracy) + "," +
                str(testCoverage) + "," + str(testReward) + "\n")
            self.currentStartingPoint += self.testSize

    def end(self):
        import os
        self.outputFile.close()
        os.remove("q.weights")
예제 #4
0
from Callback import ValidationCallback
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import EpsGreedyQPolicy
from datetime import datetime
import sys

#2006-2015   2016
trainEnv = SpEnv.SpEnv(operationCost=0, minLimit=13378, maxLimit=74336)
validationEnv = SpEnv.SpEnv(operationCost=0, minLimit=74336, maxLimit=80500)

validator = ValidationCallback()
trainer = ValidationCallback()

nb_actions = trainEnv.action_space.n

model = Sequential()
model.add(Flatten(input_shape=(50, 4, 68)))
model.add(Dense(512, activation='linear'))
model.add(LeakyReLU(alpha=.001))
model.add(Dense(1024, activation='linear'))
model.add(LeakyReLU(alpha=.001))
model.add(Dense(512, activation='linear'))
model.add(LeakyReLU(alpha=.001))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
    def __init__(self, model, explorations, trainSize, validationSize, testSize, outputFile, begin, end, nbActions, nOutput=1, operationCost=0,telegramToken="",telegramChatID=""):
        
        #If the telegram token for the bot and the telegram id of the receiver are empty, try to send a message 
        #otherwise print error
        if(telegramToken!="" and telegramChatID!=""):
            self.chatID=telegramChatID
            self.telegramOutput=True
            try:
                self.bot = telegram.Bot(token=telegramToken)
            except:
                print("Error with Telegram Bot")
        
        #If they are not empty, prepare the bot to send messages
        else:
            self.telegramOutput=True

        #Define the policy, explorations, actions and model as received by parameters
        self.policy = EpsGreedyQPolicy()
        self.explorations=explorations
        self.nbActions=nbActions
        self.model=model

        #Define the memory
        self.memory = SequentialMemory(limit=10000, window_length=1)

        #Instantiate the agent with parameters received
        self.agent = DQNAgent(model=self.model, policy=self.policy,  nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1,
                                    enable_double_dqn=True,enable_dueling_network=True)
        
        #Compile the agent with the adam optimizer and with the mean absolute error metric
        self.agent.compile(Adam(lr=1e-3), metrics=['mae'])

        #Save the weights of the agents in the q.weights file
        #Save random weights
        self.agent.save_weights("q.weights", overwrite=True)

        #Define the current starting point as the initial date
        self.currentStartingPoint = begin

        #Define the training, validation and testing size as informed by the call
        #Train: five years
        #Validation: 6 months
        #Test: 6 months
        self.trainSize=trainSize
        self.validationSize=validationSize
        self.testSize=testSize
        
        #The walk size is simply summing up the train, validation and test sizes
        self.walkSize=trainSize+validationSize+testSize
        
        #Define the ending point as the final date (January 1st of 2010)
        self.endingPoint=end

        #Read the hourly dataset
        #We join data from different files
        #Here read hour 
        self.dates= pd.read_csv('./dataset/'+MK+'Hour.csv')

        #Read the hourly dataset
        self.sp = pd.read_csv('./dataset/'+MK+'Hour.csv')
        #Convert the pandas format to date and time format
        self.sp['Datetime'] = pd.to_datetime(self.sp['Date'] + ' ' + self.sp['Time'])
        #Set an index to Datetime on the pandas loaded dataset. Register will be indexes through this value
        self.sp = self.sp.set_index('Datetime')
        #Drop Time and Date from the Dataset
        self.sp = self.sp.drop(['Time','Date'], axis=1)
        #Just the index will be important, because date and time will be used to define the train, validation and test 
        #for each walk
        self.sp = self.sp.index

        #Receives the operation cost which is 0
        #Operation cost is the cost for long and short. It is defined as zero
        self.operationCost = operationCost
        
        #Call the callback for training, validation and test in order to show the results for each episode 
        self.trainer=ValidationCallback()
        self.validator=ValidationCallback()
        self.tester=ValidationCallback()
        
        #Initiate the output file
        self.outputFile=[]
        
        #Write in the file
        for i in range(0,nOutput):
            
          
            self.outputFile.append(open(outputFile+str(i+1)+".csv", "w+"))

            #Write the fields in the file
            self.outputFile[i].write(
            "Iteration,"+
            "trainAccuracy,"+
            "trainCoverage,"+
            "trainReward,"+
            "trainLong%,"+
            "trainShort%,"+
            "trainLongAcc,"+
            "trainShortAcc,"+
            "trainLongPrec,"+
            "trainShortPrec,"+

            "validationAccuracy,"+
            "validationCoverage,"+
            "validationReward,"+
            "validationLong%,"+
            "validationShort%,"+
            "validationLongAcc,"+
            "validationShortAcc,"+
            "validLongPrec,"+
            "validShortPrec,"+
            
            "testAccuracy,"+
            "testCoverage,"+
            "testReward,"+
            "testLong%,"+
            "testShort%,"+
            "testLongAcc,"+
            "testShortAcc,"+
            "testLongPrec,"+
            "testShortPrec\n")
class DeepQTrading:
    
    #Class constructor
    #model: Keras model considered
    #Explorations is a vector containing the policy of the probability of random predictions plus how many epochs will be 
    # runned by the algorithm (we run the algorithm several times-several iterations)  
    #trainSize: size of the training set
    #validationSize: size of the validation set
    #testSize: size of the testing set 
    #outputFile: name of the file to print results
    #begin: Initial date
    #end: final date
    #nbActions: number of decisions (0-Hold 1-Long 2-Short) 
    #nOutput is the number of walks. Tonio put 20 but it is 5 walks in reality.  
    #operationCost: Price for the transaction
    #telegramToken: token used for the bot that will send messages
    #telegramChatID: ID of messager receiver in Telegram
    #ensemble.py runs the ensemble
    def __init__(self, model, explorations, trainSize, validationSize, testSize, outputFile, begin, end, nbActions, nOutput=1, operationCost=0,telegramToken="",telegramChatID=""):
        
        #If the telegram token for the bot and the telegram id of the receiver are empty, try to send a message 
        #otherwise print error
        if(telegramToken!="" and telegramChatID!=""):
            self.chatID=telegramChatID
            self.telegramOutput=True
            try:
                self.bot = telegram.Bot(token=telegramToken)
            except:
                print("Error with Telegram Bot")
        
        #If they are not empty, prepare the bot to send messages
        else:
            self.telegramOutput=True

        #Define the policy, explorations, actions and model as received by parameters
        self.policy = EpsGreedyQPolicy()
        self.explorations=explorations
        self.nbActions=nbActions
        self.model=model

        #Define the memory
        self.memory = SequentialMemory(limit=10000, window_length=1)

        #Instantiate the agent with parameters received
        self.agent = DQNAgent(model=self.model, policy=self.policy,  nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1,
                                    enable_double_dqn=True,enable_dueling_network=True)
        
        #Compile the agent with the adam optimizer and with the mean absolute error metric
        self.agent.compile(Adam(lr=1e-3), metrics=['mae'])

        #Save the weights of the agents in the q.weights file
        #Save random weights
        self.agent.save_weights("q.weights", overwrite=True)

        #Define the current starting point as the initial date
        self.currentStartingPoint = begin

        #Define the training, validation and testing size as informed by the call
        #Train: five years
        #Validation: 6 months
        #Test: 6 months
        self.trainSize=trainSize
        self.validationSize=validationSize
        self.testSize=testSize
        
        #The walk size is simply summing up the train, validation and test sizes
        self.walkSize=trainSize+validationSize+testSize
        
        #Define the ending point as the final date (January 1st of 2010)
        self.endingPoint=end

        #Read the hourly dataset
        #We join data from different files
        #Here read hour 
        self.dates= pd.read_csv('./dataset/'+MK+'Hour.csv')

        #Read the hourly dataset
        self.sp = pd.read_csv('./dataset/'+MK+'Hour.csv')
        #Convert the pandas format to date and time format
        self.sp['Datetime'] = pd.to_datetime(self.sp['Date'] + ' ' + self.sp['Time'])
        #Set an index to Datetime on the pandas loaded dataset. Register will be indexes through this value
        self.sp = self.sp.set_index('Datetime')
        #Drop Time and Date from the Dataset
        self.sp = self.sp.drop(['Time','Date'], axis=1)
        #Just the index will be important, because date and time will be used to define the train, validation and test 
        #for each walk
        self.sp = self.sp.index

        #Receives the operation cost which is 0
        #Operation cost is the cost for long and short. It is defined as zero
        self.operationCost = operationCost
        
        #Call the callback for training, validation and test in order to show the results for each episode 
        self.trainer=ValidationCallback()
        self.validator=ValidationCallback()
        self.tester=ValidationCallback()
        
        #Initiate the output file
        self.outputFile=[]
        
        #Write in the file
        for i in range(0,nOutput):
            
          
            self.outputFile.append(open(outputFile+str(i+1)+".csv", "w+"))

            #Write the fields in the file
            self.outputFile[i].write(
            "Iteration,"+
            "trainAccuracy,"+
            "trainCoverage,"+
            "trainReward,"+
            "trainLong%,"+
            "trainShort%,"+
            "trainLongAcc,"+
            "trainShortAcc,"+
            "trainLongPrec,"+
            "trainShortPrec,"+

            "validationAccuracy,"+
            "validationCoverage,"+
            "validationReward,"+
            "validationLong%,"+
            "validationShort%,"+
            "validationLongAcc,"+
            "validationShortAcc,"+
            "validLongPrec,"+
            "validShortPrec,"+
            
            "testAccuracy,"+
            "testCoverage,"+
            "testReward,"+
            "testLong%,"+
            "testShort%,"+
            "testLongAcc,"+
            "testShortAcc,"+
            "testLongPrec,"+
            "testShortPrec\n")
        

    def run(self):

        #Initiate the training, 
        trainEnv=validEnv=testEnv=" "

        iteration=-1

        #While we did not pass through all the dates (i.e., while all the walks were not finished)
        #walk size is train+validation+test size
        #currentStarting point begins with begin date
        while(self.currentStartingPoint+self.walkSize <= self.endingPoint):

            #Iteration is a walks
            iteration+=1

            #Send to the receiver the current walk
            if(self.telegramOutput):
                self.bot.send_message(chat_id=self.chatID, text="Walk "+str(iteration + 1 )+" started.")
            
            #Empty the memory and agent
            del(self.memory)
            del(self.agent)

            #Define the memory and agent
            #Memory is Sequential
            self.memory = SequentialMemory(limit=10000, window_length=1)
            #Agent is initiated as passed through parameters
            self.agent = DQNAgent(model=self.model, policy=self.policy,  nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1,
                                    enable_double_dqn=True,enable_dueling_network=True)
            #Compile the agent with Adam initialization
            self.agent.compile(Adam(lr=1e-3), metrics=['mae'])
            
            #Load the weights saved before in a random way if it is the first time
            self.agent.load_weights("q.weights")
            
            ########################################TRAINING STAGE########################################################
            
            #The TrainMinLimit will be loaded as the initial date at the beginning, and will be updated later.
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date    
            trainMinLimit=None
            while(trainMinLimit is None):
                try:
                    trainMinLimit = self.sp.get_loc(self.currentStartingPoint)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)

            #The TrainMaxLimit will be loaded as the interval between the initial date plus the training size.
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date    
            trainMaxLimit=None
            while(trainMaxLimit is None):
                try:
                    trainMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)
            
            ########################################VALIDATION STAGE#######################################################
            
            #The ValidMinLimit will be loaded as the TrainMax limit
            validMinLimit=trainMaxLimit+1

            #The ValidMaxLimit will be loaded as the interval after the begin + train size +validation size
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date  
            validMaxLimit=None
            while(validMaxLimit is None):
                try:
                    validMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize+self.validationSize)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)

            ########################################TESTING STAGE######################################################## 
            #The TestMinLimit will be loaded as the ValidMaxlimit 
            testMinLimit=validMaxLimit+1

            #The testMaxLimit will be loaded as the interval after the begin + train size +validation size + Testsize
            #If the initial date cannot be used, add 1 hour to the initial date and consider it the initial date 
            testMaxLimit=None
            while(testMaxLimit is None):
                try:
                    testMaxLimit = self.sp.get_loc(self.currentStartingPoint+self.trainSize+self.validationSize+self.testSize)
                except:
                    self.currentStartingPoint+=datetime.timedelta(0,0,0,0,0,1,0)

            #Separate the Validation and testing data according to the limits found before
            #Prepare the training and validation files for saving them later 
            ensambleValid=pd.DataFrame(index=self.dates[validMinLimit:validMaxLimit].ix[:,'Date'].drop_duplicates().tolist())
            ensambleTest=pd.DataFrame(index=self.dates[testMinLimit:testMaxLimit].ix[:,'Date'].drop_duplicates().tolist())
            
            #Put the name of the index for validation and testing
            ensambleValid.index.name='Date'
            ensambleTest.index.name='Date'
            
           
            #Explorations are epochs, 
            for eps in self.explorations:

                #policy will be 0.2, so the randomness of predictions (actions) will happen with 20% of probability 
                self.policy.eps = eps[0]
                
                #there will be 100 iterations, or eps[1])
                for i in range(0,eps[1]):
                    
                    del(trainEnv)

                    #Define the training, validation and testing environments with their respective callbacks
                    trainEnv = SpEnv(operationCost=self.operationCost,minLimit=trainMinLimit,maxLimit=trainMaxLimit,callback=self.trainer)
                    del(validEnv)
                    validEnv=SpEnv(operationCost=self.operationCost,minLimit=validMinLimit,maxLimit=validMaxLimit,callback=self.validator,ensamble=ensambleValid,columnName="iteration"+str(i))
                    del(testEnv)
                    testEnv=SpEnv(operationCost=self.operationCost,minLimit=testMinLimit,maxLimit=testMaxLimit,callback=self.tester,ensamble=ensambleTest,columnName="iteration"+str(i))

                    #Reset the callback
                    self.trainer.reset()
                    self.validator.reset()
                    self.tester.reset()

                    #Reset the training environment
                    trainEnv.resetEnv()
                    #Train the agent
                    self.agent.fit(trainEnv,nb_steps=floor(self.trainSize.days-self.trainSize.days*0.2),visualize=False,verbose=0)
                    #Get the info from the train callback
                    (_,trainCoverage,trainAccuracy,trainReward,trainLongPerc,trainShortPerc,trainLongAcc,trainShortAcc,trainLongPrec,trainShortPrec)=self.trainer.getInfo()
                    #Print Callback values on the screen
                    print(str(i) + " TRAIN:  acc: " + str(trainAccuracy)+ " cov: " + str(trainCoverage)+ " rew: " + str(trainReward))

                    #Reset the validation environment
                    validEnv.resetEnv()
                    #Test the agent on validation data
                    self.agent.test(validEnv,nb_episodes=floor(self.validationSize.days-self.validationSize.days*0.2),visualize=False,verbose=0)
                    #Get the info from the validation callback
                    (_,validCoverage,validAccuracy,validReward,validLongPerc,validShortPerc,validLongAcc,validShortAcc,validLongPrec,validShortPrec)=self.validator.getInfo()
                    #Print callback values on the screen
                    print(str(i) + " VALID:  acc: " + str(validAccuracy)+ " cov: " + str(validCoverage)+ " rew: " + str(validReward))

                    #Reset the testing environment
                    testEnv.resetEnv()
                    #Test the agent on testing data
                    self.agent.test(testEnv,nb_episodes=floor(self.validationSize.days-self.validationSize.days*0.2),visualize=False,verbose=0)
                    #Get the info from the testing callback
                    (_,testCoverage,testAccuracy,testReward,testLongPerc,testShortPerc,testLongAcc,testShortAcc,testLongPrec,testShortPrec)=self.tester.getInfo()
                    #Print callback values on the screen
                    print(str(i) + " TEST:  acc: " + str(testAccuracy)+ " cov: " + str(testCoverage)+ " rew: " + str(testReward))
                    print(" ")
                    
                    #write the walk data on the text file
                    self.outputFile[iteration].write(
                        str(i)+","+
                        str(trainAccuracy)+","+
                        str(trainCoverage)+","+
                        str(trainReward)+","+
                        str(trainLongPerc)+","+
                        str(trainShortPerc)+","+
                        str(trainLongAcc)+","+
                        str(trainShortAcc)+","+
                        str(trainLongPrec)+","+
                        str(trainShortPrec)+","+
                        
                        str(validAccuracy)+","+
                        str(validCoverage)+","+
                        str(validReward)+","+
                        str(validLongPerc)+","+
                        str(validShortPerc)+","+
                        str(validLongAcc)+","+
                        str(validShortAcc)+","+
                        str(validLongPrec)+","+
                        str(validShortPrec)+","+
                        
                        str(testAccuracy)+","+
                        str(testCoverage)+","+
                        str(testReward)+","+
                        str(testLongPerc)+","+
                        str(testShortPerc)+","+
                        str(testLongAcc)+","+
                        str(testShortAcc)+","+
                        str(testLongPrec)+","+
                        str(testShortPrec)+"\n")

            #Close the file                
            self.outputFile[iteration].close()

            #For the next walk, the current starting point will be the current starting point + the test size
            #It means that, for the next walk, the training data will start 6 months after the training data of 
            #the previous walk   
            self.currentStartingPoint+=self.testSize

            #Write validation and Testing Data into files
            #Save the files for processing later with the ensemble
            ensambleValid.to_csv("./Output/ensemble/walk"+str(iteration)+"ensemble_valid.csv")
            ensambleTest.to_csv("./Output/ensemble/walk"+str(iteration)+"ensemble_test.csv")

    #Function to end the Agent
    def end(self):
        import os 

        #Close the files where the results were written 
        for outputFile in self.outputFile:
            outputFile.close()
예제 #7
0
class DeepQTrading:

    #Class constructor
    #model: Keras model considered
    #explorations_iterations: a vector containing (i) probability of random predictions; (ii) how many iterations will be
    #run by the algorithm (we run the algorithm several times-several iterations)
    #outputFile: name of the file to print metrics of the training
    #ensembleFolderName: name of the file to print predictions
    #optimizer: optimizer to run

    def __init__(self,
                 model,
                 nbActions,
                 explorations_iterations,
                 outputFile,
                 ensembleFolderName,
                 optimizer="adamax"):

        self.ensembleFolderName = ensembleFolderName
        self.policy = EpsGreedyQPolicy()
        self.explorations_iterations = explorations_iterations
        self.nbActions = nbActions
        self.model = model
        #Define the memory
        self.memory = SequentialMemory(limit=10000, window_length=1)
        #Instantiate the agent with parameters received
        self.agent = DQNAgent(model=self.model,
                              policy=self.policy,
                              nb_actions=self.nbActions,
                              memory=self.memory,
                              nb_steps_warmup=200,
                              target_model_update=1e-1,
                              enable_double_dqn=True,
                              enable_dueling_network=True)

        #Compile the agent with the optimizer given as parameter
        if optimizer == "adamax":
            self.agent.compile(Adamax(), metrics=['mae'])
        if optimizer == "adadelta":
            self.agent.compile(Adadelta(), metrics=['mae'])
        if optimizer == "sgd":
            self.agent.compile(SGD(), metrics=['mae'])
        if optimizer == "rmsprop":
            self.agent.compile(RMSprop(), metrics=['mae'])
        if optimizer == "nadam":
            self.agent.compile(Nadam(), metrics=['mae'])
        if optimizer == "adagrad":
            self.agent.compile(Adagrad(), metrics=['mae'])
        if optimizer == "adam":
            self.agent.compile(Adam(), metrics=['mae'])
        if optimizer == "radam":
            self.agent.compile(RAdam(total_steps=5000,
                                     warmup_proportion=0.1,
                                     min_lr=1e-5),
                               metrics=['mae'])

        #Save the weights of the agents in the q.weights file
        #Save random weights
        self.agent.save_weights("q.weights", overwrite=True)

        #Load data
        self.train_data = pd.read_csv('./dataset/jpm/train_data.csv')
        self.validation_data = pd.read_csv('./dataset/jpm/train_data.csv')
        self.test_data = pd.read_csv('./dataset/jpm/test_data.csv')

        #Call the callback for training, validation and test in order to show results for each iteration
        self.trainer = ValidationCallback()
        self.validator = ValidationCallback()
        self.tester = ValidationCallback()
        self.outputFileName = outputFile

    def run(self):
        #Initiates the environments,
        trainEnv = validEnv = testEnv = " "

        if not os.path.exists(self.outputFileName):
            os.makedirs(self.outputFileName)

        file_name = self.outputFileName + "/results-agent-training.csv"

        self.outputFile = open(file_name, "w+")
        #write the first row of the csv
        self.outputFile.write("Iteration," + "trainAccuracy," +
                              "trainCoverage," + "trainReward," +
                              "trainLong%," + "trainShort%," +
                              "trainLongAcc," + "trainShortAcc," +
                              "trainLongPrec," + "trainShortPrec," +
                              "validationAccuracy," + "validationCoverage," +
                              "validationReward," + "validationLong%," +
                              "validationShort%," + "validationLongAcc," +
                              "validationShortAcc," + "validLongPrec," +
                              "validShortPrec," + "testAccuracy," +
                              "testCoverage," + "testReward," + "testLong%," +
                              "testShort%," + "testLongAcc," +
                              "testShortAcc," + "testLongPrec," +
                              "testShortPrec\n")

        #Prepare the training and validation files for saving them later
        ensambleValid = pd.DataFrame(
            index=self.validation_data[:].ix[:, 'date_time'].drop_duplicates(
            ).tolist())
        ensambleTest = pd.DataFrame(
            index=self.test_data[:].ix[:,
                                       'date_time'].drop_duplicates().tolist())

        #Put the name of the index for validation and testing
        ensambleValid.index.name = 'date_time'
        ensambleTest.index.name = 'date_time'

        #Explorations are epochs considered, or how many times the agent will play the game.
        for eps in self.explorations_iterations:

            #policy will use eps[0] (explorations), so the randomness of predictions (actions) will happen with eps[0] of probability
            self.policy.eps = eps[0]

            #there will be 25 iterations or eps[1] in explorations_iterations)
            for i in range(0, eps[1]):

                del (trainEnv)
                #Define the training, validation and testing environments with their respective callbacks
                trainEnv = SpEnv(data=self.train_data, callback=self.trainer)

                del (validEnv)
                validEnv = SpEnv(data=self.validation_data,
                                 ensamble=ensambleValid,
                                 callback=self.validator,
                                 columnName="iteration" + str(i))

                del (testEnv)
                testEnv = SpEnv(data=self.test_data,
                                callback=self.tester,
                                ensamble=ensambleTest,
                                columnName="iteration" + str(i))

                #Reset the callback
                self.trainer.reset()
                self.validator.reset()
                self.tester.reset()

                #Reset the training environment
                trainEnv.resetEnv()

                #Train the agent
                #The agent receives as input one environment
                self.agent.fit(trainEnv,
                               nb_steps=len(self.train_data),
                               visualize=False,
                               verbose=0)

                #Get the info from the train callback
                (_, trainCoverage, trainAccuracy, trainReward, trainLongPerc,
                 trainShortPerc, trainLongAcc, trainShortAcc, trainLongPrec,
                 trainShortPrec) = self.trainer.getInfo()

                print("Iteration " + str(i + 1) + " TRAIN:  accuracy: " +
                      str(trainAccuracy) + " coverage: " + str(trainCoverage) +
                      " reward: " + str(trainReward))

                #Reset the validation environment
                validEnv.resetEnv()
                #Test the agent on validation data
                self.agent.test(validEnv,
                                nb_episodes=len(self.validation_data),
                                visualize=False,
                                verbose=0)

                #Get the info from the validation callback
                (_, validCoverage, validAccuracy, validReward, validLongPerc,
                 validShortPerc, validLongAcc, validShortAcc, validLongPrec,
                 validShortPrec) = self.validator.getInfo()
                #Print callback values on the screen
                print("Iteration " + str(i + 1) + " VALIDATION:  accuracy: " +
                      str(validAccuracy) + " coverage: " + str(validCoverage) +
                      " reward: " + str(validReward))

                #Reset the testing environment
                testEnv.resetEnv()
                #Test the agent on testing data
                self.agent.test(testEnv,
                                nb_episodes=len(self.test_data),
                                visualize=False,
                                verbose=0)
                #Get the info from the testing callback
                (_, testCoverage, testAccuracy, testReward, testLongPerc,
                 testShortPerc, testLongAcc, testShortAcc, testLongPrec,
                 testShortPrec) = self.tester.getInfo()
                #Print callback values on the screen
                print("Iteration " + str(i + 1) + " TEST:  acc: " +
                      str(testAccuracy) + " cov: " + str(testCoverage) +
                      " rew: " + str(testReward))
                print(" ")

                #write the metrics in a text file
                self.outputFile.write(
                    str(i) + "," + str(trainAccuracy) + "," +
                    str(trainCoverage) + "," + str(trainReward) + "," +
                    str(trainLongPerc) + "," + str(trainShortPerc) + "," +
                    str(trainLongAcc) + "," + str(trainShortAcc) + "," +
                    str(trainLongPrec) + "," + str(trainShortPrec) + "," +
                    str(validAccuracy) + "," + str(validCoverage) + "," +
                    str(validReward) + "," + str(validLongPerc) + "," +
                    str(validShortPerc) + "," + str(validLongAcc) + "," +
                    str(validShortAcc) + "," + str(validLongPrec) + "," +
                    str(validShortPrec) + "," + str(testAccuracy) + "," +
                    str(testCoverage) + "," + str(testReward) + "," +
                    str(testLongPerc) + "," + str(testShortPerc) + "," +
                    str(testLongAcc) + "," + str(testShortAcc) + "," +
                    str(testLongPrec) + "," + str(testShortPrec) + "\n")

        #Close the file
        self.outputFile.close()

        if not os.path.exists("./Output/ensemble/" + self.ensembleFolderName):
            os.makedirs("./Output/ensemble/" + self.ensembleFolderName)

        ensambleValid.to_csv("./Output/ensemble/" + self.ensembleFolderName +
                             "/ensemble_valid.csv")
        ensambleTest.to_csv("./Output/ensemble/" + self.ensembleFolderName +
                            "/ensemble_test.csv")

    #Function to end the Agent
    def end(self):
        print("FINISHED")