def trainAgent(trainDf, backTestDf, genPath, saveAgentDir, saveAgentName):
    startTime = datetime.now()
    print("Start train time: {}".format(startTime))

    openerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator("{}".format(genPath))
    buyerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator("{}".format(genPath))
    sellerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator("{}".format(genPath))

    trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator,
                            startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001,
                            stoplossPuncts=100, takeprofitPuncts=300,
                            renderFlag=True, renderDir=saveDir, renderName="trainDealsPlot")
    backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator,
                               startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001,
                               stoplossPuncts=100, takeprofitPuncts=300,
                               renderFlag=True, renderDir=saveDir, renderName="backTestDealsPlot")
    # get size of state and action from environment
    openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n,
                           memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1,
                           epsilon_decay=0.9997, learning_rate=0.0002)
    buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n,
                          memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1,
                          epsilon_decay=1.0, learning_rate=0.0002)
    sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n,
                           memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1,
                           epsilon_decay=1.0, learning_rate=0.0002)
    agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent)
    lastSaveEp = agent.fit_agent(env=trainEnv, backTestEnv=backTestEnv, nEpisodes=4, nWarmUp=0,
                                 uniformEps=False, synEps=True, plotScores=False, saveBest=True, saveFreq=1,
                                 saveDir = saveAgentDir, saveName = saveAgentName)

    endTime = datetime.now()
    print("Training finished. Total time: {}".format(endTime - startTime))
    return lastSaveEp
Exemple #2
0
def useAgent(preprocDf, hkFeatList, saveDir, genPath, agentName,
             timeConstraint):
    ###############################
    # use agent on test
    ###############################

    useCpu(nThreads=8, nCores=8)

    testDf = preprocDf.tail(232)

    openerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator(genPath)
    buyerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator(genPath)
    sellerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator(genPath)

    #openerPriceDiffGenerator.setFitMode(False)
    #buyerPriceDiffGenerator.setFitMode(False)
    #sellerPriceDiffGenerator.setFitMode(False)

    testEnv = CompositeEnv(testDf,
                           openerPriceDiffGenerator,
                           buyerPriceDiffGenerator,
                           sellerPriceDiffGenerator,
                           startDeposit=300,
                           lotSize=0.1,
                           lotCoef=100000,
                           spread=18,
                           spreadCoef=0.00001,
                           renderFlag=True,
                           renderDir=saveDir,
                           renderName="testDealsPlot")

    # get size of state and action from environment
    openerAgent = DQNAgent(testEnv.observation_space["opener"],
                           testEnv.action_space["opener"].n)
    buyerAgent = DQNAgent(testEnv.observation_space["buyer"],
                          testEnv.action_space["buyer"].n)
    sellerAgent = DQNAgent(testEnv.observation_space["seller"],
                           testEnv.action_space["seller"].n)
    agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent)
    agent = agent.load_agent(saveDir, agentName, dropSupportModel=True)
    # agent  = agent.load_agent("./", "checkpoint_composite")
    print("start using agent")

    startTime = datetime.now()
    dealsStatistics = agent.use_agent(testEnv, timeConstraint=timeConstraint)
    endTime = datetime.now()
    print("Use time: {}".format(endTime - startTime))
    reset_keras()

    sumRew = 0
    cumulativeReward = []
    for i in range(len(dealsStatistics)):
        sumRew += dealsStatistics[i]
        cumulativeReward.append(sumRew)
    plt.plot([x for x in range(len(cumulativeReward))], cumulativeReward)
    plt.show()
Exemple #3
0
def collectStatistics(symbol, nExperiment, trainDf, backTestDf, testDf, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001):
    reset_keras()
    openerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl")
    buyerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl")
    sellerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl")

    trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator,
                            startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True)
    backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator,
                               startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True)
    forwardTestEnv = CompositeEnv(testDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator,
                               startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True)
    # get size of state and action from environment
    openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n,
                           memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1,
                           epsilon_decay=0.9995, learning_rate=0.0005)
    buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n,
                          memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1,
                          epsilon_decay=1.0, learning_rate=0.0005)
    sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n,
                           memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1,
                           epsilon_decay=1.0, learning_rate=0.0005)
    agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent)

    continueFit = False
    trainDealsStatistics = []
    testDealsStatistics = []
    backDealsStatistics = []
    for i in range(20):
        agent.fit_agent(env=trainEnv, backTestEnv=None, nEpisodes=1, nWarmUp=0,
                        uniformEps=False, synEps=True, plotScores=True, saveBest=False, saveFreq=111, continueFit=continueFit)
        continueFit = True

        trainDealsStatistics.append( agent.use_agent(trainEnv) )
        testDealsStatistics.append( agent.use_agent(forwardTestEnv) )
        backDealsStatistics.append( agent.use_agent(backTestEnv) )

        with open("./" + "trainDealsStatistics_{}_{}.pkl".format(symbol, nExperiment), mode="wb") as dealsFile:
            joblib.dump(trainDealsStatistics, dealsFile)
        with open("./" + "testDealsStatistics_{}_{}.pkl".format(symbol, nExperiment), mode="wb") as dealsFile:
            joblib.dump(testDealsStatistics, dealsFile)
        with open("./" + "backDealsStatistics_{}_{}.pkl".format(symbol, nExperiment), mode="wb") as dealsFile:
            joblib.dump(backDealsStatistics, dealsFile)
    pass
def createGenerator(df, featureList, saveDir, saveName):
    priceDiffGenerator = MultiScalerDiffGenerator(featureList=featureList,
                                                  nDiffs=1,
                                                  nPoints=32,
                                                  flatStack=False,
                                                  fitOnStep=False)
    priceDiffGenerator.setFitMode(True)
    priceDiffGenerator = priceDiffGenerator.globalFit(df)
    priceDiffGenerator.saveGenerator("{}{}.pkl".format(saveDir, saveName))
def evaluateAgent(backTestDf, genPath, agentDir, agentName):

    openerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator("{}".format(genPath))
    buyerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator("{}".format(genPath))
    sellerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator("{}".format(genPath))

    testEnv = CompositeEnv(backTestDf,
                           openerPriceDiffGenerator,
                           buyerPriceDiffGenerator,
                           sellerPriceDiffGenerator,
                           startDeposit=300,
                           lotSize=0.1,
                           lotCoef=100000,
                           spread=18,
                           spreadCoef=0.00001,
                           stoplossPuncts=100,
                           takeprofitPuncts=300,
                           renderFlag=True,
                           renderDir=saveDir,
                           renderName="testDealsPlot")

    # get size of state and action from environment
    openerAgent = DQNAgent(testEnv.observation_space["opener"],
                           testEnv.action_space["opener"].n)
    buyerAgent = DQNAgent(testEnv.observation_space["buyer"],
                          testEnv.action_space["buyer"].n)
    sellerAgent = DQNAgent(testEnv.observation_space["seller"],
                           testEnv.action_space["seller"].n)
    agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent)
    agent = agent.load_agent(agentDir, agentName)
    # agent  = agent.load_agent("./", "checkpoint_composite")
    print("start using agent")
    dealsStatistics = agent.use_agent(testEnv)
    sumRew = 0
    for i in range(len(dealsStatistics)):
        sumRew += dealsStatistics[i]
    return sumRew
Exemple #6
0
def createGenerators(df, priceFeatList):
    priceDiffGenerator = MultiScalerDiffGenerator(featureList=priceFeatList,
                                                  nDiffs=1,
                                                  nPoints=200,
                                                  flatStack=False,
                                                  fitOnStep=False)
    priceDiffGenerator.setFitMode(True)
    priceDiffGenerator = priceDiffGenerator.globalFit(df)
    priceDiffGenerator.saveGenerator("./MSDiffGen.pkl")

    pass
Exemple #7
0
def collectStatistics(testDf,
                      startDeposit=300,
                      lotSize=0.1,
                      lotCoef=100000,
                      spread=18,
                      spreadCoef=0.00001):
    openerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl")
    buyerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl")
    sellerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl")

    #openerPriceDiffGenerator.setFitMode(False)
    #buyerPriceDiffGenerator.setFitMode(False)
    #sellerPriceDiffGenerator.setFitMode(False)

    testEnv = CompositeEnv(testDf,
                           openerPriceDiffGenerator,
                           buyerPriceDiffGenerator,
                           sellerPriceDiffGenerator,
                           startDeposit=startDeposit,
                           lotSize=lotSize,
                           lotCoef=lotCoef,
                           spread=spread,
                           spreadCoef=spreadCoef,
                           renderFlag=True)
    openerAgent = DQNAgent(testEnv.observation_space["opener"],
                           testEnv.action_space["opener"].n)
    buyerAgent = DQNAgent(testEnv.observation_space["buyer"],
                          testEnv.action_space["buyer"].n)
    sellerAgent = DQNAgent(testEnv.observation_space["seller"],
                           testEnv.action_space["seller"].n)
    agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent)
    #agent  = agent.load_agent("./", "best_composite")
    agent = agent.load_agent("./", "checkpoint_composite")
    print("start using agent")
    dealsStatistics = agent.use_agent(testEnv)
    return dealsStatistics
def evaluateAgent(backTestDf):

    openerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl")
    buyerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl")
    sellerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl")

    testEnv = CompositeEnv(backTestDf,
                           openerPriceDiffGenerator,
                           buyerPriceDiffGenerator,
                           sellerPriceDiffGenerator,
                           startDeposit=300,
                           lotSize=0.1,
                           lotCoef=100000,
                           spread=18,
                           spreadCoef=0.00001,
                           renderFlag=True)

    # get size of state and action from environment
    openerAgent = DQNAgent(testEnv.observation_space["opener"],
                           testEnv.action_space["opener"].n)
    buyerAgent = DQNAgent(testEnv.observation_space["buyer"],
                          testEnv.action_space["buyer"].n)
    sellerAgent = DQNAgent(testEnv.observation_space["seller"],
                           testEnv.action_space["seller"].n)
    agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent)
    agent = agent.load_agent("./", "best_composite")
    # agent  = agent.load_agent("./", "checkpoint_composite")
    print("start using agent")
    dealsStatistics = agent.use_agent(testEnv)
    sumRew = 0
    for i in range(len(dealsStatistics)):
        sumRew += dealsStatistics[i]
    return sumRew
Exemple #9
0
def createGenerators(df, priceFeatList):
    ########
    historyMod = VSASpread()
    df = historyMod.modHistory(df)
    historyMod = HeikenAshiMod()
    df = historyMod.modHistory(df)
    historyMod = EnergyMod()
    df = historyMod.modHistory(df, featList=["open", "close", "low", "high"])
    ########

    priceDiffGenerator = MultiScalerDiffGenerator(featureList=priceFeatList, nDiffs=1, nPoints=32, flatStack=False,fitOnStep=False)
    priceDiffGenerator.setFitMode(True)
    priceDiffGenerator = priceDiffGenerator.globalFit(df)
    priceDiffGenerator.saveGenerator("./MSDiffGen.pkl")

    pass
#dataUpdater.fullUpdate(terminal, symbol, timeframe, startDate="2015-01-01 00:00:00")
df = SymbolDataManager().getData(symbol, timeframe)

########
historyMod = VSASpread()
modDf = historyMod.modHistory(df)
historyMod = HeikenAshiMod()
modDf = historyMod.modHistory(modDf)
historyMod = EnergyMod()
modDf = historyMod.modHistory(modDf, featList=["open", "close", "low", "high"])
########

genList = []
for i in range(2):
    gen = MultiScalerDiffGenerator(featureList=hkFeatList, nDiffs=i+1, nPoints = 96, flatStack = False, fitOnStep = False)
    genList.append( gen )

compositeGenerator = CompositeGenerator( genList=genList, flatStack=False)
compositeGenerator.setFitMode(True)
compositeGenerator.globalFit(modDf)
compositeGenerator.saveGenerator("./CompositeMSDiffGen.pkl")

################################
# train agent
################################
openerPriceDiffGenerator = CompositeGenerator().loadGenerator("./CompositeMSDiffGen.pkl")
buyerPriceDiffGenerator = CompositeGenerator().loadGenerator("./CompositeMSDiffGen.pkl")
sellerPriceDiffGenerator = CompositeGenerator().loadGenerator("./CompositeMSDiffGen.pkl")

trainDf = modDf.tail(7200).head(5200)
Exemple #11
0
#dataUpdater.fullUpdate(terminal, symbol, timeframe, startDate="2008-01-01 00:00:00")
df = dataManager.getData(symbol, timeframe)
#df = df.tail(110000)

########
historyMod = VSASpread()
modDf = historyMod.modHistory(df)
historyMod = HeikenAshiMod()
modDf = historyMod.modHistory(modDf)
historyMod = EnergyMod()
modDf = historyMod.modHistory(modDf, featList=["open", "close", "low", "high"])
########

priceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList,
                                              nDiffs=1,
                                              nPoints=256,
                                              flatStack=False,
                                              fitOnStep=False)
priceDiffGenerator.setFitMode(True)
priceDiffGenerator = priceDiffGenerator.globalFit(modDf)
priceDiffGenerator.saveGenerator("./MSDiffGen.pkl")

################################
# train agent
################################
openerPriceDiffGenerator = MultiScalerDiffGenerator(
    featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl")
buyerPriceDiffGenerator = MultiScalerDiffGenerator(
    featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl")
sellerPriceDiffGenerator = MultiScalerDiffGenerator(
    featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl")
def useAgent(symbol, timeframe, terminal, dataUpdater, dataManager, hkFeatList,
             saveDir, genPath, agentName, timeConstraint):
    ###############################
    # use agent on test
    ###############################

    useCpu(nThreads=8, nCores=8)

    openerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator(genPath)
    buyerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator(genPath)
    sellerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator(genPath)

    openerPriceDiffGenerator.setFitMode(False)
    buyerPriceDiffGenerator.setFitMode(False)
    sellerPriceDiffGenerator.setFitMode(False)

    testEnv = RealCompositeEnv(symbol,
                               timeframe,
                               terminal,
                               dataUpdater,
                               dataManager,
                               openerPriceDiffGenerator,
                               buyerPriceDiffGenerator,
                               sellerPriceDiffGenerator,
                               startDeposit=300,
                               lotSize=0.1,
                               lotCoef=100000,
                               stoplossPuncts=60,
                               takeprofitPuncts=120,
                               renderFlag=True)

    # get size of state and action from environment
    openerAgent = DQNAgent(testEnv.observation_space["opener"],
                           testEnv.action_space["opener"].n)
    buyerAgent = DQNAgent(testEnv.observation_space["buyer"],
                          testEnv.action_space["buyer"].n)
    sellerAgent = DQNAgent(testEnv.observation_space["seller"],
                           testEnv.action_space["seller"].n)
    agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent)
    agent = agent.load_agent(saveDir, agentName, dropSupportModel=True)
    # agent  = agent.load_agent("./", "checkpoint_composite")
    print("start using agent")

    startTime = datetime.now()
    agent.use_agent(testEnv, timeConstraint=timeConstraint)
    endTime = datetime.now()
    print("Use time: {}".format(endTime - startTime))
    reset_keras()
    featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl")
openerEnergyDiffGenerator = W2VDiffGenerator(
    featureList=energyFeatList).loadGenerator("./w2vEnergyDiffGen.pkl")
openerHkDiffGenerator = W2VDiffGenerator(
    featureList=energyFeatList).loadGenerator("./w2vHkDiffGen.pkl")
openerVSADiffGenerator = W2VDiffGenerator(
    featureList=vsaFeatList).loadGenerator("./w2vVSADiffGen.pkl")
openerVolumeDiffGenerator = W2VDiffGenerator(
    featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl")
openerCompositeGenerator = W2VCompositeGenerator([
    openerPriceDiffGenerator, openerEnergyDiffGenerator, openerHkDiffGenerator,
    openerVSADiffGenerator, openerVolumeDiffGenerator
],
                                                 flatStack=False)

buyerPriceDiffGenerator = MultiScalerDiffGenerator(
    featureList=priceFeatList).loadGenerator("./stubGen.pkl")
sellerPriceDiffGenerator = MultiScalerDiffGenerator(
    featureList=priceFeatList).loadGenerator("./stubGen.pkl")

trainDf = modDf.tail(470000).tail(446500).head(423000)
trainEnv = CompositeEnv(trainDf,
                        openerCompositeGenerator,
                        buyerPriceDiffGenerator,
                        sellerPriceDiffGenerator,
                        startDeposit=300,
                        lotSize=0.1,
                        lotCoef=100000,
                        spread=18,
                        spreadCoef=0.00001,
                        stoplossPuncts=100,
                        takeprofitPuncts=200,
Exemple #14
0
def trainAgent(trainDf,
               backTestDf,
               startDeposit=300,
               lotSize=0.1,
               lotCoef=100000,
               spread=18,
               spreadCoef=0.00001):
    openerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl")
    buyerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl")
    sellerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl")

    trainEnv = CompositeEnv(trainDf,
                            openerPriceDiffGenerator,
                            buyerPriceDiffGenerator,
                            sellerPriceDiffGenerator,
                            startDeposit=startDeposit,
                            lotSize=lotSize,
                            lotCoef=lotCoef,
                            spread=spread,
                            spreadCoef=spreadCoef,
                            renderFlag=True)
    backTestEnv = CompositeEnv(backTestDf,
                               openerPriceDiffGenerator,
                               buyerPriceDiffGenerator,
                               sellerPriceDiffGenerator,
                               startDeposit=startDeposit,
                               lotSize=lotSize,
                               lotCoef=lotCoef,
                               spread=spread,
                               spreadCoef=spreadCoef,
                               renderFlag=True)
    # get size of state and action from environment
    openerAgent = DQNAgent(trainEnv.observation_space["opener"],
                           trainEnv.action_space["opener"].n,
                           memorySize=2500,
                           batch_size=100,
                           train_start=200,
                           epsilon_min=0.05,
                           epsilon=1,
                           epsilon_decay=0.9999,
                           learning_rate=0.0002)
    buyerAgent = DQNAgent(trainEnv.observation_space["buyer"],
                          trainEnv.action_space["buyer"].n,
                          memorySize=5000,
                          batch_size=200,
                          train_start=300,
                          epsilon_min=0.05,
                          epsilon=1,
                          epsilon_decay=1.0,
                          learning_rate=0.0002)
    sellerAgent = DQNAgent(trainEnv.observation_space["seller"],
                           trainEnv.action_space["seller"].n,
                           memorySize=5000,
                           batch_size=200,
                           train_start=300,
                           epsilon_min=0.05,
                           epsilon=1,
                           epsilon_decay=1.0,
                           learning_rate=0.0002)
    agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent)
    lastSaveEp = agent.fit_agent(env=trainEnv,
                                 backTestEnv=backTestEnv,
                                 nEpisodes=3,
                                 nWarmUp=0,
                                 uniformEps=False,
                                 synEps=True,
                                 plotScores=True,
                                 saveBest=False,
                                 saveFreq=1)
    return lastSaveEp
def trainAgent(trainDf, backTestDf):

    openerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl")
    buyerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl")
    sellerPriceDiffGenerator = MultiScalerDiffGenerator(
        featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl")

    trainEnv = CompositeEnv(trainDf,
                            openerPriceDiffGenerator,
                            buyerPriceDiffGenerator,
                            sellerPriceDiffGenerator,
                            startDeposit=300,
                            lotSize=0.1,
                            lotCoef=100000,
                            spread=18,
                            spreadCoef=0.00001,
                            renderFlag=True)
    backTestEnv = CompositeEnv(backTestDf,
                               openerPriceDiffGenerator,
                               buyerPriceDiffGenerator,
                               sellerPriceDiffGenerator,
                               startDeposit=300,
                               lotSize=0.1,
                               lotCoef=100000,
                               spread=18,
                               spreadCoef=0.00001,
                               renderFlag=True)
    # get size of state and action from environment
    openerAgent = DQNAgent(trainEnv.observation_space["opener"],
                           trainEnv.action_space["opener"].n,
                           memorySize=2500,
                           batch_size=100,
                           train_start=200,
                           epsilon_min=0.05,
                           epsilon=1,
                           epsilon_decay=0.9994,
                           learning_rate=0.001)
    buyerAgent = DQNAgent(trainEnv.observation_space["buyer"],
                          trainEnv.action_space["buyer"].n,
                          memorySize=5000,
                          batch_size=200,
                          train_start=300,
                          epsilon_min=0.05,
                          epsilon=1,
                          epsilon_decay=0.9999,
                          learning_rate=0.001)
    sellerAgent = DQNAgent(trainEnv.observation_space["seller"],
                           trainEnv.action_space["seller"].n,
                           memorySize=5000,
                           batch_size=200,
                           train_start=300,
                           epsilon_min=0.05,
                           epsilon=1,
                           epsilon_decay=0.9999,
                           learning_rate=0.001)
    agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent)
    # agent  = agent.load_agent("./", "best_composite")
    # agent  = agent.load_agent("./", "checkpoint_composite")
    lastSaveEp = agent.fit_agent(env=trainEnv,
                                 backTestEnv=backTestEnv,
                                 nEpisodes=5,
                                 nWarmUp=0,
                                 uniformEps=False,
                                 synEps=True,
                                 plotScores=True,
                                 saveBest=True,
                                 saveFreq=1)

    endTime = datetime.now()
    print("Training finished. Total time: {}".format(endTime - startTime))
    return lastSaveEp