def trainAgent(trainDf, backTestDf, genPath, saveAgentDir, saveAgentName): startTime = datetime.now() print("Start train time: {}".format(startTime)) openerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator("{}".format(genPath)) buyerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator("{}".format(genPath)) sellerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator("{}".format(genPath)) trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, stoplossPuncts=100, takeprofitPuncts=300, renderFlag=True, renderDir=saveDir, renderName="trainDealsPlot") backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, stoplossPuncts=100, takeprofitPuncts=300, renderFlag=True, renderDir=saveDir, renderName="backTestDealsPlot") # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9997, learning_rate=0.0002) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0002) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0002) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) lastSaveEp = agent.fit_agent(env=trainEnv, backTestEnv=backTestEnv, nEpisodes=4, nWarmUp=0, uniformEps=False, synEps=True, plotScores=False, saveBest=True, saveFreq=1, saveDir = saveAgentDir, saveName = saveAgentName) endTime = datetime.now() print("Training finished. Total time: {}".format(endTime - startTime)) return lastSaveEp
def useAgent(symbol, timeframe, terminal, dataUpdater, dataManager, hkFeatList, saveDir, genPath, agentName, timeConstraint): ############################### # use agent on test ############################### useCpu(nThreads=8, nCores=8) openerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator(genPath) buyerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator(genPath) sellerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator(genPath) openerPriceDiffGenerator.setFitMode(False) buyerPriceDiffGenerator.setFitMode(False) sellerPriceDiffGenerator.setFitMode(False) testEnv = RealCompositeEnv(symbol, timeframe, terminal, dataUpdater, dataManager, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, stoplossPuncts=60, takeprofitPuncts=120, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent(saveDir, agentName, dropSupportModel=True) # agent = agent.load_agent("./", "checkpoint_composite") print("start using agent") startTime = datetime.now() agent.use_agent(testEnv, timeConstraint=timeConstraint) endTime = datetime.now() print("Use time: {}".format(endTime - startTime)) reset_keras()
def collectStatistics(testDf, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001 ): openerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") buyerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") sellerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") openerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") buyerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") sellerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") openerPriceDiffGenerator.setFitMode(False) buyerPriceDiffGenerator.setFitMode(False) sellerPriceDiffGenerator.setFitMode(False) openerVolumeDiffGenerator.setFitMode(False) buyerVolumeDiffGenerator.setFitMode(False) sellerVolumeDiffGenerator.setFitMode(False) openerCompositeGenerator = W2VCompositeGenerator( [openerPriceDiffGenerator,openerVolumeDiffGenerator], flatStack=False) buyerCompositeGenerator = W2VCompositeGenerator( [buyerPriceDiffGenerator,buyerVolumeDiffGenerator], flatStack=False) sellerCompositeGenerator = W2VCompositeGenerator( [sellerPriceDiffGenerator,sellerVolumeDiffGenerator], flatStack=False) testEnv = CompositeEnv(testDf, openerCompositeGenerator, buyerCompositeGenerator, sellerCompositeGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread = spread, spreadCoef = spreadCoef, renderFlag=True) openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent("./", "best_composite") print("start using agent") dealsStatistics = agent.use_agent(testEnv) return dealsStatistics
def useAgent(preprocDf, hkFeatList, saveDir, genPath, agentName, timeConstraint): ############################### # use agent on test ############################### useCpu(nThreads=8, nCores=8) testDf = preprocDf.tail(232) openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator(genPath) buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator(genPath) sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator(genPath) #openerPriceDiffGenerator.setFitMode(False) #buyerPriceDiffGenerator.setFitMode(False) #sellerPriceDiffGenerator.setFitMode(False) testEnv = CompositeEnv(testDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True, renderDir=saveDir, renderName="testDealsPlot") # get size of state and action from environment openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent(saveDir, agentName, dropSupportModel=True) # agent = agent.load_agent("./", "checkpoint_composite") print("start using agent") startTime = datetime.now() dealsStatistics = agent.use_agent(testEnv, timeConstraint=timeConstraint) endTime = datetime.now() print("Use time: {}".format(endTime - startTime)) reset_keras() sumRew = 0 cumulativeReward = [] for i in range(len(dealsStatistics)): sumRew += dealsStatistics[i] cumulativeReward.append(sumRew) plt.plot([x for x in range(len(cumulativeReward))], cumulativeReward) plt.show()
def collectStatistics(symbol, nExperiment, trainDf, backTestDf, testDf, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001): reset_keras() openerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") buyerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") sellerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) forwardTestEnv = CompositeEnv(testDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9995, learning_rate=0.0005) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0005) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0005) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) continueFit = False trainDealsStatistics = [] testDealsStatistics = [] backDealsStatistics = [] for i in range(20): agent.fit_agent(env=trainEnv, backTestEnv=None, nEpisodes=1, nWarmUp=0, uniformEps=False, synEps=True, plotScores=True, saveBest=False, saveFreq=111, continueFit=continueFit) continueFit = True trainDealsStatistics.append( agent.use_agent(trainEnv) ) testDealsStatistics.append( agent.use_agent(forwardTestEnv) ) backDealsStatistics.append( agent.use_agent(backTestEnv) ) with open("./" + "trainDealsStatistics_{}_{}.pkl".format(symbol, nExperiment), mode="wb") as dealsFile: joblib.dump(trainDealsStatistics, dealsFile) with open("./" + "testDealsStatistics_{}_{}.pkl".format(symbol, nExperiment), mode="wb") as dealsFile: joblib.dump(testDealsStatistics, dealsFile) with open("./" + "backDealsStatistics_{}_{}.pkl".format(symbol, nExperiment), mode="wb") as dealsFile: joblib.dump(backDealsStatistics, dealsFile) pass
def evaluateAgent(backTestDf, genPath, agentDir, agentName): openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("{}".format(genPath)) buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("{}".format(genPath)) sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("{}".format(genPath)) testEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, stoplossPuncts=100, takeprofitPuncts=300, renderFlag=True, renderDir=saveDir, renderName="testDealsPlot") # get size of state and action from environment openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent(agentDir, agentName) # agent = agent.load_agent("./", "checkpoint_composite") print("start using agent") dealsStatistics = agent.use_agent(testEnv) sumRew = 0 for i in range(len(dealsStatistics)): sumRew += dealsStatistics[i] return sumRew
def collectStatistics(testDf, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001): openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") #openerPriceDiffGenerator.setFitMode(False) #buyerPriceDiffGenerator.setFitMode(False) #sellerPriceDiffGenerator.setFitMode(False) testEnv = CompositeEnv(testDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) #agent = agent.load_agent("./", "best_composite") agent = agent.load_agent("./", "checkpoint_composite") print("start using agent") dealsStatistics = agent.use_agent(testEnv) return dealsStatistics
def evaluateAgent(backTestDf): openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") testEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent("./", "best_composite") # agent = agent.load_agent("./", "checkpoint_composite") print("start using agent") dealsStatistics = agent.use_agent(testEnv) sumRew = 0 for i in range(len(dealsStatistics)): sumRew += dealsStatistics[i] return sumRew
def trainAgent(trainDf, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001 ): openerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") buyerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") sellerPriceDiffGenerator = W2VDiffGenerator(featureList=priceFeatList).loadGenerator("./w2vPriceDiffGen.pkl") openerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") buyerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") sellerVolumeDiffGenerator = W2VDiffGenerator(featureList=volumeFeatList).loadGenerator("./w2vVolumeDiffGen.pkl") openerCompositeGenerator = W2VCompositeGenerator( [openerPriceDiffGenerator,openerVolumeDiffGenerator], flatStack=False) buyerCompositeGenerator = W2VCompositeGenerator( [buyerPriceDiffGenerator,buyerVolumeDiffGenerator], flatStack=False) sellerCompositeGenerator = W2VCompositeGenerator( [sellerPriceDiffGenerator,sellerVolumeDiffGenerator], flatStack=False) trainEnv = CompositeEnv(trainDf, openerCompositeGenerator, buyerCompositeGenerator, sellerCompositeGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread = spread, spreadCoef = spreadCoef, renderFlag=True) openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=500, batch_size=100, train_start=200, epsilon_min=0.1, epsilon=1, epsilon_decay=0.9994) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=1000, batch_size=200, train_start=300, epsilon_min=0.1, epsilon=1, epsilon_decay=0.9999) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=1000, batch_size=200, train_start=300, epsilon_min=0.1, epsilon=1, epsilon_decay=0.9999) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent.fit_agent(env=trainEnv, nEpisodes=15, nWarmUp = 0, uniformEps = False, plotScores=True, saveBest=True, saveFreq=2)
takePos=1, maxLoss=20000, maxTake=20000, stoplossPuncts=20000, takeprofitPuncts=20000, riskPoints=110, riskLevels=5, parallelOpener=False, renderDir="./", renderName="back_plot") # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=40000, batch_size=20, train_start=20000, epsilon_min=0.2, epsilon=1, discount_factor=0.99, epsilon_decay=0.9999, learning_rate=0.0001) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=40000, batch_size=20, train_start=10000, epsilon_min=0.2, epsilon=1, discount_factor=0.99, epsilon_decay=0.9999, learning_rate=0.0001) sellerAgent = DQNAgent(trainEnv.observation_space["seller"],
openerCompositeGenerator = W2VCompositeGenerator( [openerEnergyScalerGenerator, openerEnergyDiffGenerator], flatStack=False) buyerCompositeGenerator = W2VCompositeGenerator( [buyerEnergyScalerGenerator, buyerEnergyDiffGenerator], flatStack=False) sellerCompositeGenerator = W2VCompositeGenerator( [sellerEnergyScalerGenerator, sellerEnergyDiffGenerator], flatStack=False) testEnv = CompositeEnv(testDf, openerCompositeGenerator, buyerCompositeGenerator, sellerCompositeGenerator, startDeposit=300, lotSize=0.01, lotCoef=100000, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(testEnv.observation_space["opener"], testEnv.action_space["opener"].n) buyerAgent = DQNAgent(testEnv.observation_space["buyer"], testEnv.action_space["buyer"].n) sellerAgent = DQNAgent(testEnv.observation_space["seller"], testEnv.action_space["seller"].n) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) agent = agent.load_agent("./", "best_composite") print("start using agent") agent.use_agent(testEnv) """realEnv = RealEnv( symbol=symbol, timeframe=timeframe, terminal=terminal, dataUpdater=dataUpdater, dataManager=dataManager, featureFactory=featureFactory, obsFeatList=obsFeatList) agent.use_agent(realEnv)"""
def trainAgent(trainDf, backTestDf): openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True) backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9994, learning_rate=0.001) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9999, learning_rate=0.001) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9999, learning_rate=0.001) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) # agent = agent.load_agent("./", "best_composite") # agent = agent.load_agent("./", "checkpoint_composite") lastSaveEp = agent.fit_agent(env=trainEnv, backTestEnv=backTestEnv, nEpisodes=5, nWarmUp=0, uniformEps=False, synEps=True, plotScores=True, saveBest=True, saveFreq=1) endTime = datetime.now() print("Training finished. Total time: {}".format(endTime - startTime)) return lastSaveEp
buyerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") sellerPriceDiffGenerator = MultiScalerDiffGenerator(featureList=hkFeatList).loadGenerator("./MSDiffGen.pkl") trainDf = modDf.tail(470000).tail(120000).tail(100000) trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=20, spreadCoef=0.00001, stoplossPuncts=100, takeprofitPuncts=200, renderFlag=True, renderDir="./", renderName="train_plot") backTestDf = modDf.tail(4700000).tail(120000).head(20000) #backTestDf = modDf.head(50000).tail(3192).head(1192) backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=20, spreadCoef=0.00001, stoplossPuncts=100, takeprofitPuncts=200, renderFlag=True, renderDir="./", renderName="back_plot") # get size of state and action from environment. openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=5000, batch_size=200, train_start=1200, epsilon_min=0.66, epsilon=1, epsilon_decay=0.999, learning_rate=0.001, discount_factor=0.0) buyerAgent = StubAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=10, batch_size=1, train_start=2, epsilon_min=0.01, epsilon=1, epsilon_decay=0.999, learning_rate=0.0001) sellerAgent = StubAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=10, batch_size=1, train_start=2, epsilon_min=0.01, epsilon=1, epsilon_decay=0.999, learning_rate=0.0001) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) ################################### #agent = agent.load_agent("./", "best_composite") #agent = agent.load_agent("./", "checkpoint_composite") ################################### lastSaveEp = agent.fit_agent(env=trainEnv, backTestEnv=backTestEnv, nEpisodes=30, nWarmUp=1, uniformEps=False, synEps=False, plotScores=False, saveBest=True, saveFreq=1)
print("data updated") print("start train") ################################ # train agent ################################ trainDf = df[:int(len(df) * 0.9)] trainEnv = FeatureGenEnv(trainDf, featureFactory, startDeposit=10, lotSize=0.01, lotCoef=10, renderFlag=False) # get size of state and action from environment state_size = trainEnv.observation_space.shape[0] action_size = trainEnv.action_space.n agent = DQNAgent(state_size, action_size, epsilon_decay=0.999) agent.fit_agent(env=trainEnv, nEpisodes=300, plotScores=True, saveFreq=10) #TODO: add save best only agent.save_agent("./", "test_agent") print("agent saved") ############################### # use agent ############################### testDF = df[int(len(df) * 0.9):] featureFactory = ScalerGenerator(featureList=obsFeatList, fitOnStep=True) featureFactory.globalFit(df[:int(len(df) * 0.9)]) testEnv = FeatureGenEnv(testDF, featureFactory, startDeposit=10, lotSize=0.01,
startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, stoplossPuncts=100, takeprofitPuncts=200, renderFlag=True, renderDir="./", renderName="back_plot") # get size of state and action from environment. openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=600, batch_size=500, train_start=550, epsilon_min=0.2, epsilon=1, epsilon_decay=0.9999, learning_rate=0.001) buyerAgent = StubAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=600, batch_size=500, train_start=550, epsilon_min=0.2, epsilon=1, epsilon_decay=0.9999, learning_rate=0.001) sellerAgent = StubAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n,
#dataUpdater.fullUpdate(terminal, symbol, timeframe, startDate="2015-01-01 00:00:00") df = SymbolDataManager().getData(symbol, timeframe) featureScaler = FeatureScaler() df = featureScaler.extractFeature(df, featList=obsFeatList) print("data updated") print("start train") ################################ # train agent ################################ trainDf = df[:int(len(df) * 0.9)] trainEnv = SimpleEnv(trainDf, obsFeatList=obsFeatList, renderFlag=False) # get size of state and action from environment state_size = trainEnv.observation_space.shape[0] action_size = trainEnv.action_space.n agent = DQNAgent(state_size, action_size, epsilon_decay=0.999) agent.fit_agent(env=trainEnv, nEpisodes=1, plotScores=True, saveFreq=1) #TODO: add save best only agent.save_agent("./", "test_agent") print("agent saved") ############################### # use agent ############################### testDF = df[int(len(df) * 0.98):] testEnv = SimpleEnv(testDF, obsFeatList=obsFeatList, renderFlag=True) state_size = testEnv.observation_space.shape[0] action_size = testEnv.action_space.n print("loading agent") agent = DQNAgent(state_size, action_size).load_agent(".", "test_agent") print("start using agent")
################################ # train agent ################################ trainDf = df[:int(len(df) * 0.9)] trainEnv = CompositeEnv(trainDf, openerFeatureFactory, buyerFeatureFactory, sellerFeatureFactory, startDeposit=300, lotSize=0.01, lotCoef=100000, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"][0], trainEnv.action_space["opener"].n, memorySize=2000, batch_size=1000, train_start=1200, epsilon_decay=0.999) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"][0], trainEnv.action_space["buyer"].n, memorySize=2000, batch_size=1000, train_start=1200, epsilon_decay=0.999) sellerAgent = DQNAgent(trainEnv.observation_space["seller"][0], trainEnv.action_space["seller"].n, memorySize=2000, batch_size=1000, train_start=1200, epsilon_decay=0.999) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent)
################################ # train agent ################################ openerPriceDiffGenerator = CompositeGenerator().loadGenerator("./CompositeMSDiffGen.pkl") buyerPriceDiffGenerator = CompositeGenerator().loadGenerator("./CompositeMSDiffGen.pkl") sellerPriceDiffGenerator = CompositeGenerator().loadGenerator("./CompositeMSDiffGen.pkl") trainDf = modDf.tail(7200).head(5200) trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True) backTestDf = modDf.tail(9200).head(2200) backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9999, learning_rate=0.0001) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0001) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0001) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) #agent = agent.load_agent("./", "best_composite") #agent = agent.load_agent("./", "checkpoint_composite") lastSaveEp = agent.fit_agent(env=trainEnv, backTestEnv=backTestEnv, nEpisodes=25, nWarmUp=0, uniformEps=False, synEps=True, plotScores=True, saveBest=True, saveFreq=1) endTime = datetime.now() print("Training finished. Total time: {}".format(endTime - startTime))
def trainAgent(trainDf, backTestDf, startDeposit=300, lotSize=0.1, lotCoef=100000, spread=18, spreadCoef=0.00001): openerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") buyerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") sellerPriceDiffGenerator = MultiScalerDiffGenerator( featureList=priceFeatList).loadGenerator("./MSDiffGen.pkl") trainEnv = CompositeEnv(trainDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) backTestEnv = CompositeEnv(backTestDf, openerPriceDiffGenerator, buyerPriceDiffGenerator, sellerPriceDiffGenerator, startDeposit=startDeposit, lotSize=lotSize, lotCoef=lotCoef, spread=spread, spreadCoef=spreadCoef, renderFlag=True) # get size of state and action from environment openerAgent = DQNAgent(trainEnv.observation_space["opener"], trainEnv.action_space["opener"].n, memorySize=2500, batch_size=100, train_start=200, epsilon_min=0.05, epsilon=1, epsilon_decay=0.9999, learning_rate=0.0002) buyerAgent = DQNAgent(trainEnv.observation_space["buyer"], trainEnv.action_space["buyer"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0002) sellerAgent = DQNAgent(trainEnv.observation_space["seller"], trainEnv.action_space["seller"].n, memorySize=5000, batch_size=200, train_start=300, epsilon_min=0.05, epsilon=1, epsilon_decay=1.0, learning_rate=0.0002) agent = CompositeAgent(openerAgent, buyerAgent, sellerAgent) lastSaveEp = agent.fit_agent(env=trainEnv, backTestEnv=backTestEnv, nEpisodes=3, nWarmUp=0, uniformEps=False, synEps=True, plotScores=True, saveBest=False, saveFreq=1) return lastSaveEp