def main(): inData=createDataset() env = MarketEnvironment(inData) task = MaximizeReturnTask(env) numIn=min(env.worldState.shape) net=RecurrentNetwork() net.addInputModule(BiasUnit(name='bias')) #net.addOutputModule(TanhLayer(1, name='out')) net.addOutputModule((SignLayer(1,name='out'))) net.addRecurrentConnection(FullConnection(net['out'], net['out'], name='c3')) net.addInputModule(LinearLayer(numIn,name='in')) net.addConnection(FullConnection(net['in'],net['out'],name='c1')) net.addConnection((FullConnection(net['bias'],net['out'],name='c2'))) net.sortModules() # remove bias (set weight to 0) #initialParams=append(array([0.0]),net._params[1:]) #net._setParameters(initialParams) #net._setParameters([ 0.0,-0.05861005,1.64281513,0.98302613]) #net._setParameters([0., 1.77132063, 1.3843613, 4.73725269]) #net._setParameters([ 0.0, -0.95173719, 1.92989266, 0.06837472]) net._setParameters([ 0.0, 1.29560957, -1.14727503, -1.80005888, 0.66351325, 1.19240189]) ts=env.ts learner = RRL(numIn+2,ts) # ENAC() #Q_LinFA(2,1) agent = LearningAgent(net,learner) exp = ContinuousExperiment(task,agent) print(net._params) exp.doInteractionsAndLearn(len(ts)-1) print(net._params) outData=DataFrame(inData['RETURNS']/100) outData['ts']=[i/100 for i in ts] outData['cum_log_ts']=cumsum([log(1+i) for i in outData['ts']]) outData['Action_Hist']=env.actionHistory outData['trading rets']=pE.calculateTradingReturn(outData['Action_Hist'],outData['ts']) outData['cum_log_rets']=cumsum([log(1+x) for x in outData['trading rets']]) paramHist=learner.paramHistory plt.figure(0) for i in range(len(net._params)): plt.plot(paramHist[i]) plt.draw() print(pE.percentOfOutperformedMonths(outData['trading rets'],outData['ts'])) #ax1.plot(sign(actionHist),'r') plt.figure(1) outData['cum_log_ts'].plot(secondary_y=True) outData['cum_log_rets'].plot(secondary_y=True) outData['Action_Hist'].plot() plt.draw() plt.show()
def learn(client): av_table = ActionValueNetwork(4, 1) learner = Reinforce() agent = LearningAgent(av_table, learner) env = CarEnvironment(client) task = CarTask(env) experiment = ContinuousExperiment(task, agent) while True: experiment.doInteractionsAndLearn(1) agent.learn()
def main(): numIterations=200 terminal_EMA_SharpeRatio=[0 for i in range(numIterations)] numTrades=[0 for i in range(numIterations)] sharpe_first_half=[0 for i in range(numIterations)] sharpe_sec_half=[0 for i in range(numIterations)] sharpe_ratio_total=[0 for i in range(numIterations)] for i in range(numIterations): env=RWEnvironment(2000) task = MaximizeReturnTask(env) numIn=min(env.worldState.shape) net=RecurrentNetwork() net.addInputModule(BiasUnit(name='bias')) net.addOutputModule((SignLayer(1,name='out'))) net.addRecurrentConnection(FullConnection(net['out'], net['out'], name='c3')) net.addInputModule(LinearLayer(numIn,name='in')) net.addConnection(FullConnection(net['in'],net['out'],name='c1')) net.addConnection((FullConnection(net['bias'],net['out'],name='c2'))) net.sortModules() ts=env.ts learner = RRL(numIn+2,ts) # ENAC() #Q_LinFA(2,1) agent = LearningAgent(net,learner) exp = ContinuousExperiment(task,agent) #performance tracking exp.doInteractionsAndLearn(len(ts)-1) #print(net._params) terminal_EMA_SharpeRatio[i]=learner.ema_sharpeRatio[-1] rs=pE.calculateTradingReturn(env.actionHistory,ts) sharpe_first_half[i]=pE.annualisedSharpe(rs[:(len(ts)/2)]) sharpe_sec_half[i]=pE.annualisedSharpe(rs[len(ts)/2:]) sharpe_ratio_total[i]=pE.annualisedSharpe(rs) numTrades[i]=learner.numTrades print(net._params) print("average number of trades per 1000 observations is {}".format(mean(numTrades)/2)) print("mean Sharpe ratios are {} with standard errors {}, and {} with standard errors {}".format(mean(sharpe_first_half),st.sem(sharpe_first_half),mean(sharpe_sec_half),st.sem(sharpe_sec_half))) print("average sharpe ratio for each entire epoche is {} with standard error {}".format(mean(sharpe_ratio_total),st.sem(sharpe_ratio_total))) fig,ax= plt.subplots(nrows=2,ncols=1,sharex=True,sharey=True) l1=ax[0].hist(sharpe_first_half,bins=20) ax[0].set_title('Annualised Sharpe Ratio (t=0:1000)') l2=ax[1].hist(sharpe_sec_half,bins=20) ax[1].set_title('Annualised Sharpe Ratio (t=1001:2000)') plt.show() #plt.hist(numTrades,bins=20) #plt.plot(terminal_EMA_SharpeRatio) #plt.show() actionHist=env.actionHistory ts=[t/100 for t in ts] cum_log_r=cumsum([log(1+ts[i]) for i in range(len(ts))]) cum_log_R=cumsum([log(1+(actionHist[i]*ts[i])) for i in range(len(ts))]) fix, axes = plt.subplots(3, sharex=True) ln1=axes[0].plot(cum_log_r,label='Buy and Hold') ln2=axes[0].plot(cum_log_R,label='Trading Agent') lns=ln1+ln2 labs=[l.get_label() for l in lns] axes[0].legend(lns,labs,loc='upper left') axes[0].set_ylabel("Cumulative Log Returns") ax[0].set_title("Artificial Series") ln3=axes[1].plot(actionHist,'r',label='Trades') axes[1].set_ylabel("F(t)") axes[2].plot(learner.ema_sharpeRatio) axes[2].set_ylabel("EMA Sharpe Ratio") plt.show()
[1, 1, 1, 1, 1, 1, 1, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1]]) env = Maze(envmatrix, (7, 7)) # create task task = MazeTask(env) # create the ActionValueTable table = ActionValueTable(81, 4) table.initialize(1) # create agent with controller and learner agent = EpsilonGreedyAgent(table, QLambda(4)) experiment = ContinuousExperiment(task, agent) pylab.gray() pylab.ion() for i in range(100000): experiment.doInteractionsAndLearn() if i % 100 == 0: pylab.pcolor(table.values.max(1).reshape(9,9)) pylab.draw() agent.reset()
envmatrix = array([[1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 0, 0, 1, 0, 0, 0, 0, 1], [1, 0, 0, 1, 0, 0, 1, 0, 1], [1, 0, 0, 1, 0, 0, 1, 0, 1], [1, 0, 0, 1, 0, 1, 1, 0, 1], [1, 0, 0, 0, 0, 0, 1, 0, 1], [1, 1, 1, 1, 1, 1, 1, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1]]) env = Maze(envmatrix, (7, 7)) # create task task = MazeTask(env) # create the ActionValueTable table = ActionValueTable(81, 4) table.initialize(1) # create agent with controller and learner agent = EpsilonGreedyAgent(table, QLambda(4)) experiment = ContinuousExperiment(task, agent) pylab.gray() pylab.ion() for i in range(100000): experiment.doInteractionsAndLearn() if i % 100 == 0: pylab.pcolor(table.values.max(1).reshape(9, 9)) pylab.draw() agent.reset()
net.addConnection(FullConnection(net['in'],net['out'],name='c1')) net.addConnection((FullConnection(net['bias'],net['out'],name='c2'))) net.sortModules() net._setParameters([-0.0, 1.8, 1.6]) print(net._params) #print(net.activate(0.5)) #print(net.activate(0.6)) #net.activate(2) env=AR1Environment(2000) task=MaximizeReturnTask(env)#MaximizeReturnTask(env) learner = RRL() # ENAC() #Q_LinFA(2,1) agent = LearningAgent(net,learner) exp = ContinuousExperiment(task,agent) ts=env.ts.tolist() exp.doInteractionsAndLearn(1999) print(net._params) actionHist=env.actionHistory pyplot.plot(ts[0]) pyplot.plot(actionHist) pyplot.show() #snp_rets=env.importSnP().tolist()[0] #print(snp_rets.tolist()[0]) #pyplot.plot(snp_rets) #pyplot.show()
def main(): inData=createDataset() env = MarketEnvironment(inData) task = MaximizeReturnTask(env) numIn=min(env.worldState.shape) net=RecurrentNetwork() net.addInputModule(BiasUnit(name='bias')) net.addOutputModule((SignLayer(1,name='out'))) net.addRecurrentConnection(FullConnection(net['out'], net['out'], name='c3')) net.addInputModule(LinearLayer(numIn,name='in')) net.addConnection(FullConnection(net['in'],net['out'],name='c1')) net.addConnection((FullConnection(net['bias'],net['out'],name='c2'))) net.sortModules() ###net._setParameters([1.89523389, 2.41243781, -0.37355216, 0.60550426, 1.29560957, -1.14727503, -1.80005888, 0.66351325, 1.91905451]) ###net._setParameters([ 1.07300605, 2.37801446, -0.28118081, -0.78715898, 0.13367809, 0.31757825,-1.23956247, 1.90411791, 0.95458375]) ##net._setParameters([1.35840492,1.87785682, -0.15779415, -0.79786631, 0.13380422, 0.0067797, -1.28202562, 2.38574234, 0.909462]) ###net._setParameters([ 0.36062235, 1.70329005, 2.24180157, 0.34832656, 0.31775365, -0.60400026, -0.44850303, 1.50005529, -0.99986366]) net._setParameters([ 1.15741417, 1.70427034, 1.05050831, -0.47303435, -0.87220272, -1.44743793, 0.93697461, 2.77489952, 0.27374758]) ts=env.ts learner = RRL(numIn+2,ts) # ENAC() #Q_LinFA(2,1) agent = LearningAgent(net,learner) exp = ContinuousExperiment(task,agent) # in sample learning in_sample_len=500 print("Before in sample {}".format(net._params)) for i in range(100): exp.doInteractionsAndLearn(in_sample_len) learner.reset() agent.reset() env.reset() # ouy of sample, online learning print("Before oos {}".format(net._params)) exp.doInteractionsAndLearn(len(ts)-1) print("After oos {}".format(net._params)) #performance evaluation dfIndex=inData['RETURNS'].index rf=0#inData['Fed Fund Target'] outDataOOS=pE.outData(ts,env.actionHistory,dfIndex,startIndex=in_sample_len) sharpe_oos=pE.annualisedSharpe(outDataOOS['trading rets'],rf) drawDown_oos=pE.maximumDrawdown(outDataOOS['trading rets']) numOutperformedMonths_oos=pE.percentOfOutperformedMonths(outDataOOS['trading rets'],outDataOOS['ts']) foo=outDataOOS['cum_log_rets'][-1] bar=math.exp(foo) traderReturn=math.exp(outDataOOS['cum_log_rets'][-1])-1 benchmarkReturn=math.exp(outDataOOS['cum_log_ts'].values[-1])-1 print( "oos sharpe: {}, \noos drawdown: {} \noos percent outperformed months {}\noos trader return {}".format(sharpe_oos, drawDown_oos, numOutperformedMonths_oos,traderReturn)) paramHist=learner.paramHistory inData.rename(columns={'RETURNS': 'r(t-1)'},inplace=True) lbs=insert(inData.columns.values,0,'Bias') lbs=append(lbs,'F(t-1)') plt.figure(0) for i in range(len(net._params)): if i<7: plt.plot(paramHist[i],label=lbs[i]) else: plt.plot(paramHist[i],'--',label=lbs[i]) plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.1),ncol=3) plt.draw() fix, axes = plt.subplots(nrows=2,ncols=1) plotFrame=outDataOOS[['cum_log_ts','cum_log_rets']] plotFrame.columns=['Buy and Hold','Trading Agent'] plotFrame.plot(ax=axes[0]) outDataOOS['Action_Hist'].plot(ax=axes[1],color='r') plt.draw() plt.show()
net= RecurrentNetwork() #Single linear layer with bias unit, and single tanh layer. the linear layer is whats optimised net.addInputModule(BiasUnit(name='bias')) net.addOutputModule(TanhLayer(1, name='out')) net.addRecurrentConnection(FullConnection(net['out'], net['out'], name='c3')) net.addInputModule(LinearLayer(1,name='in')) net.addConnection(FullConnection(net['in'],net['out'],name='c1')) net.addConnection((FullConnection(net['bias'],net['out'],name='c2'))) net.sortModules() net._setParameters([0, 10, 1.259]) print(net._params) env=MonthlySnPEnvironment() task=MaximizeReturnTask(env) learner = RRL() # ENAC() #Q_LinFA(2,1) agent = LearningAgent(net,learner) exp = ContinuousExperiment(task,agent) ts=env.ts.tolist() exp.doInteractionsAndLearn(795) print(net._params) actionHist=sign(env.actionHistory)/20 pyplot.plot(ts[0]) pyplot.plot(actionHist) pyplot.show() ###################### from matplotlib import pyplot
env = MarketEnvironment() task = MaximizeReturnTask(env) numIn=min(env.worldState.shape) net=RecurrentNetwork() net.addInputModule(BiasUnit(name='bias')) net.addOutputModule(TanhLayer(1, name='out')) net.addRecurrentConnection(FullConnection(net['out'], net['out'], name='c3')) net.addInputModule(LinearLayer(numIn,name='in')) net.addConnection(FullConnection(net['in'],net['out'],name='c1')) net.addConnection((FullConnection(net['bias'],net['out'],name='c2'))) net.sortModules() #net._setParameters([-0.1749362, 2.10162725, 0.10726541, 1.67949447, -1.51793343, 2.01329702, 1.57673461]) ts=env.ts learner = RRL(numIn+2,ts) # ENAC() #Q_LinFA(2,1) agent = LearningAgent(net,learner) exp = ContinuousExperiment(task,agent) exp.doInteractionsAndLearn(10000) print(net._params) actionHist=(env.actionHistory) fig, ax1 = plt.subplots() ax2=ax1.twinx() ax1.plot(cumsum([log(1+x) for x in ts])) ax1.plot(cumsum([log(1+(x*sign(y))) for x,y in zip(ts,actionHist)]),'g') ax2.plot(actionHist,'r') plt.show()