예제 #1
0
 def reset(self):
     LearningAgent.reset(self)
     self._temperature = self.init_temperature
     self._expl_proportion = self.init_exploration
     self.learner.reset()
     self._oaro = None
     self.newEpisode()
예제 #2
0
 def reset(self):
     LearningAgent.reset(self)
     self._temperature = self.init_temperature
     self._expl_proportion = self.init_exploration
     self.learner.reset()    
     self._oaro = None
     self.newEpisode()
예제 #3
0
파일: aiplayer.py 프로젝트: SkyWox/hackcu
 def mlDriver(cv, stateTransfer, actionTransfer):
     #parameter setup
     #dimensionality of state argument (could be less than stateTransfer)
     stateDim = 352
     #Number of moves possible
     numMoves = 361
     env = SettleEnv(cv, stateTransfer, actionTransfer)
     task = SettleTask(env)
     controller = RestrictedActionValueNetwork(stateDim, numMoves, env)
     learner = NFQ()
     learner.explorer = EpsilonHackedExplorer(env)
     agent = LearningAgent(controller, learner)
     experiment = EpisodicExperiment(task, agent)
     while True:
         experiment.doEpisodes(10)
         print "Done with experiments"
         agent.learn()
         print "Learned"
         agent.reset()
         print "Cycled"
예제 #4
0
파일: aiplayer.py 프로젝트: minorl/hackcu
 def mlDriver(cv, stateTransfer, actionTransfer):
     #parameter setup
     #dimensionality of state argument (could be less than stateTransfer)
     stateDim = 352
     #Number of moves possible
     numMoves = 361
     env = SettleEnv(cv, stateTransfer, actionTransfer)
     task = SettleTask(env)
     controller = RestrictedActionValueNetwork(stateDim, numMoves, env)
     learner = NFQ()
     learner.explorer = EpsilonHackedExplorer(env)
     agent = LearningAgent(controller, learner)
     experiment = EpisodicExperiment(task, agent)
     while True:
         experiment.doEpisodes(10)
         print "Done with experiments"
         agent.learn()
         print "Learned"
         agent.reset()
         print "Cycled"
예제 #5
0
def main():
    inData=createDataset()
    env = MarketEnvironment(inData)
    task = MaximizeReturnTask(env)
    numIn=min(env.worldState.shape)

    net=RecurrentNetwork()
    net.addInputModule(BiasUnit(name='bias'))
    net.addOutputModule((SignLayer(1,name='out')))
    net.addRecurrentConnection(FullConnection(net['out'], net['out'], name='c3'))
    net.addInputModule(LinearLayer(numIn,name='in'))
    net.addConnection(FullConnection(net['in'],net['out'],name='c1'))
    net.addConnection((FullConnection(net['bias'],net['out'],name='c2')))
    net.sortModules()
    ###net._setParameters([1.89523389,  2.41243781, -0.37355216, 0.60550426, 1.29560957, -1.14727503, -1.80005888, 0.66351325, 1.91905451])
    ###net._setParameters([ 1.07300605, 2.37801446, -0.28118081, -0.78715898, 0.13367809, 0.31757825,-1.23956247, 1.90411791, 0.95458375])
    ##net._setParameters([1.35840492,1.87785682, -0.15779415, -0.79786631, 0.13380422, 0.0067797, -1.28202562, 2.38574234, 0.909462])
    ###net._setParameters([ 0.36062235, 1.70329005, 2.24180157, 0.34832656, 0.31775365, -0.60400026, -0.44850303, 1.50005529, -0.99986366])
    net._setParameters([ 1.15741417, 1.70427034, 1.05050831, -0.47303435, -0.87220272, -1.44743793,  0.93697461, 2.77489952, 0.27374758])
    ts=env.ts
    learner = RRL(numIn+2,ts) # ENAC() #Q_LinFA(2,1)
    agent = LearningAgent(net,learner)
    exp = ContinuousExperiment(task,agent)


    # in sample learning
    in_sample_len=500
    print("Before in sample {}".format(net._params))
    for i in range(100):
        exp.doInteractionsAndLearn(in_sample_len)
        learner.reset()
        agent.reset()
        env.reset()

    # ouy of sample, online learning
    print("Before oos {}".format(net._params))
    exp.doInteractionsAndLearn(len(ts)-1)
    print("After oos {}".format(net._params))

    #performance evaluation
    dfIndex=inData['RETURNS'].index
    rf=0#inData['Fed Fund Target']
    outDataOOS=pE.outData(ts,env.actionHistory,dfIndex,startIndex=in_sample_len)
    sharpe_oos=pE.annualisedSharpe(outDataOOS['trading rets'],rf)
    drawDown_oos=pE.maximumDrawdown(outDataOOS['trading rets'])
    numOutperformedMonths_oos=pE.percentOfOutperformedMonths(outDataOOS['trading rets'],outDataOOS['ts'])
    foo=outDataOOS['cum_log_rets'][-1]
    bar=math.exp(foo)
    traderReturn=math.exp(outDataOOS['cum_log_rets'][-1])-1
    benchmarkReturn=math.exp(outDataOOS['cum_log_ts'].values[-1])-1
    print( "oos sharpe: {}, \noos drawdown: {} \noos percent outperformed months {}\noos trader return {}".format(sharpe_oos, drawDown_oos, numOutperformedMonths_oos,traderReturn))

    paramHist=learner.paramHistory
    inData.rename(columns={'RETURNS': 'r(t-1)'},inplace=True)
    lbs=insert(inData.columns.values,0,'Bias')
    lbs=append(lbs,'F(t-1)')
    plt.figure(0)
    for i in range(len(net._params)):
        if i<7:
            plt.plot(paramHist[i],label=lbs[i])
        else:
            plt.plot(paramHist[i],'--',label=lbs[i])
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.1),ncol=3)
    plt.draw()



    fix, axes = plt.subplots(nrows=2,ncols=1)
    plotFrame=outDataOOS[['cum_log_ts','cum_log_rets']]
    plotFrame.columns=['Buy and Hold','Trading Agent']
    plotFrame.plot(ax=axes[0])
    outDataOOS['Action_Hist'].plot(ax=axes[1],color='r')


    plt.draw()
    plt.show()