def simulateMore(agent,lrAgent,environment,nTrials): n=1 #iterations counter begin = time.time() #execution for t in xrange(nTrials): #agents[-1].play() #use this if you want to run with constant learning rate lrAgent.play() showProgress(nTrials, n, time.time(),begin) n+=1 return (agent,lrAgent, environment)
outputPath = path + 'statistics/' begin = time.time() prob = array([.55, .65, .75, .85, .95]) vol = array([0.0,0.001,0.005,0.01,0.05,0.1]) lRates = [0.01,0.11,0.21,0.31,0.41,0.51, 0.61, 0.71, 0.81, 0.91] nEpisodes = 50 meanSquareError = np.zeros((len(prob), len(vol),len(lRates), nEpisodes)) rightEstimate = np.zeros((len(prob), len(vol), len(lRates),nEpisodes)) rightPrediction = np.zeros((len(prob), len(vol), len(lRates),nEpisodes)) rewardedTrials = np.zeros((len(prob), len(vol), len(lRates),nEpisodes)) totalIter = len(prob)*len(vol)*nEpisodes*len(lRates) n=1 #iterations counter for v in xrange(len(vol)): for p in xrange(len(prob)): environment = loadArrangeVar(prob[p], vol[v], path,'environment') for r in xrange(len(lRates)): for e in xrange(nEpisodes): agent = loadCtLbdEpisodeVar(prob[p], vol[v], lRates[r],e, path,'agent') meanSquareError[p][v][r][e] = mean(agent.err**2) rightEstimate[p][v][r][e] = np.sum(around(agent.x[1:]) == around(environment.history)) / float(environment.history.size)*100 #x has a shape of nTrials+1 and history of nTrials. That is because after the last trial the agent learns the value of x for the next rightPrediction[p][v][r][e] = np.sum(around(agent.x[0:-1]) == around(environment.history)) / float(environment.history.size)*100 rewardedTrials[p][v][r][e] = float(np.sum(agent.r))/agent.x.size*100 #Calculates how often the agent was rewarded within the episode showProgress(totalIter, n, time.time(), begin) n+=1 variables = {'meanSquareError':meanSquareError, 'rightEstimate':rightEstimate, 'rewardedTrials': rewardedTrials, 'rightPrediction':rightPrediction} saveAllVars(outputPath,variables) print 'Calculation finished in ', (time.time()-begin), 'seconds.'
def simulate (vol,prob,x0,Q0,lr,gamma,nTrials,nEpisodes,pPolicy,lrLearner,lrPolicy,fixSeed=True,saveOutput=False,path=None): ''' DEFINITION: runs the simulation for the agent with changing learning rates INPUTS: prob: list of probabilities to run vol: list of volatilities to run x0: initial value for the first agent Q0: initial value for the second agent lr: learning rate for the second agent gamma: gamma value (second agent) nTrials: number of trials nEpisodes: number of episodes pPolicy: string with the name of the policy for the prediction agent (first agent) lrLearner: string with the kind of algorithm is to be used to learn the Q-values lrPolicy: string with the name of the policy for the learning rate agent (second agent) fixSeed: (boolean) if true, the seed of the random number generation is fixed and the results can be repeated saveOutput: (boolean) if true the output will be saved at the path path: path for saving simulation data ''' now = datetime.now() n=1 #iterations counter begin = time.time() print 'Simulation started. Date:', now.strftime("%Y-%m-%d %H:%M:%S") if saveOutput: name = path + 'output' + now.strftime("%Y-%m-%d_%H-%M") + '.txt' f=open(name, 'w') print >>f,'Simulation started. Date:', now.strftime("%Y-%m-%d %H:%M") print >>f, 'Run parameters:' print >>f, 'x0 =',x0 print >>f, 'Q0 =',Q0 print >>f, 'lr =',lr print >>f, 'gamma =',gamma print >>f, 'vol =', vol print >>f, 'prob = ', prob print >>f, 'nTrials = ', nTrials print >>f, 'nEpisodes = ', nEpisodes print >>f, 'pPolicy=', pPolicy print >>f, 'lrPolicy = ', lrPolicy print >>f, 'lrLearner = ', lrLearner totalIter = nTrials*nEpisodes*len(prob)*len(vol) for v in vol: for p in prob: for e in xrange(nEpisodes): if fixSeed: seed(e) # this makes episode e have the same pseudo-random numbers for different prob and vol #set up environment = PRL(p,v) agent = PredictionAgent(pPolicy,environment,lrLearner=None,x0=x0) lrAgent = RateAgent(lrPolicy,lrLearner,agent,lr=lr, x0=Q0, gamma = gamma) agent.lrLearner = lrAgent #execution for t in xrange(nTrials): #agents[-1].play() #use this if you want to run with constant learning rate lrAgent.play() showProgress(totalIter, n, time.time(),begin) n+=1 #saving information for this episode variables = {'agent':agent,'lrAgent':lrAgent} if saveOutput: saveAllEpisodeVariables(p,v,e,path,variables) if saveOutput: saveArrangeVar(p, v, environment, path, 'environment') print 'Finished',nEpisodes,'episodes of',nTrials,'trials for vol =',v,'prob =', p, 'at time %.1f' %(time.time()-begin) if saveOutput: print >>f,'Finished',nEpisodes,'episodes of',nTrials,'trials for vol =',v,'prob =', p, 'at time %.1f' %(time.time()-begin) print 'Calculation finished at',datetime.now().strftime("%Y-%m-%d %H:%M:%S"),'in %.1f' %(time.time()-begin), 'seconds.' if saveOutput: print >>f,'Calculation finished at',datetime.now().strftime("%Y-%m-%d %H:%M%S"),'in %.1f' %(time.time()-begin), 'seconds.' if saveOutput: f.close() return (agent,lrAgent, environment)