# ------ r = exp.doEpisodes(1) # Discounted reward. cumreward = exp.task.getTotalReward() #print 'cumreward: %.4f; nsteps: %i; learningRate: %.4f' % ( # cumreward, len(r[0]), exp.agent.learner.learningRate) if irehearsal % 50 == 0: # Perform (no learning). # ---------------------- # Swap out the agent. exp.agent = performance_agent # Perform. r = exp.doEpisodes(1) perform_cumreward = task.getTotalReward() perform_cumrewards.append(perform_cumreward) print('PERFORMANCE: cumreward:', perform_cumreward, 'nsteps:', len(r[0])) # Swap back the learning agent. performance_agent.reset() exp.agent = agent ax1.cla() ax1.plot(perform_cumrewards, '.--') # Wheel trajectories. update_wheel_trajectories() plt.pause(0.001)
# Learn. # ------ r = exp.doEpisodes(1) # Discounted reward. cumreward = exp.task.getTotalReward() # print 'cumreward: %.4f; nsteps: %i; learningRate: %.4f' % ( # cumreward, len(r[0]), exp.agent.learner.learningRate) if irehearsal % 50 == 0: # Perform (no learning). # ---------------------- # Swap out the agent. exp.agent = performance_agent # Perform. r = exp.doEpisodes(1) perform_cumreward = task.getTotalReward() perform_cumrewards.append(perform_cumreward) print "PERFORMANCE: cumreward:", perform_cumreward, "nsteps:", len(r[0]) # Swap back the learning agent. performance_agent.reset() exp.agent = agent ax1.cla() ax1.plot(perform_cumrewards, ".--") # Wheel trajectories. update_wheel_trajectories() plt.pause(0.001)
for iteration in range(100000): #print("ITERATION : " , iteration) r = exp.doEpisodes(1) cumreward = exp.task.getTotalReward() #print 'cumreward: %.4f; nsteps: %i; learningRate: %.4f' % (cumreward, len(r[0]), exp.agent.learner.learningRate) if iteration % 15 == 0: exp.agent = max_agent r = exp.doEpisodes(1) perform_cumreward = task.getTotalReward() perform_cumrewards.append(perform_cumreward) print('PERFORMANCE: cumreward:', perform_cumreward, 'nsteps:', len(r[0])) stats = (task.env.get_yfhist()) new_stats = [ np.max(stats), np.mean(stats), np.median(stats), stats[-1], perform_cumreward, iteration, exp.agent.learner.learningRate ] new_stats = [str(s) for s in new_stats] with open("res/lspi_30-50.txt", "a") as myfile: myfile.write(" ".join(new_stats) + "\n") max_agent.reset() exp.agent = agent ax1.cla() ax1.plot(perform_cumrewards, '.--') # Wheel trajectories. update_wheel_trajectories() plt.pause(0.001)