Esempio n. 1
0
    # ------
    r = exp.doEpisodes(1)
    # Discounted reward.
    cumreward = exp.task.getTotalReward()
    #print 'cumreward: %.4f; nsteps: %i; learningRate: %.4f' % (
    #        cumreward, len(r[0]), exp.agent.learner.learningRate)

    if irehearsal % 50 == 0:
        # Perform (no learning).
        # ----------------------
        # Swap out the agent.
        exp.agent = performance_agent

        # Perform.
        r = exp.doEpisodes(1)
        perform_cumreward = task.getTotalReward()
        perform_cumrewards.append(perform_cumreward)
        print('PERFORMANCE: cumreward:', perform_cumreward, 'nsteps:',
              len(r[0]))

        # Swap back the learning agent.
        performance_agent.reset()
        exp.agent = agent

        ax1.cla()
        ax1.plot(perform_cumrewards, '.--')
        # Wheel trajectories.
        update_wheel_trajectories()

        plt.pause(0.001)
Esempio n. 2
0
    # Learn.
    # ------
    r = exp.doEpisodes(1)
    # Discounted reward.
    cumreward = exp.task.getTotalReward()
    # print 'cumreward: %.4f; nsteps: %i; learningRate: %.4f' % (
    #        cumreward, len(r[0]), exp.agent.learner.learningRate)

    if irehearsal % 50 == 0:
        # Perform (no learning).
        # ----------------------
        # Swap out the agent.
        exp.agent = performance_agent

        # Perform.
        r = exp.doEpisodes(1)
        perform_cumreward = task.getTotalReward()
        perform_cumrewards.append(perform_cumreward)
        print "PERFORMANCE: cumreward:", perform_cumreward, "nsteps:", len(r[0])

        # Swap back the learning agent.
        performance_agent.reset()
        exp.agent = agent

        ax1.cla()
        ax1.plot(perform_cumrewards, ".--")
        # Wheel trajectories.
        update_wheel_trajectories()

        plt.pause(0.001)
Esempio n. 3
0
for iteration in range(100000):
    #print("ITERATION :  " , iteration)
    r = exp.doEpisodes(1)
    cumreward = exp.task.getTotalReward()
    #print 'cumreward: %.4f; nsteps: %i; learningRate: %.4f' % (cumreward, len(r[0]), exp.agent.learner.learningRate)
    if iteration % 15 == 0:
        exp.agent = max_agent
        r = exp.doEpisodes(1)
        perform_cumreward = task.getTotalReward()
        perform_cumrewards.append(perform_cumreward)
        print('PERFORMANCE: cumreward:', perform_cumreward, 'nsteps:',
              len(r[0]))
        stats = (task.env.get_yfhist())
        new_stats = [
            np.max(stats),
            np.mean(stats),
            np.median(stats), stats[-1], perform_cumreward, iteration,
            exp.agent.learner.learningRate
        ]
        new_stats = [str(s) for s in new_stats]
        with open("res/lspi_30-50.txt", "a") as myfile:
            myfile.write(" ".join(new_stats) + "\n")

        max_agent.reset()
        exp.agent = agent
        ax1.cla()
        ax1.plot(perform_cumrewards, '.--')
        # Wheel trajectories.
        update_wheel_trajectories()
        plt.pause(0.001)