Esempio n. 1
0
                                           np.array([last_xf, last_yf]))
        delta_dist = dist_to_goal - dist_to_goal_last
        return -delta_tilt - delta_dist * 0.01


task = LSPI_task()
learner = LSPI(9, 20)
task.rewardDiscount = 0.8
learner.rewardDiscount = 0.8

agent = LinearFA_Agent(learner)
agent.epsilonGreedy = True
exp = EpisodicExperiment(task, agent)
learner.learningRateDecay = 3000
max_agent = LinearFA_Agent(learner)
max_agent.learnerning = False
max_agent.greedy = True

task.env.saveWheelContactTrajectories(True)
plt.ion()
plt.figure(figsize=(8, 4))

ax1 = plt.subplot(1, 2, 1)
ax2 = plt.subplot(1, 2, 2)


def update_wheel_trajectories():
    front_lines = ax2.plot(task.env.get_xfhist(), task.env.get_yfhist(), 'r')
    back_lines = ax2.plot(task.env.get_xbhist(), task.env.get_ybhist(), 'b')
    plt.axis('equal')