Example #1
0
env = gym.make('LunarLanderContinuous-v2')

agent = Agent(alpha=0.000025, beta = 0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=2)

episodes = 1000

np.random.seed(42)

tau_hist = []
score_hist = []
for i in range(episodes):
    done = False
    score = 0
    state = env.reset()
    while not done:
        act = agent.choose_action(state)
        next_state, reward, done, _ = env.step(act)
        agent.store(state, act, reward, next_state, int(done))
        agent.learn()
        score += reward
        state = next_state

    agent.save_models()
    score_hist.append(score)
    tau_hist.append(agent.tau)
    avg_score = np.mean(score_hist[-100:])
    print('episode ' + str(i + 1) + 'score %.2f' % score +
              'average score %.2f' % avg_score)

episodes = np.arange(1, episodes + 1)
plot_curve(episodes, score_hist, tau_hist)
                  batch_size=64,
                  gamma=0.99)

    agent.load_models()
    np.random.seed(0)

    score_history = []
    for i in range(200):
        obs = env.reset()
        done = False
        score = 0
        step = 0
        while not done:
            step += 1
            # print(obs)
            act = agent.choose_action(obs)
            # print(act)
            new_state, reward, done, info = env.step(act)
            agent.remember(obs, act, reward, new_state, int(done))
            agent.learn()
            score += reward
            obs = new_state
            env.render()
        score_history.append(score)

        # if i % 25 == 0:
        #     agent.save_models()

        print('episode ', i, 'score %.2f' % score,
              'trailing 128 games avg %.3f' % np.mean(score_history[-128:]),
              'finished after ', step, ' episode')