env = gym.make('LunarLanderContinuous-v2') agent = Agent(alpha=0.000025, beta = 0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=2) episodes = 1000 np.random.seed(42) tau_hist = [] score_hist = [] for i in range(episodes): done = False score = 0 state = env.reset() while not done: act = agent.choose_action(state) next_state, reward, done, _ = env.step(act) agent.store(state, act, reward, next_state, int(done)) agent.learn() score += reward state = next_state agent.save_models() score_hist.append(score) tau_hist.append(agent.tau) avg_score = np.mean(score_hist[-100:]) print('episode ' + str(i + 1) + 'score %.2f' % score + 'average score %.2f' % avg_score) episodes = np.arange(1, episodes + 1) plot_curve(episodes, score_hist, tau_hist)
batch_size=64, gamma=0.99) agent.load_models() np.random.seed(0) score_history = [] for i in range(200): obs = env.reset() done = False score = 0 step = 0 while not done: step += 1 # print(obs) act = agent.choose_action(obs) # print(act) new_state, reward, done, info = env.step(act) agent.remember(obs, act, reward, new_state, int(done)) agent.learn() score += reward obs = new_state env.render() score_history.append(score) # if i % 25 == 0: # agent.save_models() print('episode ', i, 'score %.2f' % score, 'trailing 128 games avg %.3f' % np.mean(score_history[-128:]), 'finished after ', step, ' episode')