batch_size=64, layer1_size=256, layer2_size=128, n_actions=3) #agent.load_models() # np.random.seed(1) score_history = [] for i in range(50): obs = env.reset() done = False score = 0 while not done: act = agent.choose_action(obs) print(act) new_state, reward, done, info = env.step(act) agent.remember(obs, act, reward, new_state, int(done)) agent.learn() score += reward obs = new_state #env.render() score_history.append(score) if i % 10 == 0: agent.save_models() env.render() print('episode ', i, 'score %.2f' % score, 'trailing 25 games avg %.3f' % np.mean(score_history[-25:]))
next_state, reward, done, _ = env.step(action) agent.memory.add(state, action, reward, next_state, done) state = next_state score += reward agent.optimize() score_history.append(score) avg_score: float = np.mean(score_history[-100:]) avg_history.append(avg_score) if avg_score > best_score: best_score = avg_score agent.save_models(data_path) print(f'Episode:{i}' f'\t ACC. Rewards: {score:3.2f}' f'\t AVG. Rewards: {avg_score:3.2f}' f'\t *** MODEL SAVED! ***') else: print(f'Episode:{i}' f'\t ACC. Rewards: {score:3.2f}' f'\t AVG. Rewards: {avg_score:3.2f}') episode_info = { 'Episode': i, 'Total Episodes': n_games, 'Epidosic Summed Rewards': score, 'Moving Mean of Episodic Rewards': avg_score }