def collect_trajectories(env: gym.Env, agent: Agent, n_games: int = 10) -> np.ndarray: for _ in range(n_games): state = env.reset() done: bool = False state_history: list[np.ndarray] = [] while not done: state_history.append(state) action = agent.choose_action(state) next_state, _, done, _ = env.step(action) state = next_state return np.vstack(state_history)
# Init. Agent agent = Agent(env=env, n_games=n_games, training=False) agent.load_models(data_path) for i in tqdm(range(n_games), desc=f'Testing', total=n_games): score_history: List[np.float32] = [] * n_games for _ in tqdm(range(n_games), desc=f'Testing', total=n_games): score = 0 done = False # Initial Reset of Environment state = env.reset() while not done: action = agent.choose_action(state) next_state, reward, done, _ = env.step(action) agent.memory.add(state, action, reward, next_state, done) state = copy.deepcopy(next_state) score += reward score_history.append(score) print(f'Test Analysis:\n' f'Mean:{np.mean(score_history)}\n' f'Variance:{np.std(score_history)}') test_data.append({'Test Score': score_history})
env=env, batch_size=64, layer1_size=256, layer2_size=128, n_actions=3) #agent.load_models() # np.random.seed(1) score_history = [] for i in range(50): obs = env.reset() done = False score = 0 while not done: act = agent.choose_action(obs) print(act) new_state, reward, done, info = env.step(act) agent.remember(obs, act, reward, new_state, int(done)) agent.learn() score += reward obs = new_state #env.render() score_history.append(score) if i % 10 == 0: agent.save_models() env.render() print('episode ', i, 'score %.2f' % score, 'trailing 25 games avg %.3f' % np.mean(score_history[-25:]))