def simulate(env: Env, agent: Agent, monitor: Monitor, episodes: int) -> None: for episode in range(episodes): agent.episode_start() done = False obs = env.reset() while not done: action = agent.select_action(obs) next_obs, reward, done, _ = env.step(action) transition = Transition(obs, action, reward, next_obs, done) agent.store_transition(transition) monitor.store_transition(env, transition) obs = next_obs agent.episode_end() monitor.episode_end(episode, episodes) env.close() monitor.simulation_end()
replace_target=10000) if load_checkpoint: agent.load_models() scores = [] num_games = 10000 score = 0 print("Loading up the agent's memory with random driving") while agent.mem_cntr < 5000: done = False observation = env._reset() while not done: action = np.random.choice(list(range(agent.n_actions))) observation_, reward, done = env.step(agent.action_space[action]) agent.store_transition(observation, action, reward, observation_, int(done)) observation = observation_ print("Done with random driving. Learning...") history_file = 'tmp/' + experiment_name + '/ep_{}.pkl' writer = csv.writer(open('tmp/' + experiment_name + 'scores.csv', 'w'), delimiter=',') writer.writerow(['episode', 'score', 'epsilon']) for i in range(num_games): done = False if i % 10 == 0 and i > 0: avg_score = np.mean(scores[max(0, i - 10):(i + 1)]) print('episode: ', i, 'score: ', score, ' average score %.3f' % avg_score,