Esempio n. 1
0
def simulate(env: Env, agent: Agent, monitor: Monitor, episodes: int) -> None:
    for episode in range(episodes):
        agent.episode_start()
        done = False
        obs = env.reset()
        while not done:
            action = agent.select_action(obs)
            next_obs, reward, done, _ = env.step(action)
            transition = Transition(obs, action, reward, next_obs, done)
            agent.store_transition(transition)
            monitor.store_transition(env, transition)
            obs = next_obs
        agent.episode_end()
        monitor.episode_end(episode, episodes)
    env.close()
    monitor.simulation_end()
Esempio n. 2
0
                  replace_target=10000)
    if load_checkpoint:
        agent.load_models()
    scores = []
    num_games = 10000
    score = 0

    print("Loading up the agent's memory with random driving")

    while agent.mem_cntr < 5000:
        done = False
        observation = env._reset()
        while not done:
            action = np.random.choice(list(range(agent.n_actions)))
            observation_, reward, done = env.step(agent.action_space[action])
            agent.store_transition(observation, action, reward, observation_,
                                   int(done))
            observation = observation_

    print("Done with random driving. Learning...")

    history_file = 'tmp/' + experiment_name + '/ep_{}.pkl'
    writer = csv.writer(open('tmp/' + experiment_name + 'scores.csv', 'w'),
                        delimiter=',')
    writer.writerow(['episode', 'score', 'epsilon'])

    for i in range(num_games):
        done = False
        if i % 10 == 0 and i > 0:
            avg_score = np.mean(scores[max(0, i - 10):(i + 1)])
            print('episode: ', i, 'score: ', score,
                  ' average score %.3f' % avg_score,