Example #1
0
def pipe_worker2(pipe, params, is_train, idx_range=[0]):
    envs_queue = deque()
    for idx in idx_range:
        env = DoomEnvironment(params,
                              idx=idx,
                              is_train=is_train,
                              use_shaping=params.use_shaping,
                              fixed_scenario=True)
        obs = env.reset()
        envs_queue.append((obs, env))

    obs, cur_env = envs_queue.pop()

    while True:
        action = pipe.recv()
        if action is None:
            break
        elif action == 'reset':
            pipe.send(cur_env.reset())
        else:
            obs, reward, done, info = cur_env.step(action)

            if done:
                envs_queue.append((obs, cur_env))
                obs, cur_env = envs_queue.popleft()

            pipe.send((obs, reward, done, info))
Example #2
0
def evaluate_saved_model():
    params = parse_game_args()

    env = DoomEnvironment(params, is_train=True)
    print(env.num_actions)
    obs_shape = (3, params.screen_height, params.screen_width)

    actor_critic = CNNPolicy(obs_shape[0], obs_shape, params)

    assert params.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(
        params.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU
    checkpoint = torch.load(params.model_checkpoint,
                            map_location=lambda storage, loc: storage)
    actor_critic.load_state_dict(checkpoint['model'])

    base_filename = params.model_checkpoint.split('.')[0].split('/')[1]

    agent = BaseAgent(actor_critic, params)

    for i in range(params.num_mazes_test):
        env = DoomEnvironment(params, idx=i, is_train=True)
        movie_name = 'videos/{}_rollout_{:0004}.mp4'.format(base_filename, i)
        print('Creating movie {}'.format(movie_name))
        make_movie(agent, env, movie_name, params)
Example #3
0
def pipe_worker2(pipe, params, is_train, idx_range=[0]):

    envs_queue = deque()
    for idx in idx_range:
        env = DoomEnvironment(params,
                              idx=idx,
                              is_train=is_train,
                              use_shaping=params.use_shaping,
                              fixed_scenario=True)
        obs = env.reset()
        envs_queue.append((obs, env))

    obs, cur_env = envs_queue.pop()

    while True:
        action = pipe.recv()
        if action is None:
            break
        elif action == 'reset':
            pipe.send(cur_env.reset())

        elif action == 'depth_trim':
            pipe.send(cur_env.get_depth()[2:-2, 2:-2])

        elif action == 'depth':
            pipe.send(cur_env.get_depth())

        elif action == 'ego_depth':
            pipe.send(cur_env.get_ego_depth())

        elif action == 'ego_depth_trim':
            pipe.send(cur_env.get_ego_depth()[2:-2, 2:-2])

        elif action == 'deltas':
            pipe.send(cur_env.get_player_deltas())

        elif action == 'positions':
            pipe.send(cur_env.get_player_position())

        elif action == 'origins':
            pipe.send(cur_env.get_player_origins())

        elif action == 'pos_deltas_origins':
            pipe.send(cur_env.get_player_pos_delta_origin())

        elif action == 'loops':
            pipe.send(cur_env.get_loop())

        else:
            obs, reward, done, info = cur_env.step(action)

            if done:
                envs_queue.append((obs, cur_env))
                obs, cur_env = envs_queue.popleft()

            pipe.send((obs, reward, done, info))
Example #4
0
def pipe_worker(pipe, params, is_train, idx=0):
    env = DoomEnvironment(params,
                          idx=idx,
                          is_train=is_train,
                          use_shaping=params.use_shaping)
    while True:
        action = pipe.recv()
        if action is None:
            break
        elif action == 'reset':
            pipe.send(env.reset())
        else:
            obs, reward, done, info = env.step(action)
            pipe.send((obs, reward, done, info))
Example #5
0
def eval_model(model,
               params,
               logger,
               step,
               train_iters,
               num_games,
               movie=True,
               is_train=False):
    agent = BaseAgent(model, params)

    print('agent created')
    if params.multimaze:
        accumulated_rewards = 0
        for env_idx in range(params.num_environments):
            env = DoomEnvironment(params, idx=env_idx, is_train=is_train)
            mean_reward = eval_agent_multi(agent,
                                           env,
                                           logger,
                                           params,
                                           step,
                                           train_iters,
                                           num_games=10,
                                           env_idx=env_idx,
                                           movie=movie)
            accumulated_rewards += mean_reward
        logger.write(
            'Step: {:0004}, Iter: {:000000008} Eval mean reward: {:0003.3f}'.
            format(step, train_iters,
                   accumulated_rewards / params.num_environments))
    else:
        env = DoomEnvironment(params)
        print('eval agent')
        eval_agent(agent,
                   env,
                   logger,
                   params,
                   step,
                   train_iters,
                   num_games,
                   movie=movie)
Example #6
0
    def __init__(self, env_id, num_envs, num_processes, params):

        self.in_queues = [mp.Queue() for _ in range(num_envs)]
        self.out_queues = [mp.Queue() for _ in range(num_envs)]
        self.workers = []

        for in_queue, out_queue in zip(self.in_queues, self.out_queues):
            print('Creating environment')
            process = mp.Process(target=worker,
                                 args=(in_queue, out_queue, params))
            self.workers.append(process)
            process.start()

        #print('There are {} workers'.format(len(self.workers)))

        assert env_id == 'doom', 'Multiprocessing only implemented for doom envirnment'
        tmp_env = DoomEnvironment(params)
        self.num_actions = tmp_env.num_actions
        self.obs_shape = (3, params.screen_height, params.screen_width)
        self.prep = False  # Observations already in CxHxW order
Example #7
0
def worker(in_queue, out_queue, params):
    env = DoomEnvironment(params)
    while True:
        action = in_queue.get()
        if action is None:
            break
        elif action == 'reset':
            out_queue.put(env.reset())
        elif action == 'depth_trim':
            out_queue.put(env.get_depth()[2:-2, 2:-2])
        elif action == 'depth':
            out_queue.put(env.get_depth())
        else:
            obs, reward, done, info = env.step(action)
            out_queue.put((obs, reward, done, info))
Example #8
0
def evaluate_saved_model():
    params = parse_game_args()
    env = DoomEnvironment(params)

    print(env.num_actions)
    obs_shape = (3, params.screen_height, params.screen_width)

    actor_critic = EgoMap0_Policy(obs_shape[0], obs_shape, params)

    assert params.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(
        params.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU
    checkpoint = torch.load(params.model_checkpoint,
                            map_location=lambda storage, loc: storage)
    actor_critic.load_state_dict(checkpoint['model'])

    agent = BaseAgent(actor_critic, params)

    for i in range(10):
        movie_name = 'sixitem_egomap_attention_tests_pca_{:0004}.mp4'.format(i)
        print('Creating movie {}'.format(movie_name))
        make_movie(agent, env, movie_name, params)
Example #9
0
    def __init__(self, env_id, num_envs, num_processes, params):
        if env_id == 'doom':
            # for the doom scenarios
            self.envs = [DoomEnvironment(params) for i in range(num_envs)]
            self.num_actions = self.envs[0].num_actions
            self.obs_shape = (3, params.screen_height, params.screen_width)
            self.prep = False  # Observations already in CxHxW order
        elif env_id == 'home':
            assert 0, 'HoME has not been implemented yet'
        else:
            # if testing on Atari games such as Pong etc
            self.envs = [
                wrap_deepmind(make_atari(env_id)) for i in range(num_envs)
            ]
            observation_space = self.envs[0].observation_space
            obs_shape = observation_space.shape
            observation_space = Box(observation_space.low[0, 0, 0],
                                    observation_space.high[0, 0, 0],
                                    [obs_shape[2], obs_shape[1], obs_shape[0]])
            action_space = self.envs[0].action_space

            self.num_actions = action_space.n
            self.obs_shape = observation_space.shape
            self.prep = True
Example #10
0
def eval_model(model, params, logger, step, train_iters, num_games):
    env = DoomEnvironment(params)
    agent = BaseAgent(model, params)

    eval_agent(agent, env, logger, params, step, train_iters, num_games)
Example #11
0
    clip.write_videofile('{}eval{:0004}_{:00005.0f}.mp4'.format(
        output_dir, step, score * 100))


if __name__ == '__main__':
    # Test to improve movie with action probs, values etc

    params = parse_game_args()
    params.norm_obs = False
    params.recurrent_policy = True
    envs = MultiEnvs(params.simulator, 1, 1, params)
    obs_shape = envs.obs_shape
    obs_shape = (obs_shape[0] * params.num_stack, *obs_shape[1:])
    model = CNNPolicy(obs_shape[0], envs.num_actions, params.recurrent_policy,
                      obs_shape)
    env = DoomEnvironment(params)
    agent = BaseAgent(model, params)

    env.reset()
    agent.reset()

    rewards = []
    obss = []
    actions = []
    action_probss = []
    values = []

    while not env.is_episode_finished():
        obs = env.get_observation()
        # action = agent.get_action(obs, epsilon=0.0)
        action, value, action_probs = agent.get_action_value_and_probs(
Example #12
0
def pipe_worker(pipe, params, is_train, idx=0):
    env = DoomEnvironment(params,
                          idx=idx,
                          is_train=is_train,
                          use_shaping=params.use_shaping)
    while True:
        action = pipe.recv()
        if action is None:
            break
        elif action == 'reset':
            pipe.send(env.reset())

        elif action == 'depth_trim':
            pipe.send(env.get_depth()[2:-2, 2:-2])

        elif action == 'depth':
            pipe.send(env.get_depth())

        elif action == 'ego_depth':
            pipe.send(env.get_ego_depth())

        elif action == 'ego_depth_trim':
            pipe.send(env.get_ego_depth()[2:-2, 2:-2])

        elif action == 'deltas':
            pipe.send(env.get_player_deltas())

        elif action == 'positions':
            pipe.send(env.get_player_position())

        elif action == 'origins':
            pipe.send(env.get_player_origins())

        elif action == 'pos_deltas_origins':
            pipe.send(env.get_player_pos_delta_origin())

        elif action == 'loops':
            pipe.send(env.get_loop())
        else:
            obs, reward, done, info = env.step(action)
            pipe.send((obs, reward, done, info))
Example #13
0
def worker(in_queue, out_queue, params, is_train, idx=0):
    env = DoomEnvironment(params,
                          idx=idx,
                          is_train=is_train,
                          use_shaping=params.use_shaping)
    while True:
        action = in_queue.get()
        if action is None:
            break
        elif action == 'reset':
            out_queue.put(env.reset())

        elif action == 'depth_trim':
            out_queue.put(env.get_depth()[2:-2, 2:-2])

        elif action == 'depth':
            out_queue.put(env.get_depth())

        elif action == 'ego_depth':
            out_queue.put(env.get_ego_depth())

        elif action == 'ego_depth_trim':
            out_queue.put(env.get_ego_depth()[2:-2, 2:-2])

        elif action == 'deltas':
            out_queue.put(env.get_player_deltas())

        elif action == 'positions':
            out_queue.put(env.get_player_position())

        elif action == 'origins':
            out_queue.put(env.get_player_origins())

        elif action == 'pos_deltas_origins':
            out_queue.put(env.get_player_pos_delta_origin())

        else:
            obs, reward, done, info = env.step(action)
            out_queue.put((obs, reward, done, info))
Example #14
0
def gen_classic(selh, file, scenario = False, model="model_final"):
    params = parse_game_args()


    # Charge le scénario
    if not scenario :
        params.scenario = "custom_scenario003.cfg"
    else:
        params.scenario = scenario

    env = DoomEnvironment(params)

    device = torch.device("cuda" if False else "cpu")

    num_actions = env.num_actions
    network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)

    # Chargement du modèle de base

    network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)

    checkpoint = torch.load('models/' + model + '.pth.tar', map_location=lambda storage, loc: storage)

    """Remplacement des clefs du dictionnaire qui posent problème"""

    checkpoint['model']["dist.linear.weight"] = checkpoint['model']["dist_linear.weight"]
    del checkpoint['model']["dist_linear.weight"]
    checkpoint['model']["dist.linear.bias"] = checkpoint['model']["dist_linear.bias"]
    del checkpoint['model']["dist_linear.bias"]

    network.load_state_dict(checkpoint['model'])

    agent = BaseAgent(network, params)

    ERU = {'env': env, 'agent': agent}

    # Chargement des checkpoints
    num_checkpoints = [98, 98, 159]
    checkpoints = [1]*sum(num_checkpoints)
    networks = [1]*sum(num_checkpoints)
    agents = [1]*sum(num_checkpoints)
    ERUs = [1]*sum(num_checkpoints)

    for i in range(len(num_checkpoints)):
        for j in range(num_checkpoints[i]):
            iter = i*num_checkpoints[0]+j

           # if i==0:
           #     checkpoint_filename = '/home/adam/Bureau/Transfer Learning/5 - 28-03-21/checkpoint_{}_{}.pth.tar'.format(str(i + 1), str(j + 88))
            #else:
            checkpoint_filename = '/home/adam/Bureau/Transfer Learning/5 - 28-03-21/checkpoint_{}_{}.pth.tar'.format(str(i + 1), str(j + 1))

            checkpoints[i*num_checkpoints[0]+j] = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage)

            """Remplacement des clefs du dictionnaire qui posent problème"""

            checkpoints[iter]['model']["dist.linear.weight"] = checkpoints[iter]['model']["dist_linear.weight"]
            del checkpoints[iter]['model']["dist_linear.weight"]
            checkpoints[iter]['model']["dist.linear.bias"] = checkpoints[iter]['model']["dist_linear.bias"]
            del checkpoints[iter]['model']["dist_linear.bias"]

            networks[iter] = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)
            networks[iter].load_state_dict(checkpoints[iter]['model'])

            agents[iter] = BaseAgent(networks[iter], params)

            ERUs[iter] = {'env': env, 'agent': agents[iter]}

            ERUs[iter]['env'].reset()

    selhs = []
    for i in range(sum(num_checkpoints)):
        selh = tsne_1d_projection(127)
        selh = torch.from_numpy(selh).type(torch.FloatTensor)
        selh = Variable(selh, volatile=True)
        selhs.append(selh)


    scores = []
    hiddens = []
    inputs = []
    actions = []

    #Boucle pour obtenir les images du modèle de base

    obss = []
    actions = []

    for i in range(50):
        obs = ERU['env'].get_observation()
        action, value, action_probs, grads = ERU['agent'].get_action_value_and_probs_zeroes(obs, selh, epsilon=0.0)
        ERU['env'].make_action(int(action))
        obss.append(obs)
        actions.append(action)


    #Boucle pour évaluer les checkpoints sur les situations du modèle de base

    for i in range(sum(num_checkpoints)):

        for obs2 in obss:
            action, value, action_probs, grads = ERUs[i]['agent'].get_action_value_and_probs_zeroes(obs2, selhs[i], epsilon=0.0)

        hidden = ERUs[i]['agent'].model.get_gru_h()
        h = ''
        for elem in hidden[0][0]:
            h += str(elem) + ","
        h = h[:-1]

        h = h.split(',')
        hiddens.append(h)

        ERU['env'].make_action(int(action))

    im = Image.new('P', (sum(num_checkpoints), 128))
    for i in range(len(hiddens)):
        for j in range(len(hiddens[i])):
            value = int((float(hiddens[i][j])+1)*255/2)
            im.putpixel((i, j), (value, value, value, 255))
    im.show()
    im.save("timeline.png")

    im = Image.new('P', (sum(num_checkpoints)-1, 128))
    for i in range(len(hiddens)-1):
        for j in range(len(hiddens[i])):
            value = int((abs(float(hiddens[i][j])-float(hiddens[i+1][j])))*255*1.5)
            if value>255:
                value=255
            im.putpixel((i, j), (value, value, value, 255))
    im.show()
    im.save("variation.png")
            rewards.append(reward)

            actions.append(actions)
            action_probss.append(action_probs)
            values.append(value)
            k += 1

            # if k > 500:
            #     break
        return ego_reads


if __name__ == '__main__':

    params = parse_game_args()
    env = DoomEnvironment(params)

    print(env.num_actions)
    obs_shape = (3, params.screen_height, params.screen_width)

    actor_critic = EgoMap0_Policy(obs_shape[0], obs_shape, params)

    assert params.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(
        params.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU
    checkpoint = torch.load(params.model_checkpoint,
                            map_location=lambda storage, loc: storage)
    actor_critic.load_state_dict(checkpoint['model'])

    agent = BaseAgent(actor_critic, params)
Example #16
0
def gen_classic(selh, file):
    params = parse_game_args()
    params.scenario = "health_gathering_supreme.cfg"
    env = DoomEnvironment(params)

    device = torch.device("cuda" if False else "cpu")

    num_actions = env.num_actions
    network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)

    checkpoint = torch.load('models/' + "health_gathering_supreme" +
                            '.pth.tar',
                            map_location=lambda storage, loc: storage)
    network.load_state_dict(checkpoint['model'])

    agent = BaseAgent(network, params)

    ERU = {'env': env, 'agent': agent}

    selh = torch.from_numpy(selh).type(torch.FloatTensor)

    selh = Variable(selh, volatile=True)

    ERU['env'].set_seed(randint(0, 999999999))
    ERU['env'].reset()

    scores = []
    hiddens = []
    inputs = []
    saliencies = []
    actions = []
    probabilities = []
    health = []
    positions = []
    orientations = []
    velocities = []
    items = []
    fov = []

    w = 0

    while not ERU['env'].is_episode_finished():
        obsvervation = io.BytesIO()

        obs = ERU['env'].get_observation()
        temp = ERU['env'].state.screen_buffer
        Image.fromarray(temp.transpose(1, 2, 0)).save(obsvervation,
                                                      format="JPEG")
        action, value, action_probs, grads = ERU[
            'agent'].get_action_value_and_probs_zeroes(obs, selh, epsilon=0.0)

        hidden = ERU['agent'].model.get_gru_h()
        h = ''
        for elem in hidden[0][0]:
            h += str(elem) + ","
        h = h[:-1]

        h = h.split(',')
        probs = ""
        for elem in action_probs[0]:
            probs += str(elem) + ","
        probs = probs[:-1]

        probs = probs.split(',')
        sa = io.BytesIO()

        t = Image.fromarray(grads, 'L')

        t.save(sa, format="JPEG")

        scores.append(str(round(ERU['env'].game.get_total_reward(), 2)))
        hiddens.append(h)
        inputs.append(base64.b64encode(obsvervation.getvalue()))
        saliencies.append(base64.b64encode(sa.getvalue()))
        actions.append(str(action))
        probabilities.append(probs)
        health.append(ERU['env'].get_health())
        positions.append(ERU['env'].get_pos())
        orientations.append(ERU['env'].get_ori())
        velocities.append(ERU['env'].get_velo())
        items.append(ERU['env'].get_item())
        fov.append(ERU['env'].get_fov())

        ERU['env'].make_action(int(action))
        print('Iteration', w, '/525')
        w += 1

    result = {
        'episode0': {
            'inputs': inputs,
            'actions': actions,
            'probabilities': probabilities,
            'saliencies': saliencies,
            'scores': scores,
            'positions': positions,
            'health': health,
            'hiddens': hiddens,
            'orientations': orientations,
            'velocities': velocities,
            'items': items,
            'fov': fov
        }
    }
    with open(file, 'w') as f:
        ujson.dump(result, f, indent=4, sort_keys=True)
    return result
Example #17
0
from doom_a2c.arguments import parse_game_args

#####################################################################
# This script presents SPECTATOR mode. In SPECTATOR mode you play and
# your agent can learn from it.
# Configuration is loaded from "../../scenarios/<SCENARIO_NAME>.cfg" file.
# 
# To see the scenario description go to "../../scenarios/README.md"
#####################################################################

params = parse_game_args()
params.scenario = 'mino_maze_simple.cfg'
params.limit_actions = True
params.show_window = True
params.no_reward_average = True
env = DoomEnvironment(params, use_shaping=True)


env.reset()
episodes = 10
inv_action_map = {tuple(v):k for k,v in env.action_map.items()}
for i in range(episodes):
    print("Episode #" + str(i + 1))

    rewards = []
    done = False
    while not done:

        action = env.game.get_last_action()
        action_bool = tuple(bool(a) for a in action)