def pipe_worker2(pipe, params, is_train, idx_range=[0]): envs_queue = deque() for idx in idx_range: env = DoomEnvironment(params, idx=idx, is_train=is_train, use_shaping=params.use_shaping, fixed_scenario=True) obs = env.reset() envs_queue.append((obs, env)) obs, cur_env = envs_queue.pop() while True: action = pipe.recv() if action is None: break elif action == 'reset': pipe.send(cur_env.reset()) else: obs, reward, done, info = cur_env.step(action) if done: envs_queue.append((obs, cur_env)) obs, cur_env = envs_queue.popleft() pipe.send((obs, reward, done, info))
def evaluate_saved_model(): params = parse_game_args() env = DoomEnvironment(params, is_train=True) print(env.num_actions) obs_shape = (3, params.screen_height, params.screen_width) actor_critic = CNNPolicy(obs_shape[0], obs_shape, params) assert params.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( params.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU checkpoint = torch.load(params.model_checkpoint, map_location=lambda storage, loc: storage) actor_critic.load_state_dict(checkpoint['model']) base_filename = params.model_checkpoint.split('.')[0].split('/')[1] agent = BaseAgent(actor_critic, params) for i in range(params.num_mazes_test): env = DoomEnvironment(params, idx=i, is_train=True) movie_name = 'videos/{}_rollout_{:0004}.mp4'.format(base_filename, i) print('Creating movie {}'.format(movie_name)) make_movie(agent, env, movie_name, params)
def pipe_worker2(pipe, params, is_train, idx_range=[0]): envs_queue = deque() for idx in idx_range: env = DoomEnvironment(params, idx=idx, is_train=is_train, use_shaping=params.use_shaping, fixed_scenario=True) obs = env.reset() envs_queue.append((obs, env)) obs, cur_env = envs_queue.pop() while True: action = pipe.recv() if action is None: break elif action == 'reset': pipe.send(cur_env.reset()) elif action == 'depth_trim': pipe.send(cur_env.get_depth()[2:-2, 2:-2]) elif action == 'depth': pipe.send(cur_env.get_depth()) elif action == 'ego_depth': pipe.send(cur_env.get_ego_depth()) elif action == 'ego_depth_trim': pipe.send(cur_env.get_ego_depth()[2:-2, 2:-2]) elif action == 'deltas': pipe.send(cur_env.get_player_deltas()) elif action == 'positions': pipe.send(cur_env.get_player_position()) elif action == 'origins': pipe.send(cur_env.get_player_origins()) elif action == 'pos_deltas_origins': pipe.send(cur_env.get_player_pos_delta_origin()) elif action == 'loops': pipe.send(cur_env.get_loop()) else: obs, reward, done, info = cur_env.step(action) if done: envs_queue.append((obs, cur_env)) obs, cur_env = envs_queue.popleft() pipe.send((obs, reward, done, info))
def pipe_worker(pipe, params, is_train, idx=0): env = DoomEnvironment(params, idx=idx, is_train=is_train, use_shaping=params.use_shaping) while True: action = pipe.recv() if action is None: break elif action == 'reset': pipe.send(env.reset()) else: obs, reward, done, info = env.step(action) pipe.send((obs, reward, done, info))
def eval_model(model, params, logger, step, train_iters, num_games, movie=True, is_train=False): agent = BaseAgent(model, params) print('agent created') if params.multimaze: accumulated_rewards = 0 for env_idx in range(params.num_environments): env = DoomEnvironment(params, idx=env_idx, is_train=is_train) mean_reward = eval_agent_multi(agent, env, logger, params, step, train_iters, num_games=10, env_idx=env_idx, movie=movie) accumulated_rewards += mean_reward logger.write( 'Step: {:0004}, Iter: {:000000008} Eval mean reward: {:0003.3f}'. format(step, train_iters, accumulated_rewards / params.num_environments)) else: env = DoomEnvironment(params) print('eval agent') eval_agent(agent, env, logger, params, step, train_iters, num_games, movie=movie)
def __init__(self, env_id, num_envs, num_processes, params): self.in_queues = [mp.Queue() for _ in range(num_envs)] self.out_queues = [mp.Queue() for _ in range(num_envs)] self.workers = [] for in_queue, out_queue in zip(self.in_queues, self.out_queues): print('Creating environment') process = mp.Process(target=worker, args=(in_queue, out_queue, params)) self.workers.append(process) process.start() #print('There are {} workers'.format(len(self.workers))) assert env_id == 'doom', 'Multiprocessing only implemented for doom envirnment' tmp_env = DoomEnvironment(params) self.num_actions = tmp_env.num_actions self.obs_shape = (3, params.screen_height, params.screen_width) self.prep = False # Observations already in CxHxW order
def worker(in_queue, out_queue, params): env = DoomEnvironment(params) while True: action = in_queue.get() if action is None: break elif action == 'reset': out_queue.put(env.reset()) elif action == 'depth_trim': out_queue.put(env.get_depth()[2:-2, 2:-2]) elif action == 'depth': out_queue.put(env.get_depth()) else: obs, reward, done, info = env.step(action) out_queue.put((obs, reward, done, info))
def evaluate_saved_model(): params = parse_game_args() env = DoomEnvironment(params) print(env.num_actions) obs_shape = (3, params.screen_height, params.screen_width) actor_critic = EgoMap0_Policy(obs_shape[0], obs_shape, params) assert params.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( params.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU checkpoint = torch.load(params.model_checkpoint, map_location=lambda storage, loc: storage) actor_critic.load_state_dict(checkpoint['model']) agent = BaseAgent(actor_critic, params) for i in range(10): movie_name = 'sixitem_egomap_attention_tests_pca_{:0004}.mp4'.format(i) print('Creating movie {}'.format(movie_name)) make_movie(agent, env, movie_name, params)
def __init__(self, env_id, num_envs, num_processes, params): if env_id == 'doom': # for the doom scenarios self.envs = [DoomEnvironment(params) for i in range(num_envs)] self.num_actions = self.envs[0].num_actions self.obs_shape = (3, params.screen_height, params.screen_width) self.prep = False # Observations already in CxHxW order elif env_id == 'home': assert 0, 'HoME has not been implemented yet' else: # if testing on Atari games such as Pong etc self.envs = [ wrap_deepmind(make_atari(env_id)) for i in range(num_envs) ] observation_space = self.envs[0].observation_space obs_shape = observation_space.shape observation_space = Box(observation_space.low[0, 0, 0], observation_space.high[0, 0, 0], [obs_shape[2], obs_shape[1], obs_shape[0]]) action_space = self.envs[0].action_space self.num_actions = action_space.n self.obs_shape = observation_space.shape self.prep = True
def eval_model(model, params, logger, step, train_iters, num_games): env = DoomEnvironment(params) agent = BaseAgent(model, params) eval_agent(agent, env, logger, params, step, train_iters, num_games)
clip.write_videofile('{}eval{:0004}_{:00005.0f}.mp4'.format( output_dir, step, score * 100)) if __name__ == '__main__': # Test to improve movie with action probs, values etc params = parse_game_args() params.norm_obs = False params.recurrent_policy = True envs = MultiEnvs(params.simulator, 1, 1, params) obs_shape = envs.obs_shape obs_shape = (obs_shape[0] * params.num_stack, *obs_shape[1:]) model = CNNPolicy(obs_shape[0], envs.num_actions, params.recurrent_policy, obs_shape) env = DoomEnvironment(params) agent = BaseAgent(model, params) env.reset() agent.reset() rewards = [] obss = [] actions = [] action_probss = [] values = [] while not env.is_episode_finished(): obs = env.get_observation() # action = agent.get_action(obs, epsilon=0.0) action, value, action_probs = agent.get_action_value_and_probs(
def pipe_worker(pipe, params, is_train, idx=0): env = DoomEnvironment(params, idx=idx, is_train=is_train, use_shaping=params.use_shaping) while True: action = pipe.recv() if action is None: break elif action == 'reset': pipe.send(env.reset()) elif action == 'depth_trim': pipe.send(env.get_depth()[2:-2, 2:-2]) elif action == 'depth': pipe.send(env.get_depth()) elif action == 'ego_depth': pipe.send(env.get_ego_depth()) elif action == 'ego_depth_trim': pipe.send(env.get_ego_depth()[2:-2, 2:-2]) elif action == 'deltas': pipe.send(env.get_player_deltas()) elif action == 'positions': pipe.send(env.get_player_position()) elif action == 'origins': pipe.send(env.get_player_origins()) elif action == 'pos_deltas_origins': pipe.send(env.get_player_pos_delta_origin()) elif action == 'loops': pipe.send(env.get_loop()) else: obs, reward, done, info = env.step(action) pipe.send((obs, reward, done, info))
def worker(in_queue, out_queue, params, is_train, idx=0): env = DoomEnvironment(params, idx=idx, is_train=is_train, use_shaping=params.use_shaping) while True: action = in_queue.get() if action is None: break elif action == 'reset': out_queue.put(env.reset()) elif action == 'depth_trim': out_queue.put(env.get_depth()[2:-2, 2:-2]) elif action == 'depth': out_queue.put(env.get_depth()) elif action == 'ego_depth': out_queue.put(env.get_ego_depth()) elif action == 'ego_depth_trim': out_queue.put(env.get_ego_depth()[2:-2, 2:-2]) elif action == 'deltas': out_queue.put(env.get_player_deltas()) elif action == 'positions': out_queue.put(env.get_player_position()) elif action == 'origins': out_queue.put(env.get_player_origins()) elif action == 'pos_deltas_origins': out_queue.put(env.get_player_pos_delta_origin()) else: obs, reward, done, info = env.step(action) out_queue.put((obs, reward, done, info))
def gen_classic(selh, file, scenario = False, model="model_final"): params = parse_game_args() # Charge le scénario if not scenario : params.scenario = "custom_scenario003.cfg" else: params.scenario = scenario env = DoomEnvironment(params) device = torch.device("cuda" if False else "cpu") num_actions = env.num_actions network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device) # Chargement du modèle de base network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device) checkpoint = torch.load('models/' + model + '.pth.tar', map_location=lambda storage, loc: storage) """Remplacement des clefs du dictionnaire qui posent problème""" checkpoint['model']["dist.linear.weight"] = checkpoint['model']["dist_linear.weight"] del checkpoint['model']["dist_linear.weight"] checkpoint['model']["dist.linear.bias"] = checkpoint['model']["dist_linear.bias"] del checkpoint['model']["dist_linear.bias"] network.load_state_dict(checkpoint['model']) agent = BaseAgent(network, params) ERU = {'env': env, 'agent': agent} # Chargement des checkpoints num_checkpoints = [98, 98, 159] checkpoints = [1]*sum(num_checkpoints) networks = [1]*sum(num_checkpoints) agents = [1]*sum(num_checkpoints) ERUs = [1]*sum(num_checkpoints) for i in range(len(num_checkpoints)): for j in range(num_checkpoints[i]): iter = i*num_checkpoints[0]+j # if i==0: # checkpoint_filename = '/home/adam/Bureau/Transfer Learning/5 - 28-03-21/checkpoint_{}_{}.pth.tar'.format(str(i + 1), str(j + 88)) #else: checkpoint_filename = '/home/adam/Bureau/Transfer Learning/5 - 28-03-21/checkpoint_{}_{}.pth.tar'.format(str(i + 1), str(j + 1)) checkpoints[i*num_checkpoints[0]+j] = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage) """Remplacement des clefs du dictionnaire qui posent problème""" checkpoints[iter]['model']["dist.linear.weight"] = checkpoints[iter]['model']["dist_linear.weight"] del checkpoints[iter]['model']["dist_linear.weight"] checkpoints[iter]['model']["dist.linear.bias"] = checkpoints[iter]['model']["dist_linear.bias"] del checkpoints[iter]['model']["dist_linear.bias"] networks[iter] = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device) networks[iter].load_state_dict(checkpoints[iter]['model']) agents[iter] = BaseAgent(networks[iter], params) ERUs[iter] = {'env': env, 'agent': agents[iter]} ERUs[iter]['env'].reset() selhs = [] for i in range(sum(num_checkpoints)): selh = tsne_1d_projection(127) selh = torch.from_numpy(selh).type(torch.FloatTensor) selh = Variable(selh, volatile=True) selhs.append(selh) scores = [] hiddens = [] inputs = [] actions = [] #Boucle pour obtenir les images du modèle de base obss = [] actions = [] for i in range(50): obs = ERU['env'].get_observation() action, value, action_probs, grads = ERU['agent'].get_action_value_and_probs_zeroes(obs, selh, epsilon=0.0) ERU['env'].make_action(int(action)) obss.append(obs) actions.append(action) #Boucle pour évaluer les checkpoints sur les situations du modèle de base for i in range(sum(num_checkpoints)): for obs2 in obss: action, value, action_probs, grads = ERUs[i]['agent'].get_action_value_and_probs_zeroes(obs2, selhs[i], epsilon=0.0) hidden = ERUs[i]['agent'].model.get_gru_h() h = '' for elem in hidden[0][0]: h += str(elem) + "," h = h[:-1] h = h.split(',') hiddens.append(h) ERU['env'].make_action(int(action)) im = Image.new('P', (sum(num_checkpoints), 128)) for i in range(len(hiddens)): for j in range(len(hiddens[i])): value = int((float(hiddens[i][j])+1)*255/2) im.putpixel((i, j), (value, value, value, 255)) im.show() im.save("timeline.png") im = Image.new('P', (sum(num_checkpoints)-1, 128)) for i in range(len(hiddens)-1): for j in range(len(hiddens[i])): value = int((abs(float(hiddens[i][j])-float(hiddens[i+1][j])))*255*1.5) if value>255: value=255 im.putpixel((i, j), (value, value, value, 255)) im.show() im.save("variation.png")
rewards.append(reward) actions.append(actions) action_probss.append(action_probs) values.append(value) k += 1 # if k > 500: # break return ego_reads if __name__ == '__main__': params = parse_game_args() env = DoomEnvironment(params) print(env.num_actions) obs_shape = (3, params.screen_height, params.screen_width) actor_critic = EgoMap0_Policy(obs_shape[0], obs_shape, params) assert params.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( params.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU checkpoint = torch.load(params.model_checkpoint, map_location=lambda storage, loc: storage) actor_critic.load_state_dict(checkpoint['model']) agent = BaseAgent(actor_critic, params)
def gen_classic(selh, file): params = parse_game_args() params.scenario = "health_gathering_supreme.cfg" env = DoomEnvironment(params) device = torch.device("cuda" if False else "cpu") num_actions = env.num_actions network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device) checkpoint = torch.load('models/' + "health_gathering_supreme" + '.pth.tar', map_location=lambda storage, loc: storage) network.load_state_dict(checkpoint['model']) agent = BaseAgent(network, params) ERU = {'env': env, 'agent': agent} selh = torch.from_numpy(selh).type(torch.FloatTensor) selh = Variable(selh, volatile=True) ERU['env'].set_seed(randint(0, 999999999)) ERU['env'].reset() scores = [] hiddens = [] inputs = [] saliencies = [] actions = [] probabilities = [] health = [] positions = [] orientations = [] velocities = [] items = [] fov = [] w = 0 while not ERU['env'].is_episode_finished(): obsvervation = io.BytesIO() obs = ERU['env'].get_observation() temp = ERU['env'].state.screen_buffer Image.fromarray(temp.transpose(1, 2, 0)).save(obsvervation, format="JPEG") action, value, action_probs, grads = ERU[ 'agent'].get_action_value_and_probs_zeroes(obs, selh, epsilon=0.0) hidden = ERU['agent'].model.get_gru_h() h = '' for elem in hidden[0][0]: h += str(elem) + "," h = h[:-1] h = h.split(',') probs = "" for elem in action_probs[0]: probs += str(elem) + "," probs = probs[:-1] probs = probs.split(',') sa = io.BytesIO() t = Image.fromarray(grads, 'L') t.save(sa, format="JPEG") scores.append(str(round(ERU['env'].game.get_total_reward(), 2))) hiddens.append(h) inputs.append(base64.b64encode(obsvervation.getvalue())) saliencies.append(base64.b64encode(sa.getvalue())) actions.append(str(action)) probabilities.append(probs) health.append(ERU['env'].get_health()) positions.append(ERU['env'].get_pos()) orientations.append(ERU['env'].get_ori()) velocities.append(ERU['env'].get_velo()) items.append(ERU['env'].get_item()) fov.append(ERU['env'].get_fov()) ERU['env'].make_action(int(action)) print('Iteration', w, '/525') w += 1 result = { 'episode0': { 'inputs': inputs, 'actions': actions, 'probabilities': probabilities, 'saliencies': saliencies, 'scores': scores, 'positions': positions, 'health': health, 'hiddens': hiddens, 'orientations': orientations, 'velocities': velocities, 'items': items, 'fov': fov } } with open(file, 'w') as f: ujson.dump(result, f, indent=4, sort_keys=True) return result
from doom_a2c.arguments import parse_game_args ##################################################################### # This script presents SPECTATOR mode. In SPECTATOR mode you play and # your agent can learn from it. # Configuration is loaded from "../../scenarios/<SCENARIO_NAME>.cfg" file. # # To see the scenario description go to "../../scenarios/README.md" ##################################################################### params = parse_game_args() params.scenario = 'mino_maze_simple.cfg' params.limit_actions = True params.show_window = True params.no_reward_average = True env = DoomEnvironment(params, use_shaping=True) env.reset() episodes = 10 inv_action_map = {tuple(v):k for k,v in env.action_map.items()} for i in range(episodes): print("Episode #" + str(i + 1)) rewards = [] done = False while not done: action = env.game.get_last_action() action_bool = tuple(bool(a) for a in action)