def evaluate_saved_model(): args = parse_a2c_args() #TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") #"cuda" if torch.cuda.is_available() else env = DoomEnvironment(args, is_train=True) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) args.scenario_dir = "scenarios_transfer_learning/big_combs_test/" checkpoint = torch.load("0.pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU for i in range(1, 64): #for i in range(args.num_mazes_test): env = DoomEnvironment(args, idx=i, is_train=False) movie_name = 'videos/3 - 12-04-21 - base/big_combs_test{:0004}.mp4'.format( i) print('Creating movie {}'.format(movie_name)) make_movie(policy, env, movie_name, args)
def evaluate_saved_model(): args = parse_a2c_args() #TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") #"cuda" if torch.cuda.is_available() else env = DoomEnvironment(args, is_train=True) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) name = "Final on enclave" args.scenario_dir = "scenarios_transfer_learning/scenes/" checkpoint = torch.load("final.pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU args.scenario = "custom_scenario000.cfg" env = DoomEnvironment(args, is_train=False) movie_name = '/home/adam/Bureau/Visuels/0 - Rollout faits main/{}.mp4'.format( name) print('Creating movie {}'.format(movie_name)) make_movie(policy, env, movie_name, args)
def evaluate_saved_model(): args = parse_a2c_args() # TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") # "cuda" if torch.cuda.is_available() else env = DoomEnvironment(args, is_train=False) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) results = [] for model in range(0, 2): checkpoint = torch.load(str(model) + ".pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU results.append([]) for i in range(args.num_mazes_test): env = DoomEnvironment(args, idx=i, is_train=False) results[model].append(get_results(policy, env, args)) print(i) success_rate = 0 average_reward = 0 average_time = 0 for res in results[model]: if res[1] < 525: success_rate += 1 average_time += res[1] average_reward += res[0] if success_rate != 0: average_time /= success_rate success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test print(success_rate, average_reward, average_time) time_diff = 0 finished_levels = 0 for i in range(args.num_mazes_test): if results[1][i][1] < 525: finished_levels += 1 time_diff += results[1][i][1] - results[0][i][1] print(time_diff / finished_levels)
def pipe_worker(pipe, params, is_train, idx=0): env = DoomEnvironment(params, idx=idx, is_train=is_train, use_shaping=params.use_shaping) while True: action = pipe.recv() if action is None: break elif action == 'reset': pipe.send(env.reset()) else: obs, reward, done, info = env.step(action) pipe.send((obs, reward, done, info))
def evaluate_saved_model(): args = parse_a2c_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") env = DoomEnvironment(args, is_train=True) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) checkpoint = torch.load(args.model_checkpoint, map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile(args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU for i in range(args.num_mazes_test): env = DoomEnvironment(args, idx=i, is_train=True) movie_name = 'videos/rollout_{:0004}.mp4'.format(i) print('Creating movie {}'.format(movie_name)) make_movie(policy, env, movie_name, args)
def pipe_worker2(pipe, params, is_train, idx_range=[0]): envs_queue = deque() for idx in idx_range: env = DoomEnvironment(params, idx=idx, is_train=is_train, use_shaping=params.use_shaping, fixed_scenario=True) obs = env.reset() envs_queue.append((obs, env)) obs, cur_env = envs_queue.pop() while True: action = pipe.recv() if action is None: break elif action == 'reset': pipe.send(env.reset()) else: obs, reward, done, info = cur_env.step(action) if done: envs_queue.append((obs, cur_env)) obs, cur_env = envs_queue.popleft() pipe.send((obs, reward, done, info))
self._dqn = DQN('deathmatch', n_actions, epsilon) # TODO: shouldn't be 2 ** n_actions self._drqn = DRQN('deathmatch', n_actions, epsilon) def forward(self, x_screens, hidden): # TODO: add _detection_ layer to the DRQN model q_values, hidden = self._drqn(x_screens, hidden) detection = np.random.rand() # TODO: Arnold paper propose to use dqn ____during evaluation___ # TODO: if there are no detected enemies # TODO: or agent does not have any ammo left if detection > 0.0: # without sigmoid q_values = self._dqn(x_screens) return detection, hidden, q_values def sample_actions(self): # pretty same as in drqn # may be I can even call .sample_actions() pass def agent(): return 0 if __name__ == '__main__': cfg = 'scenarios/deathmatch_shotgun.cfg' doom = DoomEnvironment(cfg, True, 4) while True: doom.step(agent())
from torch import load from doom_environment import DoomEnvironment from utils import watch_agent from models import agent from time import sleep # from hyperparameters import hp_basic_test as hp from hyperparameters import hp_d_cor_test as hp # from hyperparameters import hp_def_c_test as hp # from hyperparameters import hp_h_gth_test as hp if __name__ == '__main__': print('---------------------------- vizDoom watching script ---------------------------') test_env = DoomEnvironment('scenarios/' + hp.scenario + '.cfg', False, hp.test_skiprate) test_env.make_visible() policy_net = agent[hp.agent](hp.scenario, 2 ** test_env.get_n_buttons(), hp.epsilon) policy_net.load_state_dict(load( 'logs/' + hp.scenario + '/' + hp.agent + '/model.pth', map_location=lambda storage, loc: storage)['policy_net_state']) # policy_net.eval() policy_net.train() print('scenario: {}, agent: {}'.format(hp.scenario, hp.agent)) print('loaded model: {}'.format('logs/' + hp.scenario + '/' + hp.agent + '/model.pth')) print('agent\'s epsilon: {}'.format(hp.epsilon)) print('------------------------------- watch the model --------------------------------') print('n_episodes: {}'.format(hp.n_episodes)) for _ in range(hp.n_episodes): reward, shaped = watch_agent(hp.scenario, policy_net, test_env) print('Episode {} done, reward: {}, shaped: {}'.format(_, reward, shaped)) sleep(1.0)
def evaluate_saved_model(models, models_dir): args = parse_a2c_args() # TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") # "cuda" if torch.cuda.is_available() else #création des environnements #création des environnements little-combs little_comb_env = [] for i in range(50): args.scenario_dir = "scenarios_transfer_learning/little_combs_test/" args.scenario = "custom_scenario_test{:003}.cfg" little_comb_env.append(DoomEnvironment(args, idx=i, is_train=False)) #création des environnements big-combs big_comb_env = [] for i in range(50): args.scenario_dir = "scenarios_transfer_learning/big_combs_test/" args.scenario = "custom_scenario_test{:003}.cfg" big_comb_env.append(DoomEnvironment(args, idx=i, is_train=False)) #création des environnements classic classic_env = [] for i in range(50): args.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/" args.scenario = "custom_scenario_test{:003}.cfg" classic_env.append(DoomEnvironment(args, idx=i, is_train=False)) #création des environnements other levels medium_comb_env = [] for i in range(16): args.scenario_dir = "scenarios_transfer_learning/medium_combs_test/" args.scenario = "custom_scenario_test{:003}.cfg" medium_comb_env.append(DoomEnvironment(args, idx=i, is_train=False)) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) resultat = [] for model in models: checkpoint = torch.load(models_dir + "/checkpoint_" + str(model) + ".pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() resultat.append(model) assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU # evaluation sur les niveaux classiques results = [] for i in range(50): env = classic_env[i] results.append(get_results(policy, env, args)) print("Classic levels evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) # evaluation sur les little combs results = [] for i in range(50): env = little_comb_env[i] results.append(get_results(policy, env, args)) print("Little combs evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) # evaluation sur les big combs results = [] for i in range(50): env = big_comb_env[i] results.append(get_results(policy, env, args)) print("Big combs evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) # evaluation sur les autres niveaux results = [] for i in range(16): env = medium_comb_env[i] results.append(get_results(policy, env, args)) print("Other levels evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) print("Checkpoint " + str(model) + " has been evaluated") print(resultat)
from hyperparameters import hp_h_gth_train as hp if __name__ == '__main__': print( '---------------------------- vizDoom training script ---------------------------' ) print('scenario: {}, agent: {}'.format(hp.scenario, hp.agent)) print('\ntraining parameters:') print('n_epoch: {}, steps_per_epoch: {}, play_steps: {}'.format( hp.n_epoch, hp.steps_per_epoch, hp.play_steps)) print('batch_size: {}, time_size: {}, not_update: {}'.format( hp.batch_size, hp.time_size, hp.not_update)) print('tests_per_epoch: {}'.format(hp.tests_per_epoch)) train_env = DoomEnvironment('scenarios/' + hp.scenario + '.cfg', False, hp.train_skiprate) test_env = DoomEnvironment('scenarios/' + hp.scenario + '.cfg', False, hp.test_skiprate) er = ReplayMemory(hp.replay_size, hp.screen_size) policy_net = agent[hp.agent](hp.scenario, 2**train_env.get_n_buttons()) target_net = agent[hp.agent](hp.scenario, 2**train_env.get_n_buttons()) optimizer = torch.optim.RMSprop(policy_net.parameters(), hp.learning_rate) trainer = Trainer(scenario=hp.scenario, cuda=hp.cuda, environment=train_env, test_environment=test_env, experience_replay=er, policy_net=policy_net, target_net=target_net,