def evaluate_saved_model(): args = parse_a2c_args() #TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") #"cuda" if torch.cuda.is_available() else env = DoomEnvironment(args, is_train=True) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) args.scenario_dir = "scenarios_transfer_learning/big_combs_test/" checkpoint = torch.load("0.pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU for i in range(1, 64): #for i in range(args.num_mazes_test): env = DoomEnvironment(args, idx=i, is_train=False) movie_name = 'videos/3 - 12-04-21 - base/big_combs_test{:0004}.mp4'.format( i) print('Creating movie {}'.format(movie_name)) make_movie(policy, env, movie_name, args)
def evaluate_saved_model(): args = parse_a2c_args() #TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") #"cuda" if torch.cuda.is_available() else env = DoomEnvironment(args, is_train=True) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) name = "Final on enclave" args.scenario_dir = "scenarios_transfer_learning/scenes/" checkpoint = torch.load("final.pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU args.scenario = "custom_scenario000.cfg" env = DoomEnvironment(args, is_train=False) movie_name = '/home/adam/Bureau/Visuels/0 - Rollout faits main/{}.mp4'.format( name) print('Creating movie {}'.format(movie_name)) make_movie(policy, env, movie_name, args)
def evaluate_saved_model(): args = parse_a2c_args() # TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") # "cuda" if torch.cuda.is_available() else env = DoomEnvironment(args, is_train=False) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) results = [] for model in range(0, 2): checkpoint = torch.load(str(model) + ".pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU results.append([]) for i in range(args.num_mazes_test): env = DoomEnvironment(args, idx=i, is_train=False) results[model].append(get_results(policy, env, args)) print(i) success_rate = 0 average_reward = 0 average_time = 0 for res in results[model]: if res[1] < 525: success_rate += 1 average_time += res[1] average_reward += res[0] if success_rate != 0: average_time /= success_rate success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test print(success_rate, average_reward, average_time) time_diff = 0 finished_levels = 0 for i in range(args.num_mazes_test): if results[1][i][1] < 525: finished_levels += 1 time_diff += results[1][i][1] - results[0][i][1] print(time_diff / finished_levels)
def evaluate_saved_model(): args = parse_a2c_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") env = DoomEnvironment(args, is_train=True) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) checkpoint = torch.load(args.model_checkpoint, map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile(args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU for i in range(args.num_mazes_test): env = DoomEnvironment(args, idx=i, is_train=True) movie_name = 'videos/rollout_{:0004}.mp4'.format(i) print('Creating movie {}'.format(movie_name)) make_movie(policy, env, movie_name, args)
else: return obs def step(self, actions): new_obs = [] rewards = [] dones = [] infos = [] for action, pipe in zip(actions, self.parent_pipes): pipe.send(action) for pipe in self.parent_pipes: obs, reward, done, info = pipe.recv() new_obs.append(self.prep_obs(obs)) rewards.append(reward) dones.append(done) infos.append(info) return np.stack(new_obs), rewards, dones, infos if __name__ == '__main__': args = parse_a2c_args() args.scenario_dir = '../scenarios/' mp_test_envs = MultiEnv(args.simulator, args.num_environments, 1, args) mp_test_envs.reset()
def evaluate_saved_model(): args = parse_a2c_args() args2 = parse_a2c_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") num_updates = int( args.num_frames) // args.num_steps // args.num_environments # Writer will output to ./runs/ directory by default writer = torch.utils.tensorboard.SummaryWriter() train_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=True) # Création des environnements de test des niveaux classiques args2.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/" args2.scenario = "custom_scenario_test{:003}.cfg" classic_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) # Création des environnements de test des niveaux peignes args2.scenario_dir = "scenarios_transfer_learning/little_combs_test/" little_combs_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) args2.scenario_dir = "scenarios_transfer_learning/medium_combs_test/" medium_combs_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) obs_shape = train_envs.obs_shape policy = CNNPolicy(obs_shape, args).to(device) agent = A2CAgent(policy, args.hidden_size, value_weight=args.value_loss_coef, entropy_weight=args.entropy_coef, num_steps=args.num_steps, num_parallel=args.num_environments, gamma=args.gamma, lr=args.learning_rate, opt_alpha=args.alpha, opt_momentum=args.momentum, max_grad_norm=args.max_grad_norm) obs = little_combs_test_envs.reset() num_checkpoints = 355 for j in range(num_checkpoints): if j % 8 == 0: checkpoint_filename = '/home/adam/Bureau/Transfer Learning/FINAL/checkpoint_{}.pth.tar'.format( str(j + 1)) agent.load_model(checkpoint_filename) total_num_steps = (j + 1) * args.num_environments * args.num_steps mean_rewards_classic, game_times_classic = agent.evaluate( classic_test_envs, j, total_num_steps) mean_rewards_little, game_times_little = agent.evaluate( little_combs_test_envs, j, total_num_steps) mean_rewards_medium, game_times_medium = agent.evaluate( medium_combs_test_envs, j, total_num_steps) writer.add_scalar("Reward classic levels", mean_rewards_classic, (j + 1) * 100) writer.add_scalar("Reward little combs levels", mean_rewards_little, (j + 1) * 100) writer.add_scalar("Reward medium combs levels", mean_rewards_medium, (j + 1) * 100) print(j)
def train(): args = parse_a2c_args() args2 = parse_a2c_args() output_dir = initialize_logging(args) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") num_updates = int( args.num_frames) // args.num_steps // args.num_environments # Create the train and test environments with Multiple processes train_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=True) #Création des environnements de test des niveaux classiques args2.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/" args2.scenario = "custom_scenario_test{:003}.cfg" classic_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) #Création des environnements de test des niveaux peignes args2.scenario_dir = "scenarios_transfer_learning/little_combs_test/" little_combs_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) args2.scenario_dir = "scenarios_transfer_learning/medium_combs_test/" medium_combs_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) test_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=False) # Writer will output to ./runs/ directory by default writer = torch.utils.tensorboard.SummaryWriter() obs_shape = train_envs.obs_shape # The agent's policy network and training algorithm A2C policy = CNNPolicy(obs_shape, args).to(device) agent = A2CAgent(policy, args.hidden_size, value_weight=args.value_loss_coef, entropy_weight=args.entropy_coef, num_steps=args.num_steps, num_parallel=args.num_environments, gamma=args.gamma, lr=args.learning_rate, opt_alpha=args.alpha, opt_momentum=args.momentum, max_grad_norm=args.max_grad_norm) start_j = 0 if args.reload_model: checkpoint_idx = args.reload_model.split(',')[1] checkpoint_filename = '{}models/base_line.pth.tar'.format(output_dir) agent.load_model(checkpoint_filename) start_j = 0 #(int(checkpoint_idx) // args.num_steps // args.num_environments) + 1 obs = train_envs.reset() start = time.time() nb_of_saves = 0 for j in range(start_j, num_updates): print("------", j / num_updates * 100, "-------") # Test des performances du modèle if not args.skip_eval and j % args.eval_freq == 0: total_num_steps = (j + 1) * args.num_environments * args.num_steps mean_rewards_classic, game_times_classic = agent.evaluate( classic_test_envs, j, total_num_steps) mean_rewards_little, game_times_little = agent.evaluate( little_combs_test_envs, j, total_num_steps) mean_rewards_medium, game_times_medium = agent.evaluate( medium_combs_test_envs, j, total_num_steps) # succes_classic = sum([1 if i!=525 else 0 for i in game_times_classic])/16 # succes_little = sum([1 if i!=525 else 0 for i in game_times_little])/16 # succes_medium = sum([1 if i!=525 else 0 for i in game_times_medium])/16 writer.add_scalar("Reward classic levels", mean_rewards_classic, j) writer.add_scalar("Reward little combs levels", mean_rewards_little, j) writer.add_scalar("Reward medium combs levels", mean_rewards_medium, j) # writer.add_scalar("Success rate classic levels", succes_classic, j) # writer.add_scalar("Success rate little combs levels", succes_little, j) # writer.add_scalar("Success rate medium combs levels", succes_medium, j) for step in range(args.num_steps): action = agent.get_action(obs, step) obs, reward, done, info = train_envs.step(action) agent.add_rewards_masks(reward, done, step) report = agent.update(obs) if j % args.log_interval == 0: end = time.time() total_num_steps = (j + 1) * args.num_environments * args.num_steps save_num_steps = (start_j) * args.num_environments * args.num_steps FPS = int((total_num_steps - save_num_steps) / (end - start)), logging.info(report.format(j, total_num_steps, FPS)) if j % args.model_save_rate == 0: nb_of_saves += 1 agent.save_policy2(nb_of_saves, args, output_dir) # cancel the env processes train_envs.cancel() test_envs.cancel()
def evaluate_saved_model(models, models_dir): args = parse_a2c_args() # TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") # "cuda" if torch.cuda.is_available() else #création des environnements #création des environnements little-combs little_comb_env = [] for i in range(50): args.scenario_dir = "scenarios_transfer_learning/little_combs_test/" args.scenario = "custom_scenario_test{:003}.cfg" little_comb_env.append(DoomEnvironment(args, idx=i, is_train=False)) #création des environnements big-combs big_comb_env = [] for i in range(50): args.scenario_dir = "scenarios_transfer_learning/big_combs_test/" args.scenario = "custom_scenario_test{:003}.cfg" big_comb_env.append(DoomEnvironment(args, idx=i, is_train=False)) #création des environnements classic classic_env = [] for i in range(50): args.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/" args.scenario = "custom_scenario_test{:003}.cfg" classic_env.append(DoomEnvironment(args, idx=i, is_train=False)) #création des environnements other levels medium_comb_env = [] for i in range(16): args.scenario_dir = "scenarios_transfer_learning/medium_combs_test/" args.scenario = "custom_scenario_test{:003}.cfg" medium_comb_env.append(DoomEnvironment(args, idx=i, is_train=False)) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) resultat = [] for model in models: checkpoint = torch.load(models_dir + "/checkpoint_" + str(model) + ".pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() resultat.append(model) assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU # evaluation sur les niveaux classiques results = [] for i in range(50): env = classic_env[i] results.append(get_results(policy, env, args)) print("Classic levels evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) # evaluation sur les little combs results = [] for i in range(50): env = little_comb_env[i] results.append(get_results(policy, env, args)) print("Little combs evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) # evaluation sur les big combs results = [] for i in range(50): env = big_comb_env[i] results.append(get_results(policy, env, args)) print("Big combs evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) # evaluation sur les autres niveaux results = [] for i in range(16): env = medium_comb_env[i] results.append(get_results(policy, env, args)) print("Other levels evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) print("Checkpoint " + str(model) + " has been evaluated") print(resultat)
def train(): args = parse_a2c_args() output_dir = initialize_logging(args) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") num_updates = int(args.num_frames) // args.num_steps // args.num_environments # Create the train and test environments with Multiple processes train_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=True) test_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=False) obs_shape = train_envs.obs_shape # The agent's policy network and training algorithm A2C policy = CNNPolicy(obs_shape, args).to(device) agent = A2CAgent(policy, args.hidden_size, value_weight=args.value_loss_coef, entropy_weight=args.entropy_coef, num_steps=args.num_steps, num_parallel=args.num_environments, gamma=args.gamma, lr=args.learning_rate, opt_alpha=args.alpha, opt_momentum=args.momentum, max_grad_norm=args.max_grad_norm) start_j = 0 if args.reload_model: checkpoint_idx = args.reload_model.split(',')[1] checkpoint_filename = '{}models/checkpoint_{}.pth.tar'.format(output_dir, checkpoint_idx) agent.load_model(checkpoint_filename) start_j = (int(checkpoint_idx) // args.num_steps // args.num_environments) + 1 obs = train_envs.reset() start = time.time() for j in range(start_j, num_updates): if not args.skip_eval and j % args.eval_freq == 0: total_num_steps = (j + 1) * args.num_environments * args.num_steps mean_rewards, game_times = agent.evaluate(test_envs, j, total_num_steps) logging.info(mean_rewards) logging.info(game_times) for step in range(args.num_steps): action = agent.get_action(obs, step) obs, reward, done, info = train_envs.step(action) agent.add_rewards_masks(reward, done, step) report = agent.update(obs) if j % args.log_interval == 0: end = time.time() total_num_steps = (j + 1) * args.num_environments * args.num_steps save_num_steps = (start_j) * args.num_environments * args.num_steps FPS = int((total_num_steps - save_num_steps) / (end - start)), logging.info(report.format(j, total_num_steps, FPS)) if j % args.model_save_rate == 0: total_num_steps = (j + 1) * args.num_environments * args.num_steps agent.save_policy(total_num_steps, args, output_dir) # cancel the env processes train_envs.cancel() test_envs.cancel()