def evaluate_saved_model(): params = parse_game_args() env = DoomEnvironment(params, is_train=True) print(env.num_actions) obs_shape = (3, params.screen_height, params.screen_width) actor_critic = CNNPolicy(obs_shape[0], obs_shape, params) assert params.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( params.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU checkpoint = torch.load(params.model_checkpoint, map_location=lambda storage, loc: storage) actor_critic.load_state_dict(checkpoint['model']) base_filename = params.model_checkpoint.split('.')[0].split('/')[1] agent = BaseAgent(actor_critic, params) for i in range(params.num_mazes_test): env = DoomEnvironment(params, idx=i, is_train=True) movie_name = 'videos/{}_rollout_{:0004}.mp4'.format(base_filename, i) print('Creating movie {}'.format(movie_name)) make_movie(agent, env, movie_name, params)
def evaluate_saved_model(): args = parse_a2c_args() #TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") #"cuda" if torch.cuda.is_available() else env = DoomEnvironment(args, is_train=True) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) name = "Final on enclave" args.scenario_dir = "scenarios_transfer_learning/scenes/" checkpoint = torch.load("final.pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU args.scenario = "custom_scenario000.cfg" env = DoomEnvironment(args, is_train=False) movie_name = '/home/adam/Bureau/Visuels/0 - Rollout faits main/{}.mp4'.format( name) print('Creating movie {}'.format(movie_name)) make_movie(policy, env, movie_name, args)
def evaluate_saved_model(): args = parse_a2c_args() #TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") #"cuda" if torch.cuda.is_available() else env = DoomEnvironment(args, is_train=True) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) args.scenario_dir = "scenarios_transfer_learning/big_combs_test/" checkpoint = torch.load("0.pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU for i in range(1, 64): #for i in range(args.num_mazes_test): env = DoomEnvironment(args, idx=i, is_train=False) movie_name = 'videos/3 - 12-04-21 - base/big_combs_test{:0004}.mp4'.format( i) print('Creating movie {}'.format(movie_name)) make_movie(policy, env, movie_name, args)
def evaluate_saved_model(): args = parse_a2c_args() # TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") # "cuda" if torch.cuda.is_available() else env = DoomEnvironment(args, is_train=False) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) results = [] for model in range(0, 2): checkpoint = torch.load(str(model) + ".pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU results.append([]) for i in range(args.num_mazes_test): env = DoomEnvironment(args, idx=i, is_train=False) results[model].append(get_results(policy, env, args)) print(i) success_rate = 0 average_reward = 0 average_time = 0 for res in results[model]: if res[1] < 525: success_rate += 1 average_time += res[1] average_reward += res[0] if success_rate != 0: average_time /= success_rate success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test print(success_rate, average_reward, average_time) time_diff = 0 finished_levels = 0 for i in range(args.num_mazes_test): if results[1][i][1] < 525: finished_levels += 1 time_diff += results[1][i][1] - results[0][i][1] print(time_diff / finished_levels)
def evaluate_saved_model(): args = parse_a2c_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") env = DoomEnvironment(args, is_train=True) print(env.num_actions) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) checkpoint = torch.load(args.model_checkpoint, map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile(args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU for i in range(args.num_mazes_test): env = DoomEnvironment(args, idx=i, is_train=True) movie_name = 'videos/rollout_{:0004}.mp4'.format(i) print('Creating movie {}'.format(movie_name)) make_movie(policy, env, movie_name, args)
def evaluate_saved_model(): args = parse_a2c_args() args2 = parse_a2c_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") num_updates = int( args.num_frames) // args.num_steps // args.num_environments # Writer will output to ./runs/ directory by default writer = torch.utils.tensorboard.SummaryWriter() train_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=True) # Création des environnements de test des niveaux classiques args2.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/" args2.scenario = "custom_scenario_test{:003}.cfg" classic_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) # Création des environnements de test des niveaux peignes args2.scenario_dir = "scenarios_transfer_learning/little_combs_test/" little_combs_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) args2.scenario_dir = "scenarios_transfer_learning/medium_combs_test/" medium_combs_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) obs_shape = train_envs.obs_shape policy = CNNPolicy(obs_shape, args).to(device) agent = A2CAgent(policy, args.hidden_size, value_weight=args.value_loss_coef, entropy_weight=args.entropy_coef, num_steps=args.num_steps, num_parallel=args.num_environments, gamma=args.gamma, lr=args.learning_rate, opt_alpha=args.alpha, opt_momentum=args.momentum, max_grad_norm=args.max_grad_norm) obs = little_combs_test_envs.reset() num_checkpoints = 355 for j in range(num_checkpoints): if j % 8 == 0: checkpoint_filename = '/home/adam/Bureau/Transfer Learning/FINAL/checkpoint_{}.pth.tar'.format( str(j + 1)) agent.load_model(checkpoint_filename) total_num_steps = (j + 1) * args.num_environments * args.num_steps mean_rewards_classic, game_times_classic = agent.evaluate( classic_test_envs, j, total_num_steps) mean_rewards_little, game_times_little = agent.evaluate( little_combs_test_envs, j, total_num_steps) mean_rewards_medium, game_times_medium = agent.evaluate( medium_combs_test_envs, j, total_num_steps) writer.add_scalar("Reward classic levels", mean_rewards_classic, (j + 1) * 100) writer.add_scalar("Reward little combs levels", mean_rewards_little, (j + 1) * 100) writer.add_scalar("Reward medium combs levels", mean_rewards_medium, (j + 1) * 100) print(j)
import argparse import os import shutil import time import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from dataset import ImageFolder from models import CNNPolicy import random model = CNNPolicy(3) model.load_state_dict(torch.load('checkpoint.pth.tar')['state_dict']) model.eval() # model.load_state_dict(torch.load('model_best.pth.tar')['state_dict']) random.seed(123) idxs = list(range(10100)) random.shuffle(idxs) data_set = ImageFolder('../gym-duckietown/images', return_path=True) test_loader = torch.utils.data.DataLoader(data_set, batch_size=1, shuffle=False, num_workers=1, sampler=idxs[10000:10100]) output_html = ''
clip = ImageSequenceClip(observations, fps=int(30 / params.frame_skip)) output_dir = logger.get_eval_output() clip.write_videofile('{}eval{:0004}_{:00005.0f}.mp4'.format( output_dir, step, score * 100)) if __name__ == '__main__': # Test to improve movie with action probs, values etc params = parse_game_args() params.norm_obs = False params.recurrent_policy = True envs = MultiEnvs(params.simulator, 1, 1, params) obs_shape = envs.obs_shape obs_shape = (obs_shape[0] * params.num_stack, *obs_shape[1:]) model = CNNPolicy(obs_shape[0], envs.num_actions, params.recurrent_policy, obs_shape) env = DoomEnvironment(params) agent = BaseAgent(model, params) env.reset() agent.reset() rewards = [] obss = [] actions = [] action_probss = [] values = [] while not env.is_episode_finished(): obs = env.get_observation() # action = agent.get_action(obs, epsilon=0.0)
def gen_classic(selh, file, scenario = False, model="model_final"): params = parse_game_args() # Charge le scénario if not scenario : params.scenario = "custom_scenario003.cfg" else: params.scenario = scenario env = DoomEnvironment(params) device = torch.device("cuda" if False else "cpu") num_actions = env.num_actions network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device) # Chargement du modèle de base network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device) checkpoint = torch.load('models/' + model + '.pth.tar', map_location=lambda storage, loc: storage) """Remplacement des clefs du dictionnaire qui posent problème""" checkpoint['model']["dist.linear.weight"] = checkpoint['model']["dist_linear.weight"] del checkpoint['model']["dist_linear.weight"] checkpoint['model']["dist.linear.bias"] = checkpoint['model']["dist_linear.bias"] del checkpoint['model']["dist_linear.bias"] network.load_state_dict(checkpoint['model']) agent = BaseAgent(network, params) ERU = {'env': env, 'agent': agent} # Chargement des checkpoints num_checkpoints = [98, 98, 159] checkpoints = [1]*sum(num_checkpoints) networks = [1]*sum(num_checkpoints) agents = [1]*sum(num_checkpoints) ERUs = [1]*sum(num_checkpoints) for i in range(len(num_checkpoints)): for j in range(num_checkpoints[i]): iter = i*num_checkpoints[0]+j # if i==0: # checkpoint_filename = '/home/adam/Bureau/Transfer Learning/5 - 28-03-21/checkpoint_{}_{}.pth.tar'.format(str(i + 1), str(j + 88)) #else: checkpoint_filename = '/home/adam/Bureau/Transfer Learning/5 - 28-03-21/checkpoint_{}_{}.pth.tar'.format(str(i + 1), str(j + 1)) checkpoints[i*num_checkpoints[0]+j] = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage) """Remplacement des clefs du dictionnaire qui posent problème""" checkpoints[iter]['model']["dist.linear.weight"] = checkpoints[iter]['model']["dist_linear.weight"] del checkpoints[iter]['model']["dist_linear.weight"] checkpoints[iter]['model']["dist.linear.bias"] = checkpoints[iter]['model']["dist_linear.bias"] del checkpoints[iter]['model']["dist_linear.bias"] networks[iter] = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device) networks[iter].load_state_dict(checkpoints[iter]['model']) agents[iter] = BaseAgent(networks[iter], params) ERUs[iter] = {'env': env, 'agent': agents[iter]} ERUs[iter]['env'].reset() selhs = [] for i in range(sum(num_checkpoints)): selh = tsne_1d_projection(127) selh = torch.from_numpy(selh).type(torch.FloatTensor) selh = Variable(selh, volatile=True) selhs.append(selh) scores = [] hiddens = [] inputs = [] actions = [] #Boucle pour obtenir les images du modèle de base obss = [] actions = [] for i in range(50): obs = ERU['env'].get_observation() action, value, action_probs, grads = ERU['agent'].get_action_value_and_probs_zeroes(obs, selh, epsilon=0.0) ERU['env'].make_action(int(action)) obss.append(obs) actions.append(action) #Boucle pour évaluer les checkpoints sur les situations du modèle de base for i in range(sum(num_checkpoints)): for obs2 in obss: action, value, action_probs, grads = ERUs[i]['agent'].get_action_value_and_probs_zeroes(obs2, selhs[i], epsilon=0.0) hidden = ERUs[i]['agent'].model.get_gru_h() h = '' for elem in hidden[0][0]: h += str(elem) + "," h = h[:-1] h = h.split(',') hiddens.append(h) ERU['env'].make_action(int(action)) im = Image.new('P', (sum(num_checkpoints), 128)) for i in range(len(hiddens)): for j in range(len(hiddens[i])): value = int((float(hiddens[i][j])+1)*255/2) im.putpixel((i, j), (value, value, value, 255)) im.show() im.save("timeline.png") im = Image.new('P', (sum(num_checkpoints)-1, 128)) for i in range(len(hiddens)-1): for j in range(len(hiddens[i])): value = int((abs(float(hiddens[i][j])-float(hiddens[i+1][j])))*255*1.5) if value>255: value=255 im.putpixel((i, j), (value, value, value, 255)) im.show() im.save("variation.png")
def train(): # define params params = parse_game_args() logger = Logger(params) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") num_updates = int( params.num_frames) // params.num_steps // params.num_environments # environments envs = MultiEnvsMPPipes(params.simulator, params.num_environments, 1, params) obs_shape = envs.obs_shape obs_shape = (obs_shape[0] * params.num_stack, *obs_shape[1:]) evaluator = Evaluator(params) print('creating model') actor_critic = CNNPolicy(obs_shape[0], obs_shape, params).to(device) print('model created') start_j = 0 if params.reload_model: checkpoint_idx = params.reload_model.split(',')[1] checkpoint_filename = '{}models/checkpoint_{}.pth.tar'.format( params.output_dir, checkpoint_idx) assert os.path.isfile( checkpoint_filename), 'The model could not be found {}'.format( checkpoint_filename) logger.write('Loading model{}'.format(checkpoint_filename)) if device == 'cuda': # The checkpoint will try to load onto the GPU storage unless specified checkpoint = torch.load(checkpoint_filename) else: checkpoint = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage) actor_critic.load_state_dict(checkpoint['model']) start_j = (int(checkpoint_idx) // params.num_steps // params.num_environments) + 1 print('creating optimizer') optimizer = optim.RMSprop( [p for p in actor_critic.parameters() if p.requires_grad], params.learning_rate, eps=params.eps, alpha=params.alpha, momentum=params.momentum) if params.reload_model: optimizer.load_state_dict(checkpoint['optimizer']) rollouts = RolloutStorage(params.num_steps, params.num_environments, obs_shape, actor_critic.state_size, params) current_obs = torch.zeros(params.num_environments, *obs_shape) # For Frame stacking def update_current_obs(obs): shape_dim0 = envs.obs_shape[0] obs = torch.from_numpy(obs).float() if params.num_stack > 1: current_obs[:, :-shape_dim0] = current_obs[:, shape_dim0:] current_obs[:, -shape_dim0:] = obs print('getting first obs') obs = envs.reset() print('update current obs') update_current_obs(obs) rollouts.observations[0].copy_(current_obs) # These variables are used to compute average rewards for all processes. episode_rewards = torch.zeros([params.num_environments, 1]) final_rewards = torch.zeros([params.num_environments, 1]) current_obs = current_obs.to(device) rollouts.set_device(device) print('Starting training loop') start = time.time() print(num_updates) for j in range(start_j, num_updates): # STARTING no grad scope with torch.no_grad(): if j % params.eval_freq == 0 and not params.skip_eval: print('Evaluating model') if params.simulator == 'doom': actor_critic.eval() total_num_steps = ( j + 1) * params.num_environments * params.num_steps #eval_model(actor_critic, params, logger, j, total_num_steps, params.eval_games) evaluator.evaluate(actor_critic, params, logger, j, total_num_steps, params.eval_games) actor_critic.train() # ============================================================================= # Take steps in the environment # ============================================================================= for step in range(params.num_steps): # Sample actions value, action, action_log_prob, states = actor_critic.act( rollouts.observations[step], rollouts.states[step], rollouts.masks[step]) cpu_actions = action.squeeze(1).cpu().numpy() # Obser reward and next obs obs, reward, done, info = envs.step(cpu_actions) reward = torch.from_numpy(np.expand_dims(np.stack(reward), 1)).float() episode_rewards += reward # If done then create masks to clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) final_rewards *= masks final_rewards += (1 - masks) * episode_rewards episode_rewards *= masks masks = masks.to(device) if current_obs.dim() == 4: current_obs *= masks.unsqueeze(2).unsqueeze(2) else: current_obs *= masks update_current_obs(obs) rollouts.insert(step, current_obs, states, action, action_log_prob, value, reward, masks) # ============================================================================= # Compute discounted returns, re-step through the environment # ============================================================================= next_value = actor_critic(rollouts.observations[-1], rollouts.states[-1], rollouts.masks[-1])[0] rollouts.compute_returns(next_value, params.use_gae, params.gamma, params.tau) # FINISHED no grad scope model_output = actor_critic.evaluate_actions( rollouts.observations[:-1].view(-1, *obs_shape), rollouts.states[0].view(-1, actor_critic.state_size), rollouts.masks[:-1].view(-1, 1), rollouts.actions.view(-1, 1)) values, action_log_probs, dist_entropy, states = model_output values = values.view(params.num_steps, params.num_environments, 1) action_log_probs = action_log_probs.view(params.num_steps, params.num_environments, 1) advantages = rollouts.returns[:-1] - values value_loss = advantages.pow(2).mean() action_loss = -(advantages.detach() * action_log_probs).mean() optimizer.zero_grad() loss = value_loss * params.value_loss_coef + action_loss - dist_entropy * params.entropy_coef loss.backward() nn.utils.clip_grad_norm(actor_critic.parameters(), params.max_grad_norm) optimizer.step() rollouts.after_update() if j % params.model_save_rate == 0: total_num_steps = (j + 1) * params.num_environments * params.num_steps checkpoint = { 'step': step, 'params': params, 'model': actor_critic.state_dict(), 'optimizer': optimizer.state_dict() } filepath = logger.output_dir + 'models/' torch.save( checkpoint, '{}checkpoint_{:00000000010}.pth.tar'.format( filepath, total_num_steps)) if j % params.log_interval == 0: end = time.time() total_num_steps = (j + 1) * params.num_environments * params.num_steps save_num_steps = ( start_j) * params.num_environments * params.num_steps logger.write( "Updates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}" .format( j, total_num_steps, int((total_num_steps - save_num_steps) / (end - start)), final_rewards.mean(), final_rewards.median(), final_rewards.min(), final_rewards.max(), dist_entropy.item(), value_loss.item(), action_loss.item())) evaluator.cancel() envs.cancel()
def gen_classic(selh, file): params = parse_game_args() params.scenario = "health_gathering_supreme.cfg" env = DoomEnvironment(params) device = torch.device("cuda" if False else "cpu") num_actions = env.num_actions network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device) checkpoint = torch.load('models/' + "health_gathering_supreme" + '.pth.tar', map_location=lambda storage, loc: storage) network.load_state_dict(checkpoint['model']) agent = BaseAgent(network, params) ERU = {'env': env, 'agent': agent} selh = torch.from_numpy(selh).type(torch.FloatTensor) selh = Variable(selh, volatile=True) ERU['env'].set_seed(randint(0, 999999999)) ERU['env'].reset() scores = [] hiddens = [] inputs = [] saliencies = [] actions = [] probabilities = [] health = [] positions = [] orientations = [] velocities = [] items = [] fov = [] w = 0 while not ERU['env'].is_episode_finished(): obsvervation = io.BytesIO() obs = ERU['env'].get_observation() temp = ERU['env'].state.screen_buffer Image.fromarray(temp.transpose(1, 2, 0)).save(obsvervation, format="JPEG") action, value, action_probs, grads = ERU[ 'agent'].get_action_value_and_probs_zeroes(obs, selh, epsilon=0.0) hidden = ERU['agent'].model.get_gru_h() h = '' for elem in hidden[0][0]: h += str(elem) + "," h = h[:-1] h = h.split(',') probs = "" for elem in action_probs[0]: probs += str(elem) + "," probs = probs[:-1] probs = probs.split(',') sa = io.BytesIO() t = Image.fromarray(grads, 'L') t.save(sa, format="JPEG") scores.append(str(round(ERU['env'].game.get_total_reward(), 2))) hiddens.append(h) inputs.append(base64.b64encode(obsvervation.getvalue())) saliencies.append(base64.b64encode(sa.getvalue())) actions.append(str(action)) probabilities.append(probs) health.append(ERU['env'].get_health()) positions.append(ERU['env'].get_pos()) orientations.append(ERU['env'].get_ori()) velocities.append(ERU['env'].get_velo()) items.append(ERU['env'].get_item()) fov.append(ERU['env'].get_fov()) ERU['env'].make_action(int(action)) print('Iteration', w, '/525') w += 1 result = { 'episode0': { 'inputs': inputs, 'actions': actions, 'probabilities': probabilities, 'saliencies': saliencies, 'scores': scores, 'positions': positions, 'health': health, 'hiddens': hiddens, 'orientations': orientations, 'velocities': velocities, 'items': items, 'fov': fov } } with open(file, 'w') as f: ujson.dump(result, f, indent=4, sort_keys=True) return result
step, reward_list, time_list)) def write_movie(params, logger, observations, step, score, best_agent=True): observations = [o.transpose(1, 2, 0) for o in observations] clip = ImageSequenceClip(observations, fps=int(30 / params.frame_skip)) output_dir = logger.get_eval_output() clip.write_videofile('{}eval{:0004}_{:00005.0f}.mp4'.format( output_dir, step, score * 100)) if params.use_visdom: logger.add_video('{}eval{:0004}_{:00005.0f}.mp4'.format( output_dir, step, score * 100), best_agent=best_agent) if __name__ == '__main__': params = parse_game_args() params.norm_obs = False params.num_stack = 1 params.recurrent_policy = True params.num_environments = 16 params.scenario = 'scenario_3_item0.cfg' envs = MultiEnvsMPPipes(params.simulator, 1, 1, params) obs_shape = envs.obs_shape obs_shape = (obs_shape[0] * params.num_stack, *obs_shape[1:]) model = CNNPolicy(obs_shape[0], obs_shape, params) with torch.no_grad(): eval_model_multi(model, params, 0, 0, 0, num_games=1000)
def train(): args = parse_a2c_args() args2 = parse_a2c_args() output_dir = initialize_logging(args) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") num_updates = int( args.num_frames) // args.num_steps // args.num_environments # Create the train and test environments with Multiple processes train_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=True) #Création des environnements de test des niveaux classiques args2.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/" args2.scenario = "custom_scenario_test{:003}.cfg" classic_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) #Création des environnements de test des niveaux peignes args2.scenario_dir = "scenarios_transfer_learning/little_combs_test/" little_combs_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) args2.scenario_dir = "scenarios_transfer_learning/medium_combs_test/" medium_combs_test_envs = MultiEnv(args.simulator, args.num_environments, args2, is_train=False) test_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=False) # Writer will output to ./runs/ directory by default writer = torch.utils.tensorboard.SummaryWriter() obs_shape = train_envs.obs_shape # The agent's policy network and training algorithm A2C policy = CNNPolicy(obs_shape, args).to(device) agent = A2CAgent(policy, args.hidden_size, value_weight=args.value_loss_coef, entropy_weight=args.entropy_coef, num_steps=args.num_steps, num_parallel=args.num_environments, gamma=args.gamma, lr=args.learning_rate, opt_alpha=args.alpha, opt_momentum=args.momentum, max_grad_norm=args.max_grad_norm) start_j = 0 if args.reload_model: checkpoint_idx = args.reload_model.split(',')[1] checkpoint_filename = '{}models/base_line.pth.tar'.format(output_dir) agent.load_model(checkpoint_filename) start_j = 0 #(int(checkpoint_idx) // args.num_steps // args.num_environments) + 1 obs = train_envs.reset() start = time.time() nb_of_saves = 0 for j in range(start_j, num_updates): print("------", j / num_updates * 100, "-------") # Test des performances du modèle if not args.skip_eval and j % args.eval_freq == 0: total_num_steps = (j + 1) * args.num_environments * args.num_steps mean_rewards_classic, game_times_classic = agent.evaluate( classic_test_envs, j, total_num_steps) mean_rewards_little, game_times_little = agent.evaluate( little_combs_test_envs, j, total_num_steps) mean_rewards_medium, game_times_medium = agent.evaluate( medium_combs_test_envs, j, total_num_steps) # succes_classic = sum([1 if i!=525 else 0 for i in game_times_classic])/16 # succes_little = sum([1 if i!=525 else 0 for i in game_times_little])/16 # succes_medium = sum([1 if i!=525 else 0 for i in game_times_medium])/16 writer.add_scalar("Reward classic levels", mean_rewards_classic, j) writer.add_scalar("Reward little combs levels", mean_rewards_little, j) writer.add_scalar("Reward medium combs levels", mean_rewards_medium, j) # writer.add_scalar("Success rate classic levels", succes_classic, j) # writer.add_scalar("Success rate little combs levels", succes_little, j) # writer.add_scalar("Success rate medium combs levels", succes_medium, j) for step in range(args.num_steps): action = agent.get_action(obs, step) obs, reward, done, info = train_envs.step(action) agent.add_rewards_masks(reward, done, step) report = agent.update(obs) if j % args.log_interval == 0: end = time.time() total_num_steps = (j + 1) * args.num_environments * args.num_steps save_num_steps = (start_j) * args.num_environments * args.num_steps FPS = int((total_num_steps - save_num_steps) / (end - start)), logging.info(report.format(j, total_num_steps, FPS)) if j % args.model_save_rate == 0: nb_of_saves += 1 agent.save_policy2(nb_of_saves, args, output_dir) # cancel the env processes train_envs.cancel() test_envs.cancel()
def main(): global args, best_loss args = parser.parse_args() data_set = ImageFolder('../gym-duckietown/real_images', augment=args.augment) print(len(data_set)) writer = SummaryWriter('log/') splits = json.load(open('splits.json')) train_loader = torch.utils.data.DataLoader( data_set, batch_size=args.batch_size, shuffle=False, num_workers=1, sampler=torch.utils.data.sampler.RandomSampler(list(range(72)))) #val_loader = torch.utils.data.DataLoader( # data_set, batch_size=args.batch_size, shuffle=False, # num_workers=1, sampler=splits['val']) if args.use_model2: model = CNNPolicy2(3) else: model = CNNPolicy(3) if args.cuda: model.cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, optimizer, epoch) # evaluate on validation set loss = validate(train_loader, model) writer.add_scalar('val_loss', loss, epoch) # remember best prec@1 and save checkpoint is_best = loss < best_loss best_loss = min(loss, best_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'optimizer': optimizer.state_dict(), }, is_best)
def evaluate_saved_model(models, models_dir): args = parse_a2c_args() # TROUVER COMMENT UTILISER LE GPU device = torch.device("cpu") # "cuda" if torch.cuda.is_available() else #création des environnements #création des environnements little-combs little_comb_env = [] for i in range(50): args.scenario_dir = "scenarios_transfer_learning/little_combs_test/" args.scenario = "custom_scenario_test{:003}.cfg" little_comb_env.append(DoomEnvironment(args, idx=i, is_train=False)) #création des environnements big-combs big_comb_env = [] for i in range(50): args.scenario_dir = "scenarios_transfer_learning/big_combs_test/" args.scenario = "custom_scenario_test{:003}.cfg" big_comb_env.append(DoomEnvironment(args, idx=i, is_train=False)) #création des environnements classic classic_env = [] for i in range(50): args.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/" args.scenario = "custom_scenario_test{:003}.cfg" classic_env.append(DoomEnvironment(args, idx=i, is_train=False)) #création des environnements other levels medium_comb_env = [] for i in range(16): args.scenario_dir = "scenarios_transfer_learning/medium_combs_test/" args.scenario = "custom_scenario_test{:003}.cfg" medium_comb_env.append(DoomEnvironment(args, idx=i, is_train=False)) obs_shape = (3, args.screen_height, args.screen_width) policy = CNNPolicy(obs_shape, args).to(device) resultat = [] for model in models: checkpoint = torch.load(models_dir + "/checkpoint_" + str(model) + ".pth.tar", map_location=lambda storage, loc: storage) policy.load_state_dict(checkpoint['model']) policy.eval() resultat.append(model) assert args.model_checkpoint, 'No model checkpoint found' assert os.path.isfile( args.model_checkpoint), 'The model could not be loaded' # This lambda stuff is required otherwise it will try and load on GPU # evaluation sur les niveaux classiques results = [] for i in range(50): env = classic_env[i] results.append(get_results(policy, env, args)) print("Classic levels evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) # evaluation sur les little combs results = [] for i in range(50): env = little_comb_env[i] results.append(get_results(policy, env, args)) print("Little combs evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) # evaluation sur les big combs results = [] for i in range(50): env = big_comb_env[i] results.append(get_results(policy, env, args)) print("Big combs evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) # evaluation sur les autres niveaux results = [] for i in range(16): env = medium_comb_env[i] results.append(get_results(policy, env, args)) print("Other levels evaluation is done") success_rate = 0 average_reward = 0 for res in results: if res[1] < 525: success_rate += 1 average_reward += res[0] success_rate /= args.num_mazes_test average_reward /= args.num_mazes_test resultat.append([success_rate, average_reward]) print("Checkpoint " + str(model) + " has been evaluated") print(resultat)
def train(): args = parse_a2c_args() output_dir = initialize_logging(args) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") num_updates = int(args.num_frames) // args.num_steps // args.num_environments # Create the train and test environments with Multiple processes train_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=True) test_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=False) obs_shape = train_envs.obs_shape # The agent's policy network and training algorithm A2C policy = CNNPolicy(obs_shape, args).to(device) agent = A2CAgent(policy, args.hidden_size, value_weight=args.value_loss_coef, entropy_weight=args.entropy_coef, num_steps=args.num_steps, num_parallel=args.num_environments, gamma=args.gamma, lr=args.learning_rate, opt_alpha=args.alpha, opt_momentum=args.momentum, max_grad_norm=args.max_grad_norm) start_j = 0 if args.reload_model: checkpoint_idx = args.reload_model.split(',')[1] checkpoint_filename = '{}models/checkpoint_{}.pth.tar'.format(output_dir, checkpoint_idx) agent.load_model(checkpoint_filename) start_j = (int(checkpoint_idx) // args.num_steps // args.num_environments) + 1 obs = train_envs.reset() start = time.time() for j in range(start_j, num_updates): if not args.skip_eval and j % args.eval_freq == 0: total_num_steps = (j + 1) * args.num_environments * args.num_steps mean_rewards, game_times = agent.evaluate(test_envs, j, total_num_steps) logging.info(mean_rewards) logging.info(game_times) for step in range(args.num_steps): action = agent.get_action(obs, step) obs, reward, done, info = train_envs.step(action) agent.add_rewards_masks(reward, done, step) report = agent.update(obs) if j % args.log_interval == 0: end = time.time() total_num_steps = (j + 1) * args.num_environments * args.num_steps save_num_steps = (start_j) * args.num_environments * args.num_steps FPS = int((total_num_steps - save_num_steps) / (end - start)), logging.info(report.format(j, total_num_steps, FPS)) if j % args.model_save_rate == 0: total_num_steps = (j + 1) * args.num_environments * args.num_steps agent.save_policy(total_num_steps, args, output_dir) # cancel the env processes train_envs.cancel() test_envs.cancel()