def evaluate_saved_model():
    args = parse_a2c_args()
    #TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  #"cuda" if torch.cuda.is_available() else
    env = DoomEnvironment(args, is_train=True)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)
    args.scenario_dir = "scenarios_transfer_learning/big_combs_test/"
    checkpoint = torch.load("0.pth.tar",
                            map_location=lambda storage, loc: storage)
    policy.load_state_dict(checkpoint['model'])
    policy.eval()

    assert args.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(
        args.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU

    for i in range(1, 64):  #for i in range(args.num_mazes_test):
        env = DoomEnvironment(args, idx=i, is_train=False)
        movie_name = 'videos/3 - 12-04-21 - base/big_combs_test{:0004}.mp4'.format(
            i)
        print('Creating movie {}'.format(movie_name))
        make_movie(policy, env, movie_name, args)
def evaluate_saved_model():
    args = parse_a2c_args()
    #TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  #"cuda" if torch.cuda.is_available() else
    env = DoomEnvironment(args, is_train=True)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)

    name = "Final on enclave"
    args.scenario_dir = "scenarios_transfer_learning/scenes/"
    checkpoint = torch.load("final.pth.tar",
                            map_location=lambda storage, loc: storage)
    policy.load_state_dict(checkpoint['model'])
    policy.eval()

    assert args.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(
        args.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU

    args.scenario = "custom_scenario000.cfg"
    env = DoomEnvironment(args, is_train=False)
    movie_name = '/home/adam/Bureau/Visuels/0 - Rollout faits main/{}.mp4'.format(
        name)
    print('Creating movie {}'.format(movie_name))
    make_movie(policy, env, movie_name, args)
Beispiel #3
0
def evaluate_saved_model():

    args = parse_a2c_args()
    # TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  # "cuda" if torch.cuda.is_available() else
    env = DoomEnvironment(args, is_train=False)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)
    results = []

    for model in range(0, 2):
        checkpoint = torch.load(str(model) + ".pth.tar",
                                map_location=lambda storage, loc: storage)
        policy.load_state_dict(checkpoint['model'])
        policy.eval()

        assert args.model_checkpoint, 'No model checkpoint found'
        assert os.path.isfile(
            args.model_checkpoint), 'The model could not be loaded'
        # This lambda stuff is required otherwise it will try and load on GPU

        results.append([])

        for i in range(args.num_mazes_test):
            env = DoomEnvironment(args, idx=i, is_train=False)
            results[model].append(get_results(policy, env, args))
            print(i)

        success_rate = 0
        average_reward = 0
        average_time = 0

        for res in results[model]:
            if res[1] < 525:
                success_rate += 1
                average_time += res[1]
            average_reward += res[0]

        if success_rate != 0:
            average_time /= success_rate
        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        print(success_rate, average_reward, average_time)

    time_diff = 0
    finished_levels = 0

    for i in range(args.num_mazes_test):
        if results[1][i][1] < 525:
            finished_levels += 1
            time_diff += results[1][i][1] - results[0][i][1]

    print(time_diff / finished_levels)
def evaluate_saved_model():  
    args = parse_a2c_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    env = DoomEnvironment(args, is_train=True)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)
    checkpoint = torch.load(args.model_checkpoint, map_location=lambda storage, loc: storage) 
    policy.load_state_dict(checkpoint['model'])
    policy.eval()
    
    assert args.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(args.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU
 
    for i in range(args.num_mazes_test):
        env = DoomEnvironment(args, idx=i, is_train=True)
        movie_name = 'videos/rollout_{:0004}.mp4'.format(i)
        print('Creating movie {}'.format(movie_name))
        make_movie(policy, env, movie_name, args)
        else: 
            return obs
    
    def step(self, actions):
        new_obs = []
        rewards = []
        dones = []
        infos = []
 
        for action, pipe in zip(actions, self.parent_pipes):
            pipe.send(action)
        
        for pipe in self.parent_pipes:
            obs, reward, done, info = pipe.recv()
            new_obs.append(self.prep_obs(obs))
            rewards.append(reward)
            dones.append(done)
            infos.append(info)
        
        return np.stack(new_obs), rewards, dones, infos                
       

if __name__ == '__main__':
    
    args = parse_a2c_args()
    args.scenario_dir = '../scenarios/'
    

    mp_test_envs = MultiEnv(args.simulator, args.num_environments, 1, args)
    mp_test_envs.reset()
    
Beispiel #6
0
def evaluate_saved_model():
    args = parse_a2c_args()
    args2 = parse_a2c_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    num_updates = int(
        args.num_frames) // args.num_steps // args.num_environments

    # Writer will output to ./runs/ directory by default
    writer = torch.utils.tensorboard.SummaryWriter()

    train_envs = MultiEnv(args.simulator,
                          args.num_environments,
                          args,
                          is_train=True)

    # Création des environnements de test des niveaux classiques
    args2.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/"
    args2.scenario = "custom_scenario_test{:003}.cfg"
    classic_test_envs = MultiEnv(args.simulator,
                                 args.num_environments,
                                 args2,
                                 is_train=False)
    # Création des environnements de test des niveaux peignes
    args2.scenario_dir = "scenarios_transfer_learning/little_combs_test/"
    little_combs_test_envs = MultiEnv(args.simulator,
                                      args.num_environments,
                                      args2,
                                      is_train=False)
    args2.scenario_dir = "scenarios_transfer_learning/medium_combs_test/"
    medium_combs_test_envs = MultiEnv(args.simulator,
                                      args.num_environments,
                                      args2,
                                      is_train=False)

    obs_shape = train_envs.obs_shape

    policy = CNNPolicy(obs_shape, args).to(device)

    agent = A2CAgent(policy,
                     args.hidden_size,
                     value_weight=args.value_loss_coef,
                     entropy_weight=args.entropy_coef,
                     num_steps=args.num_steps,
                     num_parallel=args.num_environments,
                     gamma=args.gamma,
                     lr=args.learning_rate,
                     opt_alpha=args.alpha,
                     opt_momentum=args.momentum,
                     max_grad_norm=args.max_grad_norm)

    obs = little_combs_test_envs.reset()

    num_checkpoints = 355

    for j in range(num_checkpoints):
        if j % 8 == 0:
            checkpoint_filename = '/home/adam/Bureau/Transfer Learning/FINAL/checkpoint_{}.pth.tar'.format(
                str(j + 1))
            agent.load_model(checkpoint_filename)

            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            mean_rewards_classic, game_times_classic = agent.evaluate(
                classic_test_envs, j, total_num_steps)
            mean_rewards_little, game_times_little = agent.evaluate(
                little_combs_test_envs, j, total_num_steps)
            mean_rewards_medium, game_times_medium = agent.evaluate(
                medium_combs_test_envs, j, total_num_steps)

            writer.add_scalar("Reward classic levels", mean_rewards_classic,
                              (j + 1) * 100)
            writer.add_scalar("Reward little combs levels",
                              mean_rewards_little, (j + 1) * 100)
            writer.add_scalar("Reward medium combs levels",
                              mean_rewards_medium, (j + 1) * 100)
            print(j)
def train():
    args = parse_a2c_args()
    args2 = parse_a2c_args()
    output_dir = initialize_logging(args)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    num_updates = int(
        args.num_frames) // args.num_steps // args.num_environments
    # Create the train and test environments with Multiple processes
    train_envs = MultiEnv(args.simulator,
                          args.num_environments,
                          args,
                          is_train=True)

    #Création des environnements de test des niveaux classiques
    args2.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/"
    args2.scenario = "custom_scenario_test{:003}.cfg"
    classic_test_envs = MultiEnv(args.simulator,
                                 args.num_environments,
                                 args2,
                                 is_train=False)
    #Création des environnements de test des niveaux peignes
    args2.scenario_dir = "scenarios_transfer_learning/little_combs_test/"
    little_combs_test_envs = MultiEnv(args.simulator,
                                      args.num_environments,
                                      args2,
                                      is_train=False)
    args2.scenario_dir = "scenarios_transfer_learning/medium_combs_test/"
    medium_combs_test_envs = MultiEnv(args.simulator,
                                      args.num_environments,
                                      args2,
                                      is_train=False)

    test_envs = MultiEnv(args.simulator,
                         args.num_environments,
                         args,
                         is_train=False)

    # Writer will output to ./runs/ directory by default
    writer = torch.utils.tensorboard.SummaryWriter()

    obs_shape = train_envs.obs_shape

    # The agent's policy network and training algorithm A2C
    policy = CNNPolicy(obs_shape, args).to(device)
    agent = A2CAgent(policy,
                     args.hidden_size,
                     value_weight=args.value_loss_coef,
                     entropy_weight=args.entropy_coef,
                     num_steps=args.num_steps,
                     num_parallel=args.num_environments,
                     gamma=args.gamma,
                     lr=args.learning_rate,
                     opt_alpha=args.alpha,
                     opt_momentum=args.momentum,
                     max_grad_norm=args.max_grad_norm)

    start_j = 0
    if args.reload_model:
        checkpoint_idx = args.reload_model.split(',')[1]
        checkpoint_filename = '{}models/base_line.pth.tar'.format(output_dir)
        agent.load_model(checkpoint_filename)
        start_j = 0  #(int(checkpoint_idx) // args.num_steps // args.num_environments) + 1

    obs = train_envs.reset()
    start = time.time()
    nb_of_saves = 0

    for j in range(start_j, num_updates):
        print("------", j / num_updates * 100, "-------")

        # Test des performances du modèle
        if not args.skip_eval and j % args.eval_freq == 0:
            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            mean_rewards_classic, game_times_classic = agent.evaluate(
                classic_test_envs, j, total_num_steps)
            mean_rewards_little, game_times_little = agent.evaluate(
                little_combs_test_envs, j, total_num_steps)
            mean_rewards_medium, game_times_medium = agent.evaluate(
                medium_combs_test_envs, j, total_num_steps)

            # succes_classic = sum([1 if i!=525 else 0 for i in game_times_classic])/16
            #  succes_little = sum([1 if i!=525 else 0 for i in game_times_little])/16
            # succes_medium = sum([1 if i!=525 else 0 for i in game_times_medium])/16

            writer.add_scalar("Reward classic levels", mean_rewards_classic, j)
            writer.add_scalar("Reward little combs levels",
                              mean_rewards_little, j)
            writer.add_scalar("Reward medium combs levels",
                              mean_rewards_medium, j)
        # writer.add_scalar("Success rate classic levels", succes_classic, j)
        # writer.add_scalar("Success rate little combs levels", succes_little, j)
        # writer.add_scalar("Success rate medium combs levels", succes_medium, j)

        for step in range(args.num_steps):
            action = agent.get_action(obs, step)
            obs, reward, done, info = train_envs.step(action)
            agent.add_rewards_masks(reward, done, step)

        report = agent.update(obs)

        if j % args.log_interval == 0:
            end = time.time()
            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            save_num_steps = (start_j) * args.num_environments * args.num_steps
            FPS = int((total_num_steps - save_num_steps) / (end - start)),

            logging.info(report.format(j, total_num_steps, FPS))

        if j % args.model_save_rate == 0:
            nb_of_saves += 1
            agent.save_policy2(nb_of_saves, args, output_dir)

    # cancel the env processes
    train_envs.cancel()
    test_envs.cancel()
Beispiel #8
0
def evaluate_saved_model(models, models_dir):

    args = parse_a2c_args()
    # TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  # "cuda" if torch.cuda.is_available() else

    #création des environnements

    #création des environnements little-combs

    little_comb_env = []
    for i in range(50):
        args.scenario_dir = "scenarios_transfer_learning/little_combs_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        little_comb_env.append(DoomEnvironment(args, idx=i, is_train=False))

    #création des environnements big-combs

    big_comb_env = []
    for i in range(50):
        args.scenario_dir = "scenarios_transfer_learning/big_combs_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        big_comb_env.append(DoomEnvironment(args, idx=i, is_train=False))

    #création des environnements classic

    classic_env = []
    for i in range(50):
        args.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        classic_env.append(DoomEnvironment(args, idx=i, is_train=False))

    #création des environnements other levels

    medium_comb_env = []
    for i in range(16):
        args.scenario_dir = "scenarios_transfer_learning/medium_combs_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        medium_comb_env.append(DoomEnvironment(args, idx=i, is_train=False))

    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)

    resultat = []

    for model in models:
        checkpoint = torch.load(models_dir + "/checkpoint_" + str(model) +
                                ".pth.tar",
                                map_location=lambda storage, loc: storage)
        policy.load_state_dict(checkpoint['model'])
        policy.eval()

        resultat.append(model)

        assert args.model_checkpoint, 'No model checkpoint found'
        assert os.path.isfile(
            args.model_checkpoint), 'The model could not be loaded'
        # This lambda stuff is required otherwise it will try and load on GPU

        # evaluation sur les niveaux classiques

        results = []

        for i in range(50):
            env = classic_env[i]
            results.append(get_results(policy, env, args))

        print("Classic levels evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        # evaluation sur les little combs

        results = []

        for i in range(50):
            env = little_comb_env[i]
            results.append(get_results(policy, env, args))

        print("Little combs evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        # evaluation sur les big combs

        results = []

        for i in range(50):
            env = big_comb_env[i]
            results.append(get_results(policy, env, args))
        print("Big combs evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        # evaluation sur les autres niveaux

        results = []

        for i in range(16):
            env = medium_comb_env[i]
            results.append(get_results(policy, env, args))
        print("Other levels evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        print("Checkpoint " + str(model) + " has been evaluated")

    print(resultat)
def train():
    args = parse_a2c_args()
    output_dir = initialize_logging(args)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    num_updates = int(args.num_frames) // args.num_steps // args.num_environments
    # Create the train and test environments with Multiple processes
    train_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=True)
    test_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=False)
    
    obs_shape = train_envs.obs_shape
    
    # The agent's policy network and training algorithm A2C
    policy = CNNPolicy(obs_shape, args).to(device)
    agent = A2CAgent(policy, 
                     args.hidden_size,
                     value_weight=args.value_loss_coef, 
                     entropy_weight=args.entropy_coef, 
                     num_steps=args.num_steps, 
                     num_parallel=args.num_environments,
                     gamma=args.gamma,
                     lr=args.learning_rate,
                     opt_alpha=args.alpha,
                     opt_momentum=args.momentum,
                     max_grad_norm=args.max_grad_norm)
    
    start_j = 0
    if args.reload_model:
        checkpoint_idx = args.reload_model.split(',')[1]
        checkpoint_filename = '{}models/checkpoint_{}.pth.tar'.format(output_dir, checkpoint_idx)        
        agent.load_model(checkpoint_filename)
        start_j = (int(checkpoint_idx) // args.num_steps // args.num_environments) + 1
        
    obs = train_envs.reset()
    start = time.time()
    
    for j in range(start_j, num_updates):
        if not args.skip_eval and j % args.eval_freq == 0:
            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            mean_rewards, game_times = agent.evaluate(test_envs, j, total_num_steps)
            logging.info(mean_rewards)
            logging.info(game_times)
            
        for step in range(args.num_steps): 
            action = agent.get_action(obs, step)
            obs, reward, done, info = train_envs.step(action)
            agent.add_rewards_masks(reward, done, step)
            
        report = agent.update(obs)
        
        if j % args.log_interval == 0:
            end = time.time()
            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            save_num_steps = (start_j) * args.num_environments * args.num_steps
            FPS = int((total_num_steps - save_num_steps) / (end - start)),
            
            logging.info(report.format(j, total_num_steps, FPS))  
        
        if j % args.model_save_rate == 0:
            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            agent.save_policy(total_num_steps, args, output_dir)
        
    # cancel the env processes    
    train_envs.cancel()
    test_envs.cancel()