Beispiel #1
0
def evaluate_saved_model():
    params = parse_game_args()

    env = DoomEnvironment(params, is_train=True)
    print(env.num_actions)
    obs_shape = (3, params.screen_height, params.screen_width)

    actor_critic = CNNPolicy(obs_shape[0], obs_shape, params)

    assert params.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(
        params.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU
    checkpoint = torch.load(params.model_checkpoint,
                            map_location=lambda storage, loc: storage)
    actor_critic.load_state_dict(checkpoint['model'])

    base_filename = params.model_checkpoint.split('.')[0].split('/')[1]

    agent = BaseAgent(actor_critic, params)

    for i in range(params.num_mazes_test):
        env = DoomEnvironment(params, idx=i, is_train=True)
        movie_name = 'videos/{}_rollout_{:0004}.mp4'.format(base_filename, i)
        print('Creating movie {}'.format(movie_name))
        make_movie(agent, env, movie_name, params)
def evaluate_saved_model():
    args = parse_a2c_args()
    #TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  #"cuda" if torch.cuda.is_available() else
    env = DoomEnvironment(args, is_train=True)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)

    name = "Final on enclave"
    args.scenario_dir = "scenarios_transfer_learning/scenes/"
    checkpoint = torch.load("final.pth.tar",
                            map_location=lambda storage, loc: storage)
    policy.load_state_dict(checkpoint['model'])
    policy.eval()

    assert args.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(
        args.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU

    args.scenario = "custom_scenario000.cfg"
    env = DoomEnvironment(args, is_train=False)
    movie_name = '/home/adam/Bureau/Visuels/0 - Rollout faits main/{}.mp4'.format(
        name)
    print('Creating movie {}'.format(movie_name))
    make_movie(policy, env, movie_name, args)
def evaluate_saved_model():
    args = parse_a2c_args()
    #TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  #"cuda" if torch.cuda.is_available() else
    env = DoomEnvironment(args, is_train=True)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)
    args.scenario_dir = "scenarios_transfer_learning/big_combs_test/"
    checkpoint = torch.load("0.pth.tar",
                            map_location=lambda storage, loc: storage)
    policy.load_state_dict(checkpoint['model'])
    policy.eval()

    assert args.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(
        args.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU

    for i in range(1, 64):  #for i in range(args.num_mazes_test):
        env = DoomEnvironment(args, idx=i, is_train=False)
        movie_name = 'videos/3 - 12-04-21 - base/big_combs_test{:0004}.mp4'.format(
            i)
        print('Creating movie {}'.format(movie_name))
        make_movie(policy, env, movie_name, args)
Beispiel #4
0
def evaluate_saved_model():

    args = parse_a2c_args()
    # TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  # "cuda" if torch.cuda.is_available() else
    env = DoomEnvironment(args, is_train=False)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)
    results = []

    for model in range(0, 2):
        checkpoint = torch.load(str(model) + ".pth.tar",
                                map_location=lambda storage, loc: storage)
        policy.load_state_dict(checkpoint['model'])
        policy.eval()

        assert args.model_checkpoint, 'No model checkpoint found'
        assert os.path.isfile(
            args.model_checkpoint), 'The model could not be loaded'
        # This lambda stuff is required otherwise it will try and load on GPU

        results.append([])

        for i in range(args.num_mazes_test):
            env = DoomEnvironment(args, idx=i, is_train=False)
            results[model].append(get_results(policy, env, args))
            print(i)

        success_rate = 0
        average_reward = 0
        average_time = 0

        for res in results[model]:
            if res[1] < 525:
                success_rate += 1
                average_time += res[1]
            average_reward += res[0]

        if success_rate != 0:
            average_time /= success_rate
        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        print(success_rate, average_reward, average_time)

    time_diff = 0
    finished_levels = 0

    for i in range(args.num_mazes_test):
        if results[1][i][1] < 525:
            finished_levels += 1
            time_diff += results[1][i][1] - results[0][i][1]

    print(time_diff / finished_levels)
def evaluate_saved_model():  
    args = parse_a2c_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    env = DoomEnvironment(args, is_train=True)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)
    checkpoint = torch.load(args.model_checkpoint, map_location=lambda storage, loc: storage) 
    policy.load_state_dict(checkpoint['model'])
    policy.eval()
    
    assert args.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(args.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU
 
    for i in range(args.num_mazes_test):
        env = DoomEnvironment(args, idx=i, is_train=True)
        movie_name = 'videos/rollout_{:0004}.mp4'.format(i)
        print('Creating movie {}'.format(movie_name))
        make_movie(policy, env, movie_name, args)
Beispiel #6
0
def evaluate_saved_model():
    args = parse_a2c_args()
    args2 = parse_a2c_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    num_updates = int(
        args.num_frames) // args.num_steps // args.num_environments

    # Writer will output to ./runs/ directory by default
    writer = torch.utils.tensorboard.SummaryWriter()

    train_envs = MultiEnv(args.simulator,
                          args.num_environments,
                          args,
                          is_train=True)

    # Création des environnements de test des niveaux classiques
    args2.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/"
    args2.scenario = "custom_scenario_test{:003}.cfg"
    classic_test_envs = MultiEnv(args.simulator,
                                 args.num_environments,
                                 args2,
                                 is_train=False)
    # Création des environnements de test des niveaux peignes
    args2.scenario_dir = "scenarios_transfer_learning/little_combs_test/"
    little_combs_test_envs = MultiEnv(args.simulator,
                                      args.num_environments,
                                      args2,
                                      is_train=False)
    args2.scenario_dir = "scenarios_transfer_learning/medium_combs_test/"
    medium_combs_test_envs = MultiEnv(args.simulator,
                                      args.num_environments,
                                      args2,
                                      is_train=False)

    obs_shape = train_envs.obs_shape

    policy = CNNPolicy(obs_shape, args).to(device)

    agent = A2CAgent(policy,
                     args.hidden_size,
                     value_weight=args.value_loss_coef,
                     entropy_weight=args.entropy_coef,
                     num_steps=args.num_steps,
                     num_parallel=args.num_environments,
                     gamma=args.gamma,
                     lr=args.learning_rate,
                     opt_alpha=args.alpha,
                     opt_momentum=args.momentum,
                     max_grad_norm=args.max_grad_norm)

    obs = little_combs_test_envs.reset()

    num_checkpoints = 355

    for j in range(num_checkpoints):
        if j % 8 == 0:
            checkpoint_filename = '/home/adam/Bureau/Transfer Learning/FINAL/checkpoint_{}.pth.tar'.format(
                str(j + 1))
            agent.load_model(checkpoint_filename)

            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            mean_rewards_classic, game_times_classic = agent.evaluate(
                classic_test_envs, j, total_num_steps)
            mean_rewards_little, game_times_little = agent.evaluate(
                little_combs_test_envs, j, total_num_steps)
            mean_rewards_medium, game_times_medium = agent.evaluate(
                medium_combs_test_envs, j, total_num_steps)

            writer.add_scalar("Reward classic levels", mean_rewards_classic,
                              (j + 1) * 100)
            writer.add_scalar("Reward little combs levels",
                              mean_rewards_little, (j + 1) * 100)
            writer.add_scalar("Reward medium combs levels",
                              mean_rewards_medium, (j + 1) * 100)
            print(j)
Beispiel #7
0
import argparse
import os
import shutil
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from dataset import ImageFolder
from models import CNNPolicy

import random

model = CNNPolicy(3)
model.load_state_dict(torch.load('checkpoint.pth.tar')['state_dict'])
model.eval()
# model.load_state_dict(torch.load('model_best.pth.tar')['state_dict'])

random.seed(123)
idxs = list(range(10100))
random.shuffle(idxs)

data_set = ImageFolder('../gym-duckietown/images', return_path=True)
test_loader = torch.utils.data.DataLoader(data_set,
                                          batch_size=1,
                                          shuffle=False,
                                          num_workers=1,
                                          sampler=idxs[10000:10100])

output_html = ''
Beispiel #8
0
    clip = ImageSequenceClip(observations, fps=int(30 / params.frame_skip))
    output_dir = logger.get_eval_output()
    clip.write_videofile('{}eval{:0004}_{:00005.0f}.mp4'.format(
        output_dir, step, score * 100))


if __name__ == '__main__':
    # Test to improve movie with action probs, values etc

    params = parse_game_args()
    params.norm_obs = False
    params.recurrent_policy = True
    envs = MultiEnvs(params.simulator, 1, 1, params)
    obs_shape = envs.obs_shape
    obs_shape = (obs_shape[0] * params.num_stack, *obs_shape[1:])
    model = CNNPolicy(obs_shape[0], envs.num_actions, params.recurrent_policy,
                      obs_shape)
    env = DoomEnvironment(params)
    agent = BaseAgent(model, params)

    env.reset()
    agent.reset()

    rewards = []
    obss = []
    actions = []
    action_probss = []
    values = []

    while not env.is_episode_finished():
        obs = env.get_observation()
        # action = agent.get_action(obs, epsilon=0.0)
Beispiel #9
0
def gen_classic(selh, file, scenario = False, model="model_final"):
    params = parse_game_args()


    # Charge le scénario
    if not scenario :
        params.scenario = "custom_scenario003.cfg"
    else:
        params.scenario = scenario

    env = DoomEnvironment(params)

    device = torch.device("cuda" if False else "cpu")

    num_actions = env.num_actions
    network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)

    # Chargement du modèle de base

    network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)

    checkpoint = torch.load('models/' + model + '.pth.tar', map_location=lambda storage, loc: storage)

    """Remplacement des clefs du dictionnaire qui posent problème"""

    checkpoint['model']["dist.linear.weight"] = checkpoint['model']["dist_linear.weight"]
    del checkpoint['model']["dist_linear.weight"]
    checkpoint['model']["dist.linear.bias"] = checkpoint['model']["dist_linear.bias"]
    del checkpoint['model']["dist_linear.bias"]

    network.load_state_dict(checkpoint['model'])

    agent = BaseAgent(network, params)

    ERU = {'env': env, 'agent': agent}

    # Chargement des checkpoints
    num_checkpoints = [98, 98, 159]
    checkpoints = [1]*sum(num_checkpoints)
    networks = [1]*sum(num_checkpoints)
    agents = [1]*sum(num_checkpoints)
    ERUs = [1]*sum(num_checkpoints)

    for i in range(len(num_checkpoints)):
        for j in range(num_checkpoints[i]):
            iter = i*num_checkpoints[0]+j

           # if i==0:
           #     checkpoint_filename = '/home/adam/Bureau/Transfer Learning/5 - 28-03-21/checkpoint_{}_{}.pth.tar'.format(str(i + 1), str(j + 88))
            #else:
            checkpoint_filename = '/home/adam/Bureau/Transfer Learning/5 - 28-03-21/checkpoint_{}_{}.pth.tar'.format(str(i + 1), str(j + 1))

            checkpoints[i*num_checkpoints[0]+j] = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage)

            """Remplacement des clefs du dictionnaire qui posent problème"""

            checkpoints[iter]['model']["dist.linear.weight"] = checkpoints[iter]['model']["dist_linear.weight"]
            del checkpoints[iter]['model']["dist_linear.weight"]
            checkpoints[iter]['model']["dist.linear.bias"] = checkpoints[iter]['model']["dist_linear.bias"]
            del checkpoints[iter]['model']["dist_linear.bias"]

            networks[iter] = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)
            networks[iter].load_state_dict(checkpoints[iter]['model'])

            agents[iter] = BaseAgent(networks[iter], params)

            ERUs[iter] = {'env': env, 'agent': agents[iter]}

            ERUs[iter]['env'].reset()

    selhs = []
    for i in range(sum(num_checkpoints)):
        selh = tsne_1d_projection(127)
        selh = torch.from_numpy(selh).type(torch.FloatTensor)
        selh = Variable(selh, volatile=True)
        selhs.append(selh)


    scores = []
    hiddens = []
    inputs = []
    actions = []

    #Boucle pour obtenir les images du modèle de base

    obss = []
    actions = []

    for i in range(50):
        obs = ERU['env'].get_observation()
        action, value, action_probs, grads = ERU['agent'].get_action_value_and_probs_zeroes(obs, selh, epsilon=0.0)
        ERU['env'].make_action(int(action))
        obss.append(obs)
        actions.append(action)


    #Boucle pour évaluer les checkpoints sur les situations du modèle de base

    for i in range(sum(num_checkpoints)):

        for obs2 in obss:
            action, value, action_probs, grads = ERUs[i]['agent'].get_action_value_and_probs_zeroes(obs2, selhs[i], epsilon=0.0)

        hidden = ERUs[i]['agent'].model.get_gru_h()
        h = ''
        for elem in hidden[0][0]:
            h += str(elem) + ","
        h = h[:-1]

        h = h.split(',')
        hiddens.append(h)

        ERU['env'].make_action(int(action))

    im = Image.new('P', (sum(num_checkpoints), 128))
    for i in range(len(hiddens)):
        for j in range(len(hiddens[i])):
            value = int((float(hiddens[i][j])+1)*255/2)
            im.putpixel((i, j), (value, value, value, 255))
    im.show()
    im.save("timeline.png")

    im = Image.new('P', (sum(num_checkpoints)-1, 128))
    for i in range(len(hiddens)-1):
        for j in range(len(hiddens[i])):
            value = int((abs(float(hiddens[i][j])-float(hiddens[i+1][j])))*255*1.5)
            if value>255:
                value=255
            im.putpixel((i, j), (value, value, value, 255))
    im.show()
    im.save("variation.png")
Beispiel #10
0
def train():
    # define params
    params = parse_game_args()
    logger = Logger(params)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    num_updates = int(
        params.num_frames) // params.num_steps // params.num_environments

    # environments

    envs = MultiEnvsMPPipes(params.simulator, params.num_environments, 1,
                            params)

    obs_shape = envs.obs_shape
    obs_shape = (obs_shape[0] * params.num_stack, *obs_shape[1:])

    evaluator = Evaluator(params)
    print('creating model')
    actor_critic = CNNPolicy(obs_shape[0], obs_shape, params).to(device)
    print('model created')
    start_j = 0

    if params.reload_model:
        checkpoint_idx = params.reload_model.split(',')[1]
        checkpoint_filename = '{}models/checkpoint_{}.pth.tar'.format(
            params.output_dir, checkpoint_idx)
        assert os.path.isfile(
            checkpoint_filename), 'The model could not be found {}'.format(
                checkpoint_filename)
        logger.write('Loading model{}'.format(checkpoint_filename))

        if device == 'cuda':  # The checkpoint will try to load onto the GPU storage unless specified
            checkpoint = torch.load(checkpoint_filename)
        else:
            checkpoint = torch.load(checkpoint_filename,
                                    map_location=lambda storage, loc: storage)
        actor_critic.load_state_dict(checkpoint['model'])

        start_j = (int(checkpoint_idx) // params.num_steps //
                   params.num_environments) + 1

    print('creating optimizer')
    optimizer = optim.RMSprop(
        [p for p in actor_critic.parameters() if p.requires_grad],
        params.learning_rate,
        eps=params.eps,
        alpha=params.alpha,
        momentum=params.momentum)

    if params.reload_model:
        optimizer.load_state_dict(checkpoint['optimizer'])

    rollouts = RolloutStorage(params.num_steps, params.num_environments,
                              obs_shape, actor_critic.state_size, params)

    current_obs = torch.zeros(params.num_environments, *obs_shape)

    # For Frame stacking
    def update_current_obs(obs):
        shape_dim0 = envs.obs_shape[0]
        obs = torch.from_numpy(obs).float()
        if params.num_stack > 1:
            current_obs[:, :-shape_dim0] = current_obs[:, shape_dim0:]
        current_obs[:, -shape_dim0:] = obs

    print('getting first obs')
    obs = envs.reset()
    print('update current obs')
    update_current_obs(obs)

    rollouts.observations[0].copy_(current_obs)

    # These variables are used to compute average rewards for all processes.
    episode_rewards = torch.zeros([params.num_environments, 1])
    final_rewards = torch.zeros([params.num_environments, 1])

    current_obs = current_obs.to(device)
    rollouts.set_device(device)

    print('Starting training loop')
    start = time.time()
    print(num_updates)

    for j in range(start_j, num_updates):
        # STARTING no grad scope
        with torch.no_grad():

            if j % params.eval_freq == 0 and not params.skip_eval:
                print('Evaluating model')
                if params.simulator == 'doom':
                    actor_critic.eval()
                    total_num_steps = (
                        j + 1) * params.num_environments * params.num_steps
                    #eval_model(actor_critic, params, logger, j, total_num_steps, params.eval_games)
                    evaluator.evaluate(actor_critic, params, logger, j,
                                       total_num_steps, params.eval_games)
                    actor_critic.train()

            # =============================================================================
            # Take steps in the environment
            # =============================================================================
            for step in range(params.num_steps):
                # Sample actions
                value, action, action_log_prob, states = actor_critic.act(
                    rollouts.observations[step], rollouts.states[step],
                    rollouts.masks[step])

                cpu_actions = action.squeeze(1).cpu().numpy()

                # Obser reward and next obs
                obs, reward, done, info = envs.step(cpu_actions)

                reward = torch.from_numpy(np.expand_dims(np.stack(reward),
                                                         1)).float()
                episode_rewards += reward

                # If done then create masks to clean the history of observations.
                masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                           for done_ in done])

                final_rewards *= masks
                final_rewards += (1 - masks) * episode_rewards
                episode_rewards *= masks

                masks = masks.to(device)

                if current_obs.dim() == 4:
                    current_obs *= masks.unsqueeze(2).unsqueeze(2)
                else:
                    current_obs *= masks

                update_current_obs(obs)

                rollouts.insert(step, current_obs, states, action,
                                action_log_prob, value, reward, masks)

            # =============================================================================
            # Compute discounted returns, re-step through the environment
            # =============================================================================
            next_value = actor_critic(rollouts.observations[-1],
                                      rollouts.states[-1],
                                      rollouts.masks[-1])[0]

            rollouts.compute_returns(next_value, params.use_gae, params.gamma,
                                     params.tau)

        # FINISHED no grad scope
        model_output = actor_critic.evaluate_actions(
            rollouts.observations[:-1].view(-1, *obs_shape),
            rollouts.states[0].view(-1, actor_critic.state_size),
            rollouts.masks[:-1].view(-1, 1), rollouts.actions.view(-1, 1))

        values, action_log_probs, dist_entropy, states = model_output

        values = values.view(params.num_steps, params.num_environments, 1)
        action_log_probs = action_log_probs.view(params.num_steps,
                                                 params.num_environments, 1)
        advantages = rollouts.returns[:-1] - values

        value_loss = advantages.pow(2).mean()
        action_loss = -(advantages.detach() * action_log_probs).mean()

        optimizer.zero_grad()

        loss = value_loss * params.value_loss_coef + action_loss - dist_entropy * params.entropy_coef
        loss.backward()
        nn.utils.clip_grad_norm(actor_critic.parameters(),
                                params.max_grad_norm)

        optimizer.step()
        rollouts.after_update()

        if j % params.model_save_rate == 0:
            total_num_steps = (j +
                               1) * params.num_environments * params.num_steps
            checkpoint = {
                'step': step,
                'params': params,
                'model': actor_critic.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            filepath = logger.output_dir + 'models/'

            torch.save(
                checkpoint, '{}checkpoint_{:00000000010}.pth.tar'.format(
                    filepath, total_num_steps))

        if j % params.log_interval == 0:
            end = time.time()
            total_num_steps = (j +
                               1) * params.num_environments * params.num_steps
            save_num_steps = (
                start_j) * params.num_environments * params.num_steps
            logger.write(
                "Updates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}"
                .format(
                    j, total_num_steps,
                    int((total_num_steps - save_num_steps) / (end - start)),
                    final_rewards.mean(), final_rewards.median(),
                    final_rewards.min(), final_rewards.max(),
                    dist_entropy.item(), value_loss.item(),
                    action_loss.item()))

    evaluator.cancel()
    envs.cancel()
Beispiel #11
0
def gen_classic(selh, file):
    params = parse_game_args()
    params.scenario = "health_gathering_supreme.cfg"
    env = DoomEnvironment(params)

    device = torch.device("cuda" if False else "cpu")

    num_actions = env.num_actions
    network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)

    checkpoint = torch.load('models/' + "health_gathering_supreme" +
                            '.pth.tar',
                            map_location=lambda storage, loc: storage)
    network.load_state_dict(checkpoint['model'])

    agent = BaseAgent(network, params)

    ERU = {'env': env, 'agent': agent}

    selh = torch.from_numpy(selh).type(torch.FloatTensor)

    selh = Variable(selh, volatile=True)

    ERU['env'].set_seed(randint(0, 999999999))
    ERU['env'].reset()

    scores = []
    hiddens = []
    inputs = []
    saliencies = []
    actions = []
    probabilities = []
    health = []
    positions = []
    orientations = []
    velocities = []
    items = []
    fov = []

    w = 0

    while not ERU['env'].is_episode_finished():
        obsvervation = io.BytesIO()

        obs = ERU['env'].get_observation()
        temp = ERU['env'].state.screen_buffer
        Image.fromarray(temp.transpose(1, 2, 0)).save(obsvervation,
                                                      format="JPEG")
        action, value, action_probs, grads = ERU[
            'agent'].get_action_value_and_probs_zeroes(obs, selh, epsilon=0.0)

        hidden = ERU['agent'].model.get_gru_h()
        h = ''
        for elem in hidden[0][0]:
            h += str(elem) + ","
        h = h[:-1]

        h = h.split(',')
        probs = ""
        for elem in action_probs[0]:
            probs += str(elem) + ","
        probs = probs[:-1]

        probs = probs.split(',')
        sa = io.BytesIO()

        t = Image.fromarray(grads, 'L')

        t.save(sa, format="JPEG")

        scores.append(str(round(ERU['env'].game.get_total_reward(), 2)))
        hiddens.append(h)
        inputs.append(base64.b64encode(obsvervation.getvalue()))
        saliencies.append(base64.b64encode(sa.getvalue()))
        actions.append(str(action))
        probabilities.append(probs)
        health.append(ERU['env'].get_health())
        positions.append(ERU['env'].get_pos())
        orientations.append(ERU['env'].get_ori())
        velocities.append(ERU['env'].get_velo())
        items.append(ERU['env'].get_item())
        fov.append(ERU['env'].get_fov())

        ERU['env'].make_action(int(action))
        print('Iteration', w, '/525')
        w += 1

    result = {
        'episode0': {
            'inputs': inputs,
            'actions': actions,
            'probabilities': probabilities,
            'saliencies': saliencies,
            'scores': scores,
            'positions': positions,
            'health': health,
            'hiddens': hiddens,
            'orientations': orientations,
            'velocities': velocities,
            'items': items,
            'fov': fov
        }
    }
    with open(file, 'w') as f:
        ujson.dump(result, f, indent=4, sort_keys=True)
    return result
            step, reward_list, time_list))


def write_movie(params, logger, observations, step, score, best_agent=True):
    observations = [o.transpose(1, 2, 0) for o in observations]
    clip = ImageSequenceClip(observations, fps=int(30 / params.frame_skip))
    output_dir = logger.get_eval_output()
    clip.write_videofile('{}eval{:0004}_{:00005.0f}.mp4'.format(
        output_dir, step, score * 100))
    if params.use_visdom:
        logger.add_video('{}eval{:0004}_{:00005.0f}.mp4'.format(
            output_dir, step, score * 100),
                         best_agent=best_agent)


if __name__ == '__main__':
    params = parse_game_args()
    params.norm_obs = False
    params.num_stack = 1
    params.recurrent_policy = True
    params.num_environments = 16
    params.scenario = 'scenario_3_item0.cfg'

    envs = MultiEnvsMPPipes(params.simulator, 1, 1, params)
    obs_shape = envs.obs_shape
    obs_shape = (obs_shape[0] * params.num_stack, *obs_shape[1:])
    model = CNNPolicy(obs_shape[0], obs_shape, params)

    with torch.no_grad():
        eval_model_multi(model, params, 0, 0, 0, num_games=1000)
def train():
    args = parse_a2c_args()
    args2 = parse_a2c_args()
    output_dir = initialize_logging(args)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    num_updates = int(
        args.num_frames) // args.num_steps // args.num_environments
    # Create the train and test environments with Multiple processes
    train_envs = MultiEnv(args.simulator,
                          args.num_environments,
                          args,
                          is_train=True)

    #Création des environnements de test des niveaux classiques
    args2.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/"
    args2.scenario = "custom_scenario_test{:003}.cfg"
    classic_test_envs = MultiEnv(args.simulator,
                                 args.num_environments,
                                 args2,
                                 is_train=False)
    #Création des environnements de test des niveaux peignes
    args2.scenario_dir = "scenarios_transfer_learning/little_combs_test/"
    little_combs_test_envs = MultiEnv(args.simulator,
                                      args.num_environments,
                                      args2,
                                      is_train=False)
    args2.scenario_dir = "scenarios_transfer_learning/medium_combs_test/"
    medium_combs_test_envs = MultiEnv(args.simulator,
                                      args.num_environments,
                                      args2,
                                      is_train=False)

    test_envs = MultiEnv(args.simulator,
                         args.num_environments,
                         args,
                         is_train=False)

    # Writer will output to ./runs/ directory by default
    writer = torch.utils.tensorboard.SummaryWriter()

    obs_shape = train_envs.obs_shape

    # The agent's policy network and training algorithm A2C
    policy = CNNPolicy(obs_shape, args).to(device)
    agent = A2CAgent(policy,
                     args.hidden_size,
                     value_weight=args.value_loss_coef,
                     entropy_weight=args.entropy_coef,
                     num_steps=args.num_steps,
                     num_parallel=args.num_environments,
                     gamma=args.gamma,
                     lr=args.learning_rate,
                     opt_alpha=args.alpha,
                     opt_momentum=args.momentum,
                     max_grad_norm=args.max_grad_norm)

    start_j = 0
    if args.reload_model:
        checkpoint_idx = args.reload_model.split(',')[1]
        checkpoint_filename = '{}models/base_line.pth.tar'.format(output_dir)
        agent.load_model(checkpoint_filename)
        start_j = 0  #(int(checkpoint_idx) // args.num_steps // args.num_environments) + 1

    obs = train_envs.reset()
    start = time.time()
    nb_of_saves = 0

    for j in range(start_j, num_updates):
        print("------", j / num_updates * 100, "-------")

        # Test des performances du modèle
        if not args.skip_eval and j % args.eval_freq == 0:
            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            mean_rewards_classic, game_times_classic = agent.evaluate(
                classic_test_envs, j, total_num_steps)
            mean_rewards_little, game_times_little = agent.evaluate(
                little_combs_test_envs, j, total_num_steps)
            mean_rewards_medium, game_times_medium = agent.evaluate(
                medium_combs_test_envs, j, total_num_steps)

            # succes_classic = sum([1 if i!=525 else 0 for i in game_times_classic])/16
            #  succes_little = sum([1 if i!=525 else 0 for i in game_times_little])/16
            # succes_medium = sum([1 if i!=525 else 0 for i in game_times_medium])/16

            writer.add_scalar("Reward classic levels", mean_rewards_classic, j)
            writer.add_scalar("Reward little combs levels",
                              mean_rewards_little, j)
            writer.add_scalar("Reward medium combs levels",
                              mean_rewards_medium, j)
        # writer.add_scalar("Success rate classic levels", succes_classic, j)
        # writer.add_scalar("Success rate little combs levels", succes_little, j)
        # writer.add_scalar("Success rate medium combs levels", succes_medium, j)

        for step in range(args.num_steps):
            action = agent.get_action(obs, step)
            obs, reward, done, info = train_envs.step(action)
            agent.add_rewards_masks(reward, done, step)

        report = agent.update(obs)

        if j % args.log_interval == 0:
            end = time.time()
            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            save_num_steps = (start_j) * args.num_environments * args.num_steps
            FPS = int((total_num_steps - save_num_steps) / (end - start)),

            logging.info(report.format(j, total_num_steps, FPS))

        if j % args.model_save_rate == 0:
            nb_of_saves += 1
            agent.save_policy2(nb_of_saves, args, output_dir)

    # cancel the env processes
    train_envs.cancel()
    test_envs.cancel()
Beispiel #14
0
def main():
    global args, best_loss
    args = parser.parse_args()

    data_set = ImageFolder('../gym-duckietown/real_images',
                           augment=args.augment)
    print(len(data_set))

    writer = SummaryWriter('log/')

    splits = json.load(open('splits.json'))

    train_loader = torch.utils.data.DataLoader(
        data_set,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=1,
        sampler=torch.utils.data.sampler.RandomSampler(list(range(72))))
    #val_loader = torch.utils.data.DataLoader(
    #    data_set, batch_size=args.batch_size, shuffle=False,
    #    num_workers=1, sampler=splits['val'])

    if args.use_model2:
        model = CNNPolicy2(3)
    else:
        model = CNNPolicy(3)

    if args.cuda:
        model.cuda()

    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, optimizer, epoch)
        # evaluate on validation set
        loss = validate(train_loader, model)

        writer.add_scalar('val_loss', loss, epoch)

        # remember best prec@1 and save checkpoint
        is_best = loss < best_loss
        best_loss = min(loss, best_loss)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_loss': best_loss,
                'optimizer': optimizer.state_dict(),
            }, is_best)
Beispiel #15
0
def evaluate_saved_model(models, models_dir):

    args = parse_a2c_args()
    # TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  # "cuda" if torch.cuda.is_available() else

    #création des environnements

    #création des environnements little-combs

    little_comb_env = []
    for i in range(50):
        args.scenario_dir = "scenarios_transfer_learning/little_combs_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        little_comb_env.append(DoomEnvironment(args, idx=i, is_train=False))

    #création des environnements big-combs

    big_comb_env = []
    for i in range(50):
        args.scenario_dir = "scenarios_transfer_learning/big_combs_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        big_comb_env.append(DoomEnvironment(args, idx=i, is_train=False))

    #création des environnements classic

    classic_env = []
    for i in range(50):
        args.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        classic_env.append(DoomEnvironment(args, idx=i, is_train=False))

    #création des environnements other levels

    medium_comb_env = []
    for i in range(16):
        args.scenario_dir = "scenarios_transfer_learning/medium_combs_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        medium_comb_env.append(DoomEnvironment(args, idx=i, is_train=False))

    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)

    resultat = []

    for model in models:
        checkpoint = torch.load(models_dir + "/checkpoint_" + str(model) +
                                ".pth.tar",
                                map_location=lambda storage, loc: storage)
        policy.load_state_dict(checkpoint['model'])
        policy.eval()

        resultat.append(model)

        assert args.model_checkpoint, 'No model checkpoint found'
        assert os.path.isfile(
            args.model_checkpoint), 'The model could not be loaded'
        # This lambda stuff is required otherwise it will try and load on GPU

        # evaluation sur les niveaux classiques

        results = []

        for i in range(50):
            env = classic_env[i]
            results.append(get_results(policy, env, args))

        print("Classic levels evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        # evaluation sur les little combs

        results = []

        for i in range(50):
            env = little_comb_env[i]
            results.append(get_results(policy, env, args))

        print("Little combs evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        # evaluation sur les big combs

        results = []

        for i in range(50):
            env = big_comb_env[i]
            results.append(get_results(policy, env, args))
        print("Big combs evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        # evaluation sur les autres niveaux

        results = []

        for i in range(16):
            env = medium_comb_env[i]
            results.append(get_results(policy, env, args))
        print("Other levels evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        print("Checkpoint " + str(model) + " has been evaluated")

    print(resultat)
def train():
    args = parse_a2c_args()
    output_dir = initialize_logging(args)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    num_updates = int(args.num_frames) // args.num_steps // args.num_environments
    # Create the train and test environments with Multiple processes
    train_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=True)
    test_envs = MultiEnv(args.simulator, args.num_environments, args, is_train=False)
    
    obs_shape = train_envs.obs_shape
    
    # The agent's policy network and training algorithm A2C
    policy = CNNPolicy(obs_shape, args).to(device)
    agent = A2CAgent(policy, 
                     args.hidden_size,
                     value_weight=args.value_loss_coef, 
                     entropy_weight=args.entropy_coef, 
                     num_steps=args.num_steps, 
                     num_parallel=args.num_environments,
                     gamma=args.gamma,
                     lr=args.learning_rate,
                     opt_alpha=args.alpha,
                     opt_momentum=args.momentum,
                     max_grad_norm=args.max_grad_norm)
    
    start_j = 0
    if args.reload_model:
        checkpoint_idx = args.reload_model.split(',')[1]
        checkpoint_filename = '{}models/checkpoint_{}.pth.tar'.format(output_dir, checkpoint_idx)        
        agent.load_model(checkpoint_filename)
        start_j = (int(checkpoint_idx) // args.num_steps // args.num_environments) + 1
        
    obs = train_envs.reset()
    start = time.time()
    
    for j in range(start_j, num_updates):
        if not args.skip_eval and j % args.eval_freq == 0:
            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            mean_rewards, game_times = agent.evaluate(test_envs, j, total_num_steps)
            logging.info(mean_rewards)
            logging.info(game_times)
            
        for step in range(args.num_steps): 
            action = agent.get_action(obs, step)
            obs, reward, done, info = train_envs.step(action)
            agent.add_rewards_masks(reward, done, step)
            
        report = agent.update(obs)
        
        if j % args.log_interval == 0:
            end = time.time()
            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            save_num_steps = (start_j) * args.num_environments * args.num_steps
            FPS = int((total_num_steps - save_num_steps) / (end - start)),
            
            logging.info(report.format(j, total_num_steps, FPS))  
        
        if j % args.model_save_rate == 0:
            total_num_steps = (j + 1) * args.num_environments * args.num_steps
            agent.save_policy(total_num_steps, args, output_dir)
        
    # cancel the env processes    
    train_envs.cancel()
    test_envs.cancel()