Exemple #1
0
def main(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Define agent

    env = gym.make(args.env)
    env.seed(seed)
    agent = utils.load_agent(env, args.model, None, None, args.argmax,
                             args.env)
    if args.model is None and args.episodes > len(agent.demos):
        # Set the number of episodes to be the number of demos
        episodes = len(agent.demos)

    # Evaluate
    if isinstance(agent, utils.DemoAgent):
        logs = evaluate_demo_agent(agent, episodes)
    elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes:
        logs = evaluate(agent, env, episodes, False)
    else:
        logs = batch_evaluate(agent,
                              args.env,
                              seed,
                              episodes,
                              return_obss_actions=True)

    return logs
def main_test(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Define agent
    # do test environment
    env_name = args.env + "_Test-v0"
    env = gym.make(env_name)
    env.seed(seed)
    agent = utils.load_agent(env,
                             args.model,
                             argmax=args.argmax,
                             env_name=env_name)
    if args.model is None and args.episodes > len(agent.demos):
        # Set the number of episodes to be the number of demos
        episodes = len(agent.demos)

    # Evaluate
    if isinstance(agent, utils.DemoAgent):
        logs = evaluate_demo_agent(agent, episodes)
    elif isinstance(agent, utils.BotAgent):
        logs = evaluate(agent, env, episodes, False)
    else:
        logs = batch_evaluate(agent, env_name, seed, episodes)

    return logs
Exemple #3
0
    def validate(self, episodes, verbose=True):
        # Seed needs to be reset for each validation, to ensure consistency
        utils.seed(self.args.val_seed)

        if verbose:
            logger.info("Validating the model")
        if getattr(self.args, 'multi_env', None):
            agent = utils.load_agent(self.env[0],
                                     model_name=self.args.model,
                                     argmax=True)
        else:
            agent = utils.load_agent(self.env,
                                     model_name=self.args.model,
                                     argmax=True)

        # Setting the agent model to the current model
        agent.model = self.acmodel

        agent.model.eval()
        logs = []

        for env_name in ([self.args.env]
                         if not getattr(self.args, 'multi_env', None) else
                         self.args.multi_env):
            logs += [
                batch_evaluate(agent, env_name, self.args.val_seed, episodes)
            ]
        agent.model.train()

        return logs
Exemple #4
0
    def __init__(self,
                 model,
                 env,
                 args,
                 distill_with_teacher,
                 reward_predictor=False):
        self.args = args
        self.distill_with_teacher = distill_with_teacher
        self.reward_predictor = reward_predictor

        utils.seed(self.args.seed)
        self.env = env

        observation_space = self.env.observation_space
        action_space = self.env.action_space

        # Define actor-critic model
        self.acmodel = model
        utils.save_model(self.acmodel, args.model)
        self.acmodel.train()
        if torch.cuda.is_available():
            self.acmodel.cuda()

        self.optimizer = torch.optim.Adam(self.acmodel.parameters(),
                                          self.args.lr,
                                          eps=self.args.optim_eps)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=100,
                                                         gamma=0.9)

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        print("DEVICE", self.device)
Exemple #5
0
    def __init__(self, args):
        self.args = args

        # seeding
        utils.seed(args.seed)

        self.env = gym.make(id=args.env)

        self.episodes = 300  # args.episodes
        self.horizon = self.env.max_steps
        self.initial_decay = 0.99  # args.decay

        self.observation_preprocessor = utils.ObssPreprocessor(
            model_name=args.model,
            obs_space=self.env.observation_space,
            load_vocab_from=getattr(self.args, 'pretrained_model', None))
        # TODO: for now I am only running the small model
        self.model = models.ACModel(obs_space=self.env.observation_space,
                                    action_space=self.env.action_space)
        self.learner = ModelAgent(
            model_or_name=self.model,
            obss_preprocessor=self.observation_preprocessor,
            argmax=True)
        self.teacher = Bot(self.env)

        self.data = []

        self.observation_preprocessor.vocab.save()
        utils.save_model(self.model, args.model)

        self.model.train()
        if torch.cuda.is_available():
            self.model.cuda()

        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          self.args.lr,
                                          eps=self.args.optim_eps)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=100,
                                                         gamma=0.9)
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        if self.device.type == 'cpu':
            print('running on cpu...')
def main(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Define agent

    env = gym.make(args.env)
    env.seed(seed)
    agent = utils.load_agent(env, args.model, args.demos, args.demos_origin,
                             args.argmax, args.env)
    if args.model is None and args.episodes > len(agent.demos):
        # Set the number of episodes to be the number of demos
        episodes = len(agent.demos)

    if args.proj is not None:
        assert args.proj_file is not None

    if args.proj_file is not None:
        with open(args.proj_file, newline="") as reader:
            proj_sentences = reader.readlines()
    else:
        proj_sentences = None

    seeds = []
    orig_missions = []
    missions = []
    with open(args.turk_file, newline="") as reader:
        csv_reader = csv.reader(reader)
        header = next(csv_reader)
        i_seed = header.index("Input.seed")
        i_orig_dir = header.index("Input.cmd")
        i_mission = header.index("Answer.command")
        for row in csv_reader:
            seeds.append(int(row[i_seed]))
            orig_missions.append(row[i_orig_dir])
            missions.append(row[i_mission])

    if not args.human:
        logs = evaluate_fixed_seeds(agent, env, episodes, seeds, orig_missions)
    else:
        logs = evaluate_fixed_seeds(agent, env, episodes, seeds, orig_missions,
                                    missions, args.proj, proj_sentences)

    return logs
Exemple #7
0
def main(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Define agent

    env = gym.make(args.env)
    env.seed(seed)
    agent = utils.load_agent(env, args.model, args.demos, args.demos_origin,
                             args.argmax, args.env)
    if args.model is None and args.episodes > len(agent.demos):
        # Set the number of episodes to be the number of demos
        episodes = len(agent.demos)

    # Evaluate
    if isinstance(agent, utils.ModelAgent) and not args.contiguous_episodes:
        logs = batch_evaluate(agent, args.env, seed, episodes)
    else:
        logs = evaluate(agent, env, episodes, False)

    return logs
Exemple #8
0
    action_map = {
        "LEFT": "left",
        "RIGHT": "right",
        "UP": "forward",
        "PAGE_UP": "pickup",
        "PAGE_DOWN": "drop",
        "SPACE": "toggle"
    }

    if args.seed is None:
        args.seed = 0 if args.model is not None else 1

    # Set seed for all randomness sources

    utils.seed(args.seed)

    # Generate environment

    env = gym.make(args.env)
    env.seed(args.seed)
    for _ in range(args.shift):
        env.reset()

    global obs
    obs, info = env.reset()
    print("Mission: {}".format(obs["mission"]))

    # Define agent and load trained model
    agent = machine.util.load_agent(env, args.model, args.argmax, args.env, args.vocab)
Exemple #9
0
def main():
    # Generate environments
    envs = []
    for i in range(args.procs):
        env = gym.make(args.env)
        env.seed(100 * args.seed + i)
        envs.append(env)

    # Define model name
    suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
    instr = args.instr_arch if args.instr_arch else "noinstr"
    mem = "mem" if not args.no_mem else "nomem"
    model_name_parts = {
        'env': args.env,
        'algo': args.algo,
        'arch': args.arch,
        'instr': instr,
        'mem': mem,
        'seed': args.seed,
        'info': '',
        'coef': '',
        'suffix': suffix}
    default_model_name = "{env}_{algo}_{arch}_{instr}_{mem}_seed{seed}{info}{coef}_{suffix}".format(**model_name_parts)
    if args.pretrained_model:
        default_model_name = args.pretrained_model + '_pretrained_' + default_model_name
    args.model = args.model.format(**model_name_parts) if args.model else default_model_name

    utils.configure_logging(args.model)
    logger = logging.getLogger(__name__)

    # Define obss preprocessor
    if 'emb' in args.arch:
        obss_preprocessor = utils.IntObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model)
    else:
        """
        obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model)
        """
        obss_preprocessor = utils.ImgInstrObssPreprocessor(args.model, envs[0].observation_space)

    # Define actor-critic model
    acmodel = utils.load_model(args.model, raise_not_found=False)
    if acmodel is None:
        if args.pretrained_model:
            acmodel = utils.load_model(args.pretrained_model, raise_not_found=True)
        else:
            """
            acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space,
                              args.image_dim, args.memory_dim, args.instr_dim,
                              not args.no_instr, args.instr_arch, not args.no_mem, args.arch)
            """
            acmodel = ACModelImgInstr(obss_preprocessor.obs_space, envs[0].action_space,
                                      args.image_dim, args.memory_dim, args.instr_dim,
                                      not args.no_instr, not args.no_mem, args.arch)

    """
    obss_preprocessor.vocab.save()
    """
    utils.save_model(acmodel, args.model)

    if torch.cuda.is_available():
        acmodel.cuda()

    # Define actor-critic algo

    reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward
    if args.algo == "ppo":
        algo = babyai.rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.beta1, args.beta2,
                                 args.gae_lambda,
                                 args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence,
                                 args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size, obss_preprocessor,
                                 reshape_reward)
    else:
        raise ValueError("Incorrect algorithm name: {}".format(args.algo))

    # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that.
    # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that
    # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here.

    utils.seed(args.seed)

    # Restore training status

    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
    if os.path.exists(status_path):
        with open(status_path, 'r') as src:
            status = json.load(src)
    else:
        status = {'i': 0,
                  'num_episodes': 0,
                  'num_frames': 0}

    # Define logger and Tensorboard writer and CSV writer

    header = (["update", "episodes", "frames", "FPS", "duration"]
              + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']]
              + ["success_rate"]
              + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']]
              + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"])
    if args.tb:
        from tensorboardX import SummaryWriter

        writer = SummaryWriter(utils.get_log_dir(args.model))
    csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    first_created = not os.path.exists(csv_path)
    # we don't buffer data going in the csv log, cause we assume
    # that one update will take much longer that one write to the log
    csv_writer = csv.writer(open(csv_path, 'a', 1))
    if first_created:
        csv_writer.writerow(header)

    # Log code state, command, availability of CUDA and model

    babyai_code = list(babyai.__path__)[0]
    try:
        last_commit = subprocess.check_output(
            'cd {}; git log -n1'.format(babyai_code), shell=True).decode('utf-8')
        logger.info('LAST COMMIT INFO:')
        logger.info(last_commit)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    try:
        diff = subprocess.check_output(
            'cd {}; git diff'.format(babyai_code), shell=True).decode('utf-8')
        if diff:
            logger.info('GIT DIFF:')
            logger.info(diff)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    logger.info('COMMAND LINE ARGS:')
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(acmodel)

    # Train model

    total_start_time = time.time()
    best_success_rate = 0
    best_mean_return = 0
    test_env_name = args.env
    while status['num_frames'] < args.frames:
        # Update parameters

        update_start_time = time.time()
        logs = algo.update_parameters()
        update_end_time = time.time()

        status['num_frames'] += logs["num_frames"]
        status['num_episodes'] += logs['episodes_done']
        status['i'] += 1

        # Print logs

        if status['i'] % args.log_interval == 0:
            total_ellapsed_time = int(time.time() - total_start_time)
            fps = logs["num_frames"] / (update_end_time - update_start_time)
            duration = datetime.timedelta(seconds=total_ellapsed_time)
            return_per_episode = utils.synthesize(logs["return_per_episode"])
            success_per_episode = utils.synthesize(
                [1 if r > 0 else 0 for r in logs["return_per_episode"]])
            num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])

            data = [status['i'], status['num_episodes'], status['num_frames'],
                    fps, total_ellapsed_time,
                    *return_per_episode.values(),
                    success_per_episode['mean'],
                    *num_frames_per_episode.values(),
                    logs["entropy"], logs["value"], logs["policy_loss"], logs["value_loss"],
                    logs["loss"], logs["grad_norm"]]

            format_str = ("U {} | E {} | F {:06} | FPS {:04.0f} | D {} | R:xsmM {: .2f} {: .2f} {: .2f} {: .2f} | "
                          "S {:.2f} | F:xsmM {:.1f} {:.1f} {} {} | H {:.3f} | V {:.3f} | "
                          "pL {: .3f} | vL {:.3f} | L {:.3f} | gN {:.3f} | ")

            logger.info(format_str.format(*data))
            if args.tb:
                assert len(header) == len(data)
                for key, value in zip(header, data):
                    writer.add_scalar(key, float(value), status['num_frames'])

            csv_writer.writerow(data)

        # Save obss preprocessor vocabulary and model

        if args.save_interval > 0 and status['i'] % args.save_interval == 0:
            """
            obss_preprocessor.vocab.save()
            """
            with open(status_path, 'w') as dst:
                json.dump(status, dst)
                utils.save_model(acmodel, args.model)

            # Testing the model before saving
            agent = ModelAgent(args.model, obss_preprocessor, argmax=True)
            agent.model = acmodel
            agent.model.eval()
            logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes)
            agent.model.train()
            mean_return = np.mean(logs["return_per_episode"])
            success_rate = np.mean([1 if r > 0 else 0 for r in logs['return_per_episode']])
            save_model = False
            if success_rate > best_success_rate:
                best_success_rate = success_rate
                save_model = True
            elif (success_rate == best_success_rate) and (mean_return > best_mean_return):
                best_mean_return = mean_return
                save_model = True
            if save_model:
                utils.save_model(acmodel, args.model + '_best')
                """
                obss_preprocessor.vocab.save(utils.get_vocab_path(args.model + '_best'))
                """
                logger.info("Return {: .2f}; best model is saved".format(mean_return))
            else:
                logger.info("Return {: .2f}; not the best model; not saved".format(mean_return))
Exemple #10
0
    def __init__(
        self,
        args,
    ):
        self.args = args

        utils.seed(self.args.seed)

        # args.env is a list when training on multiple environments
        if getattr(args, 'multi_env', None):
            self.env = [gym.make(item) for item in args.multi_env]

            self.train_demos = []
            for demos, episodes in zip(args.multi_demos, args.multi_episodes):
                demos_path = utils.get_demos_path(demos,
                                                  None,
                                                  None,
                                                  valid=False)
                logger.info('loading {} of {} demos'.format(episodes, demos))
                train_demos = utils.load_demos(demos_path)
                logger.info('loaded demos')
                if episodes > len(train_demos):
                    raise ValueError(
                        "there are only {} train demos in {}".format(
                            len(train_demos), demos))
                self.train_demos.extend(train_demos[:episodes])
                logger.info('So far, {} demos loaded'.format(
                    len(self.train_demos)))

            self.val_demos = []
            for demos, episodes in zip(args.multi_demos, [args.val_episodes] *
                                       len(args.multi_demos)):
                demos_path_valid = utils.get_demos_path(demos,
                                                        None,
                                                        None,
                                                        valid=True)
                logger.info('loading {} of {} valid demos'.format(
                    episodes, demos))
                valid_demos = utils.load_demos(demos_path_valid)
                logger.info('loaded demos')
                if episodes > len(valid_demos):
                    logger.info(
                        'Using all the available {} demos to evaluate valid. accuracy'
                        .format(len(valid_demos)))
                self.val_demos.extend(valid_demos[:episodes])
                logger.info('So far, {} valid demos loaded'.format(
                    len(self.val_demos)))

            logger.info('Loaded all demos')

            observation_space = self.env[0].observation_space
            action_space = self.env[0].action_space

        else:
            self.env = gym.make(self.args.env)

            demos_path = utils.get_demos_path(args.demos,
                                              args.env,
                                              args.demos_origin,
                                              valid=False)
            demos_path_valid = utils.get_demos_path(args.demos,
                                                    args.env,
                                                    args.demos_origin,
                                                    valid=True)
            print("else")
            logger.info('loading demos')
            self.train_demos = utils.load_demos(demos_path)
            print(len(self.train_demos))
            print(self.train_demos[0])
            logger.info('loaded demos')
            if args.episodes:
                if args.episodes > len(self.train_demos):
                    raise ValueError("there are only {} train demos".format(
                        len(self.train_demos)))
                self.train_demos = self.train_demos[:args.episodes]

            self.val_demos = utils.load_demos(demos_path_valid)
            if args.val_episodes > len(self.val_demos):
                logger.info(
                    'Using all the available {} demos to evaluate valid. accuracy'
                    .format(len(self.val_demos)))
            self.val_demos = self.val_demos[:self.args.val_episodes]

            observation_space = self.env.observation_space
            action_space = self.env.action_space

            print("else")
        print(args.model)
        self.obss_preprocessor = utils.ObssPreprocessor(
            args.model, observation_space,
            getattr(self.args, 'pretrained_model', None))

        # Define actor-critic model
        self.acmodel = utils.load_model(args.model, raise_not_found=False)
        if self.acmodel is None:
            if getattr(self.args, 'pretrained_model', None):
                self.acmodel = utils.load_model(args.pretrained_model,
                                                raise_not_found=True)
            else:
                self.acmodel = ACModel(self.obss_preprocessor.obs_space,
                                       action_space, args.image_dim,
                                       args.memory_dim, args.instr_dim,
                                       not self.args.no_instr,
                                       self.args.instr_arch,
                                       not self.args.no_mem, self.args.arch)
        self.obss_preprocessor.vocab.save()
        utils.save_model(self.acmodel, args.model)

        self.acmodel.train()
        if torch.cuda.is_available():
            self.acmodel.cuda()

        self.optimizer = torch.optim.Adam(self.acmodel.parameters(),
                                          self.args.lr,
                                          eps=self.args.optim_eps)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=100,
                                                         gamma=0.9)

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
Exemple #11
0
def generate_demos(n_episodes, valid, seed, shift=0):
    utils.seed(seed)

    # Generate environment
    env = gym.make(args.env)
    env.seed(seed)
    for i in range(shift):
        env.reset()

    agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax,
                             args.env)
    demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid)
    demos = []

    checkpoint_time = time.time()

    while True:
        # Run the expert for one episode

        done = False
        obs = env.reset()
        agent.on_reset()

        actions = []
        mission = obs["mission"]
        images = []
        directions = []

        try:
            while not done:
                action = agent.act(obs)['action']
                if isinstance(action, torch.Tensor):
                    action = action.item()
                new_obs, reward, done, _ = env.step(action)
                agent.analyze_feedback(reward, done)

                actions.append(action)
                images.append(obs['image'])
                directions.append(obs['direction'])

                obs = new_obs
            if reward > 0 and (args.filter_steps == 0
                               or len(images) <= args.filter_steps):
                demos.append((mission, blosc.pack_array(np.array(images)),
                              directions, actions))

            if len(demos) >= n_episodes:
                break
            if reward == 0:
                if args.on_exception == 'crash':
                    raise Exception("mission failed")
                logger.info("mission failed")
        except Exception:
            if args.on_exception == 'crash':
                raise
            logger.exception("error while generating demo #{}".format(
                len(demos)))
            continue

        if len(demos) and len(demos) % args.log_interval == 0:
            now = time.time()
            demos_per_second = args.log_interval / (now - checkpoint_time)
            to_go = (n_episodes - len(demos)) / demos_per_second
            logger.info(
                "demo #{}, {:.3f} demos per second, {:.3f} seconds to go".
                format(len(demos), demos_per_second, to_go))
            checkpoint_time = now

        # Save demonstrations

        if args.save_interval > 0 and len(
                demos) < n_episodes and len(demos) % args.save_interval == 0:
            logger.info("Saving demos...")
            utils.save_demos(demos, demos_path)
            logger.info("Demos saved")
            # print statistics for the last 100 demonstrations
            print_demo_lengths(demos[-100:])

    # Save demonstrations
    logger.info("Saving demos...")
    utils.save_demos(demos, demos_path)
    logger.info("Demos saved")
    print_demo_lengths(demos[-100:])
Exemple #12
0
def main(exp, argv):
    os.environ["BABYAI_STORAGE"] = exp.results_directory()

    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument("--algo",
                        default='ppo',
                        help="algorithm to use (default: ppo)")
    parser.add_argument("--discount",
                        type=float,
                        default=0.99,
                        help="discount factor (default: 0.99)")
    parser.add_argument("--reward-scale",
                        type=float,
                        default=20.,
                        help="Reward scale multiplier")
    parser.add_argument(
        "--gae-lambda",
        type=float,
        default=0.99,
        help="lambda coefficient in GAE formula (default: 0.99, 1 means no gae)"
    )
    parser.add_argument("--value-loss-coef",
                        type=float,
                        default=0.5,
                        help="value loss term coefficient (default: 0.5)")
    parser.add_argument("--max-grad-norm",
                        type=float,
                        default=0.5,
                        help="maximum norm of gradient (default: 0.5)")
    parser.add_argument("--clip-eps",
                        type=float,
                        default=0.2,
                        help="clipping epsilon for PPO (default: 0.2)")
    parser.add_argument("--ppo-epochs",
                        type=int,
                        default=4,
                        help="number of epochs for PPO (default: 4)")
    parser.add_argument(
        "--save-interval",
        type=int,
        default=50,
        help=
        "number of updates between two saves (default: 50, 0 means no saving)")
    parser.add_argument("--workers",
                        type=int,
                        default=8,
                        help="number of workers for PyTorch (default: 8)")
    parser.add_argument("--max-count",
                        type=int,
                        default=1000,
                        help="maximum number of frames to run for")
    parser.add_argument("--sample_duration",
                        type=float,
                        default=0.5,
                        help="sampling duration")
    parser.add_argument("--cuda",
                        action="store_true",
                        default=False,
                        help="whether to use cuda")
    args = parser.parse_args(argv)

    utils.seed(args.seed)

    torch_settings = init_torch(
        seed=args.seed,
        cuda=args.cuda,
        workers=args.workers,
    )

    # Generate environments
    envs = []
    for i in range(args.procs):
        env = gym.make(args.env)
        env.seed(100 * args.seed + i)
        envs.append(env)

    # Define model name
    suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
    instr = args.instr_arch if args.instr_arch else "noinstr"
    mem = "mem" if not args.no_mem else "nomem"
    model_name_parts = {
        'env': args.env,
        'algo': args.algo,
        'arch': args.arch,
        'instr': instr,
        'mem': mem,
        'seed': args.seed,
        'info': '',
        'coef': '',
        'suffix': suffix
    }
    default_model_name = "{env}_{algo}_{arch}_{instr}_{mem}_seed{seed}{info}{coef}_{suffix}".format(
        **model_name_parts)
    if args.pretrained_model:
        default_model_name = args.pretrained_model + '_pretrained_' + default_model_name
    args.model = args.model.format(
        **model_name_parts) if args.model else default_model_name

    utils.configure_logging(args.model)
    logger = logging.getLogger(__name__)

    # Define obss preprocessor
    if 'emb' in args.arch:
        obss_preprocessor = utils.IntObssPreprocessor(
            args.model, envs[0].observation_space, args.pretrained_model)
    else:
        obss_preprocessor = utils.ObssPreprocessor(args.model,
                                                   envs[0].observation_space,
                                                   args.pretrained_model)

    # Define actor-critic model
    # acmodel = utils.load_model(args.model, raise_not_found=False)
    acmodel = None
    if acmodel is None:
        if args.pretrained_model:
            acmodel = utils.load_model(args.pretrained_model,
                                       raise_not_found=True)
        else:
            acmodel = ACModel(obss_preprocessor.obs_space,
                              envs[0].action_space, args.image_dim,
                              args.memory_dim, args.instr_dim,
                              not args.no_instr, args.instr_arch,
                              not args.no_mem, args.arch)

    obss_preprocessor.vocab.save()
    # utils.save_model(acmodel, args.model)

    if torch_settings.cuda:
        acmodel.cuda()

    # Define actor-critic algo

    reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward
    if args.algo == "ppo":
        algo = babyai.rl.PPOAlgo(
            envs, acmodel, args.frames_per_proc, args.discount, args.lr,
            args.beta1, args.beta2, args.gae_lambda, args.entropy_coef,
            args.value_loss_coef, args.max_grad_norm, args.recurrence,
            args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size,
            obss_preprocessor, reshape_reward)
    else:
        raise ValueError("Incorrect algorithm name: {}".format(args.algo))

    # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that.
    # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that
    # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here.

    utils.seed(args.seed)

    # Restore training status

    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
    if os.path.exists(status_path):
        with open(status_path, 'r') as src:
            status = json.load(src)
    else:
        status = {'i': 0, 'num_episodes': 0, 'num_frames': 0}

    # # Define logger and Tensorboard writer and CSV writer

    # header = (["update", "episodes", "frames", "FPS", "duration"]
    #         + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']]
    #         + ["success_rate"]
    #         + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']]
    #         + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"])
    # if args.tb:
    #     from tensorboardX import SummaryWriter

    #     writer = SummaryWriter(utils.get_log_dir(args.model))
    # csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    # first_created = not os.path.exists(csv_path)
    # # we don't buffer data going in the csv log, cause we assume
    # # that one update will take much longer that one write to the log
    # csv_writer = csv.writer(open(csv_path, 'a', 1))
    # if first_created:
    #     csv_writer.writerow(header)

    # Log code state, command, availability of CUDA and model

    babyai_code = list(babyai.__path__)[0]
    try:
        last_commit = subprocess.check_output(
            'cd {}; git log -n1'.format(babyai_code),
            shell=True).decode('utf-8')
        logger.info('LAST COMMIT INFO:')
        logger.info(last_commit)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    try:
        diff = subprocess.check_output('cd {}; git diff'.format(babyai_code),
                                       shell=True).decode('utf-8')
        if diff:
            logger.info('GIT DIFF:')
            logger.info(diff)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    logger.info('COMMAND LINE ARGS:')
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(acmodel)

    # Train model

    total_start_time = time.time()
    best_success_rate = 0
    best_mean_return = 0
    test_env_name = args.env

    wrapper = iteration_wrapper(
        exp,
        sync=torch_settings.sync,
        max_count=args.max_count,
        sample_duration=args.sample_duration,
    )

    # while status['num_frames'] < args.frames:
    while True:
        with wrapper() as it:
            # Update parameters
            if wrapper.done():
                break

            update_start_time = time.time()
            logs = algo.update_parameters()
            update_end_time = time.time()

            it.set_count(logs["num_frames"])
            it.log(loss=logs["loss"], )
Exemple #13
0
def main(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Keep track of results per task.
    results = {}

    for env_name in args.env:

        start_time = time.time()

        env = gym.make(env_name)
        env.seed(seed)
        if args.model is None and args.episodes > len(agent.demos):
            # Set the number of episodes to be the number of demos
            episodes = len(agent.demos)

        # Define agent
        agent = utils.load_agent(env,
                                 args.model,
                                 args.demos,
                                 args.demos_origin,
                                 args.argmax,
                                 env_name,
                                 model_path=args.model_path)

        # Evaluate
        if isinstance(agent, utils.DemoAgent):
            logs = evaluate_demo_agent(agent, episodes)
        elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes:
            logs = evaluate(agent, env, episodes, False)
        else:
            logs = batch_evaluate(agent, env_name, seed, episodes)

        end_time = time.time()

        # Print logs
        num_frames = sum(logs["num_frames_per_episode"])
        fps = num_frames / (end_time - start_time)
        ellapsed_time = int(end_time - start_time)
        duration = datetime.timedelta(seconds=ellapsed_time)

        if args.model is not None:
            return_per_episode = utils.synthesize(logs["return_per_episode"])
            success_per_episode = utils.synthesize(
                [1 if r > 0 else 0 for r in logs["return_per_episode"]])

        num_frames_per_episode = utils.synthesize(
            logs["num_frames_per_episode"])

        if args.model is not None:
            print(
                "F {} | FPS {:.0f} | D {} | R:xsmM {:.3f} {:.3f} {:.3f} {:.3f} | S {:.3f} | F:xsmM {:.1f} {:.1f} {} {}"
                .format(num_frames, fps, duration,
                        *return_per_episode.values(),
                        success_per_episode['mean'],
                        *num_frames_per_episode.values()))
        else:
            print(
                "F {} | FPS {:.0f} | D {} | F:xsmM {:.1f} {:.1f} {} {}".format(
                    num_frames, fps, duration,
                    *num_frames_per_episode.values()))

        indexes = sorted(range(len(logs["num_frames_per_episode"])),
                         key=lambda k: -logs["num_frames_per_episode"][k])

        n = args.worst_episodes_to_show
        if n > 0:
            print("{} worst episodes:".format(n))
            for i in indexes[:n]:
                if 'seed_per_episode' in logs:
                    print(logs['seed_per_episode'][i])
                if args.model is not None:
                    print("- episode {}: R={}, F={}".format(
                        i, logs["return_per_episode"][i],
                        logs["num_frames_per_episode"][i]))
                else:
                    print("- episode {}: F={}".format(
                        i, logs["num_frames_per_episode"][i]))

        # Store results for this env.
        logs['return_per_episode'] = return_per_episode
        logs['success_per_episode'] = success_per_episode
        logs['num_frames_per_episode'] = num_frames_per_episode
        results[env_name] = logs

    return results
Exemple #14
0
    def __init__(self, args):
        """

        :param args:
        """
        super(MetaLearner, self).__init__()

        self.update_lr = args.update_lr
        self.meta_lr = args.meta_lr
        self.task_num = args.task_num
        self.args = args

        utils.seed(self.args.seed)

        self.env = gym.make(self.args.env)

        demos_path = utils.get_demos_path(args.demos,
                                          args.env,
                                          args.demos_origin,
                                          valid=False)
        demos_path_valid = utils.get_demos_path(args.demos,
                                                args.env,
                                                args.demos_origin,
                                                valid=True)

        logger.info('loading demos')
        self.train_demos = utils.load_demos(demos_path)
        logger.info('loaded demos')
        # if args.episodes:
        #     if args.episodes > len(self.train_demos):
        #         raise ValueError("there are only {} train demos".format(len(self.train_demos)))
        # self.train_demos = self.train_demos[:args.episodes]

        self.val_demos = utils.load_demos(demos_path_valid)
        # if args.val_episodes > len(self.val_demos):
        #     logger.info('Using all the available {} demos to evaluate valid. accuracy'.format(len(self.val_demos)))
        self.val_demos = self.val_demos[:self.args.val_episodes]

        observation_space = self.env.observation_space
        action_space = self.env.action_space

        print(args.model)
        self.obss_preprocessor = utils.ObssPreprocessor(
            args.model, observation_space,
            getattr(self.args, 'pretrained_model', None))

        # Define actor-critic model
        # self.net = utils.load_model(args.model, raise_not_found=False)
        # if self.net is None:
        #     if getattr(self.args, 'pretrained_model', None):
        #         self.net = utils.load_model(args.pretrained_model, raise_not_found=True)
        #     else:
        self.net = ACModel(self.obss_preprocessor.obs_space, action_space,
                           args.image_dim, args.memory_dim, args.instr_dim,
                           not self.args.no_instr, self.args.instr_arch,
                           not self.args.no_mem, self.args.arch)
        self.obss_preprocessor.vocab.save()
        # utils.save_model(self.net, args.model)
        self.fast_net = copy.deepcopy(self.net)
        self.net.train()
        self.fast_net.train()

        if torch.cuda.is_available():
            self.net.cuda()
            self.fast_net.cuda()

        self.optimizer = torch.optim.SGD(self.fast_net.parameters(),
                                         lr=self.args.update_lr)
        # self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9)

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.meta_optim = optim.Adam(self.net.parameters(), lr=self.meta_lr)
Exemple #15
0
def generate_demos(n_episodes, valid, seed, shift=0):
    utils.seed(seed)

    # Generate environment
    env = gym.make(args.env)
    use_pixels = args.pixels
    if use_pixels:
        env = RGBImgPartialObsWrapper(env)

    agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax,
                             args.env)
    demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid)
    demos = []

    checkpoint_time = time.time()

    just_crashed = False
    while True:
        if len(demos) == n_episodes:
            break

        done = False
        if just_crashed:
            logger.info(
                "reset the environment to find a mission that the bot can solve"
            )
            env.reset()
        else:
            env.seed(seed + len(demos))
        obs = env.reset()
        agent.on_reset()

        actions = []
        mission = obs["mission"]
        images = []
        directions = []

        try:
            while not done:
                action = agent.act(obs)['action']
                if isinstance(action, torch.Tensor):
                    action = action.item()
                new_obs, reward, done, _ = env.step(action)
                agent.analyze_feedback(reward, done)

                actions.append(action)
                images.append(obs['image'])
                if use_pixels:
                    directions.append(None)
                else:
                    directions.append(obs['direction'])

                obs = new_obs
            if reward > 0 and (args.filter_steps == 0
                               or len(images) <= args.filter_steps):
                demos.append((mission, blosc.pack_array(np.array(images)),
                              directions, actions))
                just_crashed = False

            if reward == 0:
                if args.on_exception == 'crash':
                    raise Exception(
                        "mission failed, the seed is {}".format(seed +
                                                                len(demos)))
                just_crashed = True
                logger.info("mission failed")
        except (Exception, AssertionError):
            if args.on_exception == 'crash':
                raise
            just_crashed = True
            logger.exception("error while generating demo #{}".format(
                len(demos)))
            continue

        if len(demos) and len(demos) % args.log_interval == 0:
            now = time.time()
            demos_per_second = args.log_interval / (now - checkpoint_time)
            to_go = (n_episodes - len(demos)) / demos_per_second
            logger.info(
                "demo #{}, {:.3f} demos per second, {:.3f} seconds to go".
                format(len(demos) - 1, demos_per_second, to_go))
            checkpoint_time = now

        # Save demonstrations

        if args.save_interval > 0 and len(
                demos) < n_episodes and len(demos) % args.save_interval == 0:
            logger.info("Saving demos...")
            utils.save_demos(demos, demos_path)
            logger.info("{} demos saved".format(len(demos)))
            # print statistics for the last 100 demonstrations
            print_demo_lengths(demos[-100:])

    # Save demonstrations
    logger.info("Saving demos...")
    utils.save_demos(demos, demos_path)
    logger.info("{} demos saved".format(len(demos)))
    print_demo_lengths(demos[-100:])