Ejemplo n.º 1
0
    assert_text = "ONE of --model or --demos-origin or --demos must be specified."
    assert int(args.model is None) + int(args.demos_origin is None) + int(
        args.demos is None) == 2, assert_text
    if args.seed is None:
        args.seed = 0 if args.model is not None else 1

    start_time = time.time()
    logs = main(args, args.seed, args.episodes)
    end_time = time.time()

    # Print logs
    num_frames = sum(logs["num_frames_per_episode"])
    fps = num_frames / (end_time - start_time)
    ellapsed_time = int(end_time - start_time)
    duration = datetime.timedelta(seconds=ellapsed_time)
    return_per_episode = utils.synthesize(logs["return_per_episode"])
    success_per_episode = utils.synthesize(
        [1 if r > 0 else 0 for r in logs["return_per_episode"]])
    num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])

    print(
        "F {} | FPS {:.0f} | D {} | R:xsmM {:.2f} {:.2f} {:.2f} {:.2f} | S {:.2f} | F:xsmM {:.1f} {:.1f} {} {}"
        .format(num_frames, fps, duration, *return_per_episode.values(),
                success_per_episode['mean'], *num_frames_per_episode.values()))

    indexes = sorted(range(len(logs["return_per_episode"])),
                     key=lambda k: logs["return_per_episode"][k])
    n = 10
    print("{} worst episodes:".format(n))
    for i in indexes[:n]:
        print("- episode {}: R={}, F={}".format(
Ejemplo n.º 2
0
def main():
    # Generate environments
    envs = []
    for i in range(args.procs):
        env = gym.make(args.env)
        env.seed(100 * args.seed + i)
        envs.append(env)

    # Define model name
    suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
    instr = args.instr_arch if args.instr_arch else "noinstr"
    mem = "mem" if not args.no_mem else "nomem"
    model_name_parts = {
        'env': args.env,
        'algo': args.algo,
        'arch': args.arch,
        'instr': instr,
        'mem': mem,
        'seed': args.seed,
        'info': '',
        'coef': '',
        'suffix': suffix}
    default_model_name = "{env}_{algo}_{arch}_{instr}_{mem}_seed{seed}{info}{coef}_{suffix}".format(**model_name_parts)
    if args.pretrained_model:
        default_model_name = args.pretrained_model + '_pretrained_' + default_model_name
    args.model = args.model.format(**model_name_parts) if args.model else default_model_name

    utils.configure_logging(args.model)
    logger = logging.getLogger(__name__)

    # Define obss preprocessor
    if 'emb' in args.arch:
        obss_preprocessor = utils.IntObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model)
    else:
        """
        obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model)
        """
        obss_preprocessor = utils.ImgInstrObssPreprocessor(args.model, envs[0].observation_space)

    # Define actor-critic model
    acmodel = utils.load_model(args.model, raise_not_found=False)
    if acmodel is None:
        if args.pretrained_model:
            acmodel = utils.load_model(args.pretrained_model, raise_not_found=True)
        else:
            """
            acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space,
                              args.image_dim, args.memory_dim, args.instr_dim,
                              not args.no_instr, args.instr_arch, not args.no_mem, args.arch)
            """
            acmodel = ACModelImgInstr(obss_preprocessor.obs_space, envs[0].action_space,
                                      args.image_dim, args.memory_dim, args.instr_dim,
                                      not args.no_instr, not args.no_mem, args.arch)

    """
    obss_preprocessor.vocab.save()
    """
    utils.save_model(acmodel, args.model)

    if torch.cuda.is_available():
        acmodel.cuda()

    # Define actor-critic algo

    reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward
    if args.algo == "ppo":
        algo = babyai.rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.beta1, args.beta2,
                                 args.gae_lambda,
                                 args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence,
                                 args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size, obss_preprocessor,
                                 reshape_reward)
    else:
        raise ValueError("Incorrect algorithm name: {}".format(args.algo))

    # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that.
    # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that
    # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here.

    utils.seed(args.seed)

    # Restore training status

    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
    if os.path.exists(status_path):
        with open(status_path, 'r') as src:
            status = json.load(src)
    else:
        status = {'i': 0,
                  'num_episodes': 0,
                  'num_frames': 0}

    # Define logger and Tensorboard writer and CSV writer

    header = (["update", "episodes", "frames", "FPS", "duration"]
              + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']]
              + ["success_rate"]
              + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']]
              + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"])
    if args.tb:
        from tensorboardX import SummaryWriter

        writer = SummaryWriter(utils.get_log_dir(args.model))
    csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    first_created = not os.path.exists(csv_path)
    # we don't buffer data going in the csv log, cause we assume
    # that one update will take much longer that one write to the log
    csv_writer = csv.writer(open(csv_path, 'a', 1))
    if first_created:
        csv_writer.writerow(header)

    # Log code state, command, availability of CUDA and model

    babyai_code = list(babyai.__path__)[0]
    try:
        last_commit = subprocess.check_output(
            'cd {}; git log -n1'.format(babyai_code), shell=True).decode('utf-8')
        logger.info('LAST COMMIT INFO:')
        logger.info(last_commit)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    try:
        diff = subprocess.check_output(
            'cd {}; git diff'.format(babyai_code), shell=True).decode('utf-8')
        if diff:
            logger.info('GIT DIFF:')
            logger.info(diff)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    logger.info('COMMAND LINE ARGS:')
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(acmodel)

    # Train model

    total_start_time = time.time()
    best_success_rate = 0
    best_mean_return = 0
    test_env_name = args.env
    while status['num_frames'] < args.frames:
        # Update parameters

        update_start_time = time.time()
        logs = algo.update_parameters()
        update_end_time = time.time()

        status['num_frames'] += logs["num_frames"]
        status['num_episodes'] += logs['episodes_done']
        status['i'] += 1

        # Print logs

        if status['i'] % args.log_interval == 0:
            total_ellapsed_time = int(time.time() - total_start_time)
            fps = logs["num_frames"] / (update_end_time - update_start_time)
            duration = datetime.timedelta(seconds=total_ellapsed_time)
            return_per_episode = utils.synthesize(logs["return_per_episode"])
            success_per_episode = utils.synthesize(
                [1 if r > 0 else 0 for r in logs["return_per_episode"]])
            num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])

            data = [status['i'], status['num_episodes'], status['num_frames'],
                    fps, total_ellapsed_time,
                    *return_per_episode.values(),
                    success_per_episode['mean'],
                    *num_frames_per_episode.values(),
                    logs["entropy"], logs["value"], logs["policy_loss"], logs["value_loss"],
                    logs["loss"], logs["grad_norm"]]

            format_str = ("U {} | E {} | F {:06} | FPS {:04.0f} | D {} | R:xsmM {: .2f} {: .2f} {: .2f} {: .2f} | "
                          "S {:.2f} | F:xsmM {:.1f} {:.1f} {} {} | H {:.3f} | V {:.3f} | "
                          "pL {: .3f} | vL {:.3f} | L {:.3f} | gN {:.3f} | ")

            logger.info(format_str.format(*data))
            if args.tb:
                assert len(header) == len(data)
                for key, value in zip(header, data):
                    writer.add_scalar(key, float(value), status['num_frames'])

            csv_writer.writerow(data)

        # Save obss preprocessor vocabulary and model

        if args.save_interval > 0 and status['i'] % args.save_interval == 0:
            """
            obss_preprocessor.vocab.save()
            """
            with open(status_path, 'w') as dst:
                json.dump(status, dst)
                utils.save_model(acmodel, args.model)

            # Testing the model before saving
            agent = ModelAgent(args.model, obss_preprocessor, argmax=True)
            agent.model = acmodel
            agent.model.eval()
            logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes)
            agent.model.train()
            mean_return = np.mean(logs["return_per_episode"])
            success_rate = np.mean([1 if r > 0 else 0 for r in logs['return_per_episode']])
            save_model = False
            if success_rate > best_success_rate:
                best_success_rate = success_rate
                save_model = True
            elif (success_rate == best_success_rate) and (mean_return > best_mean_return):
                best_mean_return = mean_return
                save_model = True
            if save_model:
                utils.save_model(acmodel, args.model + '_best')
                """
                obss_preprocessor.vocab.save(utils.get_vocab_path(args.model + '_best'))
                """
                logger.info("Return {: .2f}; best model is saved".format(mean_return))
            else:
                logger.info("Return {: .2f}; not the best model; not saved".format(mean_return))
Ejemplo n.º 3
0
    update_start_time = time.time()
    logs, theta = algo.update_parameters(fake_reward=0)
    if args.fake_reward > 0:
        logs, _ = algo.update_parameters(fake_reward=args.fake_reward, cached_theta=theta)
    update_end_time = time.time()

    num_frames += logs["num_frames"]
    i += (1 if args.fake_reward == 0 else 2)

    # Print logs

    if i % args.log_interval == 0:
        total_ellapsed_time = int(time.time() - total_start_time)
        fps = logs["num_frames"] / (update_end_time - update_start_time)
        duration = datetime.timedelta(seconds=total_ellapsed_time)
        return_per_episode = utils.synthesize(logs["return_per_episode"])
        rreturn_per_episode = utils.synthesize(logs["reshaped_return_per_episode"])
        num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])

        logger.info(
            "U {} | F {:06} | FPS {:04.0f} | D {} | rR:x̄σmM {: .2f} {: .2f} {: .2f} {: .2f} | F:x̄σmM {:.1f} {:.1f} {} {} | H {:.3f} | V {:.3f} | pL {: .3f} | vL {:.3f}"
                .format(i, num_frames, fps, duration,
                        *rreturn_per_episode.values(),
                        *num_frames_per_episode.values(),
                        logs["entropy"], logs["value"], logs["policy_loss"], logs["value_loss"]))
        if args.tb:
            writer.add_scalar("frames", num_frames, i)
            writer.add_scalar("FPS", fps, i)
            writer.add_scalar("duration", total_ellapsed_time, i)
            for key, value in return_per_episode.items():
                writer.add_scalar("return_" + key, value, i)
Ejemplo n.º 4
0
def main(args, seed, episodes):
    # Set seed for all randomness sources
    utils.seed(seed)

    # Keep track of results per task.
    results = {}

    for env_name in args.env:

        start_time = time.time()

        env = gym.make(env_name)
        env.seed(seed)
        if args.model is None and args.episodes > len(agent.demos):
            # Set the number of episodes to be the number of demos
            episodes = len(agent.demos)

        # Define agent
        agent = utils.load_agent(env,
                                 args.model,
                                 args.demos,
                                 args.demos_origin,
                                 args.argmax,
                                 env_name,
                                 model_path=args.model_path)

        # Evaluate
        if isinstance(agent, utils.DemoAgent):
            logs = evaluate_demo_agent(agent, episodes)
        elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes:
            logs = evaluate(agent, env, episodes, False)
        else:
            logs = batch_evaluate(agent, env_name, seed, episodes)

        end_time = time.time()

        # Print logs
        num_frames = sum(logs["num_frames_per_episode"])
        fps = num_frames / (end_time - start_time)
        ellapsed_time = int(end_time - start_time)
        duration = datetime.timedelta(seconds=ellapsed_time)

        if args.model is not None:
            return_per_episode = utils.synthesize(logs["return_per_episode"])
            success_per_episode = utils.synthesize(
                [1 if r > 0 else 0 for r in logs["return_per_episode"]])

        num_frames_per_episode = utils.synthesize(
            logs["num_frames_per_episode"])

        if args.model is not None:
            print(
                "F {} | FPS {:.0f} | D {} | R:xsmM {:.3f} {:.3f} {:.3f} {:.3f} | S {:.3f} | F:xsmM {:.1f} {:.1f} {} {}"
                .format(num_frames, fps, duration,
                        *return_per_episode.values(),
                        success_per_episode['mean'],
                        *num_frames_per_episode.values()))
        else:
            print(
                "F {} | FPS {:.0f} | D {} | F:xsmM {:.1f} {:.1f} {} {}".format(
                    num_frames, fps, duration,
                    *num_frames_per_episode.values()))

        indexes = sorted(range(len(logs["num_frames_per_episode"])),
                         key=lambda k: -logs["num_frames_per_episode"][k])

        n = args.worst_episodes_to_show
        if n > 0:
            print("{} worst episodes:".format(n))
            for i in indexes[:n]:
                if 'seed_per_episode' in logs:
                    print(logs['seed_per_episode'][i])
                if args.model is not None:
                    print("- episode {}: R={}, F={}".format(
                        i, logs["return_per_episode"][i],
                        logs["num_frames_per_episode"][i]))
                else:
                    print("- episode {}: F={}".format(
                        i, logs["num_frames_per_episode"][i]))

        # Store results for this env.
        logs['return_per_episode'] = return_per_episode
        logs['success_per_episode'] = success_per_episode
        logs['num_frames_per_episode'] = num_frames_per_episode
        results[env_name] = logs

    return results
Ejemplo n.º 5
0
    logs = algo.update_parameters()
    update_end_time = time.time()

    status['num_frames'] += logs["num_frames"]
    status['num_episodes'] += logs['episodes_done']
    status['i'] += 1

    # Print logs

    if status['i'] % args.log_interval == 0:
        print(args.description)
        total_ellapsed_time = int(time.time() - total_start_time)
        fps = -1
        fps = logs["num_frames"] / (update_end_time - update_start_time)
        duration = datetime.timedelta(seconds=total_ellapsed_time)
        return_per_episode = utils.synthesize(logs["return_per_episode"])
        success_per_episode = utils.synthesize([
            1 if r > 2 else 0 for r in logs["return_per_episode"]
        ])  # TODO: change based on dense rewards or not
        num_frames_per_episode = utils.synthesize(
            logs["num_frames_per_episode"])

        og_data = [
            status['i'], status['num_episodes'], status['num_frames'], fps,
            total_ellapsed_time, *return_per_episode.values(),
            success_per_episode['mean'], *num_frames_per_episode.values(),
            logs["entropy"], logs["value"], logs["policy_loss"],
            logs["value_loss"], logs["loss"], logs["grad_norm"]
        ]

        data = [