def main(args):
    # Verify the arguments when we train on multiple environments
    # No need to check for the length of len(args.multi_env) in case, for some reason, we need to validate on other envs
    if args.multi_env is not None:
        assert len(args.multi_demos) == len(args.multi_episodes)

    args.model = args.model or ImitationLearning.default_model_name(args)
    utils.configure_logging(args.model)
    logger = logging.getLogger(__name__)

    il_learn = ImitationLearning(args)

    # Define logger and Tensorboard writer
    header = ([
        "update", "frames", "FPS", "duration", "entropy", "policy_loss",
        "train_accuracy"
    ] + ["validation_accuracy"])
    if args.multi_env is None:
        header.extend(["validation_return", "validation_success_rate"])
    else:
        header.extend(
            ["validation_return_{}".format(env) for env in args.multi_env])
        header.extend([
            "validation_success_rate_{}".format(env) for env in args.multi_env
        ])

    if args.weigh_corrections:
        header.extend(["correction_weight_loss"])

    if args.compute_cic:
        header.extend(["val_cic"])

    writer = None
    if args.tb:
        from tensorboardX import SummaryWriter
        writer = SummaryWriter(utils.get_log_dir(args.model))

    # Define csv writer
    csv_writer = None
    csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    first_created = not os.path.exists(csv_path)
    # we don't buffer data going in the csv log, cause we assume
    # that one update will take much longer that one write to the log
    csv_writer = csv.writer(open(csv_path, 'a', 1))
    if first_created:
        csv_writer.writerow(header)

    # Get the status path
    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')

    # Log command, availability of CUDA, and model
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(il_learn.model)

    il_learn.train(il_learn.train_demos, writer, csv_writer, status_path,
                   header)
Beispiel #2
0
def main(args):
    args.model = args.model or ImitationLearning.default_model_name(args)
    utils.configure_logging(args.model)
    il_learn = ImitationLearning(args)

    # Define logger and Tensorboard writer
    header = (["update", "frames", "FPS", "duration", "entropy", "policy_loss", "train_accuracy"]
              + ["validation_accuracy", "validation_return", "validation_success_rate"])
    writer = None
    if args.tb:
        from tensorboardX import SummaryWriter
        writer = SummaryWriter(utils.get_log_dir(args.model))

    # Define csv writer
    csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    first_created = not os.path.exists(csv_path)
    # we don't buffer data going in the csv log, cause we assume
    # that one update will take much longer that one write to the log
    csv_writer = csv.writer(open(csv_path, 'a', 1))
    if first_created:
        csv_writer.writerow(header)

    # Get the status path
    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')

    # Log command, availability of CUDA, and model
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(il_learn.acmodel)
    train_demos = []

    # Generate the initial set of training demos
    if not args.dagger or args.dagger_start_with_bot_demos:
        train_demos += generate_demos(args.env, range(args.seed, args.seed + args.start_demos))
    # Seed at which evaluation will begin
    eval_seed = args.seed + args.start_demos

    model_name = args.model

    if args.dagger:
        mean_steps = get_bot_mean(args.env, args.episodes_to_evaluate_mean, args.seed)
    else:
        mean_steps = None

    for phase_no in range(0, args.phases):
        logger.info("Starting phase {} with {} demos".format(phase_no, len(train_demos)))

        if not args.finetune:
            # Create a new model to be trained from scratch
            logging.info("Creating new model to be trained from scratch")
            args.model = model_name + ('_phase_%d' % phase_no)
            il_learn = ImitationLearning(args)

        # Train the imitation learning agent
        if len(train_demos) > 0:
            il_learn.train(train_demos, writer, csv_writer, status_path, header, reset_status=True)

        # Stopping criterion
        valid_log = il_learn.validate(args.val_episodes)
        success_rate = np.mean([1 if r > 0 else 0 for r in valid_log[0]['return_per_episode']])

        if success_rate >= 0.99:
            logger.info("Reached target success rate with {} demos, stopping".format(len(train_demos)))
            break

        eval_seed = grow_training_set(il_learn, train_demos, eval_seed, args.demo_grow_factor, args.num_eval_demos,
                                      args.dagger, mean_steps)
Beispiel #3
0
                             args.value_loss_coef, args.max_grad_norm,
                             args.recurrence, args.optim_eps, args.clip_eps,
                             args.ppo_epochs, args.batch_size,
                             obss_preprocessor, reshape_reward)
else:
    raise ValueError("Incorrect algorithm name: {}".format(args.algo))

# When using extra binary information, more tensors (model params) are initialized compared to when we don't use that.
# Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that
# the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here.

utils.seed(args.seed)

# Restore training status

status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
if os.path.exists(status_path):
    with open(status_path, 'r') as src:
        status = json.load(src)
else:
    status = {'i': 0, 'num_episodes': 0, 'num_frames': 0}

# Define logger and Tensorboard writer and CSV writer

header = (
    ["update", "episodes", "frames", "FPS", "duration"] +
    ["return_" + stat for stat in ['mean', 'std', 'min', 'max']] +
    ["success_rate_" + stat for stat in ['mean', 'std']] +
    ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']] +
    ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"])
if args.tb:
Beispiel #4
0
def main():
    # Generate environments
    envs = []
    for i in range(args.procs):
        env = gym.make(args.env)
        env.seed(100 * args.seed + i)
        envs.append(env)

    # Define model name
    suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
    instr = args.instr_arch if args.instr_arch else "noinstr"
    mem = "mem" if not args.no_mem else "nomem"
    model_name_parts = {
        'env': args.env,
        'algo': args.algo,
        'arch': args.arch,
        'instr': instr,
        'mem': mem,
        'seed': args.seed,
        'info': '',
        'coef': '',
        'suffix': suffix}
    default_model_name = "{env}_{algo}_{arch}_{instr}_{mem}_seed{seed}{info}{coef}_{suffix}".format(**model_name_parts)
    if args.pretrained_model:
        default_model_name = args.pretrained_model + '_pretrained_' + default_model_name
    args.model = args.model.format(**model_name_parts) if args.model else default_model_name

    utils.configure_logging(args.model)
    logger = logging.getLogger(__name__)

    # Define obss preprocessor
    if 'emb' in args.arch:
        obss_preprocessor = utils.IntObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model)
    else:
        """
        obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model)
        """
        obss_preprocessor = utils.ImgInstrObssPreprocessor(args.model, envs[0].observation_space)

    # Define actor-critic model
    acmodel = utils.load_model(args.model, raise_not_found=False)
    if acmodel is None:
        if args.pretrained_model:
            acmodel = utils.load_model(args.pretrained_model, raise_not_found=True)
        else:
            """
            acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space,
                              args.image_dim, args.memory_dim, args.instr_dim,
                              not args.no_instr, args.instr_arch, not args.no_mem, args.arch)
            """
            acmodel = ACModelImgInstr(obss_preprocessor.obs_space, envs[0].action_space,
                                      args.image_dim, args.memory_dim, args.instr_dim,
                                      not args.no_instr, not args.no_mem, args.arch)

    """
    obss_preprocessor.vocab.save()
    """
    utils.save_model(acmodel, args.model)

    if torch.cuda.is_available():
        acmodel.cuda()

    # Define actor-critic algo

    reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward
    if args.algo == "ppo":
        algo = babyai.rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.beta1, args.beta2,
                                 args.gae_lambda,
                                 args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence,
                                 args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size, obss_preprocessor,
                                 reshape_reward)
    else:
        raise ValueError("Incorrect algorithm name: {}".format(args.algo))

    # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that.
    # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that
    # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here.

    utils.seed(args.seed)

    # Restore training status

    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
    if os.path.exists(status_path):
        with open(status_path, 'r') as src:
            status = json.load(src)
    else:
        status = {'i': 0,
                  'num_episodes': 0,
                  'num_frames': 0}

    # Define logger and Tensorboard writer and CSV writer

    header = (["update", "episodes", "frames", "FPS", "duration"]
              + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']]
              + ["success_rate"]
              + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']]
              + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"])
    if args.tb:
        from tensorboardX import SummaryWriter

        writer = SummaryWriter(utils.get_log_dir(args.model))
    csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    first_created = not os.path.exists(csv_path)
    # we don't buffer data going in the csv log, cause we assume
    # that one update will take much longer that one write to the log
    csv_writer = csv.writer(open(csv_path, 'a', 1))
    if first_created:
        csv_writer.writerow(header)

    # Log code state, command, availability of CUDA and model

    babyai_code = list(babyai.__path__)[0]
    try:
        last_commit = subprocess.check_output(
            'cd {}; git log -n1'.format(babyai_code), shell=True).decode('utf-8')
        logger.info('LAST COMMIT INFO:')
        logger.info(last_commit)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    try:
        diff = subprocess.check_output(
            'cd {}; git diff'.format(babyai_code), shell=True).decode('utf-8')
        if diff:
            logger.info('GIT DIFF:')
            logger.info(diff)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    logger.info('COMMAND LINE ARGS:')
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(acmodel)

    # Train model

    total_start_time = time.time()
    best_success_rate = 0
    best_mean_return = 0
    test_env_name = args.env
    while status['num_frames'] < args.frames:
        # Update parameters

        update_start_time = time.time()
        logs = algo.update_parameters()
        update_end_time = time.time()

        status['num_frames'] += logs["num_frames"]
        status['num_episodes'] += logs['episodes_done']
        status['i'] += 1

        # Print logs

        if status['i'] % args.log_interval == 0:
            total_ellapsed_time = int(time.time() - total_start_time)
            fps = logs["num_frames"] / (update_end_time - update_start_time)
            duration = datetime.timedelta(seconds=total_ellapsed_time)
            return_per_episode = utils.synthesize(logs["return_per_episode"])
            success_per_episode = utils.synthesize(
                [1 if r > 0 else 0 for r in logs["return_per_episode"]])
            num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"])

            data = [status['i'], status['num_episodes'], status['num_frames'],
                    fps, total_ellapsed_time,
                    *return_per_episode.values(),
                    success_per_episode['mean'],
                    *num_frames_per_episode.values(),
                    logs["entropy"], logs["value"], logs["policy_loss"], logs["value_loss"],
                    logs["loss"], logs["grad_norm"]]

            format_str = ("U {} | E {} | F {:06} | FPS {:04.0f} | D {} | R:xsmM {: .2f} {: .2f} {: .2f} {: .2f} | "
                          "S {:.2f} | F:xsmM {:.1f} {:.1f} {} {} | H {:.3f} | V {:.3f} | "
                          "pL {: .3f} | vL {:.3f} | L {:.3f} | gN {:.3f} | ")

            logger.info(format_str.format(*data))
            if args.tb:
                assert len(header) == len(data)
                for key, value in zip(header, data):
                    writer.add_scalar(key, float(value), status['num_frames'])

            csv_writer.writerow(data)

        # Save obss preprocessor vocabulary and model

        if args.save_interval > 0 and status['i'] % args.save_interval == 0:
            """
            obss_preprocessor.vocab.save()
            """
            with open(status_path, 'w') as dst:
                json.dump(status, dst)
                utils.save_model(acmodel, args.model)

            # Testing the model before saving
            agent = ModelAgent(args.model, obss_preprocessor, argmax=True)
            agent.model = acmodel
            agent.model.eval()
            logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes)
            agent.model.train()
            mean_return = np.mean(logs["return_per_episode"])
            success_rate = np.mean([1 if r > 0 else 0 for r in logs['return_per_episode']])
            save_model = False
            if success_rate > best_success_rate:
                best_success_rate = success_rate
                save_model = True
            elif (success_rate == best_success_rate) and (mean_return > best_mean_return):
                best_mean_return = mean_return
                save_model = True
            if save_model:
                utils.save_model(acmodel, args.model + '_best')
                """
                obss_preprocessor.vocab.save(utils.get_vocab_path(args.model + '_best'))
                """
                logger.info("Return {: .2f}; best model is saved".format(mean_return))
            else:
                logger.info("Return {: .2f}; not the best model; not saved".format(mean_return))
def main(args):
    args.model = args.model or ImitationLearning.default_model_name(args)
    utils.configure_logging(args.model)
    il_learn = ImitationLearning(args)

    # Define logger and Tensorboard writer
    header = ([
        "update", "frames", "FPS", "duration", "entropy", "policy_loss",
        "train_accuracy"
    ] + [
        "validation_accuracy", "validation_return", "validation_success_rate"
    ])
    writer = None
    if args.tb:
        from tensorboardX import SummaryWriter
        writer = SummaryWriter(utils.get_log_dir(args.model))

    # Define csv writer
    csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    first_created = not os.path.exists(csv_path)
    # we don't buffer data going in the csv log, cause we assume
    # that one update will take much longer that one write to the log
    csv_writer = csv.writer(open(csv_path, 'a', 1))
    if first_created:
        csv_writer.writerow(header)

    # Log command, availability of CUDA, and model
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(il_learn.acmodel)

    # Seed at which demo evaluation/generation will begin
    eval_seed = args.seed + len(il_learn.train_demos)

    # Phase at which we start
    cur_phase = 0

    # Try to load the status (if resuming)
    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
    if os.path.exists(status_path):
        with open(status_path, 'r') as src:
            status = json.load(src)
            eval_seed = status.get('eval_seed', eval_seed)
            cur_phase = status.get('cur_phase', cur_phase)

    model_name = args.model

    for phase_no in range(cur_phase, args.phases):
        logger.info("Starting phase {} with {} demos, eval_seed={}".format(
            phase_no, len(il_learn.train_demos), eval_seed))

        # Each phase trains a different model from scratch
        args.model = model_name + ('_phase_%d' % phase_no)
        il_learn = ImitationLearning(args)

        # Train the imitation learning agent
        if len(il_learn.train_demos) > 0:
            train_status_path = os.path.join(utils.get_log_dir(args.model),
                                             'status.json')
            il_learn.train(il_learn.train_demos, writer, csv_writer,
                           train_status_path, header)

        # Stopping criterion
        valid_log = il_learn.validate(args.val_episodes)
        success_rate = np.mean(
            [1 if r > 0 else 0 for r in valid_log[0]['return_per_episode']])

        if success_rate >= 0.99:
            logger.info(
                "Reached target success rate with {} demos, stopping".format(
                    len(il_learn.train_demos)))
            break

        eval_seed = grow_training_set(il_learn, il_learn.train_demos,
                                      eval_seed, args.demo_grow_factor,
                                      args.num_eval_demos)

        # Save the current demo generation seed
        with open(status_path, 'w') as dst:
            status = {'eval_seed': eval_seed, 'cur_phase': phase_no + 1}
            json.dump(status, dst)

        # Save the demos
        demos_path = utils.get_demos_path(args.demos,
                                          args.env,
                                          args.demos_origin,
                                          valid=False)
        print('saving demos to:', demos_path)
        utils.save_demos(il_learn.train_demos, demos_path)
Beispiel #6
0
def main(exp, argv):
    os.environ["BABYAI_STORAGE"] = exp.results_directory()

    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument("--algo",
                        default='ppo',
                        help="algorithm to use (default: ppo)")
    parser.add_argument("--discount",
                        type=float,
                        default=0.99,
                        help="discount factor (default: 0.99)")
    parser.add_argument("--reward-scale",
                        type=float,
                        default=20.,
                        help="Reward scale multiplier")
    parser.add_argument(
        "--gae-lambda",
        type=float,
        default=0.99,
        help="lambda coefficient in GAE formula (default: 0.99, 1 means no gae)"
    )
    parser.add_argument("--value-loss-coef",
                        type=float,
                        default=0.5,
                        help="value loss term coefficient (default: 0.5)")
    parser.add_argument("--max-grad-norm",
                        type=float,
                        default=0.5,
                        help="maximum norm of gradient (default: 0.5)")
    parser.add_argument("--clip-eps",
                        type=float,
                        default=0.2,
                        help="clipping epsilon for PPO (default: 0.2)")
    parser.add_argument("--ppo-epochs",
                        type=int,
                        default=4,
                        help="number of epochs for PPO (default: 4)")
    parser.add_argument(
        "--save-interval",
        type=int,
        default=50,
        help=
        "number of updates between two saves (default: 50, 0 means no saving)")
    parser.add_argument("--workers",
                        type=int,
                        default=8,
                        help="number of workers for PyTorch (default: 8)")
    parser.add_argument("--max-count",
                        type=int,
                        default=1000,
                        help="maximum number of frames to run for")
    parser.add_argument("--sample_duration",
                        type=float,
                        default=0.5,
                        help="sampling duration")
    parser.add_argument("--cuda",
                        action="store_true",
                        default=False,
                        help="whether to use cuda")
    args = parser.parse_args(argv)

    utils.seed(args.seed)

    torch_settings = init_torch(
        seed=args.seed,
        cuda=args.cuda,
        workers=args.workers,
    )

    # Generate environments
    envs = []
    for i in range(args.procs):
        env = gym.make(args.env)
        env.seed(100 * args.seed + i)
        envs.append(env)

    # Define model name
    suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
    instr = args.instr_arch if args.instr_arch else "noinstr"
    mem = "mem" if not args.no_mem else "nomem"
    model_name_parts = {
        'env': args.env,
        'algo': args.algo,
        'arch': args.arch,
        'instr': instr,
        'mem': mem,
        'seed': args.seed,
        'info': '',
        'coef': '',
        'suffix': suffix
    }
    default_model_name = "{env}_{algo}_{arch}_{instr}_{mem}_seed{seed}{info}{coef}_{suffix}".format(
        **model_name_parts)
    if args.pretrained_model:
        default_model_name = args.pretrained_model + '_pretrained_' + default_model_name
    args.model = args.model.format(
        **model_name_parts) if args.model else default_model_name

    utils.configure_logging(args.model)
    logger = logging.getLogger(__name__)

    # Define obss preprocessor
    if 'emb' in args.arch:
        obss_preprocessor = utils.IntObssPreprocessor(
            args.model, envs[0].observation_space, args.pretrained_model)
    else:
        obss_preprocessor = utils.ObssPreprocessor(args.model,
                                                   envs[0].observation_space,
                                                   args.pretrained_model)

    # Define actor-critic model
    # acmodel = utils.load_model(args.model, raise_not_found=False)
    acmodel = None
    if acmodel is None:
        if args.pretrained_model:
            acmodel = utils.load_model(args.pretrained_model,
                                       raise_not_found=True)
        else:
            acmodel = ACModel(obss_preprocessor.obs_space,
                              envs[0].action_space, args.image_dim,
                              args.memory_dim, args.instr_dim,
                              not args.no_instr, args.instr_arch,
                              not args.no_mem, args.arch)

    obss_preprocessor.vocab.save()
    # utils.save_model(acmodel, args.model)

    if torch_settings.cuda:
        acmodel.cuda()

    # Define actor-critic algo

    reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward
    if args.algo == "ppo":
        algo = babyai.rl.PPOAlgo(
            envs, acmodel, args.frames_per_proc, args.discount, args.lr,
            args.beta1, args.beta2, args.gae_lambda, args.entropy_coef,
            args.value_loss_coef, args.max_grad_norm, args.recurrence,
            args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size,
            obss_preprocessor, reshape_reward)
    else:
        raise ValueError("Incorrect algorithm name: {}".format(args.algo))

    # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that.
    # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that
    # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here.

    utils.seed(args.seed)

    # Restore training status

    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
    if os.path.exists(status_path):
        with open(status_path, 'r') as src:
            status = json.load(src)
    else:
        status = {'i': 0, 'num_episodes': 0, 'num_frames': 0}

    # # Define logger and Tensorboard writer and CSV writer

    # header = (["update", "episodes", "frames", "FPS", "duration"]
    #         + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']]
    #         + ["success_rate"]
    #         + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']]
    #         + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"])
    # if args.tb:
    #     from tensorboardX import SummaryWriter

    #     writer = SummaryWriter(utils.get_log_dir(args.model))
    # csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    # first_created = not os.path.exists(csv_path)
    # # we don't buffer data going in the csv log, cause we assume
    # # that one update will take much longer that one write to the log
    # csv_writer = csv.writer(open(csv_path, 'a', 1))
    # if first_created:
    #     csv_writer.writerow(header)

    # Log code state, command, availability of CUDA and model

    babyai_code = list(babyai.__path__)[0]
    try:
        last_commit = subprocess.check_output(
            'cd {}; git log -n1'.format(babyai_code),
            shell=True).decode('utf-8')
        logger.info('LAST COMMIT INFO:')
        logger.info(last_commit)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    try:
        diff = subprocess.check_output('cd {}; git diff'.format(babyai_code),
                                       shell=True).decode('utf-8')
        if diff:
            logger.info('GIT DIFF:')
            logger.info(diff)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    logger.info('COMMAND LINE ARGS:')
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(acmodel)

    # Train model

    total_start_time = time.time()
    best_success_rate = 0
    best_mean_return = 0
    test_env_name = args.env

    wrapper = iteration_wrapper(
        exp,
        sync=torch_settings.sync,
        max_count=args.max_count,
        sample_duration=args.sample_duration,
    )

    # while status['num_frames'] < args.frames:
    while True:
        with wrapper() as it:
            # Update parameters
            if wrapper.done():
                break

            update_start_time = time.time()
            logs = algo.update_parameters()
            update_end_time = time.time()

            it.set_count(logs["num_frames"])
            it.log(loss=logs["loss"], )
Beispiel #7
0
                             args.batch_size,
                             obss_preprocessor,
                             reshape_reward,
                             savelog_missions=args.savelog_missions)
else:
    raise ValueError("Incorrect algorithm name: {}".format(args.algo))

# When using extra binary information, more tensors (model params) are initialized compared to when we don't use that.
# Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that
# the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here.

utils.seed(args.seed)

# Restore training status

status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
if os.path.exists(status_path):
    with open(status_path, 'r') as src:
        status = json.load(src)
else:
    status = {'i': 0, 'num_episodes': 0, 'num_frames': 0}

# Define logger and Tensorboard writer and CSV writer

header = (
    ["update", "episodes", "frames", "FPS", "duration"] +
    ["return_" + stat
     for stat in ['mean', 'std', 'min', 'max']] + ["success_rate"] +
    ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']] +
    ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"])
Beispiel #8
0
                            args.optim_alpha, args.optim_eps, obss_preprocessor, utils.reshape_reward)
elif args.algo == "ppo":
    algo = torch_rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.gae_tau,
                            args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence,
                            args.optim_eps, args.clip_eps, args.epochs, args.batch_size, obss_preprocessor,
                            utils.reshape_reward)
else:
    raise ValueError("Incorrect algorithm name: {}".format(args.algo))

# Define logger and Tensorboard writer

logger = utils.get_logger(model_name)
if args.tb:
    from tensorboardX import SummaryWriter

    writer = SummaryWriter(utils.get_log_dir(model_name))

# Log command, availability of CUDA and model

logger.info(args)
logger.info("CUDA available: {}".format(torch.cuda.is_available()))
logger.info(acmodel)

# Train model

num_frames = 0
total_start_time = time.time()
i = 0

while num_frames < args.frames:
    # Update parameters
Beispiel #9
0
    if optimizer is None:
        if pretrained[m]:
            algo.optimizers[m].load_state_dict(
                utils_sr.load_optimizer(pretrained[m],
                                        raise_not_found=True).state_dict())
    else:
        algo.optimizers[m].load_state_dict(optimizer.state_dict())

    utils_sr.save_optimizer(algo.optimizers[m], model_name)

# Restore training status.
status_paths = []
statuses = []
for m, model_name in enumerate(model_names):
    status_paths.append(
        os.path.join(utils.get_log_dir(model_name), "status.json"))
    if os.path.exists(status_paths[m]):
        with open(status_paths[m], 'r') as src:
            statuses.append(json.load(src))
    else:
        statuses.append({"i": 0, "num_episodes": 0, "num_frames": 0})

# Define logger and Tensorboard writer and CSV writer.
header = (
    ["update", "episodes", "frames", "FPS", "duration"] +
    ["return_" + stat
     for stat in ["mean", "std", "min", "max"]] + ["success_rate"] +
    ["num_frames_" + stat for stat in ["mean", "std", "min", "max"]] +
    ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"])
if args.tb:
    from tensorboardX import SummaryWriter
Beispiel #10
0
                              args.gae_lambda,
                              args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence,
                              args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size, obss_preprocessor,
                              reshape_reward)
else:
    raise ValueError("Incorrect algorithm name: {}".format(args.algo))

# When using extra binary information, more tensors (model params) are initialized compared to when we don't use that.
# Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that
# the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here.

utils.seed(args.seed)

# Restore training status

status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
if os.path.exists(status_path):
    with open(status_path, 'r') as src:
        status = json.load(src)
else:
    status = {'i': 0,
              'num_episodes': 0,
              'num_frames': 0}

# Define loggers and Tensorboard writer and CSV writers

header = (["update", "episodes", "frames", "FPS", "duration"]
          + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']]
          + ["success_rate"]
          + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']]
          + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"])