Beispiel #1
0
def main(cmdl):
    base_cfg = namespace_to_dict(read_config(Path(cmdl.cfg) / "default.yaml"))
    search_cfg = namespace_to_dict(read_config(Path(cmdl.cfg) / "search.yaml"))

    print(config_to_string(cmdl))
    print(config_to_string(dict_to_namespace(search_cfg)))

    # the search space
    good_init, search_space = get_search_space(search_cfg)

    search_name = "{timestep}_tune_{experiment_name}{dev}".format(
        timestep="{:%Y%b%d-%H%M%S}".format(datetime.now()),
        experiment_name=base_cfg["experiment"],
        dev="_dev" if cmdl.dev else "",
    )

    # search algorithm
    hyperopt_search = HyperOptSearch(
        search_space,
        metric="criterion",
        mode="max",
        max_concurrent=cmdl.workers,
        points_to_evaluate=good_init,
    )

    # early stopping
    scheduler = ASHAScheduler(
        time_attr="train_step",
        metric="criterion",
        mode="max",
        max_t=base_cfg["training_steps"],  # max length of the experiment
        grace_period=cmdl.grace_steps,  # stops after 20 logged steps
        brackets=3,  # don't know what this does
    )

    analysis = tune.run(
        lambda x: tune_trial(x, base_cfg=base_cfg, get_objective=None),
        name=search_name,
        # config=search_space,
        search_alg=hyperopt_search,
        scheduler=scheduler,
        local_dir="./results",
        num_samples=cmdl.trials,
        trial_name_creator=trial2string,
        resources_per_trial={"cpu": 3},
    )

    dfs = analysis.trial_dataframes
    for i, (key, df) in enumerate(dfs.items()):
        print("saving: ", key)
        df.to_pickle(f"{key}/trial_df.pkl")
Beispiel #2
0
def main(cmdl):
    """ Entry point.
    """
    opt = read_config(Path(cmdl.experiment_path) / "cfg.yaml")
    chkpt_paths = sorted(
        Path(cmdl.experiment_path).glob("*.pth"),
        key=lambda path: int(path.stem.split("_")[2]),
    )
    chkpt_paths = [(int(path.stem.split("_")[2]), path)
                   for path in chkpt_paths]

    print(config_to_string(cmdl))
    print(config_to_string(opt))

    if cmdl.build_val_dset:
        perf = [(torch.load(path)["R/ep"], path) for _, path in chkpt_paths]
        best_score, path = max(perf, key=lambda x: x[0])
        print(f"Loading {path} with total return {best_score}.")
        env, policy = configure_eval(cmdl, opt, path)
        achlioptas = _get_achlioptas(8, 4)
        val_dset = build_validation_dset(
            env,
            policy,
            opt.gamma,
            partial(_hash, decimals=cmdl.decimals, rnd_proj=achlioptas),
        )

        val_dset["meta"]["agent"] = path
        val_dset["meta"]["decimals"] = cmdl.decimals
        val_dset["meta"]["rnd_proj"] = achlioptas
        for k, v in val_dset["meta"].items():
            print(f"{k:12}", v)
        torch.save(val_dset, f"./val_dsets/{env.spec.id}.pkl")
    elif cmdl.offline_validation:
        rlog.init(opt.experiment, path=opt.out_dir, tensorboard=True)
        log = rlog.getLogger(opt.experiment + ".off_valid")
        log.addMetrics([
            rlog.AvgMetric("V_step", metargs=["value", 1]),
            rlog.AvgMetric("off_mse", metargs=["off_mse", 1]),
        ])
        log.info("Loading dataset...")
        dset = torch.load(f"./val_dsets/{opt.env_name}.pkl")
        for step, path in chkpt_paths:
            env, policy = configure_eval(cmdl, opt, path)
            offline_validation(step, policy, dset, opt)
    else:
        for step, path in chkpt_paths:
            env, policy = configure_eval(cmdl, opt, path)
            avg_return = greedy_validation(env, policy, opt.gamma)
            print("[{0:8d}]   R/ep={1:8.2f}.".format(step, avg_return))
Beispiel #3
0
def run(opt):
    """ Entry Point. """

    rlog.init(opt.experiment, path=opt.out_dir, tensorboard=True)
    rlog.addMetrics(
        rlog.AvgMetric("trn_R_ep", metargs=["trn_reward", "trn_done"]),
        rlog.AvgMetric("trn_loss", metargs=["trn_loss", 1]),
        rlog.FPSMetric("lrn_tps", metargs=["lrn_steps"]),
        rlog.AvgMetric("val_R_ep", metargs=["reward", "done"]),
        rlog.AvgMetric("val_avg_step", metargs=[1, "done"]),
        rlog.FPSMetric("val_fps", metargs=["val_frames"]),
    )

    opt = game_settings_(opt)
    env, agent = experiment_factory(opt)

    rlog.info(ioutil.config_to_string(opt))
    ioutil.save_config(opt, opt.out_dir)

    steps = 0
    for ep in range(1, opt.env.episodes + 1):
        steps = train_one_ep(env, agent, steps, opt.update_freq, opt.target_update_freq)

        if ep % opt.valid_freq == 0:
            rlog.traceAndLog(ep)
            validate(env, agent, opt.valid_episodes)
            rlog.traceAndLog(ep)
Beispiel #4
0
def run(opt):
    """ Entry point of the program. """

    if __debug__:
        print(
            clr(
                "Code might have assertions. Use -O in liftoff when running stuff.",
                color="red",
                attrs=["bold"],
            ))

    ioutil.create_paths(opt)

    sticky_schedule = OrderedDict([(int(s), float(p))
                                   for (s, p) in opt.sticky_schedule])
    assert 1 in sticky_schedule

    rlog.init(opt.experiment, path=opt.out_dir, tensorboard=True)
    train_loggers = OrderedDict()
    for i, epoch in enumerate(sticky_schedule.keys()):
        train_loggers[epoch] = train_log = rlog.getLogger(
            f"{opt.experiment}.{i:d}")
        train_log.addMetrics(
            rlog.AvgMetric("trn_R_ep", metargs=["trn_reward", "trn_done"]),
            rlog.SumMetric("trn_ep_cnt", metargs=["trn_done"]),
            rlog.AvgMetric("trn_loss", metargs=["trn_loss", 1]),
            rlog.FPSMetric("trn_tps", metargs=["trn_steps"]),
            rlog.ValueMetric("trn_sticky_action_prob",
                             metargs=["trn_sticky_action_prob"]),
            rlog.FPSMetric("lrn_tps", metargs=["lrn_steps"]),
            rlog.AvgMetric("val_R_ep", metargs=["reward", "done"]),
            rlog.SumMetric("val_ep_cnt", metargs=["done"]),
            rlog.AvgMetric("val_avg_step", metargs=[1, "done"]),
            rlog.FPSMetric("val_fps", metargs=["val_frames"]),
            rlog.ValueMetric("val_sticky_action_prob",
                             metargs=["val_sticky_action_prob"]),
        )

    # Initialize the objects we will use during training.
    env, (replay, policy_improvement,
          policy_evaluation) = experiment_factory(opt)

    rlog.info("\n\n{}\n\n{}\n\n{}".format(env, replay,
                                          policy_evaluation.estimator))
    rlog.info("\n\n{}\n\n{}".format(policy_improvement, policy_evaluation))

    if opt.estimator.args.get("spectral", None) is not None:
        for k in policy_evaluation.estimator.get_spectral_norms().keys():
            # k = f"min{str(k)[1:]}"
            rlog.addMetrics(rlog.ValueMetric(k, metargs=[k]))

    # if we loaded a checkpoint
    if Path(opt.out_dir).joinpath("replay.gz").is_file():

        # sometimes the experiment is intrerupted while saving the replay
        # buffer and it gets corrupted. Therefore we attempt restoring
        # from the previous checkpoint and replay.
        try:
            idx = replay.load(Path(opt.out_dir) / "replay.gz")
            ckpt = ioutil.load_checkpoint(opt.out_dir, idx=idx)
            rlog.info(f"Loaded most recent replay (step {idx}).")
        except:
            gc.collect()
            rlog.info("Last replay gzip is faulty.")
            idx = replay.load(Path(opt.out_dir) / "prev_replay.gz")
            ckpt = ioutil.load_checkpoint(opt.out_dir, idx=idx)
            rlog.info(f"Loading a previous snapshot (step {idx}).")

        # load state dicts

        # load state dicts
        ioutil.special_conv_uv_buffer_fix(policy_evaluation.estimator,
                                          ckpt["estimator_state"])
        policy_evaluation.estimator.load_state_dict(ckpt["estimator_state"])
        ioutil.special_conv_uv_buffer_fix(policy_evaluation.target_estimator,
                                          ckpt["target_estimator_state"])
        policy_evaluation.target_estimator.load_state_dict(
            ckpt["target_estimator_state"])
        policy_evaluation.optimizer.load_state_dict(ckpt["optim_state"])

        last_epsilon = None
        for _ in range(ckpt["step"]):
            last_epsilon = next(policy_improvement.epsilon)
        rlog.info(f"Last epsilon: {last_epsilon}.")
        # some counters
        last_epoch = ckpt["step"] // opt.train_step_cnt
        rlog.info(f"Resuming from epoch {last_epoch}.")
        start_epoch = last_epoch + 1
        steps = ckpt["step"]
    else:
        steps = 0
        start_epoch = 1
        # add some hardware and git info, log and save
        opt = ioutil.add_platform_info(opt)

    rlog.info("\n" + ioutil.config_to_string(opt))
    ioutil.save_config(opt, opt.out_dir)

    # Start training

    last_state = None  # used by train_one_epoch to know how to resume episode.
    for epoch in range(start_epoch, opt.epoch_cnt + 1):
        last_sched_epoch = max(ep for ep in sticky_schedule if ep <= epoch)
        print(f"StickyActProb goes from {env.sticky_action_prob}"
              f" to {sticky_schedule[last_sched_epoch]}")
        env.sticky_action_prob = sticky_schedule[last_sched_epoch]
        crt_logger = train_loggers[last_sched_epoch]

        # train for 250,000 steps
        steps, last_state = train_one_epoch(
            env,
            (replay, policy_improvement, policy_evaluation),
            opt.train_step_cnt,
            opt.update_freq,
            opt.target_update_freq,
            opt,
            crt_logger,
            total_steps=steps,
            last_state=last_state,
        )
        crt_logger.put(trn_sticky_action_prob=env.sticky_action_prob)
        crt_logger.traceAndLog(epoch * opt.train_step_cnt)

        # validate for 125,000 steps
        for sched_epoch, eval_logger in train_loggers.items():
            eval_env = get_env(  # this doesn't work, fute-m-aș în ele de wrappere
                opt,
                mode="testing",
                sticky_action_prob=sticky_schedule[sched_epoch])
            eval_env.sticky_action_prob = sticky_schedule[sched_epoch]
            print(
                f"Evaluating on the env with sticky={eval_env.sticky_action_prob}."
            )
            validate(
                AGENTS[opt.agent.name]["policy_improvement"](
                    policy_improvement.estimator,
                    opt.action_cnt,
                    epsilon=opt.val_epsilon,
                ),
                eval_env,
                opt.valid_step_cnt,
                eval_logger,
            )
            eval_logger.put(
                val_sticky_action_prob=eval_env.sticky_action_prob, )
            eval_logger.traceAndLog(epoch * opt.train_step_cnt)

        # save the checkpoint
        if opt.agent.save:
            ioutil.checkpoint_agent(
                opt.out_dir,
                steps,
                estimator=policy_evaluation.estimator,
                target_estimator=policy_evaluation.target_estimator,
                optim=policy_evaluation.optimizer,
                cfg=opt,
                replay=replay,
                save_replay=(epoch % 8 == 0 or epoch == opt.epoch_cnt),
            )
Beispiel #5
0
def run(opt):
    """ Entry point of the program. """

    if __debug__:
        print(
            clr(
                "Code might have assertions. Use -O in liftoff when running stuff.",
                color="red",
                attrs=["bold"],
            ))

    ioutil.create_paths(opt)

    rlog.init(opt.experiment,
              path=opt.out_dir,
              tensorboard=True,
              relative_time=True)
    rlog.addMetrics(
        rlog.AvgMetric("trn_R_ep", metargs=["trn_reward", "trn_done"]),
        rlog.SumMetric("trn_ep_cnt", metargs=["trn_done"]),
        rlog.AvgMetric("trn_loss", metargs=["trn_loss", 1]),
        rlog.FPSMetric("trn_tps", metargs=["trn_steps"]),
        rlog.FPSMetric("lrn_tps", metargs=["lrn_steps"]),
        rlog.AvgMetric("val_R_ep", metargs=["reward", "done"]),
        rlog.SumMetric("val_ep_cnt", metargs=["done"]),
        rlog.AvgMetric("val_avg_step", metargs=[1, "done"]),
        rlog.FPSMetric("val_fps", metargs=["val_frames"]),
    )

    # Initialize the objects we will use during training.
    env, (replay, policy_improvement,
          policy_evaluation) = experiment_factory(opt)

    guts = [
        env,
        replay,
        policy_evaluation.estimator,
        policy_evaluation.optimizer,
        policy_improvement,
        policy_evaluation,
    ]
    rlog.info(("\n\n{}" * len(guts)).format(*guts))

    if opt.estimator.args.get("spectral", None) is not None:
        for k in policy_evaluation.estimator.get_spectral_norms().keys():
            # k = f"min{str(k)[1:]}"
            rlog.addMetrics(rlog.ValueMetric(k, metargs=[k]))

    # if we loaded a checkpoint
    if Path(opt.out_dir).joinpath("replay.gz").is_file():

        # sometimes the experiment is intrerupted while saving the replay
        # buffer and it gets corrupted. Therefore we attempt restoring
        # from the previous checkpoint and replay.
        try:
            idx = replay.load(Path(opt.out_dir) / "replay.gz")
            ckpt = ioutil.load_checkpoint(opt.out_dir, idx=idx)
            rlog.info(f"Loaded most recent replay (step {idx}).")
        except:
            gc.collect()
            rlog.info("Last replay gzip is faulty.")
            idx = replay.load(Path(opt.out_dir) / "prev_replay.gz")
            ckpt = ioutil.load_checkpoint(opt.out_dir, idx=idx)
            rlog.info(f"Loading a previous snapshot (step {idx}).")

        # load state dicts

        # load state dicts
        ioutil.special_conv_uv_buffer_fix(policy_evaluation.estimator,
                                          ckpt["estimator_state"])
        policy_evaluation.estimator.load_state_dict(ckpt["estimator_state"])
        ioutil.special_conv_uv_buffer_fix(policy_evaluation.target_estimator,
                                          ckpt["target_estimator_state"])
        policy_evaluation.target_estimator.load_state_dict(
            ckpt["target_estimator_state"])
        policy_evaluation.optimizer.load_state_dict(ckpt["optim_state"])

        last_epsilon = None
        for _ in range(ckpt["step"]):
            last_epsilon = next(policy_improvement.epsilon)
        rlog.info(f"Last epsilon: {last_epsilon}.")
        # some counters
        last_epoch = ckpt["step"] // opt.train_step_cnt
        rlog.info(f"Resuming from epoch {last_epoch}.")
        start_epoch = last_epoch + 1
        steps = ckpt["step"]
    else:
        steps = 0
        start_epoch = 1
        # add some hardware and git info, log and save
        opt = ioutil.add_platform_info(opt)

    rlog.info("\n" + ioutil.config_to_string(opt))
    ioutil.save_config(opt, opt.out_dir)

    # Start training

    last_state = None  # used by train_one_epoch to know how to resume episode.
    for epoch in range(start_epoch, opt.epoch_cnt + 1):

        # train for 250,000 steps
        steps, last_state = train_one_epoch(
            env,
            (replay, policy_improvement, policy_evaluation),
            opt.train_step_cnt,
            opt.update_freq,
            opt.target_update_freq,
            opt,
            rlog.getRootLogger(),
            total_steps=steps,
            last_state=last_state,
        )
        rlog.traceAndLog(epoch * opt.train_step_cnt)

        # validate for 125,000 steps
        validate(
            AGENTS[opt.agent.name]["policy_improvement"](
                policy_improvement.estimator,
                opt.action_cnt,
                epsilon=opt.val_epsilon),
            get_env(opt, mode="testing"),
            opt.valid_step_cnt,
            rlog.getRootLogger(),
        )
        rlog.traceAndLog(epoch * opt.train_step_cnt)

        # save the checkpoint
        if opt.agent.save:
            ioutil.checkpoint_agent(
                opt.out_dir,
                steps,
                estimator=policy_evaluation.estimator,
                target_estimator=policy_evaluation.target_estimator,
                optim=policy_evaluation.optimizer,
                cfg=opt,
                replay=replay,
                save_replay=(epoch % 8 == 0 or epoch == opt.epoch_cnt),
            )
Beispiel #6
0
def run(opt):
    """ Run experiment. This function is being launched by liftoff.
    """
    # logging
    trn_log, val_log = set_logger(opt)

    # model related stuff
    device = torch.device("cuda")
    trn_set, val_set, wmp_set = get_dsets(opt)
    model = get_model(opt, device)
    optimizer = getattr(optim, opt.optim.name)(model.parameters(),
                                               **vars(opt.optim.args))
    # batch_size
    batch_size = opt.trn_loader.batch_size

    rlog.info(U.config_to_string(opt))
    rlog.info("Model: %s", str(model))
    rlog.info("Optimizer: %s \n", str(optimizer))

    # Warm-up the mode on a partition of the training dataset
    if wmp_set is not None:
        rlog.info("Warming-up on dset of size %d", len(wmp_set))
        for epoch in range(opt.warmup.epochs):
            # train for one epoch
            trn_loss, trn_acc = train(
                DataLoader(wmp_set, **vars(opt.trn_loader)),
                model,
                optimizer,
                get_criterion(opt, model,
                              len(wmp_set) // batch_size),
                mc_samples=opt.trn_mcs,
            )

            val_stats = valid_stats(opt, model, val_set)
            trn_stats = train_stats(opt, model, wmp_set)
            trn_stats["loss"], trn_stats["acc"] = trn_loss, trn_acc

            # to pickle and tensorboard
            val_log.trace(step=epoch, **val_stats)
            trn_log.trace(step=epoch, **trn_stats)

            # to console
            for log, stats in zip([trn_log, val_log], [trn_stats, val_stats]):
                log.info(log.fmt.format(epoch, stats["acc"], stats["loss"]))

            # extra logging
            model_stats(opt, epoch, model)

        # maybe reset optimizer after warmup
        if opt.warmup.reset_optim:
            rlog.info("\nWarmup ended. Resetting optimizer.")
            optimizer = getattr(optim, opt.optim.name)(model.parameters(),
                                                       **vars(opt.optim.args))

    # Train on the full training dataset
    if wmp_set is not None:
        epochs = range(opt.warmup.epochs, opt.warmup.epochs + opt.epochs)
    else:
        epochs = range(opt.epochs)

    rlog.info("\nTraining on dset: %s", str(trn_set))
    for epoch in epochs:
        trn_loss, trn_acc = train(
            DataLoader(trn_set, **vars(opt.trn_loader)),
            model,
            optimizer,
            get_criterion(opt, model,
                          len(trn_set) // batch_size),
            mc_samples=opt.trn_mcs,
        )

        val_stats = valid_stats(opt, model, val_set)
        trn_stats = train_stats(opt, model, trn_set)
        trn_stats["loss"], trn_stats["acc"] = trn_loss, trn_acc

        # to pickle and tensorboard
        val_log.trace(step=epoch, **val_stats)
        trn_log.trace(step=epoch, **trn_stats)

        # to console
        for log, stats in zip([trn_log, val_log], [trn_stats, val_stats]):
            log.info(log.fmt.format(epoch, stats["acc"], stats["loss"]))

        # extra logging
        model_stats(opt, epoch, model)