예제 #1
0
def main(env, ctrl_type, ctrl_args, overrides, logdir, mode):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
    overrides.append(["exp_cfg.log_cfg.nrecord", "0"])
    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir, mode)

    if cfg.exp_cfg.exp_cfg.use_value:
        cfg.exp_cfg.exp_cfg.value = DeepValueFunction("value", cfg.val_cfg)
        cfg.exp_cfg.exp_cfg.value_target = DeepValueFunction(
            "target", cfg.val_cfg)

    if not cfg.exp_cfg.exp_cfg.load_samples:
        cfg.exp_cfg.exp_cfg.teacher = cfg.exp_cfg.sim_cfg.env.teacher(
            cfg.exp_cfg.exp_cfg.value.model.sess)
        cfg.exp_cfg.exp_cfg.teacher.env.set_mode(mode)

    cfg.ctrl_cfg.value_func = cfg.exp_cfg.exp_cfg.value
    cfg.ctrl_cfg.target_value_func = cfg.exp_cfg.exp_cfg.value_target
    cfg.ctrl_cfg.use_value = cfg.exp_cfg.exp_cfg.use_value

    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)

    exp = MBExperiment(cfg.exp_cfg)

    cfg.pprint()
    os.makedirs(exp.logdir)
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(cfg.toDict()))

    exp.run_experiment()
예제 #2
0
def main(env, ctrl_type, ctrl_args, overrides, logdir):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
    cfg.pprint()

    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
    exp = MBExperiment(cfg.exp_cfg)

    os.makedirs(exp.logdir)
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(cfg.toDict()))

    exp.run_experiment()
예제 #3
0
def exp(steps_needed_to_solve, planning_horizon, logdir):
    tf.reset_default_graph()
    env = "pointmass_u_wall"
    ctrl_type = "MPC"
    ctrl_args = []
    overrides = [
        # ["exp_cfg.log_cfg.nrecord", 1],
        # ["exp_cfg.log_cfg.neval", 5],
        # ["exp_cfg.log_cfg.nrecord_eval_mode", 1],
        # ["exp_cfg.log_cfg.neval_eval_mode", 5],
        # ["exp_cfg.log_cfg.nrecord", 2],
        ["exp_cfg.log_cfg.neval", 5],
        # ["exp_cfg.log_cfg.nrecord_eval_mode", 1],
        ["exp_cfg.log_cfg.neval_eval_mode", 5],
        ["exp_cfg.exp_cfg.ntrain_iters", 3],
        ["ctrl_cfg.opt_cfg.plan_hor", 1],
        ["ctrl_cfg.opt_cfg.cfg.popsize", 5],
        ["ctrl_cfg.opt_cfg.cfg.num_elites", 2],
        ["ctrl_cfg.opt_cfg.cfg.max_iters", 1],
    ]
    config_module_kwargs = {
        'steps_needed_to_solve': steps_needed_to_solve,
        'planning_horizon': planning_horizon,
        'task_horizon_factor': 4,
    }
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir,
                        config_module_kwargs)
    cfg.pprint()

    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
    exp = MBExperiment(cfg.exp_cfg)

    os.makedirs(exp.logdir)
    config_dict = cfg.toDict()
    config_dict['config_module_kwargs'] = config_module_kwargs
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(config_dict))
    with open(os.path.join(exp.logdir, "variant.json"), "w") as f:
        json.dump(config_dict, f, indent=2, sort_keys=True, cls=MyEncoder)
    save_git_info(exp.logdir)

    logger.set_snapshot_dir(exp.logdir)
    logger.add_tabular_output(os.path.join(exp.logdir, 'progress.csv'))
    logger.log_variant(os.path.join(exp.logdir, 'variant.json'), config_dict)

    print("log dir:", exp.logdir)

    exp.run_experiment()
예제 #4
0
def main(env, ctrl_type, ctrl_args, overrides, logdir, args):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
    logger.info('\n' + pprint.pformat(cfg))

    # add the part of popsize
    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)

    cfg.exp_cfg.misc = copy.copy(cfg)
    exp = MBExperiment(cfg.exp_cfg, train_policy=bool(args.train_policy))

    if not os.path.exists(exp.logdir):
        os.makedirs(exp.logdir)
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(cfg.toDict()))

    exp.run_experiment()
예제 #5
0
def main(env, ctrl_type, ctrl_args, overrides, logdir):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})
    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
    cfg.pprint()

    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
    exp = MBExperiment(cfg.exp_cfg)

    os.makedirs(exp.logdir)
    config_dict = cfg.toDict()
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(config_dict))
    with open(os.path.join(exp.logdir, "variant.json"), "w") as f:
        json.dump(config_dict, f, indent=2, sort_keys=True, cls=MyEncoder)
    save_git_info(exp.logdir)

    exp.run_experiment()
예제 #6
0
def main(
    env,
    ctrl_type,
    ctrl_args,
    overrides,
    model_dir,
    logdir,
    init_iter,
    last_iter,
    nrecord,
    rawdir,
):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})

    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir])
    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"])
    overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"])
    overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"])
    overrides.append(["exp_cfg.exp_cfg.init_iter", str(init_iter)])
    overrides.append(["exp_cfg.exp_cfg.ntrain_iters", str(last_iter)])
    overrides.append(["exp_cfg.log_cfg.nrecord", str(nrecord)])
    overrides.append(["exp_cfg.log_cfg.rawdir", str(rawdir)])

    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
    cfg.pprint()

    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
    exp = MBExperiment(cfg.exp_cfg)

    if os.path.exists(exp.logdir):
        overwrite = user_prompt("{} already exists. Overwrite?".format(
            exp.logdir))
        if not overwrite:
            return
    else:
        os.makedirs(exp.logdir)
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(cfg.toDict()))

    exp.run_experiment()
    print("Saved to")
    print(exp.logdir)
예제 #7
0
def main(env, ctrl_type, ctrl_args, overrides, model_dir, logdir):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})

    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir])
    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"])
    overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"])
    overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"])
    overrides.append(["exp_cfg.exp_cfg.ntrain_iters", "1"])
    overrides.append(["exp_cfg.log_cfg.nrecord", "1"])

    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
    cfg.pprint()

    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
    exp = MBExperiment(cfg.exp_cfg)

    os.makedirs(exp.logdir)
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(cfg.toDict()))

    exp.run_experiment()
예제 #8
0
def main(env, ctrl_type, ctrl_args, overrides, model_dir, logdir):
    ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args})

    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir])
    overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"])
    overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"])
    overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"])
    overrides.append(["exp_cfg.exp_cfg.ntrain_iters", "1"])
    overrides.append(["exp_cfg.log_cfg.nrecord", "0"])
    overrides.append(["exp_cfg.exp_cfg.nrollouts_per_iter", "200"])
    overrides.append(["exp_cfg.log_cfg.neval", "200"])

    cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir)
    cfg.val_cfg.model_init_cfg.load_model = True
    cfg.val_cfg.model_init_cfg.model_dir = model_dir

    cfg.exp_cfg.exp_cfg.use_teacher = False
    if cfg.exp_cfg.exp_cfg.use_teacher:
        cfg.exp_cfg.exp_cfg.teacher = cfg.exp_cfg.sim_cfg.env.teacher()
    cfg.exp_cfg.exp_cfg.use_value = False
    if cfg.exp_cfg.exp_cfg.use_value:
        cfg.exp_cfg.exp_cfg.value = DeepValueFunction(cfg.val_cfg)

    cfg.exp_cfg.exp_cfg.ninit_rollouts = 0

    cfg.ctrl_cfg.value_func = cfg.exp_cfg.exp_cfg.value
    cfg.ctrl_cfg.use_value = cfg.exp_cfg.exp_cfg.use_value
    cfg.pprint()

    if ctrl_type == "MPC":
        cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg)
    exp = MBExperiment(cfg.exp_cfg)

    os.makedirs(exp.logdir)
    with open(os.path.join(exp.logdir, "config.txt"), "w") as f:
        f.write(pprint.pformat(cfg.toDict()))

    exp.run_experiment()