def main(env, ctrl_type, ctrl_args, overrides, logdir, mode): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) overrides.append(["exp_cfg.log_cfg.nrecord", "0"]) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir, mode) if cfg.exp_cfg.exp_cfg.use_value: cfg.exp_cfg.exp_cfg.value = DeepValueFunction("value", cfg.val_cfg) cfg.exp_cfg.exp_cfg.value_target = DeepValueFunction( "target", cfg.val_cfg) if not cfg.exp_cfg.exp_cfg.load_samples: cfg.exp_cfg.exp_cfg.teacher = cfg.exp_cfg.sim_cfg.env.teacher( cfg.exp_cfg.exp_cfg.value.model.sess) cfg.exp_cfg.exp_cfg.teacher.env.set_mode(mode) cfg.ctrl_cfg.value_func = cfg.exp_cfg.exp_cfg.value cfg.ctrl_cfg.target_value_func = cfg.exp_cfg.exp_cfg.value_target cfg.ctrl_cfg.use_value = cfg.exp_cfg.exp_cfg.use_value if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) exp = MBExperiment(cfg.exp_cfg) cfg.pprint() os.makedirs(exp.logdir) with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(cfg.toDict())) exp.run_experiment()
def main(env, ctrl_type, ctrl_args, overrides, logdir): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) cfg.pprint() if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) exp = MBExperiment(cfg.exp_cfg) os.makedirs(exp.logdir) with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(cfg.toDict())) exp.run_experiment()
def exp(steps_needed_to_solve, planning_horizon, logdir): tf.reset_default_graph() env = "pointmass_u_wall" ctrl_type = "MPC" ctrl_args = [] overrides = [ # ["exp_cfg.log_cfg.nrecord", 1], # ["exp_cfg.log_cfg.neval", 5], # ["exp_cfg.log_cfg.nrecord_eval_mode", 1], # ["exp_cfg.log_cfg.neval_eval_mode", 5], # ["exp_cfg.log_cfg.nrecord", 2], ["exp_cfg.log_cfg.neval", 5], # ["exp_cfg.log_cfg.nrecord_eval_mode", 1], ["exp_cfg.log_cfg.neval_eval_mode", 5], ["exp_cfg.exp_cfg.ntrain_iters", 3], ["ctrl_cfg.opt_cfg.plan_hor", 1], ["ctrl_cfg.opt_cfg.cfg.popsize", 5], ["ctrl_cfg.opt_cfg.cfg.num_elites", 2], ["ctrl_cfg.opt_cfg.cfg.max_iters", 1], ] config_module_kwargs = { 'steps_needed_to_solve': steps_needed_to_solve, 'planning_horizon': planning_horizon, 'task_horizon_factor': 4, } ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir, config_module_kwargs) cfg.pprint() if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) exp = MBExperiment(cfg.exp_cfg) os.makedirs(exp.logdir) config_dict = cfg.toDict() config_dict['config_module_kwargs'] = config_module_kwargs with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(config_dict)) with open(os.path.join(exp.logdir, "variant.json"), "w") as f: json.dump(config_dict, f, indent=2, sort_keys=True, cls=MyEncoder) save_git_info(exp.logdir) logger.set_snapshot_dir(exp.logdir) logger.add_tabular_output(os.path.join(exp.logdir, 'progress.csv')) logger.log_variant(os.path.join(exp.logdir, 'variant.json'), config_dict) print("log dir:", exp.logdir) exp.run_experiment()
def main(env, ctrl_type, ctrl_args, overrides, logdir, args): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) logger.info('\n' + pprint.pformat(cfg)) # add the part of popsize if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) cfg.exp_cfg.misc = copy.copy(cfg) exp = MBExperiment(cfg.exp_cfg, train_policy=bool(args.train_policy)) if not os.path.exists(exp.logdir): os.makedirs(exp.logdir) with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(cfg.toDict())) exp.run_experiment()
def main(env, ctrl_type, ctrl_args, overrides, logdir): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) cfg.pprint() if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) exp = MBExperiment(cfg.exp_cfg) os.makedirs(exp.logdir) config_dict = cfg.toDict() with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(config_dict)) with open(os.path.join(exp.logdir, "variant.json"), "w") as f: json.dump(config_dict, f, indent=2, sort_keys=True, cls=MyEncoder) save_git_info(exp.logdir) exp.run_experiment()
def main( env, ctrl_type, ctrl_args, overrides, model_dir, logdir, init_iter, last_iter, nrecord, rawdir, ): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir]) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"]) overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"]) overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"]) overrides.append(["exp_cfg.exp_cfg.init_iter", str(init_iter)]) overrides.append(["exp_cfg.exp_cfg.ntrain_iters", str(last_iter)]) overrides.append(["exp_cfg.log_cfg.nrecord", str(nrecord)]) overrides.append(["exp_cfg.log_cfg.rawdir", str(rawdir)]) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) cfg.pprint() if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) exp = MBExperiment(cfg.exp_cfg) if os.path.exists(exp.logdir): overwrite = user_prompt("{} already exists. Overwrite?".format( exp.logdir)) if not overwrite: return else: os.makedirs(exp.logdir) with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(cfg.toDict())) exp.run_experiment() print("Saved to") print(exp.logdir)
def main(env, ctrl_type, ctrl_args, overrides, model_dir, logdir): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir]) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"]) overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"]) overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"]) overrides.append(["exp_cfg.exp_cfg.ntrain_iters", "1"]) overrides.append(["exp_cfg.log_cfg.nrecord", "1"]) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) cfg.pprint() if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) exp = MBExperiment(cfg.exp_cfg) os.makedirs(exp.logdir) with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(cfg.toDict())) exp.run_experiment()
def main(env, ctrl_type, ctrl_args, overrides, model_dir, logdir): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir]) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"]) overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"]) overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"]) overrides.append(["exp_cfg.exp_cfg.ntrain_iters", "1"]) overrides.append(["exp_cfg.log_cfg.nrecord", "0"]) overrides.append(["exp_cfg.exp_cfg.nrollouts_per_iter", "200"]) overrides.append(["exp_cfg.log_cfg.neval", "200"]) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) cfg.val_cfg.model_init_cfg.load_model = True cfg.val_cfg.model_init_cfg.model_dir = model_dir cfg.exp_cfg.exp_cfg.use_teacher = False if cfg.exp_cfg.exp_cfg.use_teacher: cfg.exp_cfg.exp_cfg.teacher = cfg.exp_cfg.sim_cfg.env.teacher() cfg.exp_cfg.exp_cfg.use_value = False if cfg.exp_cfg.exp_cfg.use_value: cfg.exp_cfg.exp_cfg.value = DeepValueFunction(cfg.val_cfg) cfg.exp_cfg.exp_cfg.ninit_rollouts = 0 cfg.ctrl_cfg.value_func = cfg.exp_cfg.exp_cfg.value cfg.ctrl_cfg.use_value = cfg.exp_cfg.exp_cfg.use_value cfg.pprint() if ctrl_type == "MPC": cfg.exp_cfg.exp_cfg.policy = MPC(cfg.ctrl_cfg) exp = MBExperiment(cfg.exp_cfg) os.makedirs(exp.logdir) with open(os.path.join(exp.logdir, "config.txt"), "w") as f: f.write(pprint.pformat(cfg.toDict())) exp.run_experiment()