def __init__(self, params): """Initializes class instance. Argument: params (DotMap): A DotMap containing the following: .sim_cfg: .env (gym.env): Environment for this experiment .task_hor (int): Task horizon .stochastic (bool): (optional) If True, agent adds noise to its actions. Must provide noise_std (see below). Defaults to False. .noise_std (float): for stochastic agents, noise of the form N(0, noise_std^2I) will be added. .exp_cfg: .ntrain_iters (int): Number of training iterations to be performed. .nrollouts_per_iter (int): (optional) Number of rollouts done between training iterations. Defaults to 1. .ninit_rollouts (int): (optional) Number of initial rollouts. Defaults to 1. .policy (controller): Policy that will be trained. .log_cfg: .logdir (str): Parent of directory path where experiment data will be saved. Experiment will be saved in logdir/<date+time of experiment start> .nrecord (int): (optional) Number of rollouts to record for every iteration. Defaults to 0. .neval (int): (optional) Number of rollouts for performance evaluation. Defaults to 1. """ # Assert True arguments that we currently do not support assert params.sim_cfg.get("stochastic", False) == False self.env = get_required_argument(params.sim_cfg, "env", "Must provide environment.") self.task_hor = get_required_argument(params.sim_cfg, "task_hor", "Must provide task horizon.") self.agent = Agent(DotMap(env=self.env, noisy_actions=False)) self.ntrain_iters = get_required_argument( params.exp_cfg, "ntrain_iters", "Must provide number of training iterations.") self.nrollouts_per_iter = params.exp_cfg.get("nrollouts_per_iter", 1) self.ninit_rollouts = params.exp_cfg.get("ninit_rollouts", 1) self.policy = get_required_argument(params.exp_cfg, "policy", "Must provide a policy.") self.logdir = os.path.join( get_required_argument(params.log_cfg, "logdir", "Must provide log parent directory."), strftime("%Y-%m-%d--%H:%M:%S", localtime())) self.nrecord = params.log_cfg.get("nrecord", 0) self.neval = params.log_cfg.get("neval", 1)
def main(env, ctrl_type, ctrl_args, overrides, model_dir, logdir): ctrl_args = DotMap(**{key: val for (key, val) in ctrl_args}) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.model_dir", model_dir]) overrides.append(["ctrl_cfg.prop_cfg.model_init_cfg.load_model", "True"]) overrides.append(["ctrl_cfg.prop_cfg.model_pretrained", "True"]) overrides.append(["exp_cfg.exp_cfg.ninit_rollouts", "0"]) overrides.append(["exp_cfg.exp_cfg.ntrain_iters", "1"]) overrides.append(["exp_cfg.log_cfg.nrecord", "1"]) cfg = create_config(env, ctrl_type, ctrl_args, overrides, logdir) cfg.pprint() env = get_required_argument(cfg.exp_cfg.sim_cfg, "env", "Must provide environment.") # 150 for Jaco task_hor = get_required_argument(cfg.exp_cfg.sim_cfg, "task_hor", "Must provide task horizon.") policy = MPC(cfg.ctrl_cfg) agent_sample(env, task_hor, policy, "transfer.mp4")
def nn_constructor(self, model_init_cfg): ensemble_size = get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size") load_model = model_init_cfg.get("load_model", False) assert load_model is False, 'Has yet to support loading model' model = PtModel(ensemble_size, self.MODEL_IN, self.MODEL_OUT * 2).to(TORCH_DEVICE) # * 2 because we output both the mean and the variance model.optim = torch.optim.Adam(model.parameters(), lr=0.001) return model
def nn_constructor(self, model_init_cfg): ensemble_size = get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size") load_model = model_init_cfg.get("load_model", False) assert load_model is False, 'Has yet to support loading model' model = EnsembleModel(ensemble_size, in_features=self.MODEL_IN, out_features=self.MODEL_OUT * 2 + 1, hidden_size=self.MODEL_HIDDEN_SIZE, num_layers=len(self.MODEL_WEIGHT_DECAYS), weight_decays=self.MODEL_WEIGHT_DECAYS).to(TORCH_DEVICE) model.optim = torch.optim.Adam(model.parameters(), lr=0.001) return model
def __init__(self, params): """Creates class instance. Arguments: params .env (gym.env): Environment for which this controller will be used. .ac_ub (np.ndarray): (optional) An array of action upper bounds. Defaults to environment action upper bounds. .ac_lb (np.ndarray): (optional) An array of action lower bounds. Defaults to environment action lower bounds. .per (int): (optional) Determines how often the action sequence will be optimized. Defaults to 1 (reoptimizes at every call to act()). .prop_cfg .model_init_cfg (DotMap): A DotMap of initialization parameters for the model. .model_constructor (func): A function which constructs an instance of this model, given model_init_cfg. .model_train_cfg (dict): (optional) A DotMap of training parameters that will be passed into the model every time is is trained. Defaults to an empty dict. .model_pretrained (bool): (optional) If True, assumes that the model has been trained upon construction. .mode (str): Propagation method. Choose between [E, DS, TSinf, TS1, MM]. See https://arxiv.org/abs/1805.12114 for details. .npart (int): Number of particles used for DS, TSinf, TS1, and MM propagation methods. .ign_var (bool): (optional) Determines whether or not variance output of the model will be ignored. Defaults to False unless deterministic propagation is being used. .obs_preproc (func): (optional) A function which modifies observations (in a 2D matrix) before they are passed into the model. Defaults to lambda obs: obs. Note: Must be able to process both NumPy and PyTorch arrays. .obs_postproc (func): (optional) A function which returns vectors calculated from the previous observations and model predictions, which will then be passed into the provided cost function on observations. Defaults to lambda obs, model_out: model_out. Note: Must be able to process both NumPy and PyTorch arrays. .obs_postproc2 (func): (optional) A function which takes the vectors returned by obs_postproc and (possibly) modifies it into the predicted observations for the next time step. Defaults to lambda obs: obs. Note: Must be able to process both NumPy and Tensorflow arrays. .targ_proc (func): (optional) A function which takes current observations and next observations and returns the array of targets (so that the model learns the mapping obs -> targ_proc(obs, next_obs)). Defaults to lambda obs, next_obs: next_obs. Note: Only needs to process NumPy arrays. .continue_train (bool): (optional) Whether or not to continue .opt_cfg .mode (str): Internal optimizer that will be used. Choose between [CEM]. .cfg (DotMap): A map of optimizer initializer parameters. .plan_hor (int): The planning horizon that will be used in optimization. .obs_cost_fn (func): A function which computes the cost of every observation in a 2D matrix. Note: Must be able to process both NumPy and Tensorflow arrays. .ac_cost_fn (func): A function which computes the cost of every action in a 2D matrix. .catastrophe_cost_fn (func) A function that computes the cost of catastrophe. .no_catastrophe_pred (bool): Whether or not to train/use catastrophe prediction. .percentile (float): The percentile used for either catastrophic state or reward-based risk aversion. """ self.dO, self.dU = params.env.observation_space.shape[ 0], params.env.action_space.shape[0] self.ac_ub, self.ac_lb = params.env.action_space.high, params.env.action_space.low self.ac_ub = np.minimum(self.ac_ub, params.get("ac_ub", self.ac_ub)) self.ac_lb = np.maximum(self.ac_lb, params.get("ac_lb", self.ac_lb)) self.update_fns = params.get("update_fns", []) self.per = params.get("per", 1) self.model_init_cig = params.prop_cfg.get("model_init_cfg", {}) self.model_train_cfg = params.prop_cfg.get("model_train_cfg", {}) self.prop_mode = get_required_argument( params.prop_cfg, "mode", "Must provide propagation method.") self.npart = get_required_argument( params.prop_cfg, "npart", "Must provide number of particles.") self.ign_var = params.prop_cfg.get("ign_var", False) or self.prop_mode == "E" self.obs_preproc = params.prop_cfg.get("obs_preproc", lambda obs: obs) self.obs_postproc = params.prop_cfg.get( "obs_postproc", lambda obs, model_out: model_out) self.obs_postproc2 = params.prop_cfg.get("obs_postproc2", lambda next_obs: next_obs) self.targ_proc = params.prop_cfg.get("targ_proc", lambda obs, next_obs: next_obs) self.continue_train = params.prop_cfg.get("continue_train", False) self.opt_mode = get_required_argument( params.opt_cfg, "mode", "Must provide optimization method.") self.plan_hor = get_required_argument( params.opt_cfg, "plan_hor", "Must provide planning horizon.") self.obs_cost_fn = get_required_argument( params.opt_cfg, "obs_cost_fn", "Must provide cost on observations.") self.ac_cost_fn = get_required_argument( params.opt_cfg, "ac_cost_fn", "Must provide cost on actions.") self.catastrophe_cost_fn = get_required_argument( params.opt_cfg, "catastrophe_cost_fn", "Must provide cost on catastrophe.") self.no_catastrophe_pred = params.opt_cfg.get("no_catastrophe_pred") self.percentile = get_required_argument( params.opt_cfg, "percentile", "Must provide percentile used for optimizer") if hasattr(params.env, "possible_actions"): # Discrete Case self.possible_actions = params.env.possible_actions self.mode = 'train' #Setting mode to training or testing (adapting) assert self.prop_mode == 'TSinf', 'only TSinf propagation mode is supported' assert self.npart % self.model_init_cig.num_nets == 0, "Number of particles must be a multiple of the ensemble size." # Create action sequence optimizer opt_cfg = params.opt_cfg.get("cfg", {}) optim_map = { 'CEM': CEMOptimizer, 'DRO': DiscreteRandomOptimizer, 'DCEM': DiscreteCEMOptimizer } self.optimizer = optim_map[self.opt_mode]( sol_dim=self.plan_hor * self.dU, lower_bound=np.tile(self.ac_lb, [self.plan_hor]), upper_bound=np.tile(self.ac_ub, [self.plan_hor]), cost_function=self._compile_cost, **opt_cfg) # Controller state variables self.has_been_trained = params.prop_cfg.get("model_pretrained", False) self.ac_buf = np.array([]).reshape(0, self.dU) self.prev_sol = np.tile((self.ac_lb + self.ac_ub) / 2, [self.plan_hor]) self.init_var = np.tile( np.square(self.ac_ub - self.ac_lb) / 16, [self.plan_hor]) self.train_in = np.array([]).reshape( 0, self.dU + self.obs_preproc(np.zeros([1, self.dO])).shape[-1]) self.gravity_targs = np.array([]).reshape(0, 1) self.train_targs = np.array([]).reshape( 0, self.targ_proc(np.zeros([1, self.dO]), np.zeros([1, self.dO])).shape[-1]) print("Created an MPC controller, prop mode %s, %d particles. " % (self.prop_mode, self.npart) + ("Ignoring variance." if self.ign_var else "")) # Set up pytorch model self.model = get_required_argument( params.prop_cfg.model_init_cfg, "model_constructor", "Must provide a model constructor.")( params.prop_cfg.model_init_cfg) self.logdir = None
def __init__(self, params): """Creates class instance. Arguments: params .env (gym.env): Environment for which this controller will be used. .ac_ub (np.ndarray): (optional) An array of action upper bounds. Defaults to environment action upper bounds. .ac_lb (np.ndarray): (optional) An array of action lower bounds. Defaults to environment action lower bounds. .per (int): (optional) Determines how often the action sequence will be optimized. Defaults to 1 (reoptimizes at every call to act()). .prop_cfg .model_init_cfg (DotMap): A DotMap of initialization parameters for the model. .model_constructor (func): A function which constructs an instance of this model, given model_init_cfg. .model_train_cfg (dict): (optional) A DotMap of training parameters that will be passed into the model every time it is trained. Defaults to an empty dict. .model_pretrained (bool): (optional) If True, assumes that the model has been trained upon construction. .mode (str): Propagation method. Choose between [E, DS, TSinf, TS1, MM]. See https://arxiv.org/abs/1805.12114 for details. .npart (int): Number of particles used for DS, TSinf, TS1, and MM propagation methods. .ign_var (bool): (optional) Determines whether or not variance output of the model will be ignored. Defaults to False unless deterministic propagation is being used. .obs_preproc (func): (optional) A function which modifies observations (in a 2D matrix) before they are passed into the model. Defaults to lambda obs: obs. Note: Must be able to process both NumPy and Tensorflow arrays. .obs_postproc (func): (optional) A function which returns vectors calculated from the previous observations and model predictions, which will then be passed into the provided cost function on observations. Defaults to lambda obs, model_out: model_out. Note: Must be able to process both NumPy and Tensorflow arrays. .obs_postproc2 (func): (optional) A function which takes the vectors returned by obs_postproc and (possibly) modifies it into the predicted observations for the next time step. Defaults to lambda obs: obs. Note: Must be able to process both NumPy and Tensorflow arrays. .targ_proc (func): (optional) A function which takes current observations and next observations and returns the array of targets (so that the model learns the mapping obs -> targ_proc(obs, next_obs)). Defaults to lambda obs, next_obs: next_obs. Note: Only needs to process NumPy arrays. .opt_cfg .mode (str): Internal optimizer that will be used. Choose between [CEM]. .cfg (DotMap): A map of optimizer initializer parameters. .plan_hor (int): The planning horizon that will be used in optimization. .obs_cost_fn (func): A function which computes the cost of every observation in a 2D matrix. Note: Must be able to process both NumPy and Tensorflow arrays. .ac_cost_fn (func): A function which computes the cost of every action in a 2D matrix. .log_cfg .save_all_models (bool): (optional) If True, saves models at every iteration. Defaults to False (only most recent model is saved). Warning: Can be very memory-intensive. .log_traj_preds (bool): (optional) If True, saves the mean and variance of predicted particle trajectories. Defaults to False. .log_particles (bool) (optional) If True, saves all predicted particles trajectories. Defaults to False. Note: Takes precedence over log_traj_preds. Warning: Can be very memory-intensive """ super().__init__(params) self.dO, self.dU = params.env.observation_space.shape[ 0], params.env.action_space.shape[0] self.ac_ub, self.ac_lb = params.env.action_space.high, params.env.action_space.low self.ac_ub = np.minimum(self.ac_ub, params.get("ac_ub", self.ac_ub)) self.ac_lb = np.maximum(self.ac_lb, params.get("ac_lb", self.ac_lb)) self.update_fns = params.get("update_fns", []) self.per = params.get("per", 1) self.model_init_cig = params.prop_cfg.get("model_init_cfg", {}) self.model_train_cfg = params.prop_cfg.get("model_train_cfg", {}) self.prop_mode = get_required_argument( params.prop_cfg, "mode", "Must provide propagation method.") self.npart = get_required_argument( params.prop_cfg, "npart", "Must provide number of particles.") self.ign_var = params.prop_cfg.get("ign_var", False) or self.prop_mode == "E" self.obs_preproc = params.prop_cfg.get("obs_preproc", lambda obs: obs) self.obs_postproc = params.prop_cfg.get( "obs_postproc", lambda obs, model_out: model_out) self.obs_postproc2 = params.prop_cfg.get("obs_postproc2", lambda next_obs: next_obs) self.targ_proc = params.prop_cfg.get("targ_proc", lambda obs, next_obs: next_obs) self.opt_mode = get_required_argument( params.opt_cfg, "mode", "Must provide optimization method.") self.plan_hor = get_required_argument( params.opt_cfg, "plan_hor", "Must provide planning horizon.") self.obs_cost_fn = get_required_argument( params.opt_cfg, "obs_cost_fn", "Must provide cost on observations.") self.ac_cost_fn = get_required_argument( params.opt_cfg, "ac_cost_fn", "Must provide cost on actions.") self.save_all_models = params.log_cfg.get("save_all_models", False) self.log_traj_preds = params.log_cfg.get("log_traj_preds", False) self.log_particles = params.log_cfg.get("log_particles", False) # Perform argument checks assert self.opt_mode == 'CEM' assert self.prop_mode == 'TSinf', 'only TSinf propagation mode is supported' assert self.npart % self.model_init_cig.num_nets == 0, "Number of particles must be a multiple of the ensemble size." # Create action sequence optimizer opt_cfg = params.opt_cfg.get("cfg", {}) self.optimizer = CEMOptimizer( sol_dim=self.plan_hor * self.dU, lower_bound=np.tile(self.ac_lb, [self.plan_hor]), upper_bound=np.tile(self.ac_ub, [self.plan_hor]), cost_function=self._compile_cost, **opt_cfg) # Controller state variables self.has_been_trained = params.prop_cfg.get("model_pretrained", False) self.ac_buf = np.array([]).reshape(0, self.dU) self.prev_sol = np.tile((self.ac_lb + self.ac_ub) / 2, [self.plan_hor]) self.init_var = np.tile( np.square(self.ac_ub - self.ac_lb) / 16, [self.plan_hor]) self.train_in = np.array([]).reshape( 0, self.dU + self.obs_preproc(np.zeros([1, self.dO])).shape[-1]) self.train_targs = np.array([]).reshape( 0, self.targ_proc(np.zeros([1, self.dO]), np.zeros([1, self.dO])).shape[-1]) print("Created an MPC controller, prop mode %s, %d particles. " % (self.prop_mode, self.npart) + ("Ignoring variance." if self.ign_var else "")) if self.save_all_models: print( "Controller will save all models. (Note: This may be memory-intensive." ) if self.log_particles: print( "Controller is logging particle predictions (Note: This may be memory-intensive)." ) self.pred_particles = [] elif self.log_traj_preds: print( "Controller is logging trajectory prediction statistics (mean+var)." ) self.pred_means, self.pred_vars = [], [] else: print("Trajectory prediction logging is disabled.") # Set up pytorch model self.model = get_required_argument( params.prop_cfg.model_init_cfg, "model_constructor", "Must provide a model constructor.")( params.prop_cfg.model_init_cfg)
def __init__(self, params): """Initializes class instance. Argument: params (DotMap): A DotMap containing the following: .sim_cfg: .env (gym.env): Environment for this experiment. .task_hor (int): Task horizon. .test_percentile (float): Risk-aversion percentile used for testing. .record_video (bool): Whether to record training/adaptation iterations. .exp_cfg: .ntrain_iters (int): Number of training iterations to be performed. .nrollouts_per_iter (int): (optional) Number of rollouts done between training iterations. Defaults to 1. .ninit_rollouts (int): (optional) Number of initial rollouts. Defaults to 1. .policy (controller): Policy that will be trained. .ntest_rollouts (int): Number of rollouts for measuring test performance. .nadapt_iters (int): (optional) Number of adaptation iters to perform. 10 in paper. .continue_train (bool): Whether to continue training from a load_model_dir. .test_domain (float): Environment domain used for adaptation/testing. .nrollout_per_itr (int): Number of rollouts per training iteration. .start_epoch (int): Which epoch to start training from, used for continuing to train a trained model. .log_cfg: .logdir (str): Directory to log to. .suffix (str): Suffix to add to logdir. """ # Assert True arguments that we currently do not support assert params.sim_cfg.get("stochastic", False) == False self.env = get_required_argument(params.sim_cfg, "env", "Must provide environment.") self.task_hor = get_required_argument(params.sim_cfg, "task_hor", "Must provide task horizon.") self.ntrain_iters = get_required_argument( params.exp_cfg, "ntrain_iters", "Must provide number of training iterations.") self.test_percentile = params.sim_cfg.test_percentile self.nrollouts_per_iter = params.exp_cfg.get("nrollouts_per_iter", 1) self.ninit_rollouts = params.exp_cfg.get("ninit_rollouts", 1) self.ntest_rollouts = params.exp_cfg.get("ntest_rollouts", 1) self.nadapt_iters = params.exp_cfg.get("nadapt_iters", 0) self.policy = get_required_argument(params.exp_cfg, "policy", "Must provide a policy.") self.continue_train = params.exp_cfg.get("continue_train", False) self.test_domain = params.exp_cfg.get("test_domain", None) self.nrollout_per_itr = params.exp_cfg.get("nrollout_per_itr", 1) self.start_epoch = params.exp_cfg.get("start_epoch", 0) self.training_percentile = self.policy.percentile if self.continue_train: self.logdir = params.exp_cfg.load_model_dir self.policy.ac_buf = np.load( os.path.join(self.logdir, "ac_buf.npy")) self.policy.prev_sol = np.load( os.path.join(self.logdir, "prev_sol.npy")) self.policy.init_var = np.load( os.path.join(self.logdir, "init_var.npy")) self.policy.train_in = np.load( os.path.join(self.logdir, "train_in.npy")) self.policy.train_targs = np.load( os.path.join(self.logdir, "train_targs.npy")) self.logdir = os.path.join( get_required_argument(params.log_cfg, "logdir", "Must provide log parent directory."), strftime("%Y-%m-%d--%H-%M-%S", localtime())) self.suffix = params.log_cfg.get("suffix", None) if self.suffix is not None: self.logdir = self.logdir + '-' + self.suffix self.writer = SummaryWriter(self.logdir + '-tboard') self.record_video = params.sim_cfg.get("record_video", False) if self.test_domain is not None: self.env.test_domain = self.test_domain print("Setting test domain to: %0.3f" % self.env.test_domain)