コード例 #1
0
ファイル: template.py プロジェクト: jswang/handful-of-trials
 def nn_constructor(self, model_init_cfg):
     model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
         name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
         sess=self.SESS
     ))
     # Construct model below. For example:
     # model.add(FC(*args))
     # ...
     # model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
     return model
コード例 #2
0
 def gp_constructor(self, model_init_cfg):
     model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
         name="model",
         kernel_class=get_required_argument(model_init_cfg, "kernel_class", "Must provide kernel class"),
         kernel_args=model_init_cfg.get("kernel_args", {}),
         num_inducing_points=get_required_argument(
             model_init_cfg, "num_inducing_points", "Must provide number of inducing points."
         ),
         sess=self.SESS
     ))
     return model
コード例 #3
0
 def value_nn_constructor(self, name, model_init_cfg_val):
     model = get_required_argument(model_init_cfg_val, "model_class", "Must provide model class")(DotMap(
         name=name, num_networks=get_required_argument(model_init_cfg_val, "num_nets", "Must provide ensemble size"),
         sess=self.SESS, load_model=model_init_cfg_val.get("load_model", False),
         model_dir=model_init_cfg_val.get("model_dir", None)
     ))
     if not model_init_cfg_val.get("load_model", False):
         model.add(FC(500, input_dim=self.VALUE_IN, activation='swish', weight_decay=0.0001))
         model.add(FC(500, activation='swish', weight_decay=0.00025))
         model.add(FC(500, activation='swish', weight_decay=0.00025))
         model.add(FC(self.VALUE_OUT, weight_decay=0.0005))
     model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001}, suffix = "val")
     return model
コード例 #4
0
 def nn_constructor(self, model_init_cfg):
     model = get_required_argument(model_init_cfg, "model_class", "Must provide model class")(DotMap(
         name="model", num_networks=get_required_argument(model_init_cfg, "num_nets", "Must provide ensemble size"),
         sess=self.SESS, load_model=model_init_cfg.get("load_model", False),
         model_dir=model_init_cfg.get("model_dir", None)
     ))
     if not model_init_cfg.get("load_model", False):
         model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.00025))
         model.add(FC(200, activation="swish", weight_decay=0.0005))
         model.add(FC(200, activation="swish", weight_decay=0.0005))
         model.add(FC(200, activation="swish", weight_decay=0.0005))
         model.add(FC(self.MODEL_OUT, weight_decay=0.00075))
     model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.00075})
     return model
コード例 #5
0
ファイル: template.py プロジェクト: zhanggaofeng1120/saved-rl
 def value_nn_constructor(self, name, model_init_cfg_val):
     model = get_required_argument(
         model_init_cfg_val, "model_class", "Must provide model class")(
             DotMap(name=name,
                    num_networks=get_required_argument(
                        model_init_cfg_val, "num_nets",
                        "Must provide ensemble size"),
                    sess=self.SESS,
                    load_model=model_init_cfg_val.get("load_model", False),
                    model_dir=model_init_cfg_val.get("model_dir", None)))
     # Construct model below. For example:
     # model.add(FC(*args))
     # ...
     # model.finalize(tf.train.AdamOptimizer, {"learning_rate": 0.001})
     return model
コード例 #6
0
    def __init__(self, name, params):
        super().__init__(params)
        if params.gym_robotics:
            self.dO = params.env.observation_space.spaces[
                'observation'].low.size
        else:
            self.dO = params.env.observation_space.shape[0]
        self.ac_ub, self.ac_lb = params.env.action_space.high, params.env.action_space.low
        self.ac_ub = np.minimum(self.ac_ub, params.get("ac_ub", self.ac_ub))
        self.ac_lb = np.maximum(self.ac_lb, params.get("ac_lb", self.ac_lb))
        self.update_fns = params.get("update_fns", [])
        self.per = params.get("per", 1)
        self.model = get_required_argument(
            params.model_init_cfg_val, "model_constructor",
            "Must provide a model constructor.")(name,
                                                 params.model_init_cfg_val)
        self.model_train_cfg = params.get("model_train_cfg", {})
        self.buffer_limit = params.get("val_buffer_size", None)
        self.ign_var = params.get("ign_var", False)

        self.obs_preproc = params.get("obs_preproc", lambda obs: obs)
        self.obs_postproc = params.get("obs_postproc",
                                       lambda obs, model_out: model_out)
        self.targ_proc = params.get("targ_proc", lambda val: val)

        self.save_all_models = params.log_cfg.get("save_all_models", False)
        opt_cfg = params.opt_cfg.get("cfg", {})

        # Controller state variables
        self.has_been_trained = params.prop_cfg.get("model_pretrained", False)
        self.train_in = np.array([]).reshape(
            0,
            self.obs_preproc(np.zeros([1, self.dO])).shape[-1])
        self.train_targs = np.array([])

        self.buffer = {
            "obs": [],
            "cost": [],
            "next_obs": [],
            "val": [],
            "terminal": []
        }

        if self.model.is_tf_model:
            self.sy_cur_obs = tf.Variable(np.zeros(self.dO), dtype=tf.float32)
            # self.ac_seq = tf.placeholder(shape=[1, self.plan_hor*self.dU], dtype=tf.float32)
            # self.pred_cost, self.pred_traj = self._compile_cost(self.ac_seq, get_pred_trajs=True)
            # self.optimizer.setup(self._compile_cost, True)
            self.model.sess.run(tf.variables_initializer([self.sy_cur_obs]))
        else:
            raise NotImplementedError()

        if self.save_all_models:
            print(
                "Value function will save all models. (Note: This may be memory-intensive."
            )
        else:
            print("Value function won't save all models")
コード例 #7
0
def bnn_constructor(model_init_cfg):
    """ Constructs the Bayesian Neural Network model.

    Moodel_init_cfg is a dotmap object containing:
        - model_in (int): Number of inputs to the model.
        - model_out (int): Number of outputs to the model.
        - n_layers (int): Number of hidden layers.
        - n_neurons (int): Number of neurons per hidden layer.
        - learning_rate (float): Learning rate.
        - wd_in (float): Weight decay for the input layer neurons.
        - wd_hid (float): Weight decay for the hidden layer neurons.
        - wd_out (float): Weight decay for the output layer neurons.

    Returns:
        BNN class object
    """

    cfg = tf.ConfigProto()
    cfg.gpu_options.allow_growth = True
    SESS = tf.Session(config=cfg)  # Tensorflow session
    model = BNN(
        DotMap(name=get_required_argument(model_init_cfg, "model_name",
                                          "Must provide model name size"),
               num_networks=get_required_argument(
                   model_init_cfg, "num_nets", "Must provide ensemble size"),
               sess=SESS,
               load_model=model_init_cfg.get("load_model", False),
               model_dir=model_init_cfg.get("model_dir", None)))
    if not model_init_cfg.get("load_model", False):
        model.add(
            FC(model_init_cfg.n_neurons,
               input_dim=model_init_cfg.model_in,
               activation="swish",
               weight_decay=model_init_cfg.wd_in))
        for i in range(model_init_cfg.n_layers):
            model.add(
                FC(model_init_cfg.n_neurons,
                   activation="swish",
                   weight_decay=model_init_cfg.wd_hid))
        model.add(
            FC(model_init_cfg.model_out, weight_decay=model_init_cfg.wd_out))
    model.finalize(tf.train.AdamOptimizer,
                   {"learning_rate": model_init_cfg.learning_rate})
    return model
コード例 #8
0
    def __init__(self, params):
        """Initializes class instance.

        Argument:
            params (DotMap): A DotMap containing the following:
                .sim_cfg:
                    .env (gym.env): Environment for this experiment
                    .task_hor (int): Task horizon
                    .stochastic (bool): (optional) If True, agent adds noise to its actions.
                        Must provide noise_std (see below). Defaults to False.
                    .noise_std (float): for stochastic agents, noise of the form N(0, noise_std^2I)
                        will be added.

                .exp_cfg:
                    .ntrain_iters (int): Number of training iterations to be performed.
                    .nrollouts_per_iter (int): (optional) Number of rollouts done between training
                        iterations. Defaults to 1.
                    .ninit_rollouts (int): (optional) Number of initial rollouts. Defaults to 1.
                    .policy (controller): Policy that will be trained.

                .log_cfg:
                    .logdir (str): Parent of directory path where experiment data will be saved.
                        Experiment will be saved in logdir/<date+time of experiment start>
                    .nrecord (int): (optional) Number of rollouts to record for every iteration.
                        Defaults to 0.
                    .neval (int): (optional) Number of rollouts for performance evaluation.
                        Defaults to 1.
        """
        self.env = get_required_argument(params.sim_cfg, "env",
                                         "Must provide environment.")
        self.task_hor = get_required_argument(params.sim_cfg, "task_hor",
                                              "Must provide task horizon.")
        if params.sim_cfg.get("stochastic", False):
            self.agent = Agent(
                DotMap(
                    env=self.env,
                    noisy_actions=True,
                    noise_stddev=get_required_argument(
                        params.sim_cfg, "noise_std",
                        "Must provide noise standard deviation in the case of a stochastic environment."
                    )))
        else:
            self.agent = Agent(DotMap(env=self.env, noisy_actions=False))

        self.ntrain_iters = get_required_argument(
            params.exp_cfg, "ntrain_iters",
            "Must provide number of training iterations.")
        self.nrollouts_per_iter = params.exp_cfg.get("nrollouts_per_iter", 1)
        self.ninit_rollouts = params.exp_cfg.get("ninit_rollouts", 1)
        self.policy = get_required_argument(params.exp_cfg, "policy",
                                            "Must provide a policy.")

        self.logdir = os.path.join(
            get_required_argument(params.log_cfg, "logdir",
                                  "Must provide log parent directory."),
            strftime("%Y-%m-%d--%H_%M_%S",
                     localtime())  # Compatiable format in windows
        )
        self.nrecord = params.log_cfg.get("nrecord", 0)
        self.neval = params.log_cfg.get("neval", 1)
コード例 #9
0
ファイル: TFGP.py プロジェクト: jswang/handful-of-trials
    def __init__(self, params):
        """Initializes class instance.

        Arguments:
            params
                .name (str): Model name
                .kernel_class (class): Kernel class
                .kernel_args (args): Kernel args
                .num_inducing_points (int): Number of inducing points
                .sess (tf.compat.v1.Session): Tensorflow session
        """
        self.name = params.get("name", "GP")
        self.kernel_class = get_required_argument(
            params, "kernel_class", "Must provide kernel class.")
        self.kernel_args = params.get("kernel_args", {})
        self.num_inducing_points = get_required_argument(
            params, "num_inducing_points",
            "Must provide number of inducing points.")

        if params.get("sess", None) is None:
            config = tf.compat.v1.ConfigProto()
            config.gpu_options.allow_growth = True
            self._sess = tf.compat.v1.Session(config=config)
        else:
            self._sess = params.get("sess")

        with self._sess.as_default():
            with tf.compat.v1.variable_scope(self.name):
                output_dim = self.kernel_args["output_dim"]
                del self.kernel_args["output_dim"]
                self.model = gpflow.models.SGPR(
                    np.zeros([1, self.kernel_args["input_dim"]]),
                    np.zeros([1, output_dim]),
                    kern=self.kernel_class(**self.kernel_args),
                    Z=np.zeros([
                        self.num_inducing_points, self.kernel_args["input_dim"]
                    ]))
                self.model.initialize()
コード例 #10
0
ファイル: halfcheetah.py プロジェクト: yuzhou42/POPLIN
    def nn_constructor(self, model_init_cfg, misc=None):
        model = get_required_argument(
            model_init_cfg, "model_class", "Must provide model class")(DotMap(
                name="model",
                num_networks=get_required_argument(
                    model_init_cfg, "num_nets", "Must provide ensemble size"),
                sess=self.SESS,
                load_model=model_init_cfg.get("load_model", False),
                model_dir=model_init_cfg.get("model_dir", None),
                misc=misc))
        if not model_init_cfg.get("load_model", False):
            print('not building model from scratch')
            network_shape = get_required_argument(model_init_cfg,
                                                  "network_shape",
                                                  "network shape missing!")
            activation = get_required_argument(model_init_cfg, "activation",
                                               "acivations missing!")
            weight_decays = get_required_argument(model_init_cfg,
                                                  "weight_decays",
                                                  "weight decays missing!")
            learning_rate = get_required_argument(model_init_cfg, "lr",
                                                  "learning rate missing!")
            model = build_model(model, self.MODEL_IN, self.MODEL_OUT,
                                network_shape, activation, weight_decays,
                                learning_rate)
        #     model.add(FC(200, input_dim=self.MODEL_IN, activation="swish", weight_decay=0.000025))
        #     model.add(FC(200, activation="swish", weight_decay=0.00005))
        #     model.add(FC(200, activation="swish", weight_decay=0.000075))
        #     model.add(FC(200, activation="swish", weight_decay=0.000075))
        #     model.add(FC(self.MODEL_OUT, weight_decay=0.0001))
        model.finalize(
            tf.train.AdamOptimizer, {
                "learning_rate":
                get_required_argument(model_init_cfg, "lr",
                                      "learning rate missing!")
            })

        return model
コード例 #11
0
    def __init__(self, params):
        """Creates class instance.

        Arguments:
            params
                .env (gym.env): Environment for which this controller will be used.
                .update_fns (list<func>): A list of functions that will be invoked
                    (possibly with a tensorflow session) every time this controller is reset.
                .ac_ub (np.ndarray): (optional) An array of action upper bounds.
                    Defaults to environment action upper bounds.
                .ac_lb (np.ndarray): (optional) An array of action lower bounds.
                    Defaults to environment action lower bounds.
                .per (int): (optional) Determines how often the action sequence will be optimized.
                    Defaults to 1 (reoptimizes at every call to act()).
                .prop_cfg
                    .model_init_cfg (DotMap): A DotMap of initialization parameters for the model.
                        .model_constructor (func): A function which constructs an instance of this
                            model, given model_init_cfg.
                    .model_train_cfg (dict): (optional) A DotMap of training parameters that will be passed
                        into the model every time is is trained. Defaults to an empty dict.
                    .model_pretrained (bool): (optional) If True, assumes that the model
                        has been trained upon construction.
                    .mode (str): Propagation method. Choose between [E, DS, TSinf, TS1, MM].
                        See https://arxiv.org/abs/1805.12114 for details.
                    .npart (int): Number of particles used for DS, TSinf, TS1, and MM propagation methods.
                    .ign_var (bool): (optional) Determines whether or not variance output of the model
                        will be ignored. Defaults to False unless deterministic propagation is being used.
                    .obs_preproc (func): (optional) A function which modifies observations (in a 2D matrix)
                        before they are passed into the model. Defaults to lambda obs: obs.
                        Note: Must be able to process both NumPy and Tensorflow arrays.
                    .obs_postproc (func): (optional) A function which returns vectors calculated from
                        the previous observations and model predictions, which will then be passed into
                        the provided cost function on observations. Defaults to lambda obs, model_out: model_out.
                        Note: Must be able to process both NumPy and Tensorflow arrays.
                    .obs_postproc2 (func): (optional) A function which takes the vectors returned by
                        obs_postproc and (possibly) modifies it into the predicted observations for the
                        next time step. Defaults to lambda obs: obs.
                        Note: Must be able to process both NumPy and Tensorflow arrays.
                    .targ_proc (func): (optional) A function which takes current observations and next
                        observations and returns the array of targets (so that the model learns the mapping
                        obs -> targ_proc(obs, next_obs)). Defaults to lambda obs, next_obs: next_obs.
                        Note: Only needs to process NumPy arrays.
                .opt_cfg
                    .mode (str): Internal optimizer that will be used. Choose between [CEM, Random].
                    .cfg (DotMap): A map of optimizer initializer parameters.
                    .plan_hor (int): The planning horizon that will be used in optimization.
                    .obs_cost_fn (func): A function which computes the cost of every observation
                        in a 2D matrix.
                        Note: Must be able to process both NumPy and Tensorflow arrays.
                    .ac_cost_fn (func): A function which computes the cost of every action
                        in a 2D matrix.
                .log_cfg
                    .save_all_models (bool): (optional) If True, saves models at every iteration.
                        Defaults to False (only most recent model is saved).
                        Warning: Can be very memory-intensive.
                    .log_traj_preds (bool): (optional) If True, saves the mean and variance of predicted
                        particle trajectories. Defaults to False.
                    .log_particles (bool) (optional) If True, saves all predicted particles trajectories.
                        Defaults to False. Note: Takes precedence over log_traj_preds.
                        Warning: Can be very memory-intensive
        """
        super().__init__(params)
        self.dO, self.dU = params.env.observation_space.shape[
            0], params.env.action_space.shape[0]
        self.ac_ub, self.ac_lb = params.env.action_space.high, params.env.action_space.low
        self.ac_ub = np.minimum(self.ac_ub, params.get("ac_ub", self.ac_ub))
        self.ac_lb = np.maximum(self.ac_lb, params.get("ac_lb", self.ac_lb))
        self.update_fns = params.get("update_fns", [])
        self.per = params.get("per", 1)

        self.model = get_required_argument(
            params.prop_cfg.model_init_cfg, "model_constructor",
            "Must provide a model constructor.")(
                params.prop_cfg.model_init_cfg)
        self.model_train_cfg = params.prop_cfg.get("model_train_cfg", {})
        self.prop_mode = get_required_argument(
            params.prop_cfg, "mode", "Must provide propagation method.")
        self.npart = get_required_argument(
            params.prop_cfg, "npart", "Must provide number of particles.")
        self.ign_var = params.prop_cfg.get("ign_var",
                                           False) or self.prop_mode == "E"

        self.obs_preproc = params.prop_cfg.get("obs_preproc", lambda obs: obs)
        self.obs_postproc = params.prop_cfg.get(
            "obs_postproc", lambda obs, model_out: model_out)
        self.obs_postproc2 = params.prop_cfg.get("obs_postproc2",
                                                 lambda next_obs: next_obs)
        self.targ_proc = params.prop_cfg.get("targ_proc",
                                             lambda obs, next_obs: next_obs)

        self.opt_mode = get_required_argument(
            params.opt_cfg, "mode", "Must provide optimization method.")
        self.plan_hor = get_required_argument(
            params.opt_cfg, "plan_hor", "Must provide planning horizon.")
        self.obs_cost_fn = get_required_argument(
            params.opt_cfg, "obs_cost_fn",
            "Must provide cost on observations.")
        self.ac_cost_fn = get_required_argument(
            params.opt_cfg, "ac_cost_fn", "Must provide cost on actions.")

        self.save_all_models = params.log_cfg.get("save_all_models", False)
        self.log_traj_preds = params.log_cfg.get("log_traj_preds", False)
        self.log_particles = params.log_cfg.get("log_particles", False)

        # Perform argument checks
        if self.prop_mode not in ["E", "DS", "MM", "TS1", "TSinf"]:
            raise ValueError("Invalid propagation method.")
        if self.prop_mode in ["TS1", "TSinf"
                              ] and self.npart % self.model.num_nets != 0:
            raise ValueError(
                "Number of particles must be a multiple of the ensemble size.")
        if self.prop_mode == "E" and self.npart != 1:
            raise ValueError(
                "Deterministic propagation methods only need one particle.")

        # Create action sequence optimizer
        opt_cfg = params.opt_cfg.get("cfg", {})
        self.optimizer = MPC.optimizers[params.opt_cfg.mode](
            sol_dim=self.plan_hor * self.dU,
            lower_bound=np.tile(self.ac_lb, [self.plan_hor]),
            upper_bound=np.tile(self.ac_ub, [self.plan_hor]),
            tf_session=None if not self.model.is_tf_model else self.model.sess,
            **opt_cfg)

        # Controller state variables
        self.has_been_trained = params.prop_cfg.get("model_pretrained", False)
        self.ac_buf = np.array([]).reshape(0, self.dU)
        self.prev_sol = np.tile((self.ac_lb + self.ac_ub) / 2, [self.plan_hor])
        self.init_var = np.tile(
            np.square(self.ac_ub - self.ac_lb) / 16, [self.plan_hor])
        self.train_in = np.array([]).reshape(
            0, self.dU + self.obs_preproc(np.zeros([1, self.dO])).shape[-1])
        self.train_targs = np.array([]).reshape(
            0,
            self.targ_proc(np.zeros([1, self.dO]),
                           np.zeros([1, self.dO])).shape[-1])
        if self.model.is_tf_model:
            self.sy_cur_obs = tf.Variable(np.zeros(self.dO), dtype=tf.float32)
            self.ac_seq = tf.placeholder(shape=[1, self.plan_hor * self.dU],
                                         dtype=tf.float32)
            self.pred_cost, self.pred_traj = self._compile_cost(
                self.ac_seq, get_pred_trajs=True)
            self.optimizer.setup(self._compile_cost, True)
            self.model.sess.run(tf.variables_initializer([self.sy_cur_obs]))
        else:
            raise NotImplementedError()

        print("Created an MPC controller, prop mode %s, %d particles. " %
              (self.prop_mode, self.npart) +
              ("Ignoring variance." if self.ign_var else ""))

        if self.save_all_models:
            print(
                "Controller will save all models. (Note: This may be memory-intensive."
            )
        if self.log_particles:
            print(
                "Controller is logging particle predictions (Note: This may be memory-intensive)."
            )
            self.pred_particles = []
        elif self.log_traj_preds:
            print(
                "Controller is logging trajectory prediction statistics (mean+var)."
            )
            self.pred_means, self.pred_vars = [], []
        else:
            print("Trajectory prediction logging is disabled.")
コード例 #12
0
    def __init__(self, params):
        """Initializes class instance.

        Argument:
            params (DotMap): A DotMap containing the following:
                .sim_cfg:
                    .env (gym.env): Environment for this experiment
                    .task_hor (int): Task horizon
                    .stochastic (bool): (optional) If True, agent adds noise to its actions.
                        Must provide noise_std (see below). Defaults to False.
                    .noise_std (float): for stochastic agents, noise of the form N(0, noise_std^2I)
                        will be added.

                .exp_cfg:
                    .ntrain_iters (int): Number of training iterations to be performed.
                    .nrollouts_per_iter (int): (optional) Number of rollouts done between training
                        iterations. Defaults to 1.
                    .ninit_rollouts (int): (optional) Number of initial rollouts. Defaults to 1.
                    .policy (controller): Policy that will be trained.
                    .demo_low_cost (int): Minimum allowed cost for demonstrations
                    .demo_high_cost (int): Maximum allowed cost for demonstrations
                    .num_demos (int): Number of demonstrations
                    .ss_buffer_size (int): Size of buffer of safe states that density model is
                        trained on
                    .gym_robotics (bool): Indicates whether env is a gym robotics env, in which
                        case there are some small differences in data loading and environment
                        parameters
                .log_cfg:
                    .logdir (str): Parent of directory path where experiment data will be saved.
                        Experiment will be saved in logdir/<date+time of experiment start>
                    .nrecord (int): (optional) Number of rollouts to record for every iteration.
                        Defaults to 0.
                    .neval (int): (optional) Number of rollouts for performance evaluation.
                        Defaults to 1.
        """
        self.env = get_required_argument(params.sim_cfg, "env",
                                         "Must provide environment.")
        self.demo_low_cost = params.exp_cfg.demo_low_cost
        self.demo_high_cost = params.exp_cfg.demo_high_cost
        self.num_demos = params.exp_cfg.num_demos
        self.ss_buffer_size = params.exp_cfg.ss_buffer_size
        self.gym_robotics = params.exp_cfg.gym_robotics

        self.task_hor = get_required_argument(params.sim_cfg, "task_hor",
                                              "Must provide task horizon.")
        if params.sim_cfg.get("stochastic", False):
            self.agent = Agent(
                DotMap(
                    env=self.env,
                    noisy_actions=True,
                    noise_stddev=get_required_argument(
                        params.sim_cfg, "noise_std",
                        "Must provide noise standard deviation in the case of a stochastic environment."
                    )))
        else:
            self.agent = Agent(DotMap(env=self.env, noisy_actions=False))

        self.ntrain_iters = get_required_argument(
            params.exp_cfg, "ntrain_iters",
            "Must provide number of training iterations.")
        self.nrollouts_per_iter = params.exp_cfg.get("nrollouts_per_iter", 1)
        self.policy = get_required_argument(params.exp_cfg, "policy",
                                            "Must provide a policy.")
        self.value = get_required_argument(params.exp_cfg, "value",
                                           "Must provide a value function.")
        self.target = get_required_argument(params.exp_cfg, "value_target",
                                            "Must provide a value function.")
        self.value.target = self.target

        self.logdir = os.path.join(
            get_required_argument(params.log_cfg, "logdir",
                                  "Must provide log parent directory."),
            strftime("%Y-%m-%d--%H:%M:%S", localtime()))
        self.nrecord = params.log_cfg.get("nrecord", 0)
        self.neval = params.log_cfg.get("neval", 1)
        self.load_samples = params.exp_cfg.get("load_samples", True)
        self.demo_load_path = params.exp_cfg.get("demo_load_path", None)
        self.use_value = params.exp_cfg.get("use_value", True)
        self.teacher = params.exp_cfg.get("teacher")
        self.stabilizable_observations = []
        self.tvalue_schedule = LinearSchedule(3, 3, 500)
        self.stabilized_model = knn(n_neighbors=1)
        self.target_update_freq = 1
コード例 #13
0
    def __init__(self, params):
        super().__init__(params)
        self.dO, self.dU = params.dO, params.dU
        constrains = get_required_argument(
            params.opt_cfg, "constrains",
            "Must provide the optimisation constrains.")
        self.ac_lb = constrains[0][0]
        self.ac_ub = constrains[0][1]
        print("lb", self.ac_lb)
        print("ub", self.ac_ub)
        self.update_fns = params.get("update_fns", [])
        self.per = params.get("per", 1)

        self.model = get_required_argument(
            params.prop_cfg.model_init_cfg, "model_constructor",
            "Must provide a model constructor.")(
                params.prop_cfg.model_init_cfg)
        self.model_train_cfg = params.prop_cfg.get("model_train_cfg", {})
        self.prop_mode = get_required_argument(
            params.prop_cfg, "mode", "Must provide propagation method.")
        self.npart = get_required_argument(
            params.prop_cfg, "npart", "Must provide number of particles.")
        self.ign_var = params.prop_cfg.get("ign_var",
                                           False) or self.prop_mode == "E"

        self.obs_preproc = params.prop_cfg.get(
            "obs_preproc", lambda obs: obs)  # Defaults to do nothing
        self.obs_postproc = params.prop_cfg.get(
            "obs_postproc",
            lambda obs, model_out: model_out)  # Defaults to do nothing
        self.obs_postproc2 = params.prop_cfg.get(
            "obs_postproc2",
            lambda next_obs: next_obs)  # Defaults to do nothing
        self.targ_proc = params.prop_cfg.get(
            "targ_proc",
            lambda obs, next_obs: next_obs)  # Defaults to do nothing

        self.opt_mode = get_required_argument(
            params.opt_cfg, "mode", "Must provide optimization method.")
        self.plan_hor = get_required_argument(
            params.opt_cfg, "plan_hor", "Must provide planning horizon.")
        self.og_plan_hor = self.plan_hor
        self.obs_cost_fn = get_required_argument(
            params.opt_cfg, "obs_cost_fn",
            "Must provide cost on observations.")
        self.target = get_required_argument(
            params.opt_cfg, "target", "Must provide cost on observations.")
        self.ac_cost_fn = get_required_argument(
            params.opt_cfg, "ac_cost_fn", "Must provide cost on actions.")

        self.save_all_models = params.log_cfg.get("save_all_models", False)
        self.log_traj_preds = params.log_cfg.get("log_traj_preds", False)
        self.log_particles = params.log_cfg.get("log_particles", False)

        self.useConstantAction = params.get("useConstantAction", False)
        if self.useConstantAction:
            self.constantActionToUse = get_required_argument(
                params, "constantActionToUse", "Must provide constant action")

        # Perform argument checks
        if self.prop_mode not in ["E", "DS", "MM", "TS1", "TSinf"]:
            raise ValueError("Invalid propagation method.")
        if self.prop_mode in ["TS1", "TSinf"
                              ] and self.npart % self.model.num_nets != 0:
            raise ValueError(
                "Number of particles must be a multiple of the ensemble size.")
        if self.prop_mode == "E" and self.npart != 1:
            raise ValueError(
                "Deterministic propagation methods only need one particle.")

        # Create action sequence optimizer
        opt_cfg = params.opt_cfg.get("cfg", {})
        self.optimizer = MPC.optimizers[params.opt_cfg.mode](
            sol_dim=self.plan_hor * self.dU,
            constrains=constrains,
            max_resamples=params.opt_cfg.get("max_resamples", 10),
            tf_session=None if not self.model.is_tf_model else self.model.sess,
            **opt_cfg)

        self.has_been_trained = params.prop_cfg.get("model_pretrained", False)

        # Init parameters for the optimizer
        # --------------------------------------------------------------------
        # Buffer of actions if not planning at each step
        self.ac_buf = np.array([]).reshape(0, self.dU)  # (sol_dim,)
        # Previous solution is mean between min and max action
        self.prev_sol = np.tile((self.ac_lb + self.ac_ub) / 2,
                                [self.plan_hor])  # (sol_dim,)
        # sigma ~= 4 * (max - min) for Gaussian, var = sigma^2
        self.init_var = np.tile(
            np.square(self.ac_ub - self.ac_lb) / 16,
            [self.plan_hor])  # (sol_dim,)
        # print("Init var", self.init_var)

        # Init parameters for model training
        # ---------------------------------------------------------------------
        # shape going in is (~, n actions + n input observations)
        self.train_in = np.array([]).reshape(
            0, self.dU + self.obs_preproc(np.zeros([1, self.dO])).shape[-1])
        # shaope is (~, n targets)
        self.train_targs = np.array([]).reshape(
            0,
            self.targ_proc(np.zeros([1, self.dO]),
                           np.zeros([1, self.dO])).shape[-1])
        if self.model.is_tf_model:
            self.sy_cur_obs = tf.Variable(np.zeros(self.dO),
                                          dtype=tf.float32)  # (dO,)
            self.ac_seq = tf.placeholder(shape=[1, self.plan_hor * self.dU],
                                         dtype=tf.float32)  # (1, H*dU)
            self.pred_cost, self.pred_traj = self._compile_cost(
                self.ac_seq, get_pred_trajs=True)
            self.optimizer.setup(self._compile_cost, True)
            self.model.sess.run(tf.variables_initializer([self.sy_cur_obs]))
        else:
            raise NotImplementedError()

        print("Created an MPC controller, prop mode %s, %d particles. " %
              (self.prop_mode, self.npart) +
              ("Ignoring variance." if self.ign_var else ""))

        if self.save_all_models:
            print(
                "Controller will save all models. (Note: This may be memory-intensive."
            )
        if self.log_particles:
            print(
                "Controller is logging particle predictions (Note: This may be memory-intensive)."
            )
            self.pred_particles = []
        elif self.log_traj_preds:
            print(
                "Controller is logging trajectory prediction statistics (mean+var)."
            )
            self.pred_means, self.pred_vars = [], []
        else:
            print("Trajectory prediction logging is disabled.")