def __init__(self,
                 name,
                 env,
                 dynamics_model,
                 reward_model=None,
                 discount=1,
                 use_cem=False,
                 n_candidates=1024,
                 horizon=10,
                 num_cem_iters=8,
                 percent_elites=0.05,
                 use_reward_model=False):
        self.dynamics_model = dynamics_model
        self.reward_model = reward_model
        self.discount = discount
        self.n_candidates = n_candidates
        self.horizon = horizon
        self.use_cem = use_cem
        self.num_cem_iters = num_cem_iters
        self.percent_elites = percent_elites
        self.env = env
        self.use_reward_model = use_reward_model
        self._hidden_state = None

        self.unwrapped_env = env
        while hasattr(self.unwrapped_env, 'wrapped_env'):
            self.unwrapped_env = self.unwrapped_env.wrapped_env

        # make sure that env has reward function
        if not self.use_reward_model:
            assert hasattr(self.unwrapped_env,
                           'reward'), "env must have a reward function"

        Serializable.quick_init(self, locals())
        super(RNNMPCController, self).__init__(env=env)
    def __init__(self,
                 task='force',
                 reset_every_episode=False,
                 fixed_goal=False):
        Serializable.quick_init(self, locals())

        self.reset_every_episode = reset_every_episode
        self.first = True
        self.fixed_goal = fixed_goal
        MujocoEnv.__init__(
            self,
            os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets",
                         "arm_7dof.xml"))
        task = None if task == 'None' else task

        self.cripple_mask = np.ones(self.action_space.shape)

        self._init_geom_rgba = self.model.geom_rgba.copy()
        self._init_geom_contype = self.model.geom_contype.copy()
        self._init_geom_size = self.model.geom_size.copy()
        self._init_body_pos = self.model.body_pos.copy()
        self._init_body_masses = self.model.body_mass.copy()
        self._init_geom_pos = self.model.geom_pos.copy()
        self.dt = self.model.opt.timestep

        assert task in [None, 'cripple', 'damping', 'mass', 'force']

        self.task = task
    def __init__(self,
                 task='hfield',
                 reset_every_episode=False,
                 reward=True,
                 max_timesteps=1000,
                 *args,
                 **kwargs):
        Serializable.quick_init(self, locals())

        self.cripple_mask = None
        self.reset_every_episode = reset_every_episode
        self.first = True
        self.timesteps = 0
        self.max_timesteps = max_timesteps
        MujocoEnv.__init__(
            self,
            os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets",
                         "half_cheetah_hfield.xml"))

        task = None if task == 'None' else task

        self._init_geom_rgba = self.model.geom_rgba.copy()
        self._init_geom_contype = self.model.geom_contype.copy()
        self._init_geom_size = self.model.geom_size.copy()
        self._init_geom_pos = self.model.geom_pos.copy()
        self.dt = self.model.opt.timestep

        assert task in [None, 'hfield', 'hill', 'basin', 'steep', 'gentle']

        self.task = task
        self.x_walls = np.array([250, 260, 261, 270, 280, 285])
        self.height_walls = np.array([0.2, 0.2, 0.2, 0.2, 0.2, 0.2])
        self.height = 0.8
        self.width = 15
    def __init__(self,
                 task='damping',
                 reset_every_episode=False,
                 max_timesteps=1000):
        Serializable.quick_init(self, locals())

        self.reset_every_episode = reset_every_episode
        self.first = True
        self.timesteps = 0
        self.max_timesteps = max_timesteps
        MujocoEnv.__init__(
            self,
            os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets",
                         "half_cheetah_blocks.xml"))
        task = None if task == 'None' else task

        self.cripple_mask = np.ones(self.action_space.shape)

        self._init_geom_rgba = self.model.geom_rgba.copy()
        self._init_geom_contype = self.model.geom_contype.copy()
        self._init_geom_size = self.model.geom_size.copy()
        self._init_geom_pos = self.model.geom_pos.copy()
        self.dt = self.model.opt.timestep

        assert task in [None, 'damping']

        self.task = task
Example #5
0
    def __init__(self,
                 name,
                 input_dim,
                 output_dim,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.relu,
                 output_nonlinearity=None,
                 input_var=None,
                 params=None,
                 **kwargs):

        Serializable.quick_init(self, locals())

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.name = name
        self.input_var = input_var

        self.hidden_sizes = hidden_sizes
        self.hidden_nonlinearity = hidden_nonlinearity
        self.output_nonlinearity = output_nonlinearity

        self.batch_normalization = kwargs.get('batch_normalization', False)

        self._params = params
        self._assign_ops = None
        self._assign_phs = None
Example #6
0
 def __init__(self, observation_space, action_space):
     """
     :type observation_space: Space
     :type action_space: Space
     """
     Serializable.quick_init(self, locals())
     self._observation_space = observation_space
     self._action_space = action_space
Example #7
0
    def __init__(self, *args, **kwargs):
        # store the init args for serialization and call the super constructors
        Serializable.quick_init(self, locals())
        Layer.__init__(self, *args, **kwargs)
        self._cell_type = kwargs.get('cell_type', 'gru')
        self.state_var = kwargs.get('state_var', None)

        self.build_graph()
Example #8
0
    def __init__(self, task='rotorless', reset_every_episode=False, port=19999):
        Serializable.quick_init(self, locals())
        ## missing line: task mask?
        self.reset_every_episode    =   reset_every_episode
        self.first  = True
        VREPQuad.__init__(self, ip='127.0.0.1', port=port)

        task = None if task == 'None' else task
        
        # Allow to disable a rotor
        self.task_mask = np.ones(self.action_space.shape)

        assert task in [None, 'rotorless']

        self.task = task
        self.actiondim = self.action_space.shape[0]
    def __init__(self,
                 task='hfield',
                 max_episode_steps=200,
                 reset_every_episode=False,
                 reward=True,
                 frame_skip=1,
                 *args,
                 **kwargs):
        #print(task)
        Serializable.quick_init(self, locals())

        self.cripple_mask = None
        self.reset_every_episode = reset_every_episode
        self.first = True
        print("frame_skip :", frame_skip)
        MujocoEnv.__init__(self,
                           os.path.join(
                               os.path.abspath(os.path.dirname(__file__)),
                               "assets", "half_cheetah_hfield.xml"),
                           frame_skip=frame_skip)

        task = None if task == 'None' else task

        # rgba when material is omitted (ngeom x 4)
        self._init_geom_rgba = self.model.geom_rgba.copy()
        # geom_contype : geom contact type (ngeom x 1)
        self._init_geom_contype = self.model.geom_contype.copy()
        # geom-specific size parameters (ngeom x 3)
        self._init_geom_size = self.model.geom_size.copy()
        # local position offset rel. to body
        self.init_geom_pos = self.model.geom_pos.copy()
        # Opt : options for mj_setLengthRange
        # timestep : simulation timestep; 0: use mjOption.timestep
        ## self.dt = self.model.opt.timestep

        assert task in [None, 'hfield', 'hill', 'basin', 'steep', 'gentle']

        self.task = task
        self.x_walls = np.array([250, 260, 261, 270, 280, 285])
        self.height_walls = np.array([0.2, 0.2, 0.2, 0.2, 0.2, 0.2])
        self.height = 0.8
        self.width = 15

        self._max_episode_steps = max_episode_steps
Example #10
0
    def __init__(self,
                 env,
                 scale_reward=1.,
                 normalize_obs=False,
                 normalize_reward=False,
                 obs_alpha=0.001,
                 reward_alpha=0.001,
                 normalization_scale=1.,
                 ):
        Serializable.quick_init(self, locals())

        self._scale_reward = 1
        self._wrapped_env = env

        self._normalize_obs = normalize_obs
        self._normalize_reward = normalize_reward
        self._obs_alpha = obs_alpha
        self._obs_mean = np.zeros(self.observation_space.shape)
        self._obs_var = np.ones(self.observation_space.shape)
        self._reward_alpha = reward_alpha
        self._reward_mean = 0.
        self._reward_var = 1.
        self._normalization_scale = normalization_scale
Example #11
0
    def __init__(self, task='cripple', reset_every_episode=False):
        Serializable.quick_init(self, locals())
        self.cripple_mask = None
        self.first = True

        self.task = 'cripple'
        self.crippled_leg = 0
        self.prev_torso = None
        self.prev_qpos = None
        self.first = True
        MujocoEnv.__init__(
            self,
            os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets",
                         "half_cheetah_disabled.xml"))
        self._init_geom_rgba = self.model.geom_rgba.copy()
        self._init_geom_contype = self.model.geom_contype.copy()
        self._init_geom_size = self.model.geom_size.copy()
        self._init_geom_pos = self.model.geom_pos.copy()
        self.dt = self.model.opt.timestep

        self.cripple_mask = np.ones(self.action_space.shape)
        self.reward_range = (-np.inf, np.inf)
        utils.EzPickle.__init__(self, locals())
Example #12
0
    def __init__(self, *args, **kwargs):
        # store the init args for serialization and call the super constructors
        Serializable.quick_init(self, locals())
        Layer.__init__(self, *args, **kwargs)

        self.build_graph()
    def __init__(self,
                 name,
                 env,
                 hidden_sizes=(512, 512),
                 meta_batch_size=10,
                 hidden_nonlinearity=tf.nn.relu,
                 output_nonlinearity=None,
                 batch_size=500,
                 learning_rate=0.001,
                 inner_learning_rate=0.1,
                 normalize_input=True,
                 optimizer=tf.train.AdamOptimizer,
                 valid_split_ratio=0.2,
                 rolling_average_persitency=0.99,
                 ):

        Serializable.quick_init(self, locals())

        self.normalization = None
        self.normalize_input = normalize_input
        self.next_batch = None
        self.meta_batch_size = meta_batch_size

        self.valid_split_ratio = valid_split_ratio
        self.rolling_average_persitency = rolling_average_persitency

        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.inner_learning_rate = inner_learning_rate
        self.name = name
        self._dataset_train = None
        self._dataset_test = None
        self._prev_params = None
        self._adapted_param_values = None

        # determine dimensionality of state and action space
        self.obs_space_dims = obs_space_dims = env.observation_space.shape[0]
        self.action_space_dims = action_space_dims = env.action_space.shape[0]

        hidden_nonlinearity = self._activations[hidden_nonlinearity]
        output_nonlinearity = self._activations[output_nonlinearity]

        """ ------------------ Pre-Update Graph + Adaptation ----------------------- """
        with tf.variable_scope(name):
            # Placeholders
            self.obs_ph = tf.placeholder(tf.float32, shape=(None, obs_space_dims))
            self.act_ph = tf.placeholder(tf.float32, shape=(None, action_space_dims))
            self.delta_ph = tf.placeholder(tf.float32, shape=(None, obs_space_dims))

            # Concatenate action and observation --> NN input
            self.nn_input = tf.concat([self.obs_ph, self.act_ph], axis=1)

            # Create MLP
            mlp = MLP(name,
                      output_dim=obs_space_dims,
                      hidden_sizes=hidden_sizes,
                      hidden_nonlinearity=hidden_nonlinearity,
                      output_nonlinearity=output_nonlinearity,
                      input_var=self.nn_input,
                      input_dim=obs_space_dims+action_space_dims)

            self.delta_pred = mlp.output_var  # shape: (batch_size, ndim_obs, n_models)

            self.loss = tf.reduce_mean(tf.square(self.delta_ph - self.delta_pred))
            self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
            self.adaptation_sym = tf.train.GradientDescentOptimizer(self.inner_learning_rate).minimize(self.loss)

            # Tensor_utils
            self.f_delta_pred = tensor_utils.compile_function([self.obs_ph, self.act_ph], self.delta_pred)

        """ --------------------------- Meta-training Graph ---------------------------------- """
        nn_input_per_task = tf.split(self.nn_input, self.meta_batch_size, axis=0)
        delta_per_task = tf.split(self.delta_ph, self.meta_batch_size, axis=0)

        pre_input_per_task, post_input_per_task = zip(*[tf.split(nn_input, 2, axis=0) for nn_input in nn_input_per_task])
        pre_delta_per_task, post_delta_per_task = zip(*[tf.split(delta, 2, axis=0) for delta in delta_per_task])

        pre_losses = []
        post_losses = []
        self._adapted_params = []

        for idx in range(self.meta_batch_size):
            with tf.variable_scope(name + '/pre_model_%d' % idx, reuse=tf.AUTO_REUSE):
                pre_mlp = MLP(name,
                              output_dim=obs_space_dims,
                              hidden_sizes=hidden_sizes,
                              hidden_nonlinearity=hidden_nonlinearity,
                              output_nonlinearity=output_nonlinearity,
                              input_var=pre_input_per_task[idx],
                              input_dim=obs_space_dims + action_space_dims,
                              params=mlp.get_params())

                pre_delta_pred = pre_mlp.output_var
                pre_loss = tf.reduce_mean(tf.square(pre_delta_per_task[idx] - pre_delta_pred))
                adapted_params = self._adapt_sym(pre_loss, pre_mlp.get_params())
                self._adapted_params.append(adapted_params)

            with tf.variable_scope(name + '/post_model_%d' % idx, reuse=tf.AUTO_REUSE):
                post_mlp = MLP(name,
                               output_dim=obs_space_dims,
                               hidden_sizes=hidden_sizes,
                               hidden_nonlinearity=hidden_nonlinearity,
                               output_nonlinearity=output_nonlinearity,
                               input_var=post_input_per_task[idx],
                               params=adapted_params,
                               input_dim=obs_space_dims + action_space_dims)
                post_delta_pred = post_mlp.output_var

                post_loss = tf.reduce_mean(tf.square(post_delta_per_task[idx] - post_delta_pred))

                pre_losses.append(pre_loss)
                post_losses.append(post_loss)

            self.pre_loss = tf.reduce_mean(pre_losses)
            self.post_loss = tf.reduce_mean(post_losses)
            self.train_op = optimizer(self.learning_rate).minimize(self.post_loss)

        """ --------------------------- Post-update Inference Graph --------------------------- """
        with tf.variable_scope(name + '_ph_graph'):
            self.post_update_delta = []
            self.network_phs_meta_batch = []

            nn_input_per_task = tf.split(self.nn_input, self.meta_batch_size, axis=0)
            for idx in range(meta_batch_size):
                with tf.variable_scope('task_%i' % idx):
                    network_phs = self._create_placeholders_for_vars(mlp.get_params())
                    self.network_phs_meta_batch.append(network_phs)

                    mlp_meta_batch = MLP(name,
                                         output_dim=obs_space_dims,
                                         hidden_sizes=hidden_sizes,
                                         hidden_nonlinearity=hidden_nonlinearity,
                                         output_nonlinearity=output_nonlinearity,
                                         params=network_phs,
                                         input_var=nn_input_per_task[idx],
                                         input_dim=obs_space_dims + action_space_dims,
                                         )

                    self.post_update_delta.append(mlp_meta_batch.output_var)

        self._networks = [mlp]
    def __init__(
        self,
        name,
        env,
        hidden_sizes=(512, ),
        cell_type='lstm',
        hidden_nonlinearity=tf.nn.tanh,
        output_nonlinearity=None,
        batch_size=500,
        learning_rate=0.001,
        normalize_input=True,
        optimizer=tf.train.AdamOptimizer,
        valid_split_ratio=0.2,
        rolling_average_persitency=0.99,
        backprop_steps=50,
    ):

        Serializable.quick_init(self, locals())
        self.recurrent = True

        self.normalization = None
        self.normalize_input = normalize_input
        self.next_batch = None

        self.valid_split_ratio = valid_split_ratio
        self.rolling_average_persitency = rolling_average_persitency
        self.backprop_steps = backprop_steps

        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.name = name
        self._dataset_train = None
        self._dataset_test = None

        # Determine dimensionality of state and action space
        self.obs_space_dims = obs_space_dims = env.observation_space.shape[0]
        self.action_space_dims = action_space_dims = env.action_space.shape[0]
        """ computation graph for training and simple inference """
        with tf.variable_scope(name):
            # Placeholders
            self.obs_ph = tf.placeholder(tf.float32,
                                         shape=(None, None, obs_space_dims),
                                         name='obs_ph')
            self.act_ph = tf.placeholder(tf.float32,
                                         shape=(None, None, action_space_dims),
                                         name='act_ph')
            self.delta_ph = tf.placeholder(tf.float32,
                                           shape=(None, None, obs_space_dims),
                                           name='delta_ph')

            # Concatenate action and observation --> NN input
            self.nn_input = tf.concat([self.obs_ph, self.act_ph], axis=2)

            # Create RNN
            rnns = []
            delta_preds = []
            self.obs_next_pred = []
            self.hidden_state_ph = []
            self.next_hidden_state_var = []
            self.cell = []
            with tf.variable_scope('rnn_model'):
                rnn = RNN(
                    name,
                    output_dim=self.obs_space_dims,
                    hidden_sizes=hidden_sizes,
                    hidden_nonlinearity=hidden_nonlinearity,
                    output_nonlinearity=output_nonlinearity,
                    input_var=self.nn_input,
                    input_dim=self.obs_space_dims + self.action_space_dims,
                    cell_type=cell_type,
                )

            self.delta_pred = rnn.output_var
            self.hidden_state_ph = rnn.state_var
            self.next_hidden_state_var = rnn.next_state_var
            self.cell = rnn.cell
            self._zero_state = self.cell.zero_state(1, tf.float32)

            self.loss = tf.reduce_mean(
                tf.square(self.delta_pred - self.delta_ph))
            params = list(rnn.get_params().values())
            self._gradients_ph = [
                tf.placeholder(shape=param.shape, dtype=tf.float32)
                for param in params
            ]
            self._gradients_vars = tf.gradients(self.loss, params)
            applied_gradients = zip(self._gradients_ph, params)
            self.train_op = optimizer(
                self.learning_rate).apply_gradients(applied_gradients)

            # Tensor_utils
            self.f_delta_pred = tensor_utils.compile_function(
                [self.obs_ph, self.act_ph, self.hidden_state_ph],
                [self.delta_pred, self.next_hidden_state_var])

        self._networks = [rnn]
    def __init__(
        self,
        name,
        env,
        hidden_sizes=(512, 512),
        hidden_nonlinearity=tf.nn.relu,
        output_nonlinearity=None,
        batch_size=500,
        learning_rate=0.001,
        normalize_input=True,
        optimizer=tf.train.AdamOptimizer,
        valid_split_ratio=0.2,
        rolling_average_persitency=0.99,
    ):

        Serializable.quick_init(self, locals())

        self.normalization = None
        self.normalize_input = normalize_input
        self.next_batch = None

        self.valid_split_ratio = valid_split_ratio
        self.rolling_average_persitency = rolling_average_persitency

        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.name = name
        self._dataset_train = None
        self._dataset_test = None

        # determine dimensionality of state and action space
        self.obs_space_dims = obs_space_dims = env.observation_space.shape[0]
        self.action_space_dims = action_space_dims = env.action_space.shape[0]

        hidden_nonlinearity = self._activations[hidden_nonlinearity]
        output_nonlinearity = self._activations[output_nonlinearity]

        with tf.variable_scope(name):
            # placeholders
            self.obs_ph = tf.placeholder(tf.float32,
                                         shape=(None, obs_space_dims))
            self.act_ph = tf.placeholder(tf.float32,
                                         shape=(None, action_space_dims))
            self.delta_ph = tf.placeholder(tf.float32,
                                           shape=(None, obs_space_dims))

            # concatenate action and observation --> NN input
            self.nn_input = tf.concat([self.obs_ph, self.act_ph], axis=1)

            # create MLP
            with tf.variable_scope('ff_model'):
                mlp = MLP(name,
                          output_dim=obs_space_dims,
                          hidden_sizes=hidden_sizes,
                          hidden_nonlinearity=hidden_nonlinearity,
                          output_nonlinearity=output_nonlinearity,
                          input_var=self.nn_input,
                          input_dim=obs_space_dims + action_space_dims)

            self.delta_pred = mlp.output_var  # shape: (batch_size, ndim_obs, n_models)

            self.loss = tf.reduce_mean(
                tf.square(self.delta_ph - self.delta_pred))
            self.optimizer = optimizer(self.learning_rate)
            self.train_op = self.optimizer.minimize(self.loss)

            # tensor_utils
            self.f_delta_pred = tensor_utils.compile_function(
                [self.obs_ph, self.act_ph], self.delta_pred)

        self._networks = [mlp]
Example #16
0
 def __init__(self, env):
     Serializable.quick_init(self, locals())
     self.env = env
     while hasattr(self.env, 'wrapped_env'):
         self.env = self.env.wrapped_env