Beispiel #1
0
    def get_action(self, model, inputs, goals):
        assert self._session is not None

        feed_dict = {}
        for name, ph in self._obs_placeholders.get_leaf_items():
            value = np.array(inputs.get_recursive(name))
            if value.shape == tuple():
                value = value[np.newaxis]
            feed_dict[ph] = value
        for name, ph in self._goal_placeholders.get_leaf_items():
            feed_dict[ph] = np.array(goals.get_recursive(name))
        feed_dict[self._mppi_mean_placeholder] = self._mppi_mean_np

        get_action_tf = {}
        for name, tensor in self._get_action_outputs.get_leaf_items():
            get_action_tf[name] = tensor

        get_action_tf_output = self._session.run(get_action_tf,
                                                 feed_dict=feed_dict)

        get_action = AttrDict()
        for name, value in get_action_tf_output.items():
            get_action.add_recursive(name, value)
        self._mppi_mean_np = get_action.mppi_mean

        return get_action
Beispiel #2
0
    def call(self, inputs, obs_lowd=None, training=False):
        if obs_lowd is None:
            obs_lowd = [None] * len(self._models)
        else:
            obs_lowd = tf.split(obs_lowd, self._obs_lowd_dims, axis=-1)

        outputs = AttrDict()
        for model_i, obs_lowd_i in zip(self._models, obs_lowd):
            outputs_i = model_i(inputs, obs_lowd=obs_lowd_i, training=training)
            for key, value in outputs_i.get_leaf_items():
                outputs.add_recursive(key, value)

        return outputs
Beispiel #3
0
    def _split_action(self, action):
        """
        :param action (AttrDict):
        """
        d = AttrDict()

        idx = 0
        for name in self._env_spec.action_names:
            dim = np.sum(self._env_spec.names_to_shapes.get_recursive(name))
            value = action[..., idx:idx + dim]
            d.add_recursive(name, value)
            idx += dim

        return d
Beispiel #4
0
    def get_batch(self, batch_size, horizon):
        if self._static_inputs_and_outputs is None:
            self._static_inputs_and_outputs = self._iterator.get_next()
        inputs_and_outputs = self._static_inputs_and_outputs

        inputs = AttrDict()
        outputs = AttrDict()
        for key, value in inputs_and_outputs.items():
            if key.startswith('inputs/'):
                inputs.add_recursive(key.replace('inputs/', ''), value)
            else:
                outputs.add_recursive(key.replace('outputs/', ''), value)

        return inputs, outputs
Beispiel #5
0
    def denormalize(self, inputs):
        """
        :param inputs (AttrDict):
        :return: AttrDict
        """
        inputs_denormalized = AttrDict()
        for key, value in inputs.get_leaf_items():
            lower, upper = self.names_to_limits.get_recursive(key)

            lower, upper = np.array(lower), np.array(upper)
            mean = 0.5 * (lower + upper)
            std = 0.5 * (upper - lower)

            value_denormalized = value * std + mean

            inputs_denormalized.add_recursive(key, value_denormalized)

        return inputs_denormalized
Beispiel #6
0
    def _get_position_outputs(self, preprocess_outputs, inputs):
        assert preprocess_outputs.shape.as_list()[-1] == 3

        if self._is_output_gps:
            batch_size = tf.shape(preprocess_outputs)[0]
            position = tf.zeros([batch_size, 3])
            yaw = -inputs.imu.compass_bearing[:,
                                              0] + 0.5 * np.pi  # so that east is 0 degrees
        else:
            position = inputs.jackal.position
            yaw = inputs.jackal.yaw[:, 0]

        output_positions = rotate_to_global(curr_position=position,
                                            curr_yaw=yaw,
                                            local_position=preprocess_outputs)

        outputs = AttrDict()
        outputs.add_recursive('jackal/position', output_positions)

        return outputs
Beispiel #7
0
    def _preprocess_action_inputs(self, inputs):
        ### separate out actions
        action_inputs = inputs.filter_recursive(
            lambda key, value: key in self._env_spec.action_names)

        # normalization
        action_inputs.modify_recursive(lambda arr: tf.cast(arr, tf.float32))
        normalized_inputs = self._env_spec.normalize(action_inputs)

        actions = AttrDict()
        for key in self._env_spec.action_names:
            value = normalized_inputs.get_recursive(key)
            if len(value.shape) == 2:
                value = value[..., tf.newaxis]
            elif len(value.shape) == 3:
                pass
            else:
                raise ValueError
            actions.add_recursive(key, value)

        return actions
Beispiel #8
0
    def _get_outputs(self, preprocess_outputs, inputs, denormalize=True):
        """
        Split the outputs into each prediction component and denormalize
        
        :param preprocess_outputs (tensor): [batch_size, horizon, dim]
        :return: AttrDict
        """
        ### split and denormalize
        outputs_denormalized = AttrDict()
        start_idx = 0
        for output_observation in self._output_observations:
            name = output_observation.name

            shape = self._env_spec.names_to_shapes.get_recursive(name)

            assert len(shape) == 1, 'Can only predict vector quantities'
            dim = shape[0]

            outputs_slice_denormalized = preprocess_outputs[
                ..., start_idx:start_idx + dim]
            outputs_denormalized.add_recursive(name,
                                               outputs_slice_denormalized)

            start_idx += dim

        outputs = self._env_spec.denormalize(
            outputs_denormalized) if denormalize else outputs_denormalized

        ### make relative
        for output_observation in self._output_observations:
            name = output_observation.name
            is_relative = output_observation.is_relative

            if is_relative:
                value = outputs.get_recursive(name)
                value += inputs.get_recursive(name)[:, tf.newaxis, :]
                outputs.add_recursive(name, value)

        return outputs
Beispiel #9
0
    def _process_inputs(self, inputs):
        """
        Separate out observations/actions and normalize
        
        :param inputs (AttrDict): 
        :return: obs_ims (AttrDict), obs_vecs (AttrDict), actions (AttrDict)
        """
        ### separate out observations/actions

        # normalization
        inputs.modify_recursive(lambda arr: tf.cast(arr, tf.float32))
        normalized_inputs = self._env_spec.normalize(inputs)

        obs_ims = AttrDict()
        obs_vecs = AttrDict()
        for key in self._env_spec.observation_names:
            value = normalized_inputs.get_recursive(key)
            if len(value.shape) == 1:
                obs_vecs.add_recursive(key, value[:, tf.newaxis])
            elif len(value.shape) == 2:
                obs_vecs.add_recursive(key, value)
            elif len(value.shape) == 4:
                obs_ims.add_recursive(key, value)
            else:
                raise ValueError

        actions = AttrDict()
        for key in self._env_spec.action_names:
            value = normalized_inputs.get_recursive(key)
            if len(value.shape) == 2:
                value = value[..., tf.newaxis]
            elif len(value.shape) == 3:
                pass
            else:
                raise ValueError
            actions.add_recursive(key, value)

        return obs_ims, obs_vecs, actions
Beispiel #10
0
    def _preprocess_observation_inputs(self, inputs):
        ### separate out observations
        obs_inputs = inputs.filter_recursive(
            lambda key, value: key in self._env_spec.observation_names)

        # normalization
        obs_inputs.modify_recursive(lambda arr: tf.cast(arr, tf.float32))
        normalized_inputs = self._env_spec.normalize(obs_inputs)

        obs_ims = AttrDict()
        obs_vecs = AttrDict()
        for key, value in normalized_inputs.get_leaf_items():
            value = normalized_inputs.get_recursive(key)
            if len(value.shape) == 1:
                obs_vecs.add_recursive(key, value[:, tf.newaxis])
            elif len(value.shape) == 2:
                obs_vecs.add_recursive(key, value)
            elif len(value.shape) == 4:
                obs_ims.add_recursive(key, value)
            else:
                raise ValueError

        return obs_ims, obs_vecs
Beispiel #11
0
    def _setup_mppi_graph(self, model, goals):
        ### create placeholders
        obs_placeholders = AttrDict()
        for name in self._env_spec.observation_names:
            shape = list(self._env_spec.names_to_shapes.get_recursive(name))
            dtype = tf.as_dtype(
                self._env_spec.names_to_dtypes.get_recursive(name))
            ph = tf.placeholder(dtype, shape=shape, name=name)
            obs_placeholders.add_recursive(name, ph)

        goal_placeholders = AttrDict()
        for name, value in goals.get_leaf_items():
            goal_placeholders.add_recursive(
                name,
                tf.placeholder(tf.as_dtype(value.dtype),
                               shape=value.shape,
                               name=name))

        mppi_mean_placeholder = tf.placeholder(
            tf.float32,
            name='mppi_mean',
            shape=[model.horizon, self._action_dim])

        ### get obs lowd
        inputs = obs_placeholders.apply_recursive(
            lambda value: value[tf.newaxis])
        obs_lowd = model.get_obs_lowd(inputs)

        past_mean = mppi_mean_placeholder[0]
        shifted_mean = tf.concat(
            [mppi_mean_placeholder[1:], mppi_mean_placeholder[-1:]], axis=0)

        # sample through time
        delta_limits = 0.5 * (self._action_selection_upper_limits -
                              self._action_selection_lower_limits)
        eps = tf.random_normal(mean=0,
                               stddev=self._sigma * delta_limits,
                               shape=(self._N, model.horizon,
                                      self._action_dim))
        actions = []
        for h in range(model.horizon):
            if h == 0:
                action_h = self._beta * (shifted_mean[h, :] + eps[:, h, :]) + (
                    1. - self._beta) * past_mean
            else:
                action_h = self._beta * (shifted_mean[h, :] + eps[:, h, :]) + (
                    1. - self._beta) * actions[-1]
            actions.append(action_h)
        actions = tf.stack(actions, axis=1)
        actions = tf.clip_by_value(
            actions, self._action_selection_lower_limits[np.newaxis,
                                                         np.newaxis],
            self._action_selection_upper_limits[np.newaxis, np.newaxis])

        # forward simulate
        actions_split = self._split_action(actions)
        inputs_tiled = inputs.apply_recursive(
            lambda v: tf.tile(v, [self._N] + [1] * (len(v.shape) - 1)))
        for k, v in actions_split.get_leaf_items():
            inputs_tiled.add_recursive(k, v)
        obs_lowd_tiled = tf.tile(obs_lowd, (self._N, 1))

        ### call model and evaluate cost
        model_outputs = model(inputs_tiled, obs_lowd=obs_lowd_tiled)
        model_outputs = model_outputs.filter_recursive(
            lambda key, value: key[0] != '_')
        costs_per_timestep = self._cost_fn(inputs_tiled, model_outputs,
                                           goal_placeholders, actions_split)
        costs = tf.reduce_mean(costs_per_timestep.total, axis=1)

        # MPPI update
        scores = -costs
        probs = tf.exp(self._gamma * (scores - tf.reduce_max(scores)))
        probs /= tf.reduce_sum(probs) + 1e-10
        new_mppi_mean = tf.reduce_sum(actions *
                                      probs[:, tf.newaxis, tf.newaxis],
                                      axis=0)

        best_idx = tf.argmin(costs)
        best_actions = self._split_action(new_mppi_mean)

        get_action_outputs = AttrDict(
            cost=costs[best_idx],
            cost_per_timestep=costs_per_timestep.apply_recursive(
                lambda v: v[best_idx]),
            action=best_actions.apply_recursive(lambda v: v[0]),
            action_sequence=best_actions,
            model_outputs=model_outputs.apply_recursive(lambda v: v[best_idx]),
            all_costs=costs,
            all_costs_per_timestep=costs_per_timestep,
            all_actions=actions_split,
            all_model_outputs=model_outputs,
            mppi_mean=new_mppi_mean,
        )

        for key, value in get_action_outputs.get_leaf_items():
            get_action_outputs.add_recursive(
                key, tf.identity(value, 'get_action_outputs/' + key))

        return obs_placeholders, goal_placeholders, mppi_mean_placeholder, get_action_outputs
Beispiel #12
0
class EnvSpec(object):
    def __init__(self, names_shapes_limits_dtypes):
        names_shapes_limits_dtypes = list(names_shapes_limits_dtypes)
        names_shapes_limits_dtypes += [('done', (1, ), (0, 1), np.bool)]

        self._names_to_shapes = AttrDict()
        self._names_to_limits = AttrDict()
        self._names_to_dtypes = AttrDict()
        for name, shape, limit, dtype in names_shapes_limits_dtypes:
            self._names_to_shapes.add_recursive(name, shape)
            self._names_to_limits.add_recursive(name, limit)
            self._names_to_dtypes.add_recursive(name, dtype)

    @property
    def observation_names(self):
        raise NotImplementedError

    @property
    def output_observation_names(self):
        return self.observation_names

    @property
    def action_names(self):
        raise NotImplementedError

    @property
    def names(self):
        return self.observation_names + self.action_names

    @property
    def names_to_shapes(self):
        return self._names_to_shapes

    @property
    def names_to_limits(self):
        return self._names_to_limits

    @property
    def names_to_dtypes(self):
        return self._names_to_dtypes

    def dims(self, names):
        return np.array([
            np.sum(self.names_to_shapes.get_recursive(name)) for name in names
        ])

    def dim(self, names):
        return np.sum(self.dims(names))

    def normalize(self, inputs):
        """
        :param inputs (AttrDict):
        :return: AttrDict
        """
        inputs_normalized = AttrDict()
        for key, value in inputs.get_leaf_items():
            lower, upper = self.names_to_limits.get_recursive(key)

            lower, upper = np.array(lower), np.array(upper)
            mean = 0.5 * (lower + upper)
            std = 0.5 * (upper - lower)

            value_normalized = (value - mean) / std

            inputs_normalized.add_recursive(key, value_normalized)

        return inputs_normalized

    def denormalize(self, inputs):
        """
        :param inputs (AttrDict):
        :return: AttrDict
        """
        inputs_denormalized = AttrDict()
        for key, value in inputs.get_leaf_items():
            lower, upper = self.names_to_limits.get_recursive(key)

            lower, upper = np.array(lower), np.array(upper)
            mean = 0.5 * (lower + upper)
            std = 0.5 * (upper - lower)

            value_denormalized = value * std + mean

            inputs_denormalized.add_recursive(key, value_denormalized)

        return inputs_denormalized

    def process_image(self, name, image):
        """
        Default behavior: resize the image
        """
        if len(image.shape) == 4:
            return np.array([self.process_image(name, im_i) for im_i in image])

        return imresize(image, self.names_to_shapes.get_recursive(name))