Ejemplo n.º 1
0
    def predict(self, obs, act):
        """
        Predict the batch of next observations given the batch of current observations and actions
        :param obs: observations - numpy array of shape (n_samples, ndim_obs)
        :param act: actions - numpy array of shape (n_samples, ndim_act)
        """
        assert obs.shape[0] == act.shape[0]
        assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims
        assert act.ndim == 2 and act.shape[1] == self.action_space_dims

        obs_original = obs

        if self.normalize_input:
            obs, act = self._normalize_data(obs, act)
            delta = np.array(self.f_delta_pred(obs, act))
            var = np.array(self.f_var_pred(obs, act))
            delta = np.random.normal(delta, np.sqrt(var))
            delta = denormalize(delta, self.normalization['delta'][0], self.normalization['delta'][1])
        else:
            delta = np.array(self.f_delta_pred(obs, act))
            var = np.array(self.f_var_pred(obs, act))
            delta = np.random.normal(delta, np.sqrt(var))

        assert delta.shape == obs.shape

        pred_obs = obs_original + delta
        return pred_obs
Ejemplo n.º 2
0
    def predict(self, obs, act, hidden_state):
        """
        Predict the batch of next observations given the batch of current observations and actions
        :param obs: observations - numpy array of shape (n_samples, ndim_obs)
        :param act: actions - numpy array of shape (n_samples, ndim_act)
        :param hidden_state: hidden_state - numpy array of shape (n_samples, hidden_sizes)
        :return: pred_obs_next: predicted batch of next observations (n_samples, ndim_obs)
        """
        assert obs.shape[0] == act.shape[0]
        assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims
        assert act.ndim == 2 and act.shape[1] == self.action_space_dims

        obs_original = obs

        obs, act = np.expand_dims(obs, 1), np.expand_dims(act, 1)

        if self.normalize_input:
            obs, act = self._normalize_data(obs, act)
            delta, next_hidden_state = self.f_delta_pred(
                obs, act, hidden_state)
            delta = denormalize(delta, self.normalization['delta'][0],
                                self.normalization['delta'][1])
        else:
            delta, next_hidden_state = self.f_delta_pred(
                obs, act, hidden_state)

        delta = delta[:, 0, :]

        pred_obs = obs_original + delta
        return pred_obs, next_hidden_state
Ejemplo n.º 3
0
 def _denormalize_data(self, delta):
     assert delta.shape[-1] == self.num_models
     denorm_deltas = []
     for i in range(self.num_models):
         denorm_delta = denormalize(delta[..., i], self.normalization[i]['delta'][0], self.normalization[i]['delta'][1])
         denorm_deltas.append(denorm_delta)
     return np.stack(denorm_deltas, axis=-1)
Ejemplo n.º 4
0
    def predict_batches(self, obs_batches, act_batches):
        """
            Predict the batch of next observations for each model given the batch of current observations and actions for each model
            :param obs_batches: observation batches for each model concatenated along axis 0 - numpy array of shape (batch_size_per_model * num_models, ndim_obs)
            :param act_batches: action batches for each model concatenated along axis 0 - numpy array of shape (batch_size_per_model * num_models, ndim_act)
            :return: pred_obs_next_batch: predicted batch of next observations -
                                    shape:  (batch_size_per_model * num_models, ndim_obs)
        """
        assert obs_batches.shape[0] == act_batches.shape[
            0] and obs_batches.shape[0] % self.num_models == 0
        assert obs_batches.ndim == 2 and obs_batches.shape[
            1] == self.obs_space_dims
        assert act_batches.ndim == 2 and act_batches.shape[
            1] == self.action_space_dims

        obs_batches_original = obs_batches

        if self.normalize_input:
            obs_batches, act_batches = self._normalize_data(
                obs_batches, act_batches)
            delta_batches = np.array(
                self.f_delta_pred_model_batches(obs_batches, act_batches))
            delta_batches = denormalize(delta_batches,
                                        self.normalization['delta'][0],
                                        self.normalization['delta'][1])
        else:
            delta_batches = np.array(
                self.f_delta_pred(obs_batches, act_batches))

        assert delta_batches.ndim == 2

        pred_obs_batches = obs_batches_original + delta_batches
        assert pred_obs_batches.shape == obs_batches.shape
        return pred_obs_batches
Ejemplo n.º 5
0
    def predict(self, obs, act, hidden_state, pred_type='rand'):
        """
        Predict the batch of next observations given the batch of current observations and actions
        :param obs: observations - numpy array of shape (n_samples, ndim_obs)
        :param act: actions - numpy array of shape (n_samples, ndim_act)
        :param pred_type:  prediction type
                   - rand: choose one of the models randomly
                   - mean: mean prediction of all models
                   - all: returns the prediction of all the models
        :return: pred_obs_next: predicted batch of next observations -
                                shape:  (n_samples, ndim_obs) - in case of 'rand' and 'mean' mode
                                        (n_samples, ndim_obs, n_models) - in case of 'all' mode
        """
        assert obs.shape[0] == act.shape[0]
        assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims
        assert act.ndim == 2 and act.shape[1] == self.action_space_dims

        obs_original = obs

        obs, act = np.expand_dims(obs, 1), np.expand_dims(act, 1)

        if self.normalize_input:
            obs, act = self._normalize_data(obs, act)
            delta, *next_hidden_state = self.f_delta_pred(
                obs, act, *hidden_state)
            delta = denormalize(delta, self.normalization['delta'][0],
                                self.normalization['delta'][1])
        else:
            delta, *next_hidden_state = self.f_delta_pred(
                obs, act, *hidden_state)

        delta = delta[:, 0, :, :]
        assert delta.ndim == 3

        pred_obs = obs_original[:, :, None] + delta

        batch_size = delta.shape[0]
        if pred_type == 'rand':
            # randomly selecting the prediction of one model in each row
            idx = np.random.randint(0, self.num_models, size=batch_size)
            pred_obs = np.stack([
                pred_obs[row, :, model_id] for row, model_id in enumerate(idx)
            ],
                                axis=0)
        elif pred_type == 'mean':
            pred_obs = np.mean(pred_obs, axis=-1)
        elif pred_type == 'all':
            pass
        else:
            NotImplementedError('pred_type must be one of [rand, mean, all]')
        return pred_obs, next_hidden_state
Ejemplo n.º 6
0
    def predict(self, obs):
        """
        Predict the batch of next observations given the batch of current observations and actions
        :param obs: observations - numpy array of shape (n_samples, ndim_obs)
        :param act: actions - numpy array of shape (n_samples, ndim_act)
        :return: pred_obs_next: predicted batch of next observations (n_samples, ndim_obs)
        """
        assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims

        if self.normalize_input:
            obs = self._normalize_data(obs)
            values = np.array(self.f_value_pred(obs))
            values = denormalize(values, self.normalization['ret'][0],
                                 self.normalization['ret'][1])
        else:
            values = np.array(self.f_value_pred(obs))
        return values