def predict(self, obs, act): """ Predict the batch of next observations given the batch of current observations and actions :param obs: observations - numpy array of shape (n_samples, ndim_obs) :param act: actions - numpy array of shape (n_samples, ndim_act) """ assert obs.shape[0] == act.shape[0] assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims assert act.ndim == 2 and act.shape[1] == self.action_space_dims obs_original = obs if self.normalize_input: obs, act = self._normalize_data(obs, act) delta = np.array(self.f_delta_pred(obs, act)) var = np.array(self.f_var_pred(obs, act)) delta = np.random.normal(delta, np.sqrt(var)) delta = denormalize(delta, self.normalization['delta'][0], self.normalization['delta'][1]) else: delta = np.array(self.f_delta_pred(obs, act)) var = np.array(self.f_var_pred(obs, act)) delta = np.random.normal(delta, np.sqrt(var)) assert delta.shape == obs.shape pred_obs = obs_original + delta return pred_obs
def predict(self, obs, act, hidden_state): """ Predict the batch of next observations given the batch of current observations and actions :param obs: observations - numpy array of shape (n_samples, ndim_obs) :param act: actions - numpy array of shape (n_samples, ndim_act) :param hidden_state: hidden_state - numpy array of shape (n_samples, hidden_sizes) :return: pred_obs_next: predicted batch of next observations (n_samples, ndim_obs) """ assert obs.shape[0] == act.shape[0] assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims assert act.ndim == 2 and act.shape[1] == self.action_space_dims obs_original = obs obs, act = np.expand_dims(obs, 1), np.expand_dims(act, 1) if self.normalize_input: obs, act = self._normalize_data(obs, act) delta, next_hidden_state = self.f_delta_pred( obs, act, hidden_state) delta = denormalize(delta, self.normalization['delta'][0], self.normalization['delta'][1]) else: delta, next_hidden_state = self.f_delta_pred( obs, act, hidden_state) delta = delta[:, 0, :] pred_obs = obs_original + delta return pred_obs, next_hidden_state
def _denormalize_data(self, delta): assert delta.shape[-1] == self.num_models denorm_deltas = [] for i in range(self.num_models): denorm_delta = denormalize(delta[..., i], self.normalization[i]['delta'][0], self.normalization[i]['delta'][1]) denorm_deltas.append(denorm_delta) return np.stack(denorm_deltas, axis=-1)
def predict_batches(self, obs_batches, act_batches): """ Predict the batch of next observations for each model given the batch of current observations and actions for each model :param obs_batches: observation batches for each model concatenated along axis 0 - numpy array of shape (batch_size_per_model * num_models, ndim_obs) :param act_batches: action batches for each model concatenated along axis 0 - numpy array of shape (batch_size_per_model * num_models, ndim_act) :return: pred_obs_next_batch: predicted batch of next observations - shape: (batch_size_per_model * num_models, ndim_obs) """ assert obs_batches.shape[0] == act_batches.shape[ 0] and obs_batches.shape[0] % self.num_models == 0 assert obs_batches.ndim == 2 and obs_batches.shape[ 1] == self.obs_space_dims assert act_batches.ndim == 2 and act_batches.shape[ 1] == self.action_space_dims obs_batches_original = obs_batches if self.normalize_input: obs_batches, act_batches = self._normalize_data( obs_batches, act_batches) delta_batches = np.array( self.f_delta_pred_model_batches(obs_batches, act_batches)) delta_batches = denormalize(delta_batches, self.normalization['delta'][0], self.normalization['delta'][1]) else: delta_batches = np.array( self.f_delta_pred(obs_batches, act_batches)) assert delta_batches.ndim == 2 pred_obs_batches = obs_batches_original + delta_batches assert pred_obs_batches.shape == obs_batches.shape return pred_obs_batches
def predict(self, obs, act, hidden_state, pred_type='rand'): """ Predict the batch of next observations given the batch of current observations and actions :param obs: observations - numpy array of shape (n_samples, ndim_obs) :param act: actions - numpy array of shape (n_samples, ndim_act) :param pred_type: prediction type - rand: choose one of the models randomly - mean: mean prediction of all models - all: returns the prediction of all the models :return: pred_obs_next: predicted batch of next observations - shape: (n_samples, ndim_obs) - in case of 'rand' and 'mean' mode (n_samples, ndim_obs, n_models) - in case of 'all' mode """ assert obs.shape[0] == act.shape[0] assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims assert act.ndim == 2 and act.shape[1] == self.action_space_dims obs_original = obs obs, act = np.expand_dims(obs, 1), np.expand_dims(act, 1) if self.normalize_input: obs, act = self._normalize_data(obs, act) delta, *next_hidden_state = self.f_delta_pred( obs, act, *hidden_state) delta = denormalize(delta, self.normalization['delta'][0], self.normalization['delta'][1]) else: delta, *next_hidden_state = self.f_delta_pred( obs, act, *hidden_state) delta = delta[:, 0, :, :] assert delta.ndim == 3 pred_obs = obs_original[:, :, None] + delta batch_size = delta.shape[0] if pred_type == 'rand': # randomly selecting the prediction of one model in each row idx = np.random.randint(0, self.num_models, size=batch_size) pred_obs = np.stack([ pred_obs[row, :, model_id] for row, model_id in enumerate(idx) ], axis=0) elif pred_type == 'mean': pred_obs = np.mean(pred_obs, axis=-1) elif pred_type == 'all': pass else: NotImplementedError('pred_type must be one of [rand, mean, all]') return pred_obs, next_hidden_state
def predict(self, obs): """ Predict the batch of next observations given the batch of current observations and actions :param obs: observations - numpy array of shape (n_samples, ndim_obs) :param act: actions - numpy array of shape (n_samples, ndim_act) :return: pred_obs_next: predicted batch of next observations (n_samples, ndim_obs) """ assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims if self.normalize_input: obs = self._normalize_data(obs) values = np.array(self.f_value_pred(obs)) values = denormalize(values, self.normalization['ret'][0], self.normalization['ret'][1]) else: values = np.array(self.f_value_pred(obs)) return values