Python Batch.v_s Exemples

Langage de programmation: Python

Espace de nommage/Pack: tianshou.data

Class/Type: Batch

Méthode/Fonction: v_s

Exemples au hotexamples.com: 2

Python Batch.v_s - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de tianshou.data.Batch.v_s extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Batch(30)

split(30)

weight(28)

pop(23)

returns(17)

stack(14)

update(11)

cat(9)

rew(9)

obs(8)

get(7)

act(7)

to_torch(6)

logp_old(6)

done(6)

cat_(6)

append(5)

adv(5)

is_empty(5)

keys(3)

to_numpy(3)

items(3)

obs_next(2)

update_weight(2)

empty_(2)

empty(2)

cat_list(2)

v_s(2)

v(2)

b(2)

values(1)

value_targets(1)

advantages(1)

loss(1)

policy(1)

stack_(1)

__repr__(1)

info(1)

indice(1)

Méthodes fréquemment utilisées

Batch (30)

split (30)

weight (28)

pop (23)

returns (17)

stack (14)

update (11)

cat (9)

rew (9)

obs (8)

Méthodes fréquemment utilisées

get (7)

act (7)

to_torch (6)

logp_old (6)

done (6)

cat_ (6)

append (5)

adv (5)

is_empty (5)

keys (3)

to_numpy (3)

items (3)

obs_next (2)

update_weight (2)

empty_ (2)

empty (2)

cat_list (2)

v_s (2)

v (2)

b (2)

Méthodes fréquemment utilisées

to_numpy (3)

items (3)

obs_next (2)

update_weight (2)

empty_ (2)

empty (2)

cat_list (2)

v_s (2)

v (2)

b (2)

values (1)

value_targets (1)

advantages (1)

loss (1)

policy (1)

stack_ (1)

__repr__ (1)

info (1)

indice (1)

Méthodes fréquemment utilisées

values (1)

value_targets (1)

advantages (1)

loss (1)

policy (1)

stack_ (1)

__repr__ (1)

info (1)

indice (1)

Exemple #1

0

Afficher le fichier

def _compute_returns(self, batch: Batch, buffer: ReplayBuffer, indice: np.ndarray) -> Batch: v_s, v_s_ = [], [] with torch.no_grad(): for b in batch.split(self._batch, shuffle=False, merge_last=True): v_s.append(self.critic(b.obs)) v_s_.append(self.critic(b.obs_next)) batch.v_s = torch.cat(v_s, dim=0).flatten() # old value v_s = batch.v_s.cpu().numpy() v_s_ = torch.cat(v_s_, dim=0).flatten().cpu().numpy() # when normalizing values, we do not minus self.ret_rms.mean to be numerically # consistent with OPENAI baselines' value normalization pipeline. Emperical # study also shows that "minus mean" will harm performances a tiny little bit # due to unknown reasons (on Mujoco envs, not confident, though). if self._rew_norm: # unnormalize v_s & v_s_ v_s = v_s * np.sqrt(self.ret_rms.var + self._eps) v_s_ = v_s_ * np.sqrt(self.ret_rms.var + self._eps) unnormalized_returns, advantages = self.compute_episodic_return( batch, buffer, indice, v_s_, v_s, gamma=self._gamma, gae_lambda=self._lambda) if self._rew_norm: batch.returns = unnormalized_returns / \ np.sqrt(self.ret_rms.var + self._eps) self.ret_rms.update(unnormalized_returns) else: batch.returns = unnormalized_returns batch.returns = to_torch_as(batch.returns, batch.v_s) batch.adv = to_torch_as(advantages, batch.v_s) return batch

Exemple #2

0

Afficher le fichier

def process_fn( self, batch: Batch, buffer: ReplayBuffer, indice: np.ndarray ) -> Batch: v_s, v_s_, old_log_prob = [], [], [] with torch.no_grad(): for b in batch.split(self._batch, shuffle=False, merge_last=True): v_s.append(self.critic(b.obs)) v_s_.append(self.critic(b.obs_next)) old_log_prob.append(self(b).dist.log_prob(to_torch_as(b.act, v_s[0]))) batch.v_s = torch.cat(v_s, dim=0).flatten() # old value v_s = to_numpy(batch.v_s) v_s_ = to_numpy(torch.cat(v_s_, dim=0).flatten()) if self._rew_norm: # unnormalize v_s & v_s_ v_s = v_s * np.sqrt(self.ret_rms.var + self._eps) + self.ret_rms.mean v_s_ = v_s_ * np.sqrt(self.ret_rms.var + self._eps) + self.ret_rms.mean unnormalized_returns, advantages = self.compute_episodic_return( batch, buffer, indice, v_s_, v_s, gamma=self._gamma, gae_lambda=self._lambda) if self._rew_norm: batch.returns = (unnormalized_returns - self.ret_rms.mean) / \ np.sqrt(self.ret_rms.var + self._eps) self.ret_rms.update(unnormalized_returns) mean, std = np.mean(advantages), np.std(advantages) advantages = (advantages - mean) / std # per-batch norm else: batch.returns = unnormalized_returns batch.act = to_torch_as(batch.act, batch.v_s) batch.logp_old = torch.cat(old_log_prob, dim=0) batch.returns = to_torch_as(batch.returns, batch.v_s) batch.adv = to_torch_as(advantages, batch.v_s) return batch