Ejemplo n.º 1
0
 def ClipFraction(dist_inputs, actions, old_log_probs):
     """Probability Ratio Mean from the PPO algorithm."""
     probs_ratio = rl_layers.ProbsRatio(
         dist_inputs,
         actions,
         old_log_probs,
         log_prob_fun=self._policy_dist.log_prob)
     return jnp.mean(jnp.abs(probs_ratio - 1) > self._epsilon)
Ejemplo n.º 2
0
  def forward(self, inputs, weights):
    gamma, beta, epsilon_l = weights

    epsilon = self._init_epsilon
    if epsilon_l is not base.EMPTY_WEIGHTS:
      epsilon += np.abs(epsilon_l[0])

    # Omit B and C
    axis = tuple(range(1, len(np.shape(inputs)) - 1))
    # (B, 1, 1, C)
    nu2 = np.mean(inputs**2, axis=axis, keepdims=True)
    # (B, W, H, C)
    xhat = inputs / np.sqrt(nu2 + epsilon)

    return gamma * xhat + beta
Ejemplo n.º 3
0
def SaturationCost(x, limit=0.9):
  return np.minimum(0, np.abs(x) - limit)