Beispiel #1
0
    def _calculate_scores(self, query, key):
        """Calculates attention scores as a query-key dot product.

        Args:
          query: Query tensor of shape `[batch_size, Tq, dim]`.
          key: Key tensor of shape `[batch_size, Tv, dim]`.
        Returns:
          Tensor of shape `[batch_size, Tq, Tv]`.
        """
        if self.score_mode == "dot":
            scores = tf.matmul(query, key, transpose_b=True)
            if self.scale is not None:
                scores *= self.scale
        elif self.score_mode == "concat":
            # Reshape tensors to enable broadcasting.
            # Reshape into [batch_size, Tq, 1, dim].
            q_reshaped = tf.expand_dims(query, axis=-2)
            # Reshape into [batch_size, 1, Tv, dim].
            k_reshaped = tf.expand_dims(key, axis=-3)
            if self.scale is not None:
                scores = self.concat_score_weight * tf.reduce_sum(
                    tf.tanh(self.scale * (q_reshaped + k_reshaped)), axis=-1)
            else:
                scores = self.concat_score_weight * tf.reduce_sum(
                    tf.tanh(q_reshaped + k_reshaped), axis=-1)

        return scores
Beispiel #2
0
def laplace_attention(q, k, v, scale, normalise):
    """Computes laplace exponential attention.

  Args:
    q: queries. Tensor of shape [batch_size, m, d_k].
    k: keys. Tensor of shape [batch_size, n, d_k].
    v: values. Tensor of shape [batch_size, n, d_v].
    scale: float that scales the L1 distance.
    normalise: Boolean that determines whether weights sum to 1.

  Returns:
    Tensor of shape [batch_size, m, d_v].
  """
    k = tf.expand_dims(k, axis=1)  # [batch_size, 1, n, d_k]
    q = tf.expand_dims(q, axis=2)  # [batch_size, m, 1, d_k]
    unnorm_weights = -tf.abs((k - q) / scale)  # [batch_size, m, n, d_k]
    unnorm_weights = tf.reduce_sum(unnorm_weights,
                                   axis=-1)  # [batch_size, m, n]
    if normalise:
        weight_fn = tf.nn.softmax
    else:
        weight_fn = lambda x: 1 + tf.tanh(x)
    weights = weight_fn(unnorm_weights)  # [batch_size, m, n]
    rep = tf.einsum('bik,bkj->bij', weights, v)  # [batch_size, m, d_v]
    return rep
Beispiel #3
0
  def step(cell_inputs, cell_states):
    """Step function that will be used by Keras RNN backend."""
    h_tm1 = cell_states[0]  # previous memory state
    c_tm1 = cell_states[1]  # previous carry state

    z = backend.dot(cell_inputs, kernel)
    z += backend.dot(h_tm1, recurrent_kernel)
    z = backend.bias_add(z, bias)

    z0, z1, z2, z3 = tf.split(z, 4, axis=1)

    i = tf.sigmoid(z0)
    f = tf.sigmoid(z1)
    c = f * c_tm1 + i * tf.tanh(z2)
    o = tf.sigmoid(z3)

    h = o * tf.tanh(c)
    return h, [h, c]
 def __call__(self, x, carry):
     update_t = tf.sigmoid(x @ self.W_update_x + carry @ self.W_update_c +
                           self.b_update)
     reset_t = tf.sigmoid(x @ self.W_reset_x + carry @ self.W_reset_c +
                          self.b_reset)
     new_carry = update_t * carry + (1. - update_t) * tf.tanh(
         self.next_x_net(x) + self.next_c_net(reset_t * carry) +
         self.b_next)
     return new_carry
Beispiel #5
0
def gelu(x):
    """Gaussian Error Linear Unit.

  This is a smoother version of the RELU.
  Original paper: https://arxiv.org/abs/1606.08415
  Args:
    x: float Tensor to perform activation.

  Returns:
    `x` with the GELU activation applied.
  """
    cdf = 0.5 * (1.0 + tf.tanh(
        (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
    return x * cdf
Beispiel #6
0
def tanh(x):
    """Hyperbolic tangent activation function.

    For example:

    >>> a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32)
    >>> b = tf.keras.activations.tanh(a)
    >>> b.numpy()
    array([-0.9950547, -0.7615942,  0.,  0.7615942,  0.9950547], dtype=float32)

    Args:
        x: Input tensor.

    Returns:
        Tensor of same shape and dtype of input `x`, with tanh activation:
        `tanh(x) = sinh(x)/cosh(x) = ((exp(x) - exp(-x))/(exp(x) + exp(-x)))`.
    """
    return tf.tanh(x)
Beispiel #7
0
    def _calculate_scores(self, query, key):
        """Calculates attention scores as a nonlinear sum of query and key.

    Args:
      query: Query tensor of shape `[batch_size, Tq, dim]`.
      key: Key tensor of shape `[batch_size, Tv, dim]`.
    Returns:
      Tensor of shape `[batch_size, Tq, Tv]`.
    """
        # Reshape tensors to enable broadcasting.
        # Reshape into [batch_size, Tq, 1, dim].
        q_reshaped = tf.expand_dims(query, axis=-2)
        # Reshape into [batch_size, 1, Tv, dim].
        k_reshaped = tf.expand_dims(key, axis=-3)
        if self.use_scale:
            scale = self.scale
        else:
            scale = 1.
        return tf.reduce_sum(scale * tf.tanh(q_reshaped + k_reshaped), axis=-1)
Beispiel #8
0
    def step(cell_inputs, cell_states):
        """Step function that will be used by Keras RNN backend."""
        h_tm1 = cell_states[0]

        # inputs projected by all gate matrices at once
        matrix_x = backend.dot(cell_inputs, kernel)
        matrix_x = backend.bias_add(matrix_x, input_bias)

        x_z, x_r, x_h = tf.split(matrix_x, 3, axis=1)

        # hidden state projected by all gate matrices at once
        matrix_inner = backend.dot(h_tm1, recurrent_kernel)
        matrix_inner = backend.bias_add(matrix_inner, recurrent_bias)

        recurrent_z, recurrent_r, recurrent_h = tf.split(matrix_inner,
                                                         3,
                                                         axis=1)
        z = tf.sigmoid(x_z + recurrent_z)
        r = tf.sigmoid(x_r + recurrent_r)
        hh = tf.tanh(x_h + r * recurrent_h)

        # previous and candidate state mixed by update gate
        h = z * h_tm1 + (1 - z) * hh
        return h, [h]
Beispiel #9
0
def get_ac_loss(learner_agent_output, env_output, actor_agent_output,
                actor_action, reward_clipping, discounting, baseline_cost,
                entropy_cost, num_steps):
  """Computes actor-critic loss.

  Args:
    learner_agent_output: A nested structure of type `AgentOutput`. The tensors
      are expected to have shape [num_timesteps, batch, ....]
    env_output: A nested structure of type `EnvOutput`. The tensors are expected
      to have shape [num_timesteps, batch, ...].
    actor_agent_output: A nested structure of type `AgentOutput`. The tensors
      are expected to have shape [num_timesteps, batch, ....]
    actor_action: An instance of `ActorAction` containing indices of the actions
      chosen by actor. The total number of actions available to actor at any
      point is equal to actor_agent_output.policy_logits.shape()[-1].
    reward_clipping: A string denoting the clipping strategy to be applied to
      rewards. An empty string means no clipping is applied.
    discounting: The discount factor.
    baseline_cost: A multiplier for baseline loss.
    entropy_cost: A multiplier for entropy.
    num_steps: An int to be used as step arg for summaries.

  Returns:
    A tensor of shape [num_timesteps - 1, batch_size] which contains the
    computed actor-critic loss per timestep per element.
  """
  # Use last baseline value (from the value function) to bootstrap.
  bootstrap_value = learner_agent_output.baseline[-1]

  # At this point, the environment outputs at time step `t` are the inputs
  # that lead to the learner_outputs at time step `t`. After the following
  # shifting, the actions in actor_agent_output and learner_outputs at time step
  # `t` is what leads to the environment outputs at time step `t`.
  actor_agent_output = tf.nest.map_structure(lambda t: t[1:],
                                             actor_agent_output)
  rewards, done, _, _ = tf.nest.map_structure(lambda t: t[1:], env_output)
  actor_action_idx = actor_action.chosen_action_idx[1:]
  learner_agent_output = tf.nest.map_structure(lambda t: t[:-1],
                                               learner_agent_output)

  clipped_rewards = rewards
  if reward_clipping == 'abs_one':
    clipped_rewards = tf.clip_by_value(rewards, -1, 1)
  elif reward_clipping == 'soft_asymmetric':
    squeezed = tf.tanh(rewards / 5.0)
    # Negative rewards are given less weight than positive rewards.
    clipped_rewards = tf.where(rewards < 0, .3 * squeezed, squeezed) * 5.

  discounts = tf.cast(~done, tf.float32) * discounting

  # Compute V-trace returns and weights.
  vtrace_returns = vtrace.from_logits(
      behaviour_policy_logits=actor_agent_output.policy_logits,
      target_policy_logits=learner_agent_output.policy_logits,
      actions=actor_action_idx,
      discounts=discounts,
      rewards=clipped_rewards,
      values=learner_agent_output.baseline,
      bootstrap_value=bootstrap_value)

  pg_advantages = vtrace_returns.pg_advantages
  v_advantages = vtrace_returns.vs - learner_agent_output.baseline
  tf.summary.histogram('pg_advantages', pg_advantages, step=num_steps)
  tf.summary.histogram('v_advantages', v_advantages, step=num_steps)

  # Compute loss as a weighted sum of the baseline loss, the policy gradient
  # loss and an entropy regularization term.
  pg_loss = _compute_policy_gradient_loss(
      learner_agent_output.policy_logits,
      actor_action_idx,
      pg_advantages,
      step=num_steps)
  baseline_loss = _compute_baseline_loss(v_advantages, step=num_steps)
  entropy = _compute_entropy_loss(
      learner_agent_output.policy_logits, step=num_steps)

  total_loss = pg_loss + baseline_cost * baseline_loss + entropy_cost * entropy
  tf.summary.scalar('loss/ac_loss', tf.reduce_mean(total_loss), step=num_steps)
  return total_loss
def tanh(x):
    return tf.tanh(tf.minimum(tf.maximum(x, -MAX_TANH_ARG), MAX_TANH_ARG))
Beispiel #11
0
def tanh_and_scale_to_spec(inputs, spec):
  """Maps inputs with arbitrary range to range defined by spec using `tanh`."""
  means = (spec.maximum + spec.minimum) / 2.0
  magnitudes = (spec.maximum - spec.minimum) / 2.0

  return means + magnitudes * tf.tanh(inputs)