def _calculate_scores(self, query, key): """Calculates attention scores as a query-key dot product. Args: query: Query tensor of shape `[batch_size, Tq, dim]`. key: Key tensor of shape `[batch_size, Tv, dim]`. Returns: Tensor of shape `[batch_size, Tq, Tv]`. """ if self.score_mode == "dot": scores = tf.matmul(query, key, transpose_b=True) if self.scale is not None: scores *= self.scale elif self.score_mode == "concat": # Reshape tensors to enable broadcasting. # Reshape into [batch_size, Tq, 1, dim]. q_reshaped = tf.expand_dims(query, axis=-2) # Reshape into [batch_size, 1, Tv, dim]. k_reshaped = tf.expand_dims(key, axis=-3) if self.scale is not None: scores = self.concat_score_weight * tf.reduce_sum( tf.tanh(self.scale * (q_reshaped + k_reshaped)), axis=-1) else: scores = self.concat_score_weight * tf.reduce_sum( tf.tanh(q_reshaped + k_reshaped), axis=-1) return scores
def laplace_attention(q, k, v, scale, normalise): """Computes laplace exponential attention. Args: q: queries. Tensor of shape [batch_size, m, d_k]. k: keys. Tensor of shape [batch_size, n, d_k]. v: values. Tensor of shape [batch_size, n, d_v]. scale: float that scales the L1 distance. normalise: Boolean that determines whether weights sum to 1. Returns: Tensor of shape [batch_size, m, d_v]. """ k = tf.expand_dims(k, axis=1) # [batch_size, 1, n, d_k] q = tf.expand_dims(q, axis=2) # [batch_size, m, 1, d_k] unnorm_weights = -tf.abs((k - q) / scale) # [batch_size, m, n, d_k] unnorm_weights = tf.reduce_sum(unnorm_weights, axis=-1) # [batch_size, m, n] if normalise: weight_fn = tf.nn.softmax else: weight_fn = lambda x: 1 + tf.tanh(x) weights = weight_fn(unnorm_weights) # [batch_size, m, n] rep = tf.einsum('bik,bkj->bij', weights, v) # [batch_size, m, d_v] return rep
def step(cell_inputs, cell_states): """Step function that will be used by Keras RNN backend.""" h_tm1 = cell_states[0] # previous memory state c_tm1 = cell_states[1] # previous carry state z = backend.dot(cell_inputs, kernel) z += backend.dot(h_tm1, recurrent_kernel) z = backend.bias_add(z, bias) z0, z1, z2, z3 = tf.split(z, 4, axis=1) i = tf.sigmoid(z0) f = tf.sigmoid(z1) c = f * c_tm1 + i * tf.tanh(z2) o = tf.sigmoid(z3) h = o * tf.tanh(c) return h, [h, c]
def __call__(self, x, carry): update_t = tf.sigmoid(x @ self.W_update_x + carry @ self.W_update_c + self.b_update) reset_t = tf.sigmoid(x @ self.W_reset_x + carry @ self.W_reset_c + self.b_reset) new_carry = update_t * carry + (1. - update_t) * tf.tanh( self.next_x_net(x) + self.next_c_net(reset_t * carry) + self.b_next) return new_carry
def gelu(x): """Gaussian Error Linear Unit. This is a smoother version of the RELU. Original paper: https://arxiv.org/abs/1606.08415 Args: x: float Tensor to perform activation. Returns: `x` with the GELU activation applied. """ cdf = 0.5 * (1.0 + tf.tanh( (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) return x * cdf
def tanh(x): """Hyperbolic tangent activation function. For example: >>> a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32) >>> b = tf.keras.activations.tanh(a) >>> b.numpy() array([-0.9950547, -0.7615942, 0., 0.7615942, 0.9950547], dtype=float32) Args: x: Input tensor. Returns: Tensor of same shape and dtype of input `x`, with tanh activation: `tanh(x) = sinh(x)/cosh(x) = ((exp(x) - exp(-x))/(exp(x) + exp(-x)))`. """ return tf.tanh(x)
def _calculate_scores(self, query, key): """Calculates attention scores as a nonlinear sum of query and key. Args: query: Query tensor of shape `[batch_size, Tq, dim]`. key: Key tensor of shape `[batch_size, Tv, dim]`. Returns: Tensor of shape `[batch_size, Tq, Tv]`. """ # Reshape tensors to enable broadcasting. # Reshape into [batch_size, Tq, 1, dim]. q_reshaped = tf.expand_dims(query, axis=-2) # Reshape into [batch_size, 1, Tv, dim]. k_reshaped = tf.expand_dims(key, axis=-3) if self.use_scale: scale = self.scale else: scale = 1. return tf.reduce_sum(scale * tf.tanh(q_reshaped + k_reshaped), axis=-1)
def step(cell_inputs, cell_states): """Step function that will be used by Keras RNN backend.""" h_tm1 = cell_states[0] # inputs projected by all gate matrices at once matrix_x = backend.dot(cell_inputs, kernel) matrix_x = backend.bias_add(matrix_x, input_bias) x_z, x_r, x_h = tf.split(matrix_x, 3, axis=1) # hidden state projected by all gate matrices at once matrix_inner = backend.dot(h_tm1, recurrent_kernel) matrix_inner = backend.bias_add(matrix_inner, recurrent_bias) recurrent_z, recurrent_r, recurrent_h = tf.split(matrix_inner, 3, axis=1) z = tf.sigmoid(x_z + recurrent_z) r = tf.sigmoid(x_r + recurrent_r) hh = tf.tanh(x_h + r * recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh return h, [h]
def get_ac_loss(learner_agent_output, env_output, actor_agent_output, actor_action, reward_clipping, discounting, baseline_cost, entropy_cost, num_steps): """Computes actor-critic loss. Args: learner_agent_output: A nested structure of type `AgentOutput`. The tensors are expected to have shape [num_timesteps, batch, ....] env_output: A nested structure of type `EnvOutput`. The tensors are expected to have shape [num_timesteps, batch, ...]. actor_agent_output: A nested structure of type `AgentOutput`. The tensors are expected to have shape [num_timesteps, batch, ....] actor_action: An instance of `ActorAction` containing indices of the actions chosen by actor. The total number of actions available to actor at any point is equal to actor_agent_output.policy_logits.shape()[-1]. reward_clipping: A string denoting the clipping strategy to be applied to rewards. An empty string means no clipping is applied. discounting: The discount factor. baseline_cost: A multiplier for baseline loss. entropy_cost: A multiplier for entropy. num_steps: An int to be used as step arg for summaries. Returns: A tensor of shape [num_timesteps - 1, batch_size] which contains the computed actor-critic loss per timestep per element. """ # Use last baseline value (from the value function) to bootstrap. bootstrap_value = learner_agent_output.baseline[-1] # At this point, the environment outputs at time step `t` are the inputs # that lead to the learner_outputs at time step `t`. After the following # shifting, the actions in actor_agent_output and learner_outputs at time step # `t` is what leads to the environment outputs at time step `t`. actor_agent_output = tf.nest.map_structure(lambda t: t[1:], actor_agent_output) rewards, done, _, _ = tf.nest.map_structure(lambda t: t[1:], env_output) actor_action_idx = actor_action.chosen_action_idx[1:] learner_agent_output = tf.nest.map_structure(lambda t: t[:-1], learner_agent_output) clipped_rewards = rewards if reward_clipping == 'abs_one': clipped_rewards = tf.clip_by_value(rewards, -1, 1) elif reward_clipping == 'soft_asymmetric': squeezed = tf.tanh(rewards / 5.0) # Negative rewards are given less weight than positive rewards. clipped_rewards = tf.where(rewards < 0, .3 * squeezed, squeezed) * 5. discounts = tf.cast(~done, tf.float32) * discounting # Compute V-trace returns and weights. vtrace_returns = vtrace.from_logits( behaviour_policy_logits=actor_agent_output.policy_logits, target_policy_logits=learner_agent_output.policy_logits, actions=actor_action_idx, discounts=discounts, rewards=clipped_rewards, values=learner_agent_output.baseline, bootstrap_value=bootstrap_value) pg_advantages = vtrace_returns.pg_advantages v_advantages = vtrace_returns.vs - learner_agent_output.baseline tf.summary.histogram('pg_advantages', pg_advantages, step=num_steps) tf.summary.histogram('v_advantages', v_advantages, step=num_steps) # Compute loss as a weighted sum of the baseline loss, the policy gradient # loss and an entropy regularization term. pg_loss = _compute_policy_gradient_loss( learner_agent_output.policy_logits, actor_action_idx, pg_advantages, step=num_steps) baseline_loss = _compute_baseline_loss(v_advantages, step=num_steps) entropy = _compute_entropy_loss( learner_agent_output.policy_logits, step=num_steps) total_loss = pg_loss + baseline_cost * baseline_loss + entropy_cost * entropy tf.summary.scalar('loss/ac_loss', tf.reduce_mean(total_loss), step=num_steps) return total_loss
def tanh(x): return tf.tanh(tf.minimum(tf.maximum(x, -MAX_TANH_ARG), MAX_TANH_ARG))
def tanh_and_scale_to_spec(inputs, spec): """Maps inputs with arbitrary range to range defined by spec using `tanh`.""" means = (spec.maximum + spec.minimum) / 2.0 magnitudes = (spec.maximum - spec.minimum) / 2.0 return means + magnitudes * tf.tanh(inputs)