def __call__(self, inputs, state, scope=None): with tf.variable_scope(scope, default_name="gru_cell", values=[inputs, state]): if not isinstance(inputs, (list, tuple)): inputs = [inputs] all_inputs = list(inputs) + [state] r = tf.nn.sigmoid( linear(all_inputs, self._num_units, False, False, scope="reset_gate")) u = tf.nn.sigmoid( linear(all_inputs, self._num_units, False, False, scope="update_gate")) all_inputs = list(inputs) + [r * state] c = linear(all_inputs, self._num_units, True, False, scope="candidate") new_state = (1.0 - u) * state + u * tf.tanh(c) return new_state, new_state
def __call__(self, inputs, state, scope=None): with tf.variable_scope(scope or "lstm_cell"): c, h = state concat = linear([inputs, h], 4 * self._num_units, True, scope="gates") i, j, f, o = tf.split(concat, 4, 1) j = self._activation(j) new_c = c * tf.nn.sigmoid(f) + tf.nn.sigmoid(i) * j if not self._output_activation: new_h = new_c * tf.nn.sigmoid(o) else: new_h = self._output_activation(new_c) * tf.nn.sigmoid(o) new_state = (new_c, new_h) return new_h, new_state
def attention(query, memories, bias, hidden_size, cache=None, reuse=None, dtype=None, scope=None): """ Standard attention layer Args: query: A tensor with shape [batch, key_size] memories: A tensor with shape [batch, memory_size, key_size] bias: A tensor with shape [batch, memory_size] hidden_size: An integer cache: A dictionary of precomputed value reuse: A boolean value, whether to reuse the scope dtype: An optional instance of tf.DType scope: An optional string, the scope of this layer Return: A tensor with shape [batch, value_size] and a Tensor with shape [batch, memory_size] """ with tf.variable_scope(scope or "attention", reuse=reuse, values=[query, memories, bias], dtype=dtype): mem_shape = tf.shape(memories) key_size = memories.get_shape().as_list()[-1] if cache is None: k = tf.reshape(memories, [-1, key_size]) k = linear(k, hidden_size, False, False, scope="k_transform") if query is None: return {"key": k} else: k = cache["key"] q = linear(query, hidden_size, False, False, scope="q_transform") k = tf.reshape(k, [mem_shape[0], mem_shape[1], hidden_size]) hidden = tf.tanh(q[:, None, :] + k) hidden = tf.reshape(hidden, [-1, hidden_size]) # Shape: [batch, mem_size, 1] logits = linear(hidden, 1, False, False, scope="logits") logits = tf.reshape(logits, [-1, mem_shape[1]]) if bias is not None: logits = logits + bias alpha = tf.nn.softmax(logits) outputs = { "value": tf.reduce_sum(alpha[:, :, None] * memories, axis=1), "weight": alpha } return outputs