Exemple #1
0
  def __call__(self, input_tensor, concat_output=False):
    """ Invoke the bidirectional LSTM layer.

    Args:
      See in the LSTM class.

    Returns:
      Output contains three parts:
      1) Output tensor of shape [batch, time, depth] or [batch, depth] * time if
        not concatenated.
      2) Final state of the forward LSTM.
      3) Final state of the backward LSTM.
    """
    fwd_outputs, fwd_state = self.fwd_lstm(input_tensor)
    # Reverse the time dimension of the input for the backward LSTM.
    input_bwd = input_tensor
    if isinstance(input_bwd, list):
      input_bwd.reverse()
    else:
      input_bwd = array_ops.unstack(input_bwd, 1)
      input_bwd.reverse()
    bwd_outputs, bwd_state = self.bwd_lstm(input_bwd)
    outputs = []
    for i in range(len(fwd_outputs)):
      outputs.append(
          array_ops.concat([fwd_outputs[i], bwd_outputs[i]], 1,
                           name=self.name + "concat"))
    if concat_output:
      return self._concat_output_steps(outputs), fwd_state, bwd_state
    return outputs, fwd_state, bwd_state
Exemple #2
0
  def build_test_residual_graph(self, backend):
    """Create a residual model.

    The graph contains a residual connection, where the output of conv0 and
    conv2 is added at the end."""

    np_dtype = test_backend_dtypes[backend]
    self.expected_dtype = datatypes.np_to_smaug_type[np_dtype]
    with Graph(name="test_residual_graph", backend=backend) as graph:
      input_tensor = Tensor(
          data_layout=types_pb2.NCHW,
          tensor_data=np.random.rand(1, 1, 28, 28).astype(np_dtype))
      filter_tensor0 = Tensor(
          data_layout=types_pb2.NCHW,
          tensor_data=np.random.rand(64, 1, 3, 3).astype(np_dtype))
      filter_tensor1 = Tensor(
          data_layout=types_pb2.NCHW,
          tensor_data=np.random.rand(64, 1, 3, 3).astype(np_dtype))
      filter_tensor2 = Tensor(
          data_layout=types_pb2.NCHW,
          tensor_data=np.random.rand(64, 64, 3, 3).astype(np_dtype))
      bn_mean_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))
      bn_var_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))
      bn_gamma_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))
      bn_beta_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))

      act = data_op.input_data(input_tensor, "input")
      x = nn_ops.convolution(
          act, filter_tensor0, stride=[1, 1], padding="same", name="conv0")
      out = nn_ops.convolution(
          act, filter_tensor1, stride=[1, 1], padding="same", name="conv1")
      out = nn_ops.batch_norm(
          out, bn_mean_tensor, bn_var_tensor, bn_gamma_tensor, bn_beta_tensor,
          name="bn")
      out = activation_ops.relu(out, "relu")
      out = nn_ops.convolution(
          out, filter_tensor2, stride=[1, 1], padding="same", name="conv2")
      out = math_ops.add(x, out, "add")
      out = math_ops.mul(x, out, "mul")
      # Concatenate the channel dimension of x and out.
      axis = 1 if out.shape.layout == types_pb2.NCHW else 3
      out = array_ops.concat([x, out], axis, "concat")
      # Evenly split the tensor into 4 over the channel dimension.
      out0, out1, out2, out3 = array_ops.split(out, 4, axis, "split")
      out = math_ops.mul(
          math_ops.add(out0, out1, "add1"), math_ops.add(out2, out3, "add2"),
          "mul1")

    self.test_graph, _ = graph.to_proto()
    self.backend = backend
    self.alignment = global_vars.backend_alignment[
        self.test_graph.backend]
Exemple #3
0
  def __call__(self, query):
    """ Invoke the attention layer to compute the attention vector."""

    # Compute alignments shaped [batch, time].
    alignment = self._compute_alignment(query)

    # Compute context vector (aka attention). Context is the inner product of
    # alignments and keys along the time dimension. The shape of context is
    # [batch, depth].
    # alignment_batches is shaped [1, time] * batch.
    alignment_batches = array_ops.split(
        alignment, self.batch_size, axis=0, name=self.name + "split")
    # [batch, time, depth] -> [batch, depth, time] -> [depth, time] * batch.
    values = array_ops.unstack(
        array_ops.reorder(self.memory, types_pb2.NCT), 0,
        name=self.name + "unstack")
    context = []
    for i in range(self.batch_size):
      # Every mat_mul produces a tensor shaped [1, depth].
      context.append(
          nn_ops.mat_mul(
              alignment_batches[i], values[i], name=self.name + "mm"))
    # context shaped [batch, depth].
    context = array_ops.concat(context, 0, name=self.name + "concat")

    return context
Exemple #4
0
    def test_bahdanau_attention(self):
        # Build and run an Bahdanau layer in TF.
        batch = 2
        units = 32
        timestep = 8
        # Use the Bahdanau attention mechanism.
        memory = tf.random.normal([batch, timestep, units], dtype=self.dtype)
        attention_mechanism = tfa.seq2seq.BahdanauAttention(units=units,
                                                            memory=memory,
                                                            dtype=self.dtype)
        # Compute attention using the query and state.
        tf_cell = tf.keras.layers.LSTMCell(units,
                                           use_bias=False,
                                           unit_forget_bias=False,
                                           dtype=self.dtype)
        attention_wrapper = tfa.seq2seq.AttentionWrapper(tf_cell,
                                                         attention_mechanism,
                                                         output_attention=True,
                                                         dtype=self.dtype)
        query = tf.random.normal([batch, units], dtype=self.dtype)
        tf_initial_state = attention_wrapper.get_initial_state(
            batch_size=batch, dtype=self.dtype)
        # Perform a step of attention-wrapped RNN.
        tf_attention, _ = attention_wrapper(query, tf_initial_state)

        # Build the attention model in SMAUG using the tensors from the TF model.
        query = Tensor(data_layout=types_pb2.NC, tensor_data=query.numpy())
        w, u = recurrent_test.createSmaugWeights(tf_cell)
        memory = Tensor(data_layout=types_pb2.NTC, tensor_data=memory.numpy())
        weights = attention_mechanism.get_weights()
        w_alignment = Tensor(data_layout=types_pb2.NC,
                             tensor_data=np.expand_dims(weights[0], 0))
        w_decoder = Tensor(data_layout=types_pb2.NC,
                           tensor_data=np.transpose(weights[1]))
        w_encoder = Tensor(data_layout=types_pb2.NC,
                           tensor_data=np.transpose(weights[2]))
        with Graph(name=self.graph_name, backend=self.backend) as graph:
            # Create an LSTM and an attention, and perform one step.
            sg_cell = LSTM([w, u])
            sg_attention = BahdanauAttention(memory, w_encoder, w_decoder,
                                             w_alignment)
            sg_initial_attention = Tensor(data_layout=types_pb2.NC,
                                          tensor_data=np.zeros(
                                              (batch, units),
                                              dtype=self.dtype))
            cell_out, _ = sg_cell.step(concat([query, sg_initial_attention],
                                              axis=1),
                                       timestep=0)
            sg_attention(cell_out)
        self.runAndValidate(graph, tf_attention, decimal=2)