Exemple #1
0
  def __call__(self, input_tensor, concat_output=False):
    """Invoke this cell repeatedly until finishing inputs.

    Args:
      input_tensor: Input tensor of shape [batch, time, depth] (aka NTC layout)
        or a series of tensors shaped [batch, depth] (aka NC layout)
        representing timesteps.
      concat_output: If true, the output for each timestep will be concatenated
        into a single tensor, otherwise a list of output tensors will be
        returned.

    Returns:
      Output contains two parts:
      1) Output tensor of shape [batch, time, depth] or
        [batch, depth] * time if not concatenated.
      2) The final state of the LSTM.
    """
    num_steps = 0
    if not isinstance(input_tensor, list):
      input_steps = array_ops.unstack(
          input_tensor, 1, name=self.name + "unstack")
      num_steps = input_tensor.shape.dims[1]
    else:
      input_steps = input_tensor
      num_steps = len(input_steps)
    state = self.c
    output_steps = []
    # Unroll the timesteps.
    for i in range(num_steps):
      output, state = self.step(input_steps[i], i)
      output_steps.append(output)
    if concat_output:
      return self._concat_output_steps(output_steps), state
    return output_steps, state
Exemple #2
0
  def __call__(self, query):
    """ Invoke the attention layer to compute the attention vector."""

    # Compute alignments shaped [batch, time].
    alignment = self._compute_alignment(query)

    # Compute context vector (aka attention). Context is the inner product of
    # alignments and keys along the time dimension. The shape of context is
    # [batch, depth].
    # alignment_batches is shaped [1, time] * batch.
    alignment_batches = array_ops.split(
        alignment, self.batch_size, axis=0, name=self.name + "split")
    # [batch, time, depth] -> [batch, depth, time] -> [depth, time] * batch.
    values = array_ops.unstack(
        array_ops.reorder(self.memory, types_pb2.NCT), 0,
        name=self.name + "unstack")
    context = []
    for i in range(self.batch_size):
      # Every mat_mul produces a tensor shaped [1, depth].
      context.append(
          nn_ops.mat_mul(
              alignment_batches[i], values[i], name=self.name + "mm"))
    # context shaped [batch, depth].
    context = array_ops.concat(context, 0, name=self.name + "concat")

    return context
Exemple #3
0
  def __call__(self, input_tensor, concat_output=False):
    """ Invoke the bidirectional LSTM layer.

    Args:
      See in the LSTM class.

    Returns:
      Output contains three parts:
      1) Output tensor of shape [batch, time, depth] or [batch, depth] * time if
        not concatenated.
      2) Final state of the forward LSTM.
      3) Final state of the backward LSTM.
    """
    fwd_outputs, fwd_state = self.fwd_lstm(input_tensor)
    # Reverse the time dimension of the input for the backward LSTM.
    input_bwd = input_tensor
    if isinstance(input_bwd, list):
      input_bwd.reverse()
    else:
      input_bwd = array_ops.unstack(input_bwd, 1)
      input_bwd.reverse()
    bwd_outputs, bwd_state = self.bwd_lstm(input_bwd)
    outputs = []
    for i in range(len(fwd_outputs)):
      outputs.append(
          array_ops.concat([fwd_outputs[i], bwd_outputs[i]], 1,
                           name=self.name + "concat"))
    if concat_output:
      return self._concat_output_steps(outputs), fwd_state, bwd_state
    return outputs, fwd_state, bwd_state
Exemple #4
0
  def build_test_sequential_graph(self, backend):
    """Create a sequential model."""
    np_dtype = test_backend_dtypes[backend]
    self.expected_dtype = datatypes.np_to_smaug_type[np_dtype]
    with Graph(name="test_sequential_graph", backend=backend) as graph:
      input_tensor = Tensor(
          data_layout=types_pb2.NCHW,
          tensor_data=np.random.rand(1, 3, 28, 28).astype(np_dtype))
      filter_tensor0 = Tensor(
          data_layout=types_pb2.NCHW,
          tensor_data=np.random.rand(64, 3, 3, 3).astype(np_dtype))
      filter_tensor1 = Tensor(
          data_layout=types_pb2.NCHW,
          tensor_data=np.random.rand(64, 64, 3, 3).astype(np_dtype))
      weight_tensor0 = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(254, 12544).astype(np_dtype))
      weight_tensor1 = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(10, 254).astype(np_dtype))
      bn_mean_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))
      bn_var_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))
      bn_gamma_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))
      bn_beta_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))

      out = data_op.input_data(input_tensor, "input")
      out = nn_ops.convolution(
          out, filter_tensor0, stride=[1, 1], padding="same", name="conv0")
      out = activation_ops.relu(out, "conv0_relu")
      out = nn_ops.batch_norm(
          out, bn_mean_tensor, bn_var_tensor, bn_gamma_tensor, bn_beta_tensor,
          name="bn")
      out = nn_ops.convolution(
          out, filter_tensor1, stride=[1, 1], padding="same", name="conv1")
      out = activation_ops.relu(out, "conv1_relu")
      out = nn_ops.max_pool(out, pool_size=[2, 2], stride=[2, 2], name="pool")
      out = array_ops.flatten(out, "flatten")
      out = nn_ops.mat_mul(out, weight_tensor0, name="fc0")
      out = activation_ops.relu(out, "fc0_relu")
      out = nn_ops.mat_mul(out, weight_tensor1, name="fc1")
      out = array_ops.expand_dims(out, 1, "expand_dims")
      out = array_ops.squeeze(out, 1, "squeeze")
      out = array_ops.reshape(out, [2, 5], types_pb2.NC, "reshape")
      out = array_ops.repeat(out, [4, 2], "repeat")
      out = array_ops.stack(out, 4, 1, "stack")
      out0, out1, out2, out3 = array_ops.unstack(out, 1, "unstack")
      out0 = array_ops.reshape(out0, [1, 1, 8, 10], types_pb2.NCHW, "reshape")
      out0 = array_ops.padding(out0, [0, 0, 0, 0, 1, 1, 1, 1], "padding")

    self.test_graph, _ = graph.to_proto()
    self.backend = backend
    self.alignment = global_vars.backend_alignment[backend]