Exemple #1
0
  def build_test_sequential_graph(self, backend):
    """Create a sequential model."""
    np_dtype = test_backend_dtypes[backend]
    self.expected_dtype = datatypes.np_to_smaug_type[np_dtype]
    with Graph(name="test_sequential_graph", backend=backend) as graph:
      input_tensor = Tensor(
          data_layout=types_pb2.NCHW,
          tensor_data=np.random.rand(1, 3, 28, 28).astype(np_dtype))
      filter_tensor0 = Tensor(
          data_layout=types_pb2.NCHW,
          tensor_data=np.random.rand(64, 3, 3, 3).astype(np_dtype))
      filter_tensor1 = Tensor(
          data_layout=types_pb2.NCHW,
          tensor_data=np.random.rand(64, 64, 3, 3).astype(np_dtype))
      weight_tensor0 = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(254, 12544).astype(np_dtype))
      weight_tensor1 = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(10, 254).astype(np_dtype))
      bn_mean_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))
      bn_var_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))
      bn_gamma_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))
      bn_beta_tensor = Tensor(
          data_layout=types_pb2.NC,
          tensor_data=np.random.rand(1, 64).astype(np_dtype))

      out = data_op.input_data(input_tensor, "input")
      out = nn_ops.convolution(
          out, filter_tensor0, stride=[1, 1], padding="same", name="conv0")
      out = activation_ops.relu(out, "conv0_relu")
      out = nn_ops.batch_norm(
          out, bn_mean_tensor, bn_var_tensor, bn_gamma_tensor, bn_beta_tensor,
          name="bn")
      out = nn_ops.convolution(
          out, filter_tensor1, stride=[1, 1], padding="same", name="conv1")
      out = activation_ops.relu(out, "conv1_relu")
      out = nn_ops.max_pool(out, pool_size=[2, 2], stride=[2, 2], name="pool")
      out = array_ops.flatten(out, "flatten")
      out = nn_ops.mat_mul(out, weight_tensor0, name="fc0")
      out = activation_ops.relu(out, "fc0_relu")
      out = nn_ops.mat_mul(out, weight_tensor1, name="fc1")
      out = array_ops.expand_dims(out, 1, "expand_dims")
      out = array_ops.squeeze(out, 1, "squeeze")
      out = array_ops.reshape(out, [2, 5], types_pb2.NC, "reshape")
      out = array_ops.repeat(out, [4, 2], "repeat")
      out = array_ops.stack(out, 4, 1, "stack")
      out0, out1, out2, out3 = array_ops.unstack(out, 1, "unstack")
      out0 = array_ops.reshape(out0, [1, 1, 8, 10], types_pb2.NCHW, "reshape")
      out0 = array_ops.padding(out0, [0, 0, 0, 0, 1, 1, 1, 1], "padding")

    self.test_graph, _ = graph.to_proto()
    self.backend = backend
    self.alignment = global_vars.backend_alignment[backend]
  def test_mat_mul(self):
    batch = 4
    channels = 32
    units = 128
    tf_a = tf.Variable(initializer(shape=[batch, channels], dtype=self.dtype))
    tf_b = tf.Variable(initializer(shape=[units, channels], dtype=self.dtype))
    tf_result = tf.linalg.matmul(tf_a, tf_b, transpose_b=True)

    a = Tensor(data_layout=types_pb2.NC, tensor_data=tf_a.numpy())
    b = Tensor(data_layout=types_pb2.NC, tensor_data=tf_b.numpy())
    with Graph(name=self.graph_name, backend=self.backend) as graph:
      nn_ops.mat_mul(a, b)
    self.runAndValidate(graph, tf_result, decimal=3)
Exemple #3
0
  def __call__(self, query):
    """ Invoke the attention layer to compute the attention vector."""

    # Compute alignments shaped [batch, time].
    alignment = self._compute_alignment(query)

    # Compute context vector (aka attention). Context is the inner product of
    # alignments and keys along the time dimension. The shape of context is
    # [batch, depth].
    # alignment_batches is shaped [1, time] * batch.
    alignment_batches = array_ops.split(
        alignment, self.batch_size, axis=0, name=self.name + "split")
    # [batch, time, depth] -> [batch, depth, time] -> [depth, time] * batch.
    values = array_ops.unstack(
        array_ops.reorder(self.memory, types_pb2.NCT), 0,
        name=self.name + "unstack")
    context = []
    for i in range(self.batch_size):
      # Every mat_mul produces a tensor shaped [1, depth].
      context.append(
          nn_ops.mat_mul(
              alignment_batches[i], values[i], name=self.name + "mm"))
    # context shaped [batch, depth].
    context = array_ops.concat(context, 0, name=self.name + "concat")

    return context
Exemple #4
0
 def _memory_layer(self, memory):
   name_pfx = self.name + ":mem_layer:"
   # Reshape memory from [batch, time, depth] to [batch * time, depth].
   memory = array_ops.reshape(
       memory, [self.batch_size * self.timesteps, self.depth], types_pb2.NC,
       name=name_pfx + "reshape0")
   # scores shaped [batch * time, depth].
   scores = nn_ops.mat_mul(memory, self.w_encoder, name=name_pfx + "mm")
   # [batch * time, depth] -> [batch, time, depth]
   return array_ops.reshape(
       scores, [self.batch_size, self.timesteps, self.depth], types_pb2.NTC,
       name=name_pfx + "reshape1")
Exemple #5
0
  def step(self, input_tensor, timestep):
    """Invoke this cell for a single timestep.

    Args:
      input_tensor: An input tensor of shape [batch, depth].
      timestep: The start timestep. This is used for naming the output tensors.

    Returns:
      Output contains two parts:
      1) An output tensor of shape [Batch, Depth].
      2) The final state of the LSTM.
    """
    x = input_tensor
    name_pfx = self.name + "step%d:" % timestep

    z = nn_ops.mat_mul(x, self.kernel, name=name_pfx + "mm_f")
    z = math_ops.add(
        z, nn_ops.mat_mul(self.h, self.recurrent_kernel, name="mm_u"),
        name=name_pfx + "add_z")
    # i = input_gate, c = new_input, f = forget_gate, o = output_gate
    zi, zf, zc, zo = array_ops.split(z, num_or_size_splits=4, axis=1)
    i = activation_ops.sigmoid(zi, name=name_pfx + "sigmoid_i")
    f = activation_ops.sigmoid(zf, name=name_pfx + "sigmoid_f")
    c = math_ops.add(
        math_ops.mul(f, self.c, name=name_pfx + "mul_f"),
        math_ops.mul(
            i,
            self.activation(
                zc, **self.activation_params, name=name_pfx + "act0"),
            name=name_pfx + "mul_i"), name=name_pfx + "add_c")
    o = activation_ops.sigmoid(zo, name=name_pfx + "sigmoid_o")
    h = math_ops.mul(
        o, self.activation(c, **self.activation_params, name=name_pfx + "act1"),
        name=name_pfx + "mul_h")
    self.c = c
    self.h = h
    return self.h, self.c
Exemple #6
0
  def compute_score(self, query):
    # The score is computed as tanh(query + keys) * w_alignments.

    name_pfx = self.name + ":score:"
    # Reshape from [batch, depth] to [batch, 1, depth] for broadcasting.
    query = array_ops.expand_dims(query, 1, name=name_pfx + "expand")
    # [batch, time, depth].
    activations = activation_ops.tanh(
        math_ops.add(self.keys, query, name=name_pfx + "add"),
        name=name_pfx + "stack")
    # [batch * time, depth]
    activations = array_ops.reshape(
        activations, [self.batch_size * self.timesteps, self.depth],
        types_pb2.NC, name=name_pfx + "reshape0")
    # [batch * time, 1]
    scores = nn_ops.mat_mul(activations, self.w_alignment, name=name_pfx + "mm")
    # [batch, time]
    scores = array_ops.reshape(
        scores, [self.batch_size, self.timesteps], types_pb2.NC,
        name=name_pfx + "reshape1")
    return scores
Exemple #7
0
 def _query_layer(self, query):
   # Return a tensor shaped [batch, depth].
   return nn_ops.mat_mul(query, self.w_decoder, name=self.name + "query_layer")
 def test_fusing_activation_functions(self):
     """Test activation function when they are fused with other operators."""
     with Graph("test_graph", "SMV") as test_graph:
         input_tensor = Tensor(data_layout=types_pb2.NHWC,
                               tensor_data=np.random.rand(
                                   *self.tensor_shape).astype(np.float16))
         filter_tensor = Tensor(data_layout=types_pb2.NHWC,
                                tensor_data=np.random.rand(
                                    32, 3, 3, 32).astype(np.float16))
         weight_tensor = Tensor(data_layout=types_pb2.NC,
                                tensor_data=np.random.rand(
                                    10, 32768).astype(np.float16))
         bn_mean_tensor = Tensor(data_layout=types_pb2.NC,
                                 tensor_data=np.random.rand(1, 64).astype(
                                     np.float16))
         bn_var_tensor = Tensor(data_layout=types_pb2.NC,
                                tensor_data=np.random.rand(1, 64).astype(
                                    np.float16))
         bn_gamma_tensor = Tensor(data_layout=types_pb2.NC,
                                  tensor_data=np.random.rand(1, 64).astype(
                                      np.float16))
         bn_beta_tensor = Tensor(data_layout=types_pb2.NC,
                                 tensor_data=np.random.rand(1, 64).astype(
                                     np.float16))
         act = data_op.input_data(input_tensor, "input")
         act = nn_ops.convolution(act,
                                  filter_tensor,
                                  stride=[1, 1],
                                  padding="same",
                                  activation=None,
                                  name="conv_none")
         act = nn_ops.convolution(act,
                                  filter_tensor,
                                  stride=[1, 1],
                                  padding="same",
                                  activation="relu",
                                  name="conv_relu")
         act = nn_ops.convolution(act,
                                  filter_tensor,
                                  stride=[1, 1],
                                  padding="same",
                                  activation="lrelu",
                                  name="conv_lrelu")
         act = nn_ops.convolution(act,
                                  filter_tensor,
                                  stride=[1, 1],
                                  padding="same",
                                  activation="elu",
                                  name="conv_elu")
         act = nn_ops.convolution(act,
                                  filter_tensor,
                                  stride=[1, 1],
                                  padding="same",
                                  activation="selu",
                                  name="conv_selu")
         act = nn_ops.convolution(act,
                                  filter_tensor,
                                  stride=[1, 1],
                                  padding="same",
                                  activation="tanh",
                                  name="conv_tanh")
         act = nn_ops.convolution(act,
                                  filter_tensor,
                                  stride=[1, 1],
                                  padding="same",
                                  activation="hard_tanh",
                                  name="conv_hard_tanh")
         act = nn_ops.convolution(act,
                                  filter_tensor,
                                  stride=[1, 1],
                                  padding="same",
                                  activation="sigmoid",
                                  name="conv_sigmoid")
         act = nn_ops.convolution(act,
                                  filter_tensor,
                                  stride=[1, 1],
                                  padding="same",
                                  activation="softmax",
                                  name="conv_softmax")
         act = nn_ops.batch_norm(act,
                                 bn_mean_tensor,
                                 bn_var_tensor,
                                 bn_gamma_tensor,
                                 bn_beta_tensor,
                                 activation="relu",
                                 name="bn_relu")
         out = nn_ops.mat_mul(act,
                              weight_tensor,
                              activation="relu",
                              name="fc_relu")
         out = nn_ops.mat_mul(act,
                              weight_tensor,
                              activation="lrelu",
                              activation_params={"slope": 0.1},
                              name="fc_lrelu")
         out = nn_ops.mat_mul(act,
                              weight_tensor,
                              activation="elu",
                              activation_params={"alpha": 0.3},
                              name="fc_elu")
         out = nn_ops.mat_mul(act,
                              weight_tensor,
                              activation="selu",
                              activation_params={
                                  "alpha": 1.0,
                                  "lambda_param": 1.0
                              },
                              name="fc_selu")
         out = nn_ops.mat_mul(act,
                              weight_tensor,
                              activation="hard_tanh",
                              activation_params={
                                  "min": -2.0,
                                  "max": 2.0
                              },
                              name="fc_hard_tanh")
     graph_proto, _ = test_graph.to_proto()
     # None
     node = get_node_proto(graph_proto, "conv_none")
     self.assertEqual(node.params.act_params.activation,
                      types_pb2.UnknownOp)
     # ReLU
     node = get_node_proto(graph_proto, "conv_relu")
     self.assertEqual(node.params.act_params.activation, types_pb2.ReLU)
     # LReLU (default slope = 0.2)
     node = get_node_proto(graph_proto, "conv_lrelu")
     self.assertEqual(node.params.act_params.activation, types_pb2.LReLU)
     self.assertAlmostEqual(node.params.act_params.lrelu_params.slope, 0.2)
     # ELU (default alpha = 0.1)
     node = get_node_proto(graph_proto, "conv_elu")
     self.assertEqual(node.params.act_params.activation, types_pb2.ELU)
     self.assertAlmostEqual(node.params.act_params.elu_params.alpha, 0.1)
     # SELU (default alpha = 1.6733, lambda = 1.0507)
     node = get_node_proto(graph_proto, "conv_selu")
     self.assertEqual(node.params.act_params.activation, types_pb2.SELU)
     self.assertAlmostEqual(node.params.act_params.elu_params.alpha, 1.6733,
                            5)
     self.assertAlmostEqual(node.params.act_params.elu_params.lambda_param,
                            1.0507, 5)
     # Tanh
     node = get_node_proto(graph_proto, "conv_tanh")
     self.assertEqual(node.params.act_params.activation, types_pb2.Tanh)
     # HardTanh (default min = -1, max = 1)
     node = get_node_proto(graph_proto, "conv_hard_tanh")
     self.assertEqual(node.params.act_params.activation, types_pb2.HardTanh)
     self.assertAlmostEqual(node.params.act_params.hard_tanh_params.min, -1)
     self.assertAlmostEqual(node.params.act_params.hard_tanh_params.max, 1)
     # Sigmoid
     node = get_node_proto(graph_proto, "conv_sigmoid")
     self.assertEqual(node.params.act_params.activation, types_pb2.Sigmoid)
     # Softmax
     node = get_node_proto(graph_proto, "conv_softmax")
     self.assertEqual(node.params.act_params.activation, types_pb2.Softmax)
     # Fusion with inner products and batch norms.
     node = get_node_proto(graph_proto, "bn_relu")
     self.assertEqual(node.params.act_params.activation, types_pb2.ReLU)
     node = get_node_proto(graph_proto, "fc_relu")
     self.assertEqual(node.params.act_params.activation, types_pb2.ReLU)
     node = get_node_proto(graph_proto, "fc_lrelu")
     self.assertEqual(node.params.act_params.activation, types_pb2.LReLU)
     self.assertAlmostEqual(node.params.act_params.lrelu_params.slope, 0.1)
     node = get_node_proto(graph_proto, "fc_elu")
     self.assertEqual(node.params.act_params.activation, types_pb2.ELU)
     self.assertAlmostEqual(node.params.act_params.elu_params.alpha, 0.3)
     node = get_node_proto(graph_proto, "fc_selu")
     self.assertEqual(node.params.act_params.activation, types_pb2.SELU)
     self.assertAlmostEqual(node.params.act_params.elu_params.alpha, 1.0)
     self.assertAlmostEqual(node.params.act_params.elu_params.lambda_param,
                            1.0)
     node = get_node_proto(graph_proto, "fc_hard_tanh")
     self.assertEqual(node.params.act_params.activation, types_pb2.HardTanh)
     self.assertAlmostEqual(node.params.act_params.hard_tanh_params.min,
                            -2.0)
     self.assertAlmostEqual(node.params.act_params.hard_tanh_params.max,
                            2.0)