Example #1
0
    def call(self, inputs, training=False):
        """Computes a forward pass.

    Args:
      inputs: An input tensor.
      training: A boolean indicating whether the call is for training or not.

    Returns:
      An output tensor and a list of output activations from all the layers.
    """
        activations = {}
        x = inputs

        for block in self.blocks:
            x = block(x, training=training)
            activations[block.name] = x

        output = activations['embedder']
        if len(self.embedder_output_shape) > 1:
            output = data_utils.recursively_expand_dims(
                output, axes=[-1] * (len(self.embedder_output_shape) - 1))
            output = data_utils.reshape_by_last_dims(
                output, last_dim_shape=self.embedder_output_shape)
            activations['embedder'] = output

        return output, activations
Example #2
0
def multi_head_logits(input_features, output_sizes, name, **kwargs):
    """Builds a multi-head logit layer with potential bottleneck layer.

  Args:
    input_features: A tensor for input features. Shape =
      [..., sequence_length, feature_dim].
    output_sizes: A dictionary for output sizes in the format {output_name:
      output_size}, where `output_size` can be an integer or a list.
    name: A string for the name scope.
    **kwargs: A dictionary for additional arguments. Supported arguments include
      `num_hidden_nodes`, `weight_initializer`, `bias_initializer`,
      `weight_max_norm`, `use_batch_norm`, `dropout_rate`, `num_fcs_per_block`,
      and `num_fc_blocks`.

  Returns:
    outputs: A dictionary for the output logits.
  """
    outputs = {}
    for output_name, output_size in output_sizes.items():
        if isinstance(output_size, int):
            output_size = [output_size]
        outputs[output_name] = linear(
            input_features,
            output_size=np.prod(output_size),
            weight_max_norm=kwargs.get('weight_max_norm', 0.0),
            weight_initializer=kwargs.get('weight_initializer',
                                          tf.initializers.he_normal()),
            bias_initializer=kwargs.get('bias_initializer',
                                        tf.initializers.he_normal()),
            name=name + '/OutputLogits/' + output_name)
        if len(output_size) > 1:
            outputs[output_name] = data_utils.recursively_expand_dims(
                outputs[output_name], axes=[-1] * (len(output_size) - 1))
            outputs[output_name] = data_utils.reshape_by_last_dims(
                outputs[output_name], last_dim_shape=output_size)
    return outputs
Example #3
0
 def test_reshape_by_last_dims(self):
     # Shape = [2, 4, 1].
     x = tf.constant([[[1], [2], [3], [4]], [[5], [6], [7], [8]]])
     # Shape = [2, 2, 2]
     reshaped_x = data_utils.reshape_by_last_dims(x, last_dim_shape=[2, 2])
     self.assertAllEqual(reshaped_x, [[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
Example #4
0
def simple_model(input_features,
                 output_sizes,
                 is_training,
                 name='SimpleModel',
                 num_bottleneck_nodes=0,
                 **kwargs):
    """Implements `simple base` model with outputs.

  Note that the code differs from the original architecture by disabling dropout
  and maximum weight norms by default.

  Args:
    input_features: A tensor for input features. Shape = [..., feature_dim].
    output_sizes: A dictionary for output sizes in the format {output_name:
      output_size}, where `output_size` can be an integer or a list.
    is_training: A boolean for whether it is in training mode.
    name: A string for the name scope.
    num_bottleneck_nodes: An integer for size of the bottleneck layer to be
      added before the output layer(s). No bottleneck layer will be added if
      non-positive.
    **kwargs: A dictionary of additional arguments passed to `simple_base`.

  Returns:
    outputs: A dictionary for output tensors in the format {output_name:
      output_tensors}. Output tensor shape = [..., output_size].
    activations: A dictionary of addition activation tensors for pre-output
      model activations. Keys include 'base_activations' and optionally
      'bottleneck_activations'.
  """
    net = simple_base(input_features,
                      is_training=is_training,
                      name=name,
                      **kwargs)
    activations = {'base_activations': net}

    if num_bottleneck_nodes > 0:
        net = linear(net,
                     output_size=num_bottleneck_nodes,
                     weight_max_norm=kwargs.get('weight_max_norm', 0.0),
                     weight_initializer=kwargs.get(
                         'weight_initializer', tf.initializers.he_normal()),
                     bias_initializer=kwargs.get('bias_initializer',
                                                 tf.initializers.he_normal()),
                     name=name + '/BottleneckLogits')
        activations['bottleneck_activations'] = net

    outputs = {}
    for output_name, output_size in output_sizes.items():
        if isinstance(output_size, int):
            output_size = [output_size]
        outputs[output_name] = linear(
            net,
            output_size=np.prod(output_size),
            weight_max_norm=kwargs.get('weight_max_norm', 0.0),
            weight_initializer=kwargs.get('weight_initializer',
                                          tf.initializers.he_normal()),
            bias_initializer=kwargs.get('bias_initializer',
                                        tf.initializers.he_normal()),
            name=name + '/OutputLogits/' + output_name)
        if len(output_size) > 1:
            outputs[output_name] = data_utils.recursively_expand_dims(
                outputs[output_name], axes=[-1] * (len(output_size) - 1))
            outputs[output_name] = data_utils.reshape_by_last_dims(
                outputs[output_name], last_dim_shape=output_size)
    return outputs, activations