コード例 #1
0
def transformer_ffn_layer(x, hparams):
    """Feed-forward layer in the transformer.

  Args:
    x: a Tensor of shape [batch_size, length, hparams.hidden_size]
    hparams: hyperparmeters for model

  Returns:
    a Tensor of shape [batch_size, length, hparams.hidden_size]
  """
    if hparams.ffn_layer == "conv_hidden_relu":
        return common_layers.conv_hidden_relu(x,
                                              hparams.filter_size,
                                              hparams.hidden_size,
                                              dropout=hparams.relu_dropout)
    elif hparams.ffn_layer == "parameter_attention":
        return common_attention.parameter_attention(
            x, hparams.parameter_attention_key_channels or hparams.hidden_size,
            hparams.parameter_attention_value_channels or hparams.hidden_size,
            hparams.hidden_size, hparams.filter_size, hparams.num_heads,
            hparams.attention_dropout)
    elif hparams.ffn_layer == "conv_hidden_relu_with_sepconv":
        return common_layers.conv_hidden_relu(x,
                                              hparams.filter_size,
                                              hparams.hidden_size,
                                              kernel_size=(3, 1),
                                              second_kernel_size=(31, 1),
                                              padding="LEFT",
                                              dropout=hparams.relu_dropout)
    else:
        assert hparams.ffn_layer == "none"
        return x
コード例 #2
0
def transformer_ffn_layer(x, hparams):
  """Feed-forward layer in the transformer.

  Args:
    x: a Tensor of shape [batch_size, length, hparams.hidden_size]
    hparams: hyperparmeters for model

  Returns:
    a Tensor of shape [batch_size, length, hparams.hidden_size]
  """
  if hparams.ffn_layer == "conv_hidden_relu":
    return common_layers.conv_hidden_relu(
        x,
        hparams.filter_size,
        hparams.hidden_size,
        dropout=hparams.relu_dropout)
  elif hparams.ffn_layer == "parameter_attention":
    return common_attention.parameter_attention(
        x,
        hparams.parameter_attention_key_channels or hparams.hidden_size,
        hparams.parameter_attention_value_channels or hparams.hidden_size,
        hparams.hidden_size,
        hparams.filter_size,
        hparams.num_heads,
        hparams.attention_dropout)
  else:
    assert hparams.ffn_layer == "none"
    return x
コード例 #3
0
def attention_lm_decoder(decoder_input,
                         residual_fn,
                         decoder_self_attention_bias,
                         hparams,
                         name="decoder"):
    """A stack of attention_lm layers.

  Args:
    decoder_input: a Tensor
    residual_fn: a function from (layer_input, layer_output) -> combined_output
    decoder_self_attention_bias: bias Tensor for self-attention
      (see common_attention.attention_bias())
    hparams: hyperparameters for model
    name: a string

  Returns:
    y: a Tensors
  """
    x = decoder_input
    # Summaries don't work in multi-problem setting yet.
    summaries = "problems" not in hparams.values() or len(
        hparams.problems) == 1
    with tf.variable_scope(name):
        for layer in xrange(hparams.num_hidden_layers):
            with tf.variable_scope("layer_%d" % layer):
                x = residual_fn(
                    x,
                    common_attention.multihead_attention(
                        x,
                        None,
                        decoder_self_attention_bias,
                        hparams.attention_key_channels or hparams.hidden_size,
                        hparams.attention_value_channels
                        or hparams.hidden_size,
                        hparams.hidden_size,
                        hparams.num_heads,
                        hparams.attention_dropout,
                        summaries=summaries,
                        name="decoder_self_attention"))
                x = residual_fn(
                    x,
                    common_layers.conv_hidden_relu(
                        x,
                        hparams.filter_size,
                        hparams.hidden_size,
                        dropout=hparams.relu_dropout))
    return x