예제 #1
0
def transformer_ffn_layer(x, hparams):
    """Feed-forward layer in the transformer.

  Args:
    x: a Tensor of shape [batch_size, length, hparams.hidden_size]
    hparams: hyperparmeters for model

  Returns:
    a Tensor of shape [batch_size, length, hparams.hidden_size]
  """
    if hparams.ffn_layer == "conv_hidden_relu":
        return common_layers.conv_hidden_relu(x,
                                              hparams.filter_size,
                                              hparams.hidden_size,
                                              dropout=hparams.relu_dropout)
    elif hparams.ffn_layer == "parameter_attention":
        return common_attention.parameter_attention(
            x, hparams.parameter_attention_key_channels or hparams.hidden_size,
            hparams.parameter_attention_value_channels or hparams.hidden_size,
            hparams.hidden_size, hparams.filter_size, hparams.num_heads,
            hparams.attention_dropout)
    elif hparams.ffn_layer == "conv_hidden_relu_with_sepconv":
        return common_layers.conv_hidden_relu(x,
                                              hparams.filter_size,
                                              hparams.hidden_size,
                                              kernel_size=(3, 1),
                                              second_kernel_size=(31, 1),
                                              padding="LEFT",
                                              dropout=hparams.relu_dropout)
    else:
        assert hparams.ffn_layer == "none"
        return x
예제 #2
0
def transformer_ffn_layer(x, hparams):
  """Feed-forward layer in the transformer.

  Args:
    x: a Tensor of shape [batch_size, length, hparams.hidden_size]
    hparams: hyperparmeters for model

  Returns:
    a Tensor of shape [batch_size, length, hparams.hidden_size]
  """
  if hparams.ffn_layer == "conv_hidden_relu":
    return common_layers.conv_hidden_relu(
        x,
        hparams.filter_size,
        hparams.hidden_size,
        dropout=hparams.relu_dropout)
  elif hparams.ffn_layer == "parameter_attention":
    return common_attention.parameter_attention(
        x,
        hparams.parameter_attention_key_channels or hparams.hidden_size,
        hparams.parameter_attention_value_channels or hparams.hidden_size,
        hparams.hidden_size,
        hparams.filter_size,
        hparams.num_heads,
        hparams.attention_dropout)
  else:
    assert hparams.ffn_layer == "none"
    return x