Exemple #1
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 attention_dropout_rate=0.0,
                 residual_dropout_rate=0.1):
        super(TransformerDecoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model,
                                            nhead,
                                            dropout=attention_dropout_rate)
        self.multihead_attn = MultiheadAttention(
            d_model, nhead, dropout=attention_dropout_rate)

        self.conv1 = Conv2D(in_channels=d_model,
                            out_channels=dim_feedforward,
                            kernel_size=(1, 1))
        self.conv2 = Conv2D(in_channels=dim_feedforward,
                            out_channels=d_model,
                            kernel_size=(1, 1))

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.norm3 = LayerNorm(d_model)
        self.dropout1 = Dropout(residual_dropout_rate)
        self.dropout2 = Dropout(residual_dropout_rate)
        self.dropout3 = Dropout(residual_dropout_rate)
Exemple #2
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward,
                 dropout=0.1,
                 activation="relu",
                 attn_dropout=None,
                 act_dropout=None,
                 normalize_before=False,
                 weight_attr=None,
                 bias_attr=None,
                 attention_type="bigbird",
                 block_size=1,
                 window_size=3,
                 num_global_blocks=1,
                 num_rand_blocks=1,
                 seed=None):
        self._config = locals()
        self._config.pop("self")
        self._config.pop("__class__", None)  # py3

        super(TransformerEncoderLayer, self).__init__()
        attn_dropout = dropout if attn_dropout is None else attn_dropout
        act_dropout = dropout if act_dropout is None else act_dropout
        self.normalize_before = normalize_before

        weight_attrs = _convert_param_attr_to_list(weight_attr, 2)
        bias_attrs = _convert_param_attr_to_list(bias_attr, 2)

        self.self_attn = MultiHeadAttention(
            d_model,
            nhead,
            dropout=attn_dropout,
            weight_attr=weight_attrs[0],
            bias_attr=bias_attrs[0],
            attention_type=attention_type,
            block_size=block_size,
            window_size=window_size,
            num_global_blocks=num_global_blocks,
            num_rand_blocks=num_rand_blocks,
            seed=seed)
        self.linear1 = Linear(d_model,
                              dim_feedforward,
                              weight_attrs[1],
                              bias_attr=bias_attrs[1])
        self.dropout = Dropout(act_dropout, mode="upscale_in_train")
        self.linear2 = Linear(dim_feedforward,
                              d_model,
                              weight_attrs[1],
                              bias_attr=bias_attrs[1])
        self.norm1 = LayerNorm(d_model, epsilon=1e-12)
        self.norm2 = LayerNorm(d_model, epsilon=1e-12)
        self.dropout1 = Dropout(dropout, mode="upscale_in_train")
        self.dropout2 = Dropout(dropout, mode="upscale_in_train")
        self.activation = getattr(F, activation)
        self.d_model = d_model
Exemple #3
0
 def __init__(self, encoder_layer, num_layers):
     super(TransformerEncoder, self).__init__()
     self.layers = LayerList([(encoder_layer if i == 0 else
                               type(encoder_layer)(**encoder_layer._config))
                              for i in range(num_layers)])
     self.num_layers = num_layers
     self.norm = LayerNorm(self.layers[0].d_model, epsilon=1e-12)
     self.normalize_before = self.layers[0].normalize_before
Exemple #4
0
 def __init__(self):
     super(ModelCase4, self).__init__()
     self.bn1 = BatchNorm2D(3)
     self.ln1 = LayerNorm([3 * 16 * 16])
     self.relu1 = ReLU()
     self.fc1 = paddle.nn.Linear(3 * 16 * 16, 3 * 16 * 16)