def __init__(self, params, name="cached_decoder"):
        super().__init__(name=name)

        self.normalization = params.normalization
        self.enable_cache = params.enable_decoder_cache
        self.enable_relative_positional_embedding = params.enable_relative_positional_embedding
        self.query_method = params.tgt_query_method
        self.dropout = params.residual_dropout

        with utils.scope(name):
            self.cache = Cache(params, name="decoder_cache")
            if self.query_method == "single_linear":
                self.query_transform = nn.Sequential(nn.Linear(params.hidden_size, self.cache_dk),
                                                     nn.Tanh())
            self.layers = nn.ModuleList([CachedTransformerDecoderLayer(params, name="layer_%d" % i)
                                         for i in range(params.num_decoder_layers)])
            if params.enable_relative_positional_embedding:
                self.pos_emb = PositionalEmbedding(params.hidden_size)
                self.pos_bias_u = nn.Parameter(torch.Tensor(params.num_heads, params.hidden_size // params.num_heads))
                self.pos_bias_v = nn.Parameter(torch.Tensor(params.num_heads, params.hidden_size // params.num_heads))
                self.add_name(self.pos_bias_u, "pos_bias_u")
                self.add_name(self.pos_bias_v, "pos_bias_v")
            else:
                self.pos_bias_u, self.pos_bias_v = None, None

            if self.normalization == "before":
                self.layer_norm = modules.LayerNorm(params.hidden_size)
            else:
                self.layer_norm = None

        self.reset_parameters()
Exemple #2
0
    def __init__(self, params, name="attention"):
        super(AttentionSubLayer, self).__init__(name=name)

        self.dropout = params.residual_dropout
        self.normalization = params.normalization

        with utils.scope(name):
            self.attention = modules.MultiHeadAttention(
                params.hidden_size, params.num_heads, params.attention_dropout)
            self.layer_norm = modules.LayerNorm(params.hidden_size)
Exemple #3
0
    def __init__(self, params, dtype=None, name="ffn_layer"):
        super(FFNSubLayer, self).__init__(name=name)

        self.dropout = params.residual_dropout
        self.normalization = params.normalization

        with utils.scope(name):
            self.ffn_layer = modules.FeedForward(params.hidden_size,
                                                 params.filter_size,
                                                 dropout=params.relu_dropout)
            self.layer_norm = modules.LayerNorm(params.hidden_size)
Exemple #4
0
    def __init__(self, params, name="encoder"):
        super(TransformerEncoder, self).__init__(name=name)

        self.normalization = params.normalization

        with utils.scope(name):
            self.layers = nn.ModuleList([
                TransformerEncoderLayer(params, name="layer_%d" % i)
                for i in range(params.num_encoder_layers)])
            if self.normalization == "before":
                self.layer_norm = modules.LayerNorm(params.hidden_size)
            else:
                self.layer_norm = None
    def __init__(self, params, name="learnableselfattention"):
        super().__init__(name=name)

        self.dropout = params.residual_dropout
        self.normalization = params.normalization
        self.gated = params.enable_residual_gate
        if self.gated:
            hidden_size = params.hidden_size
            self.W_x = Affine(hidden_size, hidden_size, name="W_x")
            self.W_y = Affine(hidden_size, hidden_size, name="W_y")

        with utils.scope(name):
            self.attention = modules.LearnableMultiHeadSelfAttention(params.hidden_size, 
                                                                     params.num_heads, 
                                                                     params.attention_dropout,
                                                                     params.enable_relative_positional_embedding,
                                                                     params.enable_sentence_embedding)
            self.layer_norm = modules.LayerNorm(params.hidden_size)