Ejemplo n.º 1
0
    def __init__(self, params, name="cached_decoder"):
        super().__init__(name=name)

        self.normalization = params.normalization
        self.enable_cache = params.enable_decoder_cache
        self.enable_relative_positional_embedding = params.enable_relative_positional_embedding
        self.query_method = params.tgt_query_method
        self.dropout = params.residual_dropout

        with utils.scope(name):
            self.cache = Cache(params, name="decoder_cache")
            if self.query_method == "single_linear":
                self.query_transform = nn.Sequential(nn.Linear(params.hidden_size, self.cache_dk),
                                                     nn.Tanh())
            self.layers = nn.ModuleList([CachedTransformerDecoderLayer(params, name="layer_%d" % i)
                                         for i in range(params.num_decoder_layers)])
            if params.enable_relative_positional_embedding:
                self.pos_emb = PositionalEmbedding(params.hidden_size)
                self.pos_bias_u = nn.Parameter(torch.Tensor(params.num_heads, params.hidden_size // params.num_heads))
                self.pos_bias_v = nn.Parameter(torch.Tensor(params.num_heads, params.hidden_size // params.num_heads))
                self.add_name(self.pos_bias_u, "pos_bias_u")
                self.add_name(self.pos_bias_v, "pos_bias_v")
            else:
                self.pos_bias_u, self.pos_bias_v = None, None

            if self.normalization == "before":
                self.layer_norm = modules.LayerNorm(params.hidden_size)
            else:
                self.layer_norm = None

        self.reset_parameters()
Ejemplo n.º 2
0
    def __init__(self, hidden_size, num_heads, dropout=0.0, enable_rel_emb=True, enable_sent_emb=False, gated=False,
                 name="learnable_multihead_selfattention"):
        super().__init__(name=name)

        self.num_heads = num_heads
        self.hidden_size = hidden_size
        self.dropout = dropout
        self.enable_rel_emb = enable_rel_emb
        self.enable_sent_emb = enable_sent_emb
        self.gated = gated

        if enable_sent_emb:
            self.sent_emb = PositionalEmbedding(hidden_size)

        if gated:
            self.W_c = Affine(hidden_size, hidden_size, name="W_c")
            self.W_i = Affine(hidden_size, hidden_size, name="W_i")

        with utils.scope(name):
            self.q_transform = Affine(hidden_size, hidden_size,
                                      name="q_transform")
            self.k_transform = Affine(hidden_size, hidden_size,
                                      name="k_transform")
            self.v_transform = Affine(hidden_size, hidden_size,
                                      name="v_transform")
            self.o_transform = Affine(hidden_size, hidden_size,
                                      name="o_transform")
            if self.enable_rel_emb:
                self.r_transform = Affine(hidden_size, hidden_size,
                                          name="r_transform")

        self.reset_parameters()
Ejemplo n.º 3
0
    def __init__(self, params, name="layer"):
        super(TransformerDecoderLayer, self).__init__(name=name)

        with utils.scope(name):
            self.self_attention = AttentionSubLayer(params,
                                                    name="self_attention")
            self.encdec_attention = AttentionSubLayer(params,
                                                      name="encdec_attention")
            self.feed_forward = FFNSubLayer(params)
Ejemplo n.º 4
0
    def __init__(self, params, name="layer"):
        super().__init__(name=name)

        with utils.scope(name):
            self.self_attention = LearnableSelfAttentionSubLayer(params,
                                                    name="self_attention")
            self.encdec_attention = AttentionSubLayer(params,
                                                      name="encdec_attention")
            self.feed_forward = FFNSubLayer(params)
Ejemplo n.º 5
0
    def __init__(self, params, name="attention"):
        super(AttentionSubLayer, self).__init__(name=name)

        self.dropout = params.residual_dropout
        self.normalization = params.normalization

        with utils.scope(name):
            self.attention = modules.MultiHeadAttention(
                params.hidden_size, params.num_heads, params.attention_dropout)
            self.layer_norm = modules.LayerNorm(params.hidden_size)
Ejemplo n.º 6
0
    def __init__(self, params, dtype=None, name="ffn_layer"):
        super(FFNSubLayer, self).__init__(name=name)

        self.dropout = params.residual_dropout
        self.normalization = params.normalization

        with utils.scope(name):
            self.ffn_layer = modules.FeedForward(params.hidden_size,
                                                 params.filter_size,
                                                 dropout=params.relu_dropout)
            self.layer_norm = modules.LayerNorm(params.hidden_size)
Ejemplo n.º 7
0
    def __init__(self, params, name="encoder"):
        super(TransformerEncoder, self).__init__(name=name)

        self.normalization = params.normalization

        with utils.scope(name):
            self.layers = nn.ModuleList([
                TransformerEncoderLayer(params, name="layer_%d" % i)
                for i in range(params.num_encoder_layers)])
            if self.normalization == "before":
                self.layer_norm = modules.LayerNorm(params.hidden_size)
            else:
                self.layer_norm = None
Ejemplo n.º 8
0
    def __init__(self, q_size, k_size, hidden_size, name="attention"):
        super(Attention, self).__init__(name)

        self._q_size = q_size
        self._k_size = k_size
        self._hidden_size = hidden_size

        with utils.scope(name):
            self.q_transform = Affine(q_size, hidden_size, name="q_transform")
            self.k_transform = Affine(k_size, hidden_size, name="k_transform")
            self.v_transform = Affine(hidden_size, 1,
                                      name="v_transform")

        self.reset_parameters()
Ejemplo n.º 9
0
    def __init__(self, in_features, out_features, bias=True, name="affine"):
        super(Affine, self).__init__(name=name)
        self.in_features = in_features
        self.out_features = out_features

        with utils.scope(name):
            self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
            self.add_name(self.weight, "weight")
            if bias:
                self.bias = nn.Parameter(torch.Tensor(out_features))
                self.add_name(self.bias, "bias")
            else:
                self.register_parameter('bias', None)

        self.reset_parameters()
Ejemplo n.º 10
0
    def __init__(self, input_size, hidden_size, output_size=None, dropout=0.0,
                 name="feed_forward"):
        super(FeedForward, self).__init__(name=name)

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size or input_size
        self.dropout = dropout

        with utils.scope(name):
            self.input_transform = Affine(input_size, hidden_size,
                                          name="input_transform")
            self.output_transform = Affine(hidden_size, self.output_size,
                                           name="output_transform")

        self.reset_parameters()
Ejemplo n.º 11
0
    def __init__(self, params, name="transformer"):
        super(Transformer, self).__init__(name=name)
        self.params = params

        with utils.scope(name):
            self.build_embedding(params)
            self.encoding = modules.PositionalEmbedding()
            self.encoder = TransformerEncoder(params)
            self.decoder = TransformerDecoder(params)

        self.criterion = modules.SmoothedCrossEntropyLoss(
            params.label_smoothing)
        self.dropout = params.residual_dropout
        self.hidden_size = params.hidden_size
        self.num_encoder_layers = params.num_encoder_layers
        self.num_decoder_layers = params.num_decoder_layers
        self.reset_parameters()
Ejemplo n.º 12
0
    def __init__(self, params, name="learnableselfattention"):
        super().__init__(name=name)

        self.dropout = params.residual_dropout
        self.normalization = params.normalization
        self.gated = params.enable_residual_gate
        if self.gated:
            hidden_size = params.hidden_size
            self.W_x = Affine(hidden_size, hidden_size, name="W_x")
            self.W_y = Affine(hidden_size, hidden_size, name="W_y")

        with utils.scope(name):
            self.attention = modules.LearnableMultiHeadSelfAttention(params.hidden_size, 
                                                                     params.num_heads, 
                                                                     params.attention_dropout,
                                                                     params.enable_relative_positional_embedding,
                                                                     params.enable_sentence_embedding)
            self.layer_norm = modules.LayerNorm(params.hidden_size)
Ejemplo n.º 13
0
    def __init__(self, q_size, k_size, hidden_size, num_heads, dropout=0.0,
                 name="multihead_attention"):
        super(MultiHeadAdditiveAttention, self).__init__(name=name)

        self.num_heads = num_heads
        self.hidden_size = hidden_size
        self.dropout = dropout

        with utils.scope(name):
            self.q_transform = Affine(q_size, hidden_size,
                                      name="q_transform")
            self.k_transform = Affine(k_size, hidden_size,
                                      name="k_transform")
            self.v_transform = Affine(hidden_size, num_heads,
                                      name="v_transform")
            self.o_transform = Affine(k_size, k_size,
                                      name="o_transform")

        self.reset_parameters()
Ejemplo n.º 14
0
    def __init__(self,
                 normalized_shape,
                 eps=1e-5,
                 elementwise_affine=True,
                 name="layer_norm"):
        super(LayerNorm, self).__init__(name=name)
        if isinstance(normalized_shape, numbers.Integral):
            normalized_shape = (normalized_shape, )
        self.normalized_shape = tuple(normalized_shape)
        self.eps = eps
        self.elementwise_affine = elementwise_affine

        with utils.scope(name):
            if self.elementwise_affine:
                self.weight = nn.Parameter(torch.Tensor(*normalized_shape))
                self.bias = nn.Parameter(torch.Tensor(*normalized_shape))
                self.add_name(self.weight, "weight")
                self.add_name(self.bias, "bias")
            else:
                self.register_parameter('weight', None)
                self.register_parameter('bias', None)
        self.reset_parameters()