Beispiel #1
0
    def __init__(self, args):
        super(TransformerLayer, self).__init__()

        self.layernorm_positioning = args.layernorm_positioning

        if hasattr(args, "attention_head_size"):
            attention_head_size = args.attention_head_size
        else:
            attention_head_size = args.hidden_size // args.heads_num

        has_bias = bool(1 - args.remove_transformer_bias)

        # Multi-headed self-attention.
        self.self_attn = MultiHeadedAttention(args.hidden_size,
                                              args.heads_num,
                                              attention_head_size,
                                              args.dropout,
                                              has_bias=has_bias)
        self.dropout_1 = nn.Dropout(args.dropout)
        self.layer_norm_1 = LayerNorm(args.hidden_size, has_bias=has_bias)
        # Feed forward layer.
        if args.feed_forward == "gated":
            self.feed_forward = GatedFeedForward(args.hidden_size,
                                                 args.feedforward_size,
                                                 args.hidden_act, has_bias)
        else:
            self.feed_forward = PositionwiseFeedForward(
                args.hidden_size, args.feedforward_size, args.hidden_act,
                has_bias)
        self.dropout_2 = nn.Dropout(args.dropout)
        self.layer_norm_2 = LayerNorm(args.hidden_size, has_bias=has_bias)
    def __init__(self, args):
        super(GptBlock, self).__init__()

        # Multi-headed self-attention.
        self.self_attn = MultiHeadedAttention(args.hidden_size, args.heads_num,
                                              args.dropout)
        self.layer_norm_1 = LayerNorm(args.hidden_size)
        # Feed forward layer.
        self.feed_forward = PositionwiseFeedForward(args.hidden_size,
                                                    args.feedforward_size,
                                                    args.hidden_act)
        self.layer_norm_2 = LayerNorm(args.hidden_size)
Beispiel #3
0
    def __init__(self, args):
        super(TransformerDecoderLayer, self).__init__()

        self.self_attn = MultiHeadedAttention(args.hidden_size, args.heads_num,
                                              args.dropout)
        self.context_attn = MultiHeadedAttention(args.hidden_size,
                                                 args.heads_num, args.dropout)
        self.layer_norm_1 = LayerNorm(args.hidden_size)
        self.layer_norm_2 = LayerNorm(args.hidden_size)
        self.layer_norm_3 = LayerNorm(args.hidden_size)
        self.feed_forward = PositionwiseFeedForward(args.hidden_size,
                                                    args.feedforward_size,
                                                    args.hidden_act)
Beispiel #4
0
    def __init__(self, args):
        super(ISynthesizer, self).__init__()

        self.att = None
        self.dropout_1 = nn.Dropout(args.dropout)
        self.layer_norm_1 = LayerNorm(args.hidden_size)
        # Feed forward layer.
        self.feed_forward = PositionwiseFeedForward(args.hidden_size,
                                                    args.feedforward_size)
        self.dropout_2 = nn.Dropout(args.dropout)
        self.layer_norm_2 = LayerNorm(args.hidden_size)

        if self.__class__.__name__ == 'ISynthesizer':
            raise Exception("ISynthesizer cannot be instantiated.")
Beispiel #5
0
    def __init__(self, args):
        super(RelationAwareTransformerLayer, self).__init__()

        # Multi-headed self-attention.
        self.self_attn = RelationAwareMultiHeadedAttention(
            args.hidden_size, args.heads_num, args.dropout
        )
        self.dropout_1 = nn.Dropout(args.dropout)
        self.layer_norm_1 = LayerNorm(args.hidden_size)
        # Feed forward layer.
        self.feed_forward = PositionwiseFeedForward(
            args.hidden_size, args.feedforward_size
        )
        self.dropout_2 = nn.Dropout(args.dropout)
        self.layer_norm_2 = LayerNorm(args.hidden_size)
Beispiel #6
0
    def __init__(self, args):
        super(TransformerDecoderLayer, self).__init__()

        self.layernorm_positioning = args.layernorm_positioning

        # Multi-headed self-attention.
        self.self_attn = MultiHeadedAttention(args.hidden_size, args.heads_num,
                                              args.dropout)
        self.dropout_1 = nn.Dropout(args.dropout)
        self.layer_norm_1 = LayerNorm(args.hidden_size)

        # Multi-headed context-attention.
        self.context_attn = MultiHeadedAttention(args.hidden_size,
                                                 args.heads_num, args.dropout)
        self.dropout_2 = nn.Dropout(args.dropout)
        self.layer_norm_2 = LayerNorm(args.hidden_size)

        # Feed forward layer.
        self.feed_forward = PositionwiseFeedForward(args.hidden_size,
                                                    args.feedforward_size,
                                                    args.hidden_act)
        self.dropout_3 = nn.Dropout(args.dropout)
        self.layer_norm_3 = LayerNorm(args.hidden_size)