コード例 #1
0
    def __init__(self, d_model, heads, d_ff, dropout):
        super().__init__()

        self.self_attn = MultiHeadedAttention(heads, d_model, dropout=dropout)
        self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)
        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
        self.dropout = nn.Dropout(dropout)
コード例 #2
0
ファイル: decoder.py プロジェクト: martin6336/DSGSum
    def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings):
        super(TransformerDecoder, self).__init__()

        # Basic attributes.
        self.decoder_type = 'transformer'
        self.num_layers = num_layers
        self.embeddings = embeddings
        self.pos_emb = PositionalEncoding(dropout, self.embeddings.embedding_dim)
        #
        self.context_attn_graph = MultiHeadedAttention(
            heads, d_model, dropout=dropout)
        self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)
        self.drop_3 = nn.Dropout(dropout)
        self.layer_norm_3 = nn.LayerNorm(d_model, eps=1e-6)
        # Build TransformerDecoder.
        self.transformer_layers = nn.ModuleList(
            [TransformerDecoderLayer(d_model, heads, d_ff, dropout)
             for _ in range(num_layers)])

        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
        self.att_weight_c = nn.Linear(self.embeddings.embedding_dim, 1)
        self.att_weight_q = nn.Linear(self.embeddings.embedding_dim, 1)
        self.att_weight_cq = nn.Linear(self.embeddings.embedding_dim, 1)
        self.graph_act = gelu
        self.graph_aware = nn.Linear(self.embeddings.embedding_dim*3, self.embeddings.embedding_dim)
        self.graph_drop = nn.Dropout(dropout)

        self.linear_filter = nn.Linear(d_model*2, 1)
        self.fix_top = torch.tensor((torch.arange(512,0,-1).type(torch.FloatTensor)/512).\
             unsqueeze(0).unsqueeze(0).expand(8, 512, -1)).to(self.get_device())
        self.fix_top.requires_grad = True
        self.fix_top = torch.nn.Parameter(self.fix_top, requires_grad=True)
        self.register_parameter("fix_top", self.fix_top)
コード例 #3
0
    def __init__(self,
                 d_model,
                 heads,
                 d_ff,
                 dropout,
                 topic=False,
                 topic_dim=300,
                 split_noise=False):
        super(TransformerDecoderLayer, self).__init__()

        self.self_attn = MultiHeadedAttention(heads, d_model, dropout=dropout)

        self.context_attn = MultiHeadedAttention(heads,
                                                 d_model,
                                                 dropout=dropout,
                                                 topic=topic,
                                                 topic_dim=topic_dim,
                                                 split_noise=split_noise)
        self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)
        self.layer_norm_1 = nn.LayerNorm(d_model, eps=1e-6)
        self.layer_norm_2 = nn.LayerNorm(d_model, eps=1e-6)
        self.drop = nn.Dropout(dropout)
        mask = self._get_attn_subsequent_mask(MAX_SIZE)
        # Register self.mask as a buffer in TransformerDecoderLayer, so
        # it gets TransformerDecoderLayer's cuda behavior automatically.
        self.register_buffer('mask', mask)
コード例 #4
0
    def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings,
                 use_universal_transformer):
        super(TransformerDecoder, self).__init__()

        # Basic attributes.
        self.decoder_type = 'transformer'
        self.num_layers = num_layers
        self.embeddings = embeddings
        self.pos_emb = PositionalEncoding(dropout,
                                          self.embeddings.embedding_dim)

        # Build TransformerDecoder.
        self.dim_mismatch = d_model != 768

        if self.dim_mismatch:
            self.linear_custom = nn.Linear(768, d_model)
            self.linear_custom_reverse = nn.Linear(d_model, 768)
            print(
                "TransformerDecoder# Dimension of input is 768, while d_model is {}. Therefore, Adding Upsampling and Downsampling Layer"
                .format(str(d_model)))
        self.common_ff = None
        if use_universal_transformer:
            print("Using Universal Transformer in Decoder")
            self.common_ff = PositionwiseFeedForward(d_model, d_ff, dropout)
        self.transformer_layers = nn.ModuleList([
            TransformerDecoderLayer(d_model, heads, d_ff, dropout,
                                    self.common_ff) for _ in range(num_layers)
        ])

        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
コード例 #5
0
ファイル: decoder.py プロジェクト: x0rzkov/BERT-summarizer
    def __init__(self, d_model, heads, d_ff, dropout):
        super(TransformerDecoderLayer, self).__init__()

        self.self_attn = MultiHeadedAttention(heads, d_model, dropout=dropout)
        self.context_attn = MultiHeadedAttention(heads,
                                                 d_model,
                                                 dropout=dropout)
        self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)
        self.layer_norm_1 = nn.LayerNorm(d_model, eps=1e-6)
        self.layer_norm_2 = nn.LayerNorm(d_model, eps=1e-6)
        self.drop = nn.Dropout(dropout)

        mask = self._get_attn_subsequent_mask(5000)
        self.register_buffer('mask', mask)