Esempio n. 1
0
    def __init__(self, config):
        super(Model, self).__init__()
        self.config = config
        self.use_elmo = config.use_elmo
        self.use_bert = config.use_bert

        # feature layer
        input_size = 0
        if config.use_elmo:
            input_size += (3 * 1024)
        if config.use_bert:
            input_size += 768
        self.query_linear = nn.Linear(input_size, config.encoding_size)
        self.query_lstm = nn.LSTM(input_size, config.encoding_size // 2, 2, bidirectional=True, batch_first=True)
        self.nodes_linear = nn.Linear(input_size, config.encoding_size)
        # gcn layer
        self.nodes_dropout = nn.Dropout(self.config.dropout)
        self.hidden_linears = nn.ModuleList([nn.Linear(512, 512)] * 4)
        self.combined_linear = nn.Linear(1024, 512)
        # bi_attention layer
        self.attention_linear = nn.Linear(512 * 3, 1, False)
        self.mha = attention.MultiHeadAttention(config.encoding_size, 8)
        # output layer
        self.out_att1 = nn.Linear(2048, 128)
        self.out_att2 = nn.Linear(128, 1)
 def __init__(self, model_part, seq_len, d_model, d_inner, n_head, d_k, d_v, layer_sizes, learned, embedding_dim, activation, output_activation, transfer, dropout=0.1):
     super(Transformer, self).__init__()
     
     self.learned = learned
     if learned:
         self.embed = nn.Embedding(AMINO_ACID, LEARNED_DIM, padding_idx=0)
     
     self.posembed = PositionEmbedding(seq_len, d_model, "Sinusoid")
     
     self.d_model = d_model
     self.slf_attn = attention.MultiHeadAttention(
         n_head, d_model, d_k, d_v, dropout=dropout)
     self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
     
     self.model_part = model_part
     self.attn, output_size = aggregate_feature(model_part, d_model, seq_len)
     
     self.transfer = transfer
     if transfer:
         output_size = output_size * 2
         
     self.layer_num = len(layer_sizes)
     if self.layer_num > 0:
         self.denses = nn.ModuleList()
         self.dactive = nn.ModuleList()
         for i, layer_size in enumerate(layer_sizes):
             self.denses.append(nn.Linear(output_size, layer_size))
             output_size = layer_size
             self.dactive.append(get_activation(activation))
     
     self.output_layer = nn.Linear(output_size, embedding_dim)
     self.output_active = get_activation(output_activation)
Esempio n. 3
0
 def __init__(self,
              d_w,
              d_e,
              num_classes,
              hidden_dim,
              word_emb_weight,
              num_layers=4,
              num_heads=8,
              dropout=0.1,
              max_sen_len=100):
     super(Transformer, self).__init__()
     self.max_sen_len = max_sen_len
     self.w2v = nn.Embedding.from_pretrained(word_emb_weight, freeze=False)
     self.pos_embedding1 = nn.Embedding(2 * self.max_sen_len, d_e)
     self.pos_embedding2 = nn.Embedding(2 * self.max_sen_len, d_e)
     c = copy.deepcopy
     d_model = d_w + 2 * d_e
     self_attn = attention.MultiHeadAttention(h=num_heads,
                                              d_model=d_model,
                                              dropout=dropout)
     ff = layers.PositionwiseFeedForward(d_model=d_model,
                                         d_ff=hidden_dim,
                                         dropout=dropout)
     word_attn = attention.WordAttention(
         d_model)  # (batch, sen, d_model) => (batch, d_model)
     self.model = nn.Sequential(
         layers.Encoder(
             layers.EncoderLayer(d_model, c(self_attn), c(ff), dropout),
             num_layers), word_attn, nn.Linear(d_model, d_model // 2),
         nn.ReLU(), nn.Linear(d_model // 2, num_classes))
     for p in self.model.parameters():
         if p.dim() > 1:  # dim: 维度数
             nn.init.xavier_uniform_(p)
Esempio n. 4
0
 def _self_attention(self, x, attention_bias=None):
     with tf.variable_scope('self-attention'):
         multi_head_attention = attention.MultiHeadAttention(
             num_heads=self.num_heads,
             linear_key_dim=self.linear_key_dim,
             linear_value_dim=self.linear_value_dim,
             hidden_size=self.hidden_size,
             dropout=self.dropout,
             attention_bias=attention_bias)
         return multi_head_attention.build(x, x, x)
Esempio n. 5
0
 def __init__(self,
              d_w,
              d_e,
              num_heads,
              num_layers,
              hidden_dim,
              window_sizes,
              num_filter,
              dropout_p,
              is_gpu,
              num_classes=2):
     super(CharAttnModelHelper, self).__init__()
     self.w2v = nn.Embedding(97, d_w)
     self.pos_embedding = nn.Embedding(842, d_e)
     self.is_gpu = is_gpu
     c = copy.deepcopy
     d_model = d_w + d_e
     self.cnn_layer1 = nn.Sequential(
         nn.Conv2d(in_channels=1,
                   out_channels=d_model,
                   kernel_size=(3, d_model),
                   stride=(1, 1),
                   padding=(1, 0))  # (batch, d_model, max_sen_len, 1)
     )
     self.cnn_layer1.apply(self.weights_init)
     self_attn = attention.MultiHeadAttention(h=num_heads,
                                              d_model=d_model,
                                              dropout=dropout_p)
     ff = layers.PositionwiseFeedForward(d_model=d_model,
                                         d_ff=hidden_dim,
                                         dropout=dropout_p)
     self.self_attn_layer = nn.Sequential(
         layers.Encoder(
             layers.EncoderLayer(d_model, c(self_attn), c(ff), dropout_p),
             num_layers))  # (batch, max_sen_len, d_w + d_e)
     for p in self.self_attn_layer.parameters():
         if p.dim() > 1:  # dim: 维度数
             nn.init.xavier_uniform_(p)
     self.cnn_layer2 = CNNLayers(d_model, num_filter, window_sizes,
                                 dropout_p, is_gpu)
     # (batch, len(window_sizes), num_filter) => (batch, num_filter)
     self.word_attn = attention.WordAttention(num_filter)
     for p in self.word_attn.parameters():
         if p.dim() > 1:  # dim: 维度数
             nn.init.xavier_uniform_(p)
     self.linear_layer = nn.Sequential(
         nn.Linear(num_filter, num_filter // 2), nn.Dropout(dropout_p),
         nn.Tanh(), nn.Linear(num_filter // 2, num_classes))
     self.linear_layer.apply(self.weights_init)
Esempio n. 6
0
 def __init__(self,
              model_dim,
              n_head,
              key_dim,
              value_dim,
              hidden_dim,
              dropout=0.1):
     super(DecoderLayer, self).__init__()
     self.self_attention = attention.MultiHeadAttention(model_dim=model_dim,
                                                        n_head=n_head,
                                                        key_dim=key_dim,
                                                        value_dim=value_dim,
                                                        dropout=dropout)
     self.layer_norm_1 = nn.LayerNorm(normalized_shape=model_dim, eps=1e-12)
     self.encoder_attention = attention.MultiHeadAttention(
         model_dim=model_dim,
         n_head=n_head,
         key_dim=key_dim,
         value_dim=value_dim,
         dropout=dropout)
     self.layer_norm_2 = nn.LayerNorm(normalized_shape=model_dim, eps=1e-12)
     self.ffn = feed_forward.PositionwiseFeedForward(model_dim=model_dim,
                                                     hidden_dim=hidden_dim)
     self.layer_norm_3 = nn.LayerNorm(normalized_shape=model_dim, eps=1e-12)
Esempio n. 7
0
    def __init__(self, size, n_heads, dropout):
        super().__init__()

        self.attention = attention.MultiHeadAttention(size, n_heads)
        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(size)