def __init__(self, embedding_matrix, opt, memory_weighter='no'): super(LCRS, self).__init__() self.opt = opt self.memory_weighter = memory_weighter self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.blstm_l = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, rnn_type='LSTM') self.blstm_c = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, rnn_type='LSTM') self.blstm_r = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, rnn_type='LSTM') self.dense = nn.Linear(opt.hidden_dim * 4, opt.polarities_dim) # target to context attention self.t2c_l_attention = Attention(opt.hidden_dim, score_function='bi_linear') self.t2c_r_attention = Attention(opt.hidden_dim, score_function='bi_linear') # context to target attention self.c2t_l_attention = Attention(opt.hidden_dim, score_function='bi_linear') self.c2t_r_attention = Attention(opt.hidden_dim, score_function='bi_linear')
def __init__(self, bert, opt): """ 注意力编码器网络, Attentional Encoder Network for Targeted Sentiment Classification :param bert: :param opt: """ super(AEN_BERT, self).__init__() self.opt = opt self.bert = bert self.squeeze_embedding = SqueezeEmbedding() self.dropout = nn.Dropout(opt.dropout) # attn_k和 attn_q的初始化 self.attn_k = Attention(opt.bert_dim, out_dim=opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.attn_q = Attention(opt.bert_dim, out_dim=opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) # 初始化ffn_c, PCT层 self.ffn_c = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) self.ffn_t = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) # 目标特定的注意力层初始化 self.attn_s1 = Attention(opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) # 最终输出层定义 self.dense = nn.Linear(opt.hidden_dim * 3, opt.polarities_dim)
def __init__(self, embedding_matrix, opt): super(IAN, self).__init__() self.opt = opt self.embed = nn.Embedding.from_pretrained(torch.tensor( embedding_matrix, dtype=torch.float), freeze=True) self.lstm_entity1 = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=False) self.lstm_entity2 = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=False) self.lstm_context = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=False) self.attention_entity1 = Attention(opt.hidden_dim, score_function='bi_linear') self.attention_entity2 = Attention(opt.hidden_dim, score_function='bi_linear') self.attention_context1 = Attention(opt.hidden_dim, score_function='bi_linear') self.attention_context2 = Attention(opt.hidden_dim, score_function='bi_linear') self.dense = nn.Linear(opt.hidden_dim * 2, opt.polarities_dim)
def __init__(self, embedding_matrix, opt): super(RAM2m, self).__init__() self.opt = opt self.embed = nn.Embedding.from_pretrained(torch.tensor( embedding_matrix, dtype=torch.float), freeze=False) self.bi_lstm_context = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.bi_lstm_aspect = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.bi_lstm_img = DynamicLSTM(opt.embed_dim_img, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.attention_text = Attention(opt.hidden_dim * 2, score_function='mlp') self.attention_img = Attention(opt.hidden_dim * 2, score_function='mlp') self.gru_cell_text = nn.GRUCell(opt.hidden_dim * 2, opt.hidden_dim * 2) self.gru_cell_img = nn.GRUCell(opt.hidden_dim * 2, opt.hidden_dim * 2) self.bn = nn.BatchNorm1d(opt.hidden_dim * 2, affine=False) self.fc = nn.Linear(opt.hidden_dim * 4, opt.polarities_dim)
def __init__(self, embedding_matrix, opt): super(GC_IAN1, self).__init__() self.opt = opt kernel_sizes = [1, 3, 5, 7] self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.cnn_context = nn.ModuleList([ self.cnn_layer(opt.embed_dim, opt.in_channels, k) for k in kernel_sizes ]) self.cnn_context_ = nn.ModuleList([ self.cnn_layer(opt.embed_dim, opt.in_channels, k) for k in kernel_sizes ]) self.cnn_aspect = nn.ModuleList([ self.cnn_layer(opt.embed_dim, opt.in_channels, k) for k in kernel_sizes ]) self.attention_1 = Attention(len(kernel_sizes) * opt.in_channels, score_function='bi_linear') self.attention_2 = Attention(len(kernel_sizes) * opt.in_channels, score_function='bi_linear') self.dropout = nn.Dropout(opt.dropout) self.dense = nn.Linear(2 * len(kernel_sizes) * opt.in_channels, opt.polarities_dim)
def __init__(self, embedding_matrix, opt): super(AEN, self).__init__() self.opt = opt self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.squeeze_embedding = SqueezeEmbedding() self.attn_k = Attention(opt.embed_dim, out_dim=opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.attn_q = Attention(opt.embed_dim, out_dim=opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.ffn_c = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) self.ffn_t = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) self.attn_s1 = Attention(opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.dense = nn.Linear(opt.hidden_dim * 3, opt.polarities_dim)
def __init__(self, bert, opt): super(AEN_BERT, self).__init__() #print(" 1 In AEN_BERT ") self.opt = opt self.bert = bert self.squeeze_embedding = SqueezeEmbedding() self.dropout = nn.Dropout(opt.dropout) #print(" 2 In AEN_BERT ") self.attn_k = Attention(opt.bert_dim, out_dim=opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.attn_q = Attention(opt.bert_dim, out_dim=opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.ffn_c = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) self.ffn_t = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) #print(" 3 In AEN_BERT ") self.attn_s1 = Attention(opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.hat = False self.last = torch.nn.ModuleList() for t in range(self.opt.taskcla): self.last.append(nn.Linear(opt.hidden_dim * 3, opt.polarities_dim))
def __init__(self, bert, opt): super(AEN_BERT, self).__init__() self.opt = opt self.bert = bert self.squeeze_embedding = SqueezeEmbedding() self.dropout = nn.Dropout(opt.dropout) self.attn_k = Attention(opt.bert_dim, out_dim=opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.attn_q = Attention(opt.bert_dim, out_dim=opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.ffn_c = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) self.ffn_t = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) self.attn_s1 = Attention(opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.dense = nn.Linear(opt.hidden_dim * 3, opt.polarities_dim)
def __init__(self, config, opt): super(BERT_IAN, self).__init__() self.opt = opt self.bert = BertModel(config) self.attention_aspect = Attention(opt.hidden_dim, score_function='bi_linear') self.attention_context = Attention(opt.hidden_dim, score_function='bi_linear') self.dense = nn.Linear(opt.hidden_dim * 2, opt.output_dim)
def __init__(self, config, opt): super(IAN, self).__init__() self.opt = opt self.bert = BertModel(config) self.lstm_context = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True) self.lstm_aspect = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True) self.attention_aspect = Attention(opt.hidden_dim, score_function='bi_linear') self.attention_context = Attention(opt.hidden_dim, score_function='bi_linear') self.dense = nn.Linear(opt.hidden_dim*2, opt.output_dim)
def __init__(self, embedding_matrix, opt): super(IAN, self).__init__() self.opt = opt self.embed = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float)) self.lstm_context = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True) self.lstm_aspect = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True) self.attention_aspect = Attention(opt.hidden_dim, score_function='bi_linear',n_head=2) self.attention_context = Attention(opt.hidden_dim, score_function='bi_linear',n_head=2) self.dense = nn.Linear(opt.hidden_dim*2, opt.polarities_dim)
def _encoder_decoder_attention(self, q, k, v): with tf.variable_scope("encoder-decoder-attention"): attention = Attention(num_heads=self.num_heads, masked=False, linear_key_dim=self.linear_key_dim, linear_value_dim=self.linear_value_dim, model_dim=self.model_dim, dropout=self.dropout) return attention.multi_head(q, k, v)
def _masked_self_attention(self, q, k, v): with tf.variable_scope("masked-self-attention"): attention = Attention( num_heads=self.num_heads, masked=True, # Not implemented yet linear_key_dim=self.linear_key_dim, linear_value_dim=self.linear_value_dim, model_dim=self.model_dim, dropout=self.dropout) return attention.multi_head(q, k, v)
def __init__(self, embedding_matrix, opt): super(InterAttNet, self).__init__() self.model_name = "IAN" self.opt = opt self.embed = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_matrix)) self.lstm_context = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layer=1, batch_first=True,return_use_tuple=True) self.lstm_aspect = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layer=1, batch_first=True, return_use_tuple=True) self.attention_aspect = Attention(opt.hidden_dim, score_function="bi_linear", dropout=0.5) self.attention_context = Attention(opt.hidden_dim, score_function="bi_linear", dropout=0.5) self.dense = nn.Linear(opt.hidden_dim * 2, opt.polarity_dim)
def __init__(self, embedding_matrix, opt): super(AEGCN, self).__init__() self.opt = opt self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.squeeze_embedding = SqueezeEmbedding() # self.text_lstm = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.aspect_lstm = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.attn_k = Attention(opt.embed_dim * 2, out_dim=opt.hidden_dim, n_head=opt.head, score_function='mlp', dropout=opt.dropout) # self.attn_a = Attention(opt.embed_dim * 2, out_dim=opt.hidden_dim, n_head=opt.head, score_function='mlp', dropout=opt.dropout) # # self.attn_s1 = Attention(opt.embed_dim*2, out_dim=opt.hidden_dim, n_head=3, score_function='mlp', dropout=0.5) self.attn_q = Attention(opt.embed_dim * 2, out_dim=opt.hidden_dim, n_head=opt.head, score_function='mlp', dropout=opt.dropout) # self.gc1 = GraphConvolution(2 * opt.hidden_dim, 2 * opt.hidden_dim, opt) self.gc2 = GraphConvolution(2 * opt.hidden_dim, 2 * opt.hidden_dim, opt) self.attn_k_q = Attention(opt.hidden_dim, n_head=opt.head, score_function='mlp', dropout=opt.dropout) # self.attn_k_a = Attention(opt.hidden_dim, n_head=opt.head, score_function='mlp', dropout=opt.dropout) #self.fc = nn.Linear(2*opt.hidden_dim, opt.polarities_dim) self.text_embed_dropout = nn.Dropout(opt.dropout) self.aspect_embed_dropout = nn.Dropout(opt.dropout) self.dense = nn.Linear(opt.hidden_dim * 3, opt.polarities_dim)
def _self_attention(self, q, k, v, key_masks): with tf.variable_scope("self-attention"): attention = Attention( num_heads=self.num_heads, masked=False, linear_key_dim=self.linear_key_dim, linear_value_dim=self.linear_value_dim, model_dim=self.model_dim, max_seq_len=self.max_seq_len, dropout=self.dropout, ) #self.att = attention return attention.multi_head(q, k, v, key_masks)
def __init__(self, bert, opt): super(SDGCN_NEW, self).__init__() self.opt = opt self.bert = bert self.squeeze_embedding = SqueezeEmbedding() self.dropout = nn.Dropout(opt.dropout) self.lstm_context = sLSTM(opt.bert_dim, 768, window_size=2) self.lstm_aspect = sLSTM(opt.bert_dim, 768, window_size=2) # self.lstm_context = DynamicLSTM(opt.bert_dim, opt.hidden_dim, num_layers=1, batch_first=True,bidirectional=True) # self.lstm_aspect = DynamicLSTM(opt.bert_dim, opt.hidden_dim, num_layers=1, batch_first=True,bidirectional=True) self.attention_aspect = Attention(768, score_function='bi_linear') self.attention_context = Attention(768, score_function='bi_linear') self.dense = nn.Linear(768 * 2, opt.polarities_dim)
def __init__(self, word_embedding, char_embedding, opt): super(LSTM_CNN, self).__init__() self.opt = opt self.word_char_embed = WordCharEmbedding(word_embedding, char_embedding, opt) self.text_lstm = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.bi_lstm_aspect = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) D = opt.embed_dim C = opt.polarities_dim # A = opt.aspect_num Co = 100 # Ks = [i for i in range(3, 10)] Ks = [3, 4] self.convs1 = nn.ModuleList([nn.Conv1d(D, Co, K) for K in Ks]) self.convs2 = nn.ModuleList([nn.Conv1d(D, Co, K) for K in Ks]) self.convs3 = nn.ModuleList( [nn.Conv1d(D, Co, K, padding=K - 2) for K in [3]]) self.fc_aspect = nn.Linear(100, Co) self.attention = Attention(opt.hidden_dim * 2, score_function='mlp') self.l1 = nn.Linear(opt.hidden_dim * 2, Co * len(Ks)) self.dropout = nn.Dropout(opt.dropout) # self.dense = nn.Linear(self.kernel_num, opt.polarities_dim) self.gru_cell = nn.GRUCell(opt.hidden_dim * 2, opt.hidden_dim * 2) self.gru = nn.GRUCell(Co * len(Ks), Co * len(Ks)) self.dense = nn.Linear(Co * len(Ks) * 2, opt.polarities_dim) self.w1 = nn.Linear(Co * len(Ks), Co * len(Ks)) self.w2 = nn.Linear(Co * len(Ks), Co * len(Ks))
def __init__(self): super(ATAE_LSTM, self).__init__() self.uniform_rate = config.uniform_rate self.lstm = nn.LSTM(2 * config.embed_size, config.hidden_size, batch_first=True) self.text_embed = nn.Embedding.from_pretrained( config.text_vocab.vectors, freeze=False if config.if_embed_trainable else True) self.aspect_embed = nn.Embedding.from_pretrained( config.aspect_vocab.vectors, freeze=False if config.if_embed_trainable else True) self.aspect_mean = AspectMean(config.max_sen_len) self.attention = Attention() self.proj1 = nn.Linear(config.hidden_size, config.hidden_size, bias=False) self.proj2 = nn.Linear(config.hidden_size, config.hidden_size, bias=False) self.fc = nn.Linear(config.hidden_size, config.target_size) self.dropout = nn.Dropout(config.dropout_rate) self.tanh = nn.Tanh() self.so1ftmax = nn.Softmax(dim=1) # reset parameters self.reset_param()
def __init__(self, embedding_matrix, opt, type='cabasc'): super(Cabasc, self).__init__() self.opt = opt self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.squeeze_embedding = SqueezeEmbedding(batch_first=True) self.attention = Attention(opt.embed_dim, score_function='mlp', dropout=opt.dropout) # content attention self.m_linear = nn.Linear(opt.embed_dim, opt.embed_dim, bias=False) self.mlp = nn.Linear(opt.embed_dim, opt.embed_dim) # W4 self.dense = nn.Linear(opt.embed_dim, opt.polarities_dim) # W5 # context attention layer self.rnn_l = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, rnn_type='GRU') self.rnn_r = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, rnn_type='GRU') self.mlp_l = nn.Linear(opt.hidden_dim, 1) self.mlp_r = nn.Linear(opt.hidden_dim, 1)
def __init__(self, embedding_matrix, opt): super(C_LSTM, self).__init__() self.opt = opt self.kernel_num = 100 self.kernel_sizes = [3, 4, 5] self.embed = nn.Embedding.from_pretrained( torch.FloatTensor(embedding_matrix)) self.context_lstm = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.bi_lstm_aspect = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.convs1 = nn.ModuleList([ nn.Conv1d(opt.embed_dim, self.kernel_num, K) for K in self.kernel_sizes ]) self.attention = Attention(opt.hidden_dim * 2, score_function='mlp') self.dropout = nn.Dropout(opt.dropout) self.gru_cell = nn.GRUCell(opt.hidden_dim * 2, opt.hidden_dim * 2) # self.dense = nn.Linear(self.kernel_num, opt.polarities_dim) self.dense = nn.Linear(opt.hidden_dim * 2, opt.polarities_dim)
def __init__(self, input_shape): self.model = Sequential() [self.model.add(x) for x in Encoder(input_shape).model.layers] [self.model.add(x) for x in Attention(self.model.layers[-1].output_shape).model.layers] [self.model.add(x) for x in State(self.model.layers[-1].output_shape).model.layers] [self.model.add(x) for x in Decoder(self.model.layers[-1].output_shape).model.layers] self.model.add(Activation('softmax'))
def __init__(self, embedding_matrix, opt): super(MMRAM, self).__init__() self.opt = opt self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.bi_lstm_context = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True, dropout=opt.dropout_rate) self.bi_lstm_aspect = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True, dropout=opt.dropout_rate) self.attention = Attention(opt.hidden_dim * 2, score_function='mlp', dropout=opt.dropout_rate) self.gru_cell = nn.GRUCell(opt.hidden_dim * 2, opt.hidden_dim * 2) if self.opt.tfn: self.vis2text = nn.Linear(2048, opt.hidden_dim * 2) self.dense = nn.Linear(opt.hidden_dim * opt.hidden_dim * 4, opt.polarities_dim) else: self.vis2text = nn.Linear(2048, opt.hidden_dim) self.dense = nn.Linear(opt.hidden_dim * 3, opt.polarities_dim)
def build(self): seq1 = Input(name='seq1', shape=[self.config['seq1_maxlen']]) seq2 = Input(name='seq2', shape=[self.config['seq2_maxlen']]) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.config['embed_trainable']) seq1_embed = embedding(seq1) seq1_embed = Dropout(0.5)(seq1_embed) seq2_embed = embedding(seq2) seq2_embed = Dropout(0.5)(seq2_embed) lstm = Bidirectional( LSTM(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate'])) seq1_rep_rnn = lstm(seq1_embed) seq2_rep_rnn = lstm(seq2_embed) att = Attention(8, 64) final_rep = att([seq1_rep_rnn, seq2_rep_rnn, seq2_rep_rnn]) final_rep = GlobalAveragePooling1D()(final_rep) final_rep = Dropout(0.5)(final_rep) output = Dense(2, activation="softmax")(final_rep) model = Model(inputs=[seq1, seq2], outputs=output) return model
def __init__(self, embedding_matrix, opt): super(Test, self).__init__() self.opt = opt self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.squeeze_embedding = SqueezeEmbedding() self.layer_stack = nn.ModuleList([ EncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=opt.dropout) for _ in range(n_layers) ]) # self.ffn_c = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) # self.ffn_t = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) self.attn_s1 = Attention(opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.dense = nn.Linear(opt.hidden_dim * 3, opt.polarities_dim)
def __init__(self, embedding_matrix, opt): super(MemNet, self).__init__() self.opt = opt self.embed = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float)) self.squeeze_embedding = SqueezeEmbedding(batch_first=True) self.attention = Attention(opt.embed_dim, score_function='mlp') self.x_linear = nn.Linear(opt.embed_dim, opt.embed_dim) self.dense = nn.Linear(opt.embed_dim, opt.polarities_dim)
def __init__(self, config, opt): super(MemNet, self).__init__() self.opt = opt self.bert = BertModel(config) self.squeeze_embedding = SqueezeEmbedding(batch_first=True) self.attention = Attention(opt.embed_dim, score_function='mlp') self.x_linear = nn.Linear(opt.embed_dim, opt.embed_dim) self.dense = nn.Linear(opt.embed_dim, opt.output_dim)
def build(self): H = self.config['hidden_size'] v = SharedWeight(size=(H, 1), name='v') WQ_u = SharedWeight(size=(2 * H, H), name='WQ_u') WP_u = SharedWeight(size=(2 * H, H), name='WP_u') WP_v = SharedWeight(size=(H, H), name='WP_v') W_g1 = SharedWeight(size=(4 * H, 4 * H), name='W_g1') W_g2 = SharedWeight(size=(2 * H, 2 * H), name='W_g2') WP_h = SharedWeight(size=(2 * H, H), name='WP_h') Wa_h = SharedWeight(size=(2 * H, H), name='Wa_h') WQ_v = SharedWeight(size=(2 * H, H), name='WQ_v') WPP_v = SharedWeight(size=(H, H), name='WPP_v') VQ_r = SharedWeight(size=(H, H), name='VQ_r') shared_weights = [ v, WQ_u, WP_u, WP_v, W_g1, W_g2, WP_h, Wa_h, WQ_v, WPP_v, VQ_r ] seq1 = Input(name='seq1', shape=[self.config['seq1_maxlen']]) seq2 = Input(name='seq2', shape=[self.config['seq2_maxlen']]) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.config['embed_trainable']) seq1_embed = embedding(seq1) seq1_embed = Dropout(0.5)(seq1_embed) seq2_embed = embedding(seq2) seq2_embed = Dropout(0.5)(seq2_embed) lstm = Bidirectional( GRU(self.config['hidden_size'], return_sequences=True, dropout=self.config['dropout_rate'])) seq1_rep_rnn = lstm(seq1_embed) seq2_rep_rnn = lstm(seq2_embed) vP = QuestionAttnGRU(units=H, return_sequences=True, unroll=False)( [seq2_rep_rnn, seq1_rep_rnn, WQ_u, WP_v, WP_u, v, W_g1]) hP = Bidirectional( SelfAttnGRU(units=H, return_sequences=True, unroll=False))([vP, vP, WP_v, WPP_v, v, W_g2]) gP = Bidirectional(GRU(units=H, return_sequences=True, unroll=False))(hP) rQ = QuestionPooling()([seq1_rep_rnn, WQ_u, WQ_v, v, VQ_r]) rQ = Dropout(rate=self.config['dropout_rate'], name='rQ')(rQ) att = Attention(8, 64) final_rep = att([seq1_rep_rnn, seq2_rep_rnn, seq2_rep_rnn]) final_rep = GlobalAveragePooling1D()(final_rep) final_rep = Dropout(0.5)(final_rep) output = Dense(2, activation="softmax")(final_rep) model = Model(inputs=[seq1, seq2], outputs=output) return model
def __init__(self, embedding_matrix, opt): super(MMFUSION, self).__init__() self.opt = opt self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.lstm_aspect = DynamicLSTM( opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True) #, dropout = opt.dropout_rate self.lstm_l = DynamicLSTM( opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True) #, dropout = opt.dropout_rate self.lstm_r = DynamicLSTM( opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True) #, dropout = opt.dropout_rate self.attention_l = Attention(opt.hidden_dim, score_function='bi_linear', dropout=opt.dropout_rate) self.attention_r = Attention(opt.hidden_dim, score_function='bi_linear', dropout=opt.dropout_rate) self.visaspect_att_l = MMAttention(opt.hidden_dim, score_function='bi_linear', dropout=opt.dropout_rate) self.visaspect_att_r = MMAttention(opt.hidden_dim, score_function='bi_linear', dropout=opt.dropout_rate) #self.viscontext_att_aspect = MMAttention(opt.hidden_dim, score_function='mlp', dropout=opt.dropout_rate) #self.visaspect_att_context = MMAttention(opt.hidden_dim, score_function='mlp', dropout=opt.dropout_rate) self.aspect2text = nn.Linear(opt.hidden_dim, opt.hidden_dim) self.vismap2text = nn.Linear(2048, opt.hidden_dim) self.vis2text = nn.Linear(2048, opt.hidden_dim) self.gate = nn.Linear(2048 + 3 * opt.hidden_dim, opt.hidden_dim) self.madality_attetion = nn.Linear(opt.hidden_dim, 1) #blinear interaction between text vectors and image vectors self.text2hidden = nn.Linear(opt.hidden_dim * 3, opt.hidden_dim) self.vis2hidden = nn.Linear(opt.hidden_dim, opt.hidden_dim) self.hidden2final = nn.Linear(opt.hidden_dim, opt.hidden_dim) self.dense_2 = nn.Linear(opt.hidden_dim * 2, opt.polarities_dim) self.dense_3 = nn.Linear(opt.hidden_dim * 3, opt.polarities_dim) self.dense_4 = nn.Linear(opt.hidden_dim * 4, opt.polarities_dim) self.dense = nn.Linear(opt.hidden_dim, opt.polarities_dim)
def __init__(self, bert, opt): super(BERT_SPC, self).__init__() # self.squeeze_embedding = SqueezeEmbedding() self.bert = bert self.dropout = nn.Dropout(opt.dropout) self.dense = nn.Linear(opt.bert_dim, opt.polarities_dim) self.att = Attention(opt.bert_dim, score_function='mlp') self.pool = BertPooler() self.conv = nn.MaxPool1d(12)