def forward(self, x): # print("fffff",x) embed = self.embed(x) # CNN cnn_x = embed cnn_x = torch.transpose(cnn_x, 0, 1) cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in cnn_x] # [(N,Co), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 1) cnn_x = self.dropout(cnn_x) # LSTM lstm_x = embed.view(len(x), embed.size(1), -1) lstm_out, self.hidden = self.lstm(lstm_x, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) # lstm_out = F.tanh(lstm_out) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) # CNN and LSTM cat cnn_x = torch.transpose(cnn_x, 0, 1) lstm_out = torch.transpose(lstm_out, 0, 1) cnn_lstm_out = torch.cat((cnn_x, lstm_out), 0) cnn_lstm_out = torch.transpose(cnn_lstm_out, 0, 1) # linear cnn_lstm_out = self.hidden2label1(F.tanh(cnn_lstm_out)) cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out)) # output logit = cnn_lstm_out return logit
def forward(self, x): # print("aaaaa") x_no_static = self.embed_no_static(x) # x_no_static = self.dropout(x_no_static) x_static = self.embed_static(x) # fix the embedding # x_static = Variable(x_static.data) # x_static = self.dropout(x_static) x = torch.stack([x_static, x_no_static], 1) # x = x.unsqueeze(1) # (N,Ci,W,D) x = self.dropout(x) if self.args.batch_normalizations is True: x = [F.relu(self.convs1_bn(conv(x))).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks) else: x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] # [(N,Co), ...]*len(Ks) x = torch.cat(x, 1) ''' x1 = self.conv_and_pool(x,self.conv13) #(N,Co) x2 = self.conv_and_pool(x,self.conv14) #(N,Co) x3 = self.conv_and_pool(x,self.conv15) #(N,Co) x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co) ''' x = self.dropout(x) # (N,len(Ks)*Co) if self.args.batch_normalizations is True: x = self.fc1(x) logit = self.fc2(F.relu(x)) else: x = self.fc1(x) logit = self.fc2(F.relu(x)) return logit
def forward(self, sample): statement = Variable(sample.statement).unsqueeze(0) subject = Variable(sample.subject).unsqueeze(0) speaker = Variable(sample.speaker).unsqueeze(0) speaker_pos = Variable(sample.speaker_pos).unsqueeze(0) state = Variable(sample.state).unsqueeze(0) party = Variable(sample.party).unsqueeze(0) context = Variable(sample.context).unsqueeze(0) batch = 1 # Current support one sample per time # TODO: Increase batch number # Statement statement_ = self.statement_embedding(statement).unsqueeze(0) # 1*W*D -> 1*1*W*D statement_ = [F.relu(conv(statement_)).squeeze(3) for conv in self.statement_convs] # 1*1*W*1 -> 1*Co*W x [len(convs)] statement_ = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in statement_] # 1*Co*1 -> 1*Co x len(convs) statement_ = torch.cat(statement_, 1) # 1*len(convs) # Subject subject_ = self.subject_embedding(subject) # 1*W*D _, (subject_, _) = self.subject_lstm(subject_) # 1*(layer x dir)*hidden subject_ = F.max_pool1d(subject_, self.subject_hidden_dim).view(1, -1) # 1*(layer x dir)*1 -> 1*(layer x dir) # Speaker speaker_ = self.speaker_embedding(speaker).squeeze(0) # 1*1*D -> 1*D # Speaker Position speaker_pos_ = self.speaker_pos_embedding(speaker_pos) _, (speaker_pos_, _) = self.speaker_pos_lstm(speaker_pos_) speaker_pos_ = F.max_pool1d(speaker_pos_, self.speaker_pos_hidden_dim).view(1, -1) # State state_ = self.state_embedding(state).squeeze(0) # Party party_ = self.party_embedding(party).squeeze(0) # Context context_ = self.context_embedding(context) _, (context_, _) = self.context_lstm(context_) context_ = F.max_pool1d(context_, self.context_hidden_dim).view(1, -1) # Concatenate features = torch.cat((statement_, subject_, speaker_, speaker_pos_, state_, party_, context_), 1) features = self.dropout(features) features = self.fc(features) return features
def forward(self, sentence): # print(sentence) [torch.LongTensor of size 47x64] x = self.word_embeddings(sentence) # [torch.FloatTensor of size 47x64x100] x = torch.transpose(x, 0, 1) # print(x) # [torch.FloatTensor of size 32x43x300] x = x.unsqueeze(1) # x = F.relu(self.convl3(x).squeeze(3)) x = [F.relu(conv(x)).squeeze(3) for conv in self.convsl] # print(x) # [torch.FloatTensor of size 32x200x41] 40 39 38 37 # print(type(x)) # a = torch.cat((a1.data, a2.data), 2) x = torch.cat(x, 2) # print(x) # [torch.FloatTensor of size 32x200x195] x = torch.transpose(x, 1, 2) embeds = torch.transpose(x, 0, 1) # print(embeds) # [torch.FloatTensor of size 195x32x200] # embeds = self.word_embeddings() # x = embeds.view(len(sentence), self.batch_size, -1) # torch.Size([43, 64, 300]) lstm_out, self.hidden = self.lstm(embeds, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) # lstm_out = F.tanh(lstm_out) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)) # print(lstm_out.size()) lstm_out = lstm_out.squeeze(2) lstm_out = self.dropout(lstm_out) y = self.hidden2label(lstm_out) # log_probs = F.log_softmax(y) log_probs = y return log_probs
def forward(self, x): x_no_static = self.embed_no_static(x) # x_no_static = self.dropout(x_no_static) x_static = self.embed_static(x) # fix the embedding x_static = Variable(x_static.data) # x_static = self.dropout(x_static) x = torch.stack([x_static, x_no_static], 1) one_layer = x # (N,W,D) # torch.Size([64, 43, 300]) # print("one_layer {}".format(one_layer.size())) # one_layer = self.dropout(one_layer) # one_layer = one_layer.unsqueeze(1) # (N,Ci,W,D) # torch.Size([64, 1, 43, 300]) # one layer one_layer = [torch.transpose(F.relu(conv(one_layer)).squeeze(3), 1, 2).unsqueeze(1) for conv in self.convs1] # torch.Size([64, 100, 36]) # one_layer = [F.relu(conv(one_layer)).squeeze(3).unsqueeze(1) for conv in self.convs1] # torch.Size([64, 100, 36]) # print(one_layer[0].size()) # print(one_layer[1].size()) # two layer two_layer = [F.relu(conv(one_layer)).squeeze(3) for (conv, one_layer) in zip(self.convs2, one_layer)] # print("two_layer {}".format(two_layer[0].size())) # print("two_layer {}".format(two_layer[1].size())) # pooling output = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in two_layer] # torch.Size([64, 100]) torch.Size([64, 100]) output = torch.cat(output, 1) # torch.Size([64, 300]) # dropout output = self.dropout(output) # linear output = self.fc1(output) logit = self.fc2(F.relu(output)) return logit
def conv_and_pool(self, x, conv): x = F.relu(conv(x)).squeeze(3) # (N,Co,W) # squeeze([dim]) x = F.max_pool1d(x, x.size(2)).squeeze(2) # x2 = F.relu(conv(x)).squeeze(3) # (N,Co,W) # squeeze([dim]) # x2 = F.avg_pool1d(x, x.size(2)).squeeze(2) x = torch.cat(x, 1) return x
def forward(self, x): x = self.embed(x) x = self.dropout(x) # x = x.view(len(x), x.size(1), -1) # x = embed.view(len(x), embed.size(1), -1) bilstm_out, self.hidden = self.bilstm(x, self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) # bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)) bilstm_out = bilstm_out.squeeze(2) hidden2lable = self.hidden2label1(F.tanh(bilstm_out)) gate_layer = F.sigmoid(self.gate_layer(bilstm_out)) # calculate highway layer values gate_hidden_layer = torch.mul(hidden2lable, gate_layer) # if write like follow ,can run,but not equal the HighWay NetWorks formula # gate_input = torch.mul((1 - gate_layer), hidden2lable) gate_input = torch.mul((1 - gate_layer), bilstm_out) highway_output = torch.add(gate_hidden_layer, gate_input) logit = self.logit_layer(highway_output) return logit
def forward(self, sentence): # print(sentence) # [torch.LongTensor of size 42x64] x = self.word_embeddings(sentence) x = self.dropout_embed(x) # print(embeds.size()) # torch.Size([42, 64, 100]) # x = embeds.view(len(sentence), self.batch_size, -1) print(x.size()) # torch.Size([42, 64, 100]) print(self.hidden) lstm_out, self.hidden = self.bnlstm(x, self.hidden) # lstm_out 10*5*50 hidden 1*5*50 *2 # print(lstm_out) # lstm_out = [F.max_pool1d(i, len(lstm_out)).unsqueeze(2) for i in lstm_out] lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)) # print(lstm_out.size()) lstm_out = lstm_out.squeeze(2) # y = self.hidden2label(lstm_out) #lstm_out = torch.cat(lstm_out, 1) # lstm_out = self.dropout(lstm_out) # lstm_out = lstm_out.view(len(sentence), -1) y = self.hidden2label(F.tanh(lstm_out)) # log_probs = F.log_softmax(y) log_probs = y return log_probs
def _conv_and_pool(x): x = x.unsqueeze(1) conv = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=(3, 4), padding=(1, 0)) x = conv.forward(x) x = x.squeeze(3) # print(x) x = F.max_pool1d(x, x.size(2)).squeeze(2) return x
def forward(self, XD_input, XT_input): # Tính embedding của XD_input XD_input = self.embedding_XD(XD_input) # Tính layer convulution thứ nhất bằng relu smiles = functional.relu(self.convolution1_XD(torch.transpose(XD_input, 2, 1))) # Tính layer convulution thứ hai bằng relu smiles = functional.relu(self.convolution2_XD(smiles)) # Tính layer convulution thứ ba bằng relu smiles = functional.relu(self.convolution3_XD(smiles)) # Tính layer max pool 1d smiles = functional.max_pool1d(smiles, kernel_size=smiles.size()[2:]) # Thay đổi shape của kết quả smiles = smiles.view(smiles.shape[0], smiles.shape[1]) # Tính embedding của XT_input XT_input = self.embedding_XT(XT_input) # Tính layer convulution thứ nhất bằng relu protein = functional.relu(self.convolution1_XT(torch.transpose(XT_input, 2, 1))) # Tính layer convulution thứ hai bằng relu protein = functional.relu(self.convolution2_XT(protein)) # Tính layer convulution thứ ba bằng relu protein = functional.relu(self.convolution3_XT(protein)) # Tính layer max pool 1d protein = functional.max_pool1d(protein, kernel_size=protein.size()[2:]) # Thay đổi shape của kết quả protein = protein.view(protein.shape[0], protein.shape[1]) # Gộp hai layer interaction = torch.cat((smiles, protein), 1) # Tính layer fully connected 1 bằng relu f_relu = functional.relu(self.fully_connected1(interaction)) # Tính dropout 1 f_relu = self.dropout1(f_relu) # Tính layer fully connected 2 bằng relu f_relu = functional.relu(self.fully_connected2(f_relu)) # Tính dropout 12 f_relu = self.dropout2(f_relu) # Tính layer fully connected 3 bằng relu f_relu = functional.relu(self.fully_connected3(f_relu)) # Tính layer fully connected 4 f_relu = self.fully_connected4(f_relu) # Trả về kết quả return f_relu
def max_pool1d(self, x, seq_lens): # x:[N,L,O_in] out = [] for index, t in enumerate(x): t = t[:seq_lens[index], :] t = torch.t(t).unsqueeze(0) out.append(F.max_pool1d(t, t.size(2))) out = torch.cat(out).squeeze(2) return out
def forward(self, x): emb = self.emb(x).unsqueeze(1) # batch_size * 1 * seq_len * emb_dim convs = [F.relu(conv(emb)).squeeze(3) for conv in self.convs] # [batch_size * num_filter * length] pools = [F.max_pool1d(conv, conv.size(2)).squeeze(2) for conv in convs] # [batch_size * num_filter] pred = torch.cat(pools, 1) # batch_size * num_filters_sum highway = self.highway(pred) pred = F.sigmoid(highway) * F.relu(highway) + (1. - F.sigmoid(highway)) * pred pred = self.softmax(self.lin(self.dropout(pred))) return pred
def forward(self, x): x = self.embed(x) # (N,W,D) x = self.dropout_embed(x) x = x.unsqueeze(1) # (N,Ci,W,D) if self.args.batch_normalizations is True: x = [self.convs1_bn(F.tanh(conv(x))).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks) else: # x = [self.dropout(F.relu(conv(x)).squeeze(3)) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) # x = [self.dropout(F.tanh(conv(x)).squeeze(3)) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) # x = [F.tanh(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks) x = torch.cat(x, 1) x = self.dropout(x) # (N,len(Ks)*Co) if self.args.batch_normalizations is True: x = self.fc1_bn(self.fc1(x)) logit = self.fc2_bn(self.fc2(F.tanh(x))) else: logit = self.fc(x) return logit
def forward(self, inputs): inputs = self.embedding(inputs) # B,T,D # print (inputs.size()) inputs = inputs.view(-1, 1,self.embedding_dim*inputs.size(1)) # (B,1,T*D) inputs = [F.relu(conv(inputs)) for conv in self.convs] #[(B,kernel_dim,L_out), ...]*len(Ks) inputs = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in inputs] #[(B,kernel_dim), ...]*len(Ks) concated = torch.cat(inputs, 1) # B, kernel_dim*len(Ks) concated = self.dropout(concated) out = self.fc(concated) return F.sigmoid(out)
def forward(self, x): # print(x.size()) # torch.Size([64, 49]) x = self.embed(x) # (N,W,D) # print(x.size()) # torch.Size([64, 49, 300]) # if self.args.static: # x = Variable(x.data) x_static = Variable(x.data) x = x.unsqueeze(1) # (N,Ci,W,D) 在索引1处增加一维 # print(x.size()) # torch.Size([64, 1, 49, 300]) x_static = x_static.unsqueeze(1) # xx = [x, x_static] # x = torch.cat(xx, 1) x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) [torch.FloatTensor of size 64x100x33] x2 = [F.relu(conv(x_static)).squeeze(3) for conv in self.convs1] # print(len(x)) # print(x) # [torch.FloatTensor of size 64x200x50] [torch.FloatTensor of size 64x200x49] 48 47 46 # x = torch.cat(x, 2) # xx = [x, x2] # x = torch.cat(xx, 0) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] # [(N,Co), ...]*len(Ks) <class 'list'>: Variable: [torch.FloatTensor of size 64x100] x2 = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x2] # print(len(x)) # print(x) # [torch.FloatTensor of size 64x200] * 5 # xx = [x, x2] x = torch.cat(x, 1) # [torch.FloatTensor of size 64x500] x2 = torch.cat(x2, 1) x = torch.cat([x, x2], 1) # print(len(x)) # print(x) # [torch.FloatTensor of size 64x1000] ''' x1 = self.conv_and_pool(x,self.conv13) #(N,Co) x2 = self.conv_and_pool(x,self.conv14) #(N,Co) x3 = self.conv_and_pool(x,self.conv15) #(N,Co) x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co) ''' x = self.dropout(x) # (N,len(Ks)*Co) logit = self.fc1(x) # (N,C) return logit
def forward(self, x): # print("source x {} ".format(x.size())) x = self.embed(x) # (N,W,D) x = self.dropout(x) x = x.unsqueeze(1) # (N,Ci,W,D) if self.args.batch_normalizations is True: x = [self.convs1_bn(F.tanh(conv(x))).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks) else: # x = [self.dropout(F.relu(conv(x)).squeeze(3)) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) # x = [F.tanh(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) # x = [conv(x).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks) x = torch.cat(x, 1) # x = self.dropout(x) # (N,len(Ks)*Co) if self.args.batch_normalizations is True: x = self.fc1_bn(self.fc1(x)) fc = self.fc2_bn(self.fc2(F.tanh(x))) else: fc = self.fc1(x) # fc = self.fc2(F.relu(x)) # print("xxx {} ".format(x.size())) gate_layer = F.sigmoid(self.gate_layer(x)) # calculate highway layer values # print(" fc_size {} gate_layer_size {}".format(fc.size(), gate_layer.size())) gate_fc_layer = torch.mul(fc, gate_layer) # print("gate_layer {} ".format(gate_layer)) # print("1 - gate_layer size {} ".format((1 - gate_layer).size())) # if write like follow ,can run,but not equal the HighWay NetWorks formula # gate_input = torch.mul((1 - gate_layer), fc) gate_input = torch.mul((1 - gate_layer), x) highway_output = torch.add(gate_fc_layer, gate_input) logit = self.logit_layer(highway_output) return logit
def forward(self, x): embed = self.embed(x) x = embed.view(len(x), embed.size(1), -1) bilstm_out, self.hidden = self.bilstm(x, self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) bilstm_out = F.tanh(bilstm_out) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) y = self.hidden2label1(bilstm_out) y = self.hidden2label2(y) logit = y return logit
def forward(self, x): embed = self.embed(x) # CNN cnn_x = embed cnn_x = torch.transpose(cnn_x, 0, 1) cnn_x = cnn_x.unsqueeze(1) # cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = [conv(cnn_x).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) # cnn_x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in cnn_x] # [(N,Co), ...]*len(Ks) cnn_x = [F.tanh(F.max_pool1d(i, i.size(2)).squeeze(2)) for i in cnn_x] # [(N,Co), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 1) cnn_x = self.dropout(cnn_x) # BiLSTM bilstm_x = embed.view(len(x), embed.size(1), -1) bilstm_out, self.hidden = self.bilstm(bilstm_x, self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) # bilstm_out = F.tanh(bilstm_out) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) bilstm_out = F.tanh(bilstm_out) # CNN and BiLSTM CAT cnn_x = torch.transpose(cnn_x, 0, 1) bilstm_out = torch.transpose(bilstm_out, 0, 1) cnn_bilstm_out = torch.cat((cnn_x, bilstm_out), 0) cnn_bilstm_out = torch.transpose(cnn_bilstm_out, 0, 1) # linear cnn_bilstm_out = self.hidden2label1(F.tanh(cnn_bilstm_out)) # cnn_bilstm_out = F.tanh(self.hidden2label1(cnn_bilstm_out)) cnn_bilstm_out = self.hidden2label2(F.tanh(cnn_bilstm_out)) # cnn_bilstm_out = self.hidden2label2(cnn_bilstm_out) # output logit = cnn_bilstm_out return logit
def max_pool1d(x, seq_lens): """ :param x: (B, L, D) :param seq_lens: (B) :return: (B, D) """ out = [] for index, t in enumerate(x): # t: (L, D) t = t[:seq_lens[index], :] t = torch.t(t).unsqueeze(0) # (L, D) -> (D, L) -> (1, D, L) out.append(F.max_pool1d(t, t.size(2))) # [(1, D, 1)] out = torch.cat(out).squeeze(2) # B * (1, D, 1) -> (B, D, 1) -> (B, D) return out
def forward(self, input): embed = self.embed(input) input = embed.view(len(input), embed.size(1), -1) # lstm lstm_out, hidden = self.gru(input, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) # pooling lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) lstm_out = F.tanh(lstm_out) # linear y = self.hidden2label(lstm_out) logit = y return logit
def forward(self, x): embed = self.embed(x) embed = self.dropout_embed(embed) x = embed.view(len(x), embed.size(1), -1) # lstm lstm_out, self.hidden = self.lstm(x, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) # pooling lstm_out = F.tanh(lstm_out) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) lstm_out = F.tanh(lstm_out) # linear logit = self.hidden2label(lstm_out) return logit
def forward(self, sentence): # print(sentence) [torch.LongTensor of size 47x64] x = self.word_embeddings(sentence) # [torch.FloatTensor of size 47x64x100] x = torch.transpose(x, 0, 1) x = x.unsqueeze(1) x = [F.relu(conv(x)).squeeze(3) for conv in self.convsl] x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] x = torch.cat(x, 1) # 64*500 x = self.dropout(x) # print(x.size()) # a = torch.cat((a1.data, a2.data), 2) # x = torch.cat(x, 2) # x = torch.transpose(x, 1, 2) # embeds = torch.transpose(x, 0, 1) embeds = self.word_embeddings(sentence) # embeds = embeds.view(len(sentence), self.batch_size, -1) # torch.Size([43, 64, 300]) lstm_out, self.hidden = self.lstm(embeds, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) # lstm_out = F.tanh(lstm_out) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)) # print(lstm_out.size()) lstm_out = lstm_out.squeeze(2) # 64*50 # lstm_out = self.dropout(lstm_out) # print(lstm_out.size()) # y = self.hidden2label(lstm_out) y = torch.cat((lstm_out, x), 1) y = self.fc2class(y) # log_probs = F.log_softmax(y) log_probs = y return log_probs
def _get_lstm_features(self, names, lengths): self.hidden = self.init_hidden(names.size(-1)) embeds = self.char_embeds(names) # Figure 4 packed_input = pack_padded_sequence(embeds, lengths) # Figure 5 packed_output, (ht, ct) = self.lstm(packed_input, self.hidden) # Figure 6 lstm_out, _ = pad_packed_sequence(packed_output) # Figure 7 lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) lstm_out = F.tanh(lstm_out) # Figure 8 lstm_out, indices = F.max_pool1d(lstm_out, lstm_out.size(2), return_indices=True) # Figure 9 lstm_out = lstm_out.squeeze(2) #对维度的修正,使其符合输入格式 lstm_out = F.tanh(lstm_out) lstm_feats = self.fully_connected_layer(lstm_out) output = self.softmax(lstm_feats) # Figure 10 return output
def get_last_hiddens(self, input, seq_lengths): """ input: input: Variable(batch_size, word_length) seq_lengths: numpy array (batch_size, 1) output: Variable(batch_size, char_hidden_dim) Note it only accepts ordered (length) variable, length size is recorded in seq_lengths """ batch_size = input.size(0) char_embeds = self.char_drop(self.char_embeddings(input)) char_embeds = char_embeds.transpose(2,1).contiguous() char_cnn_out = self.char_cnn(char_embeds) char_cnn_out = F.max_pool1d(char_cnn_out, char_cnn_out.size(2)).view(batch_size, -1) return char_cnn_out
def forward(self, input): embed = self.embed(input) embed = self.dropout(embed) # add this reduce the acc input = embed.view(len(input), embed.size(1), -1) # gru gru_out, hidden = self.bigru(input, self.hidden) gru_out = torch.transpose(gru_out, 0, 1) gru_out = torch.transpose(gru_out, 1, 2) # pooling # gru_out = F.tanh(gru_out) gru_out = F.max_pool1d(gru_out, gru_out.size(2)).squeeze(2) gru_out = F.tanh(gru_out) # linear y = self.hidden2label(gru_out) logit = y return logit
def forward(self, x): # print(x) # <class 'torch.autograd.variable.Variable'> [torch.LongTensor of size 64x35] # x_size = x.data.size(1) # print(x_size) x = self.embed(x) x = x.unsqueeze(1) # (N,Ci,W,D) 在索引1处增加一维 # print(x) # [torch.FloatTensor of size 64x1x35x128] # x = Variable(torch.transpose(x.data, 0, 1)) # x = self.bn(x) # x = Variable(torch.transpose(x.data, 0, 1)) # print(x) # [torch.FloatTensor of size 64x35x128] # if self.args.static: # x = Variable(x.data) # print(x) # [torch.FloatTensor of size 64x35x128] # x2 = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) # print(x2) a = [] for conv in self.convs1: xx = conv(x) # variable [torch.FloatTensor of size 16x200x35x1] # print(xx) xx = Variable(torch.transpose(xx.data, 2, 3)) xx = Variable(torch.transpose(xx.data, 1, 2)) xx = self.bn(xx) xx = F.relu(xx) xx = xx.squeeze(1) a.append(xx) # print(a) x = a # print(x) # [torch.FloatTensor of size 64x100x31],32,33,34 x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] # [(N,Co), ...]*len(Ks) # print(x) # [torch.FloatTensor of size 64x100]*4 x = torch.cat(x, 1) # print(x) # [torch.FloatTensor of size 64x400] ''' x1 = self.conv_and_pool(x,self.conv13) #(N,Co) x2 = self.conv_and_pool(x,self.conv14) #(N,Co) x3 = self.conv_and_pool(x,self.conv15) #(N,Co) x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co) ''' x = self.dropout(x) # (N,len(Ks)*Co) # print(x) # [torch.FloatTensor of size 64x400] logit = self.fc1(x) # (N,C) # print(logit) # [torch.FloatTensor of size 64x2] return logit
def forward(self, x, doc_lens): sent_lens = torch.sum(torch.sign(x), dim=1).data H = self.args.hidden_size x = self.embed(x) # (N,L,D) # word level GRU x = [conv(x.permute(0, 2, 1)) for conv in self.convs] x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] x = torch.cat(x, 1) # make sent features(pad with zeros) x = self.pad_doc(x, doc_lens) # sent level GRU sent_out = self.sent_RNN(x)[0] # (B,max_doc_len,2*H) docs = self.max_pool1d(sent_out, doc_lens) # (B,2*H) docs = self.fc(docs) probs = [] for index, doc_len in enumerate(doc_lens): valid_hidden = sent_out[index, :doc_len, :] # (doc_len,2*H) doc = docs[index].unsqueeze(0) s = Variable(torch.zeros(1, 2 * H)) if self.args.device is not None: s = s.cuda() for position, h in enumerate(valid_hidden): h = h.view(1, -1) # (1,2*H) # get position embeddings abs_index = Variable(torch.LongTensor([[position]])) if self.args.device is not None: abs_index = abs_index.cuda() abs_features = self.abs_pos_embed(abs_index).squeeze(0) rel_index = int(round((position + 1) * 9.0 / doc_len)) rel_index = Variable(torch.LongTensor([[rel_index]])) if self.args.device is not None: rel_index = rel_index.cuda() rel_features = self.rel_pos_embed(rel_index).squeeze(0) # classification layer content = self.content(h) salience = self.salience(h, doc) novelty = -1 * self.novelty(h, torch.tanh(s)) abs_p = self.abs_pos(abs_features) rel_p = self.rel_pos(rel_features) prob = F.sigmoid(content + salience + novelty + abs_p + rel_p + self.bias) s = s + torch.mm(prob, h) probs.append(prob) return torch.cat(probs).squeeze()
def forward(self, x): x = self.conv1(x) x = self.elu1(self.bn1(x)) x = self.elu2(self.bn2(self.conv2(x))) x = F.max_pool1d(x, 160) x = x.unsqueeze(1) x = self.elu3(self.bn3(self.conv3(x))) x = F.max_pool2d(x, (3, 3)) x = self.elu4(self.bn4(self.conv4(x))) x = F.max_pool2d(x, (1, 3)) x = x.view(-1, 50*14*11) x = F.elu(self.fc5(x)) x = F.dropout(x, training=True) x = F.elu(self.fc6(x)) x = F.dropout(x, training=True) return self.fc7(x)
def forward(self, x): x = self.embed(x) x = self.dropout_embed(x) # x = x.view(len(x), x.size(1), -1) # x = embed.view(len(x), embed.size(1), -1) bilstm_out, self.hidden = self.bilstm(x, self.hidden) # print(self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) bilstm_out = F.tanh(bilstm_out) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) bilstm_out = F.tanh(bilstm_out) # bilstm_out = self.dropout(bilstm_out) # bilstm_out = self.hidden2label1(bilstm_out) # logit = self.hidden2label2(F.tanh(bilstm_out)) logit = self.hidden2label(bilstm_out) return logit
def forward(self, x): embed = self.embed(x) # CNN cnn_x = embed cnn_x = self.dropout(cnn_x) cnn_x = cnn_x.unsqueeze(1) cnn_x = [F.relu(conv(cnn_x)).squeeze(3) for conv in self.convs1] # [(N,Co,W), ...]*len(Ks) cnn_x = torch.cat(cnn_x, 0) cnn_x = torch.transpose(cnn_x, 1, 2) # GRU lstm_out, self.hidden = self.gru(cnn_x, self.hidden) lstm_out = torch.transpose(lstm_out, 0, 1) lstm_out = torch.transpose(lstm_out, 1, 2) lstm_out = F.max_pool1d(lstm_out, lstm_out.size(2)).squeeze(2) # linear cnn_lstm_out = self.hidden2label1(F.tanh(lstm_out)) cnn_lstm_out = self.hidden2label2(F.tanh(cnn_lstm_out)) # output logit = cnn_lstm_out return logit
def active_and_maxpooling(self, conv, x): out = F.relu(conv(x)).squeeze(3) out = F.max_pool1d(out, out.size(2)).squeeze(2) return out
def apply_multiple(x): # input: batch_size * seq_len * (2 * hidden_size) p1 = F.avg_pool1d(x.transpose(1, 2), x.size(1)).squeeze(-1) p2 = F.max_pool1d(x.transpose(1, 2), x.size(1)).squeeze(-1) # output: batch_size * (4 * hidden_size) return torch.cat([p1, p2], 1)
def graph_maxpool(self, node_vec, node_mask=None): # Maxpool # Shape: (batch_size, hidden_size, num_nodes) graph_embedding = F.max_pool1d(node_vec, kernel_size=node_vec.size(-1)).squeeze(-1) return graph_embedding
# m1 = torch.nn.MaxPool2d(kernel_size=3, stride=2) # m2 = torch.nn.MaxPool1d(kernel_size=3, stride=2) # inputm = torch.randn(2, 4, 5) # print(m1(inputm).shape) # print(m2(inputm).shape) # # m1 = torch.nn.AvgPool2d(kernel_size=3, stride=2) # m11 = torch.nn.AvgPool2d(kernel_size=3) # m2 = torch.nn.AvgPool1d(kernel_size=3, stride=2) # inputm = torch.randn(2, 4, 5) # print(m1(inputm).shape) # print(m2(inputm).shape) # print(m11(inputm).shape) a = torch.randn(2, 4, 5) print(F.max_pool1d(a)) target = torch.randn(12, 5) out = torch.randn(12, 5) loss = -target * torch.log(out) - (1 - target) * torch.log(1 - out) print(loss) loss2 = loss.sum(-1).mean() print(loss2) # crition = torch.nn.BCELoss() # print(crition(target, out) == loss) m = nn.Conv1d(in_channels=16, out_channels=33, kernel_size=(3, )) # input: N,in_channels,L_in # 默认情况下: Conv1d:N,out_channels,L_in - kernel_size + 1 # 公式:https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html?highlight=conv1d#torch.nn.Conv1d input = torch.randn(20, 16, 50)
def _conv_and_pool(self, x, conv): x = F.relu(conv(x)).squeeze(dim=3) # (B, 100, L - i + 1) x = F.max_pool1d(x, x.size(2)).squeeze(dim=2) # (B, 100) return x
def conv_and_pool(self, x, conv): x = F.relu(conv(x)) # [32, 1, 100, 300] x = x.squeeze(3) x = F.max_pool1d(x, x.size(2)) # x.size(2) 对应参数:kernel_size,指窗口大小 x = x.squeeze(2) return x
def conv_global_max_pool(x, conv): return F.max_pool1d(conv(x).transpose(1, 2))
def conv_and_pool(self, x, conv): # a max pooling function x = F.relu(conv(x)).squeeze(3) # (N, Co, W) filter the input x = F.max_pool1d(x, x.size(2)).squeeze( 2) # pool the max value of the filtered data return x
def forward_sent(self, ctx_words, mask): hid = self.sent_gru(ctx_words, mask) hid = F.max_pool1d(hid.permute(0, 2, 1), self.cfg.memory_len) hid = hid.permute(0, 2, 1).squeeze(1) return hid
def conv_and_pool(self, x, conv): x = F.relu(conv(x)).squeeze( 3) # (batch_size, output_channel, feature_map_dim) x = F.max_pool1d(x, x.size(2)).squeeze(2) return x
def forward(self, ques, rela_list): batch_size = ques.size()[0] ques = self.embed(ques) # (batch_size, sent_len, embed_dim) rela_list = [self.embed(rela) for rela in rela_list] # (num_classes, batch_size, sent_len, embed_dim) rela_output = list() if self.config.relation_detection_mode.upper() == "LSTM": # h0 / c0 = (layer*direction, batch_size, hidden_dim) if self.config.cuda: h0 = Variable(torch.zeros(self.config.num_layer * 2, batch_size, self.config.hidden_size).cuda()) c0 = Variable(torch.zeros(self.config.num_layer * 2, batch_size, self.config.hidden_size).cuda()) else: h0 = Variable(torch.zeros(self.config.num_layer * 2, batch_size, self.config.hidden_size)) c0 = Variable(torch.zeros(self.config.num_layer * 2, batch_size, self.config.hidden_size)) # output = (sentence length, batch_size, hidden_size * num_direction) # ht = (layer*direction, batch, hidden_dim) # ct = (layer*direction, batch, hidden_dim) outputs1, (ht1, ct1) = self.lstm(ques, (h0, c0)) # cross attention # query_cross_alphas = Var(torch.Tensor(query_state.size(0), target_state.size(0))) # target_cross_alphas = Var(torch.Tensor(target_state.size(0), query_state.size(0))) # q_to_t = Var(torch.Tensor(query_state.size(0), self.mem_dim)) # t_to_q = Var(torch.Tensor(target_state.size(0), self.mem_dim)) # for rela in rela_list: # outputs2, (ht2, ct2) = self.lstm(rela, (h0, c0)) # for i in range(query_state.size(0)): # q_to_t[i], query_cross_alphas[i] = self.attention(target_state, query_state[i,]) tags = self.hidden2tag(ht1[-2:].transpose(0, 1).contiguous().view(batch_size, -1)) scores = F.log_softmax(tags) return scores elif self.config.relation_detection_mode.upper() == "CNN": ques = ques.contiguous().unsqueeze(1) ques = [F.relu(self.conv1(ques)).squeeze(3), F.relu(self.conv2(ques)).squeeze(3), F.relu(self.conv3(ques)).squeeze(3)] ques = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in ques] # max-over-time pooling ques = torch.cat(ques, 1) # (batch, channel_output * Ks) ques = self.dropout(ques) # logit = self.fc1(ques) # (batch, config.label) ques = ques.unsqueeze(1) # (batch, 1, channel_output * Ks) for rela in rela_list: rela = rela.contiguous().unsqueeze(1) # rela.transpose(0, 1) rela = [F.relu(self.conv1(rela)).squeeze(3), F.relu(self.conv2(rela)).squeeze(3), F.relu(self.conv3(rela)).squeeze(3)] rela = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in rela] rela = torch.cat(rela, 1) rela = self.dropout(rela) rela = rela.unsqueeze(1) rela_output.append(rela) rela = torch.cat(rela_output, 1).transpose(0, 1).contiguous() dot = torch.sum(torch.mul(ques, rela), 2) sqrt_ques = torch.sqrt(torch.sum(torch.pow(ques, 2), 2)) sqrt_rela = torch.sqrt(torch.sum(torch.pow(rela, 2), 2)) # print(sqrt_ques, sqrt_rela) # 32,1 32,51 epsilon = 1e-6 # 1e-6 scores = dot / (sqrt_ques * sqrt_rela + epsilon) # todo: torch.max(a, b) return scores else: print("Unknown Mode") exit(1)
def conv_and_pool(self, x, conv): x_conv = conv(x) x_act = F.relu(x_conv).squeeze(3) # (N, Co, W) x_pool = F.max_pool1d(x_act, x_act.size(2)).squeeze(2) return x_pool
def max_pooling(self, x): x = F.relu(conv(x)).squeeze(3) # N,C,L - (50,100,62) x = F.max_pool1d(x, x.size(2)).squeeze(2) # x.size(2)=62 squeeze: (50,100,1) -> (50,100) return x
def forward(self, x): return F.max_pool1d(x, kernel_size=x.shape[2])
def forward(self, x_train_char, file_idx=None): """Performs a forward pass. Args: Returns: """ if self.hparams.char_ngram_n > 0 or self.hparams.bpe_ngram: for idx, x_char_sent in enumerate(x_train_char): emb = Variable(x_char_sent.to_dense(), requires_grad=False) if self.hparams.cuda: emb = emb.cuda() #if self.hparams.d_char_vec is not None: # emb = self.char_down_proj(emb) x_char_sent = torch.tanh(self.char_emb_proj(emb)) if self.hparams.residue: x_char_sent_in = x_char_sent #print('residue') if self.hparams.sep_char_proj and not self.trg: assert file_idx is not None x_char_sent = torch.tanh( self.sep_proj_list[file_idx[idx]](x_char_sent)) #print('file idx{}'.format(file_idx[idx])) elif self.trg and self.hparams.d_char_vec: x_char_sent = torch.tanh(self.trg_proj(x_char_sent)) #print('self.trg d_char_vec') if self.hparams.residue: x_char_sent = x_char_sent + x_char_sent_in #print('residue') if self.hparams.layer_norm: x_char_sent = self.layer_norm(x_char_sent) #print('layer norm') x_train_char[idx] = x_char_sent if not self.hparams.semb == 'mlp': char_emb = torch.stack(x_train_char, dim=0) else: char_emb = x_train_char elif self.hparams.char_input == 'sum': # [batch_size, max_len, char_len, d_word_vec] char_emb = self.char_emb(x_train_char) char_emb = char_emb.sum(dim=2) elif self.hparams.char_input == 'bi-lstm': char_emb = self.char_emb(x_train_char) batch_size, max_len, char_len, d_word_vec = char_emb.size() char_emb = char_emb.view(-1, char_len, d_word_vec) enc, (ht, ct) = self.lstm_layer(char_emb) char_emb = torch.cat([ct[0], ct[1]], 1).view(batch_size, max_len, -1) if self.hparams.sep_char_proj and not self.trg: char_emb = torch.split(char_emb, batch_size, dim=0) proj_list = [] for idx, c_emb in enumerate(char_emb): proj_list.append( torch.tanh(self.sep_proj_list[file_idx[idx]](c_emb))) char_emb = torch.cat(proj_list, dim=0) elif self.hparams.char_input == 'cnn': # [batch_size, max_len, char_len, d_char_vec] char_emb = self.char_emb(x_train_char) batch_size, max_len, char_len, d_word_vec = char_emb.size() # [batch_size*max_len, d_char_vec, char_len] char_emb = char_emb.view(-1, char_len, d_word_vec).permute(0, 2, 1) conv_out = [] for conv in self.conv_list: # [batch_size*max_len, out_channel, char_len_out] c = conv(char_emb) c = F.max_pool1d(c, kernel_size=c.size(2)).squeeze(2) conv_out.append(c) # [batch_size*max_len, d_word_vec] char_emb = torch.cat(conv_out, dim=-1).view(batch_size, max_len, -1) if self.hparams.highway: g = torch.sigmoid(self.highway_g(char_emb)) char_emb = g * torch.tanh( self.highway_h(char_emb)) + (1 - g) * char_emb else: char_emb = torch.tanh(char_emb) if self.hparams.sep_char_proj and not self.trg: char_emb = torch.split(char_emb, batch_size, dim=0) proj_list = [] for idx, c_emb in enumerate(char_emb): proj_list.append( torch.tanh(self.sep_proj_list[file_idx[idx]](c_emb))) char_emb = torch.cat(proj_list, dim=0) return char_emb
def forward(self, input_sentence, batch_size=None): """ Parameters ---------- input_sentence: input_sentence of shape = (batch_size, num_sequences) batch_size : default = None. Used only for prediction on a single sentence after training (batch_size = 1) Returns ------- Output of the linear layer containing logits for positive & negative class which receives its input as the final_hidden_state of the LSTM final_output.shape = (batch_size, output_size) """ """ The idea of the paper "Recurrent Convolutional Neural Networks for Text Classification" is that we pass the embedding vector of the text sequences through a bidirectional LSTM and then for each sequence, our final embedding vector is the concatenation of its own GloVe embedding and the left and right contextual embedding which in bidirectional LSTM is same as the corresponding hidden state. This final embedding is passed through a linear layer which maps this long concatenated encoding vector back to the hidden_size vector. After this step, we use a max pooling layer across all sequences of texts. This converts any varying length text into a fixed dimension tensor of size (batch_size, hidden_size) and finally we map this to the output layer. """ input = self.word_embeddings( input_sentence ) # embedded input of shape = (batch_size, num_sequences, embedding_length) input = input.permute( 1, 0, 2) # input.size() = (num_sequences, batch_size, embedding_length) if torch.cuda.is_available(): if batch_size is None: h_0 = Variable( torch.zeros(2, self.batch_size, self.hidden_size).cuda( env_settings.CUDA_DEVICE) ) # Initial hidden state of the LSTM c_0 = Variable( torch.zeros(2, self.batch_size, self.hidden_size).cuda( env_settings.CUDA_DEVICE) ) # Initial cell state of the LSTM # h_0 = Variable(torch.zeros(2, self.batch_size, self.hidden_size)) # Initial hidden state of the LSTM # c_0 = Variable(torch.zeros(2, self.batch_size, self.hidden_size)) # Initial cell state of the LSTM else: h_0 = Variable( torch.zeros(2, batch_size, self.hidden_size).cuda( env_settings.CUDA_DEVICE)) c_0 = Variable( torch.zeros(2, batch_size, self.hidden_size).cuda( env_settings.CUDA_DEVICE)) # h_0 = Variable(torch.zeros(2, batch_size, self.hidden_size)) # c_0 = Variable(torch.zeros(2, batch_size, self.hidden_size)) else: if batch_size is None: h_0 = Variable( torch.zeros( 2, self.batch_size, self.hidden_size)) # Initial hidden state of the LSTM c_0 = Variable( torch.zeros( 2, self.batch_size, self.hidden_size)) # Initial cell state of the LSTM else: h_0 = Variable(torch.zeros(2, batch_size, self.hidden_size)) c_0 = Variable(torch.zeros(2, batch_size, self.hidden_size)) output, (final_hidden_state, final_cell_state) = self.lstm(input, (h_0, c_0)) final_encoding = torch.cat((output, input), 2).permute(1, 0, 2) y = self.W2(final_encoding ) # y.size() = (batch_size, num_sequences, hidden_size) y = y.permute(0, 2, 1) # y.size() = (batch_size, hidden_size, num_sequences) y = F.max_pool1d( y, y.size()[2]) # y.size() = (batch_size, hidden_size, 1) y = y.squeeze(2) logits = self.label(y) return logits
def max_pool1d(input, kernel): orig_shape = get_shape(input) # make it 3d tmp_res = F.max_pool1d(input.view([-1]+orig_shape[-2:]), kernel) real_res = tmp_res.view(orig_shape[:-1] + [-1]) return real_res
def forward(self, model_input): """ Forward pass for the predictor model Args: model_input: is of the form (source,source_mask, source_left, target) where, """ source, source_mask, target, target_mask = model_input # create matrix of source positions for position embeddings source_positions = Variable( torch.arange(1, self.max_positions)[:source.size(0)].view( -1, 1).expand(-1, source.size(1)).long()).cuda() source_positions = source_positions * source_mask if self.debug == True: print("source_positions", type(source_positions)) # create matrix of target positions for position embeddings target_positions = Variable( torch.arange(1, self.max_positions)[:target.size(0)].view( -1, 1).expand(-1, target.size(1)).long()).cuda() target_positions = target_positions * target_mask if self.debug == True: print("target_positions", target_positions.size()) # compute lengths of source sentences source_lengths = list(source_mask.data.sum(0)) # dim = B if self.debug == True: print("lengths:", source_lengths) # source embedding source_embedded = self.source_embedding( source) + self.source_position_embedding( source_positions) # TxB => TxBxD if self.debug == True: print("source embedded:", source_embedded.size()) # target embedding target_embedded = self.target_embedding( target) + self.target_position_embedding( target_positions) # TxB => T x B x D if self.debug == True: print("target embedded:", target_embedded.size()) # source convolutions source_proj = self.source_conv_proj(source_embedded) if self.debug == True: print("source_proj:", source_proj.size()) conv_input = source_proj.permute(1, 2, 0) # B x H x T if self.debug == True: print("conv_input:", conv_input.size()) for source_conv in self.source_convs: # permuting TxBxD => BxDxT conv_output = source_conv(conv_input) # B x 2H x T => B x 2H x T conv_output = F.glu(conv_output, dim=1) # B x 2H x T = B x H x T # residual connection conv_output = (conv_output + conv_input) * math.sqrt(0.5) # apply masking on outputs encoder_states = conv_output.permute(2, 0, 1) source_mask_expanded = source_mask.unsqueeze(-1).expand( -1, -1, self.hidden_size) encoder_states.data.masked_fill_(source_mask_expanded.data.eq(0), 0) if self.debug == True: print("encoder_states:", encoder_states.size()) # for attention # compute attention score compuatation vectors by projecting encoder states (e) encoder_states_in = self.encoder_state_embed_proj( encoder_states) # T'xBxH' => T'xBxE' encoder_states_in = GradMultiply.apply( encoder_states_in, 1.0 / (2.0 * self.num_target_layers)) # compute vectors on which attention weights are applied (e+s) encoder_vectors = (encoder_states_in + source_embedded) * math.sqrt( 0.5) # T'xBxE' + T'xBxE' = T'xBxE' # find the max length of the batch max_target_length = target.size(0) # run CNNs from forward looking CNN and a backward looking CNN # decoder forward convolutions conv_input = self.forward_target_conv_proj(target_embedded) conv_input = conv_input.permute(1, 2, 0) # B x H x T for target_conv, target_attn in zip(self.forward_target_convs, self.forward_target_attns): # MISSING: dropout conv_output = target_conv( conv_input) # B x H x T => B x 2H x T+(k-1) # removing future paddings (k-1) conv_output = conv_output[:, :, :-( self.target_kernel_width - 1)] # B x 2H x T+(k-1) => B x H x T # applying non-linearty conv_output = F.glu(conv_output, dim=1) # B x 2H x T => B x H x T # attention context, attn_weights = target_attn(conv_output.permute(2, 0, 1), target_embedded, encoder_states_in, encoder_vectors, source_mask) # adding context vector and residual conv_output = (( (conv_output + context.permute(1, 2, 0)) * math.sqrt(0.5)) + conv_input) * math.sqrt(0.5) # get back in original dimensions decoder_forward_states = conv_output.permute( 2, 0, 1) # B x H x T => T x B x H if self.debug == True: print("decoder_forward_states:", decoder_forward_states.size()) # decoder backward convolutions conv_input = self.reverse_target_conv_proj(target_embedded) conv_input = conv_input.permute(1, 2, 0) # B x H x T for target_conv, target_attn in zip(self.reverse_target_convs, self.reverse_target_attns): conv_output = target_conv( conv_input) # B x H x T => B x 2H x T+(k-1) # removing first paddings (k-1) conv_output = conv_output[:, :, self.target_kernel_width - 1:] # B x 2H x T+(k-1) => B x H x T # applying non-linearty conv_output = F.glu(conv_output, dim=1) # B x 2H x T => B x H x T # attention context, attn_weights = target_attn(conv_output.permute(2, 0, 1), target_embedded, encoder_states_in, encoder_vectors, source_mask) # adding context vector and residual conv_output = (( (conv_output + context.permute(1, 2, 0)) * math.sqrt(0.5)) + conv_input) * math.sqrt(0.5) # projecting back to original dimensions decoder_reverse_states = conv_output.permute( 2, 0, 1) # B x H x T => T x B x H if self.debug == True: print("decoder_reverse_states:", decoder_forward_states.size()) decoder_states = torch.cat( [decoder_forward_states, decoder_reverse_states], dim=-1) if self.debug == True: print("decoder_states:", decoder_states.size()) # list of vocab outputs vocab_outputs = [] preqvs = [] # loop trhough 1 to max_target_length-1 for ti in range(1, max_target_length - 1): # target = BxL # extract the forward decoder state (previous word) decoder_left_state = torch.split( decoder_states[ti - 1], self.hidden_size, dim=-1)[0] # BxH : extract forward RNN output only if self.debug == True: print("decoder_left_state:", decoder_left_state.size()) # extract the reverse decoder state (next word) decoder_right_state = torch.split( decoder_states[ti + 1], self.hidden_size, dim=-1)[1] # BxH : extract reverse RNN output only if self.debug == True: print("decoder_right_state:", decoder_right_state.size()) # concatenating right and left decoder state into single vector decoder_state = torch.cat( (decoder_left_state, decoder_right_state), dim=-1).unsqueeze(0) # Bx2H if self.debug == True: print("decoder_state:", decoder_state.size()) # target word projections of prev word and next words prev_word_proj = target_embedded[ti - 1] # BxD next_word_proj = target_embedded[ti + 1] # BxD # combining prev word and next word projections into single vector near_words_proj = torch.cat((prev_word_proj, next_word_proj), dim=-1).unsqueeze(0) # 1xBx2D if self.debug == True: print("near_words_proj:", near_words_proj.size()) act_input = self.decoder_states_proj( decoder_state) + self.target_words_proj( near_words_proj ) # dim(act_input) = B x 2M M for maxout input # non-linearty using maxout act_output = F.max_pool1d(act_input, kernel_size=2, stride=2) # dim(act_output) = B x M if self.debug == True: print("act output:", act_output.size()) # projection to output embedding space output_embedded = self.final_proj(act_output) # 1xBx2H -> 1xBxO output_embedded = output_embedded.squeeze(0) # 1 x B x O => B x O if self.debug == True: print("sqz output:", output_embedded.size()) # projecting to final output vocab_output = self.out_vocab_proj( output_embedded) # B x D => B x V if self.debug == True: print("vocab output:", vocab_output.size()) vocab_outputs.append(vocab_output) preqvs.append(self.out_vocab_proj.weight[target[ti]].unsqueeze(0) * output_embedded) # concatenating all final output layers for each time step final_output = torch.cat(vocab_outputs, dim=0) if self.debug == True: print("final_output:", final_output.size()) preqv_output = torch.cat(preqvs, dim=0) postqv_output = decoder_states[1:max_target_length - 1] return final_output, preqv_output, postqv_output
def conv_and_pool(self, x, conv): x = F.relu(conv(x)).squeeze( 3) #x:(batch_size, num_filters, H*hiddden_size) x = F.max_pool1d(x, x.size(2)).squeeze(2) #x:(batch_size, num_filters) return x
def conv_block(self, input, conv_layer): conv_out = conv_layer(input) activation = F.relu(conv_out.squeeze(3)) max_out = F.max_pool1d(activation, activation.size()[2]).squeeze(2) return max_out
def forward(self, x, bfs_tensor, children_batch_list): ''' :param x: words_id_tensor :param bfs_tensor: tensor :param children_batch_list: tensor :return: ''' x = self.embeddings(x) x = self.embed_dropout(x) if self.debug: print() print('x.size():', x.size()) # torch.Size([4, 19, 100]) print('bfs_tensor:', bfs_tensor) print('bfs_tensor.size():', bfs_tensor.size()) # torch.Size([4, 19]) print('children_batch_list:', children_batch_list) print('children_batch_list.size():', children_batch_list.size()) # torch.Size([4, 19, 19]) batch_size = x.size(0) sent_len = x.size()[1] all_C = Variable(torch.zeros((batch_size, sent_len, self.hidden_size))) all_H = Variable(torch.zeros((batch_size, sent_len, self.hidden_size))) if self.use_cuda: all_C = all_C.cuda() all_H = all_H.cuda() if self.debug: print('all_C.size():', all_C.size()) # torch.Size([4, 19, 100]) h = None for index in range(sent_len): # get ith embeds mask = torch.zeros(x.size()) # print(mask.size()) one = torch.ones((1, x.size(2))) batch = 0 for i in torch.transpose(bfs_tensor, 0, 1).data.tolist()[index]: mask[batch][i] = one batch += 1 mask = Variable(torch.ByteTensor(mask.data.tolist())) if self.use_cuda: mask = mask.cuda() cur_embeds = torch.masked_select(x, mask) cur_embeds = cur_embeds.view( cur_embeds.size(-1) // self.embed_dim, self.embed_dim) if self.debug: print('cur_embeds:', cur_embeds) # select current index from bfs mask = [] mask.extend([0 for _ in range(sent_len)]) mask[index] = 1 mask = Variable(torch.ByteTensor(mask)) if self.use_cuda: mask = mask.cuda() cur_nodes_list = torch.masked_select(bfs_tensor, mask).data.tolist() if self.debug: print('cur_nodes_list:', cur_nodes_list) # select current node's children from children_batch_list mask = torch.zeros(batch_size, sent_len, sent_len) for i, rel in enumerate(cur_nodes_list): mask[i][rel] = torch.ones(1, sent_len) mask = Variable(torch.ByteTensor(mask.data.tolist())) if self.use_cuda: mask = mask.cuda() rels = torch.masked_select(children_batch_list, mask).view(batch_size, sent_len) if self.debug: print('rels:', rels) print('rels.size():', rels.size()) # torch.Size([4, 19]) rels_sum = torch.sum(rels, 1) if self.debug: print('rels_sum:', rels_sum) rels_max = torch.max(rels_sum) if self.debug: print('rels_max:', rels_max) if self.debug: print('rel_max:', rels_max) print('rel_max.size():', rels_max.size()) # torch.Size([4]) rel_batch_max = torch.max(rels_max, 0)[0] c, h = None, None if rel_batch_max.data.tolist() == 0: c = Variable(torch.zeros((batch_size, 1, self.hidden_size))) h = Variable(torch.zeros((batch_size, 1, self.hidden_size))) else: pad_c = Variable( torch.zeros(batch_size, rel_batch_max, self.hidden_size)) pad_h = Variable( torch.zeros(batch_size, rel_batch_max, self.hidden_size)) rels_broadcast = rels.unsqueeze(1).expand( rels.size(0), self.hidden_size, rels.size(1)) rels_broadcast = Variable( torch.ByteTensor(rels_broadcast.data.tolist())) if self.use_cuda: rels_broadcast = rels_broadcast.cuda() pad_c = pad_c.cuda() pad_h = pad_h.cuda() selected_c = torch.masked_select(torch.transpose(all_C, 1, 2), rels_broadcast) selected_h = torch.masked_select(torch.transpose(all_H, 1, 2), rels_broadcast) selected_c = selected_c.view( selected_c.size(0) // self.hidden_size, self.hidden_size) selected_h = selected_h.view( selected_h.size(0) // self.hidden_size, self.hidden_size) idx = 0 for i, batch in enumerate(pad_c): for j in range(rels_sum.data.tolist()[i]): batch[j] = selected_c[idx] idx += 1 idx = 0 for i, batch in enumerate(pad_h): for j in range(rels_sum.data.tolist()[i]): batch[j] = selected_h[idx] idx += 1 c = pad_c h = pad_h # lstm cell c, h = self.node_forward(cur_embeds, c, h) h = self.hidden_dropout(h) # insert c and h to all_C and all_H batch = 0 for i in cur_nodes_list: all_C[batch][i] = c[batch] all_H[batch][i] = h[batch] batch += 1 out = torch.transpose(all_H, 1, 2) out = torch.tanh(out) out = F.max_pool1d(out, out.size(2)) out = out.squeeze(2) out = self.out(out) return out
def forward(self, x1, x2): ## x1 batch size, sent len, emb dim x1 = x1.unsqueeze(1) x2 = x2.unsqueeze(1) # x = torch.cat([x1, x2], dim =1) # print(x.size(), x1.size()) # x = x.unsqueeze(1) #embedded = [batch size, 1, sent len, emb dim] conved1 = [F.relu(conv(x1)).squeeze(3) for conv in self.convs1] conved2 = [F.relu(conv(x2)).squeeze(3) for conv in self.convs2] convedTo1 = [self.sigmoid(conv) for conv in conved2] convedTo2 = [self.sigmoid(conv) for conv in conved1] conved1 = [conved1[i] + convedTo1[i] for i in range(len(conved1))] conved2 = [conved2[i] + convedTo2[i] for i in range(len(conved2))] cosine_value1 = [self.cosine_2(conved1[i], conved2[i]).mean(dim=1, keepdim=True) for i in range(len(conved1))] # print(cosine_value1[0].size()) self.cosine_value1 = torch.cat(cosine_value1, dim=1).mean(dim=1) # print(cosine_value1.size()) # conved2 = [F.relu(conv(x2)).squeeze(3) for conv in self.convs2] #conv_n = [batch size, n_filters, sent len - filter_sizes[n]] pooled1 = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved1] #pooled_n = [batch size, n_filters] pooled2 = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved2] convedTo1 = [self.sigmoid(conv) for conv in pooled2] convedTo2 = [self.sigmoid(conv) for conv in pooled1] pooled1 = [pooled1[i] + convedTo1[i] for i in range(len(pooled1))] pooled2 = [pooled2[i] + convedTo2[i] for i in range(len(pooled2))] out1 = torch.cat(pooled1, dim = 1) out2 = torch.cat(pooled2, dim = 1) self.cosine_value2 = self.cosine_1(out1, out2) out1 = self.dropout(out1) out2 = self.dropout(out2) att1 = self.sigmoid(out1) # gate2 = self.sigmoid(self.gate2(out2_sig)) att2 = self.sigmoid(out2) # att2 = F.softmax(out2, dim=1) # att2_1 = torch.exp(out2) # att2 = att2_1/ (1e-8 + (att2_1.sum(dim=1, keepdim=True))) out1 = out1*att2#*x1_att out2 = out2*att1#*x2_att self.cosine_value3 = self.cosine_1(out1, out2) # out = torch.cat([out1, out2], dim=1) # out = out1 + out2 out_sent = self.fcSent(out1) out_PDN = self.fcPDN(out2) out_PDN_pose = self.sigmoid(out_PDN) # out = (self.fc(out)) return out_sent, out_PDN, out_PDN_pose
def conv_and_pool(self, x, conv): x = F.relu(conv(x)).squeeze(3) x = F.max_pool1d(x, x.size(2)).squeeze(2) return x
def forward(self, x1, x2): def att_sum(x): query = torch.mean(x, dim=1, keepdim=True) # query = self.linear_general(query) query_len = x.size()[1] att_scores = torch.bmm(query, x.transpose(1,2).contiguous()) ## batch_size x 1 x query_len # att_scores = att.view(-1, query_len) att_weights = F.softmax(att_scores, dim = 1) output = torch.bmm(att_weights, x) ## ## batch_size x 1 x embeding_dim # combined = torch.cat((output, query), dim=2) # output = self.linear_att(combined) # output = self.tanh(output) # x = torch.sum(x*a, dim=1, keepdim=True) return mix def inner_conv(x): x = x.permute(0, 2, 1) ## batch_size x embedding_dim x seq_len # x = x.unsqueeze(1) ## batch_size x 1 x seq_len x embedding_dim conv = self.conv_inner(x) ## batch_size x self.embedding x seq_len - filter x 1 # print("conv",conv.size())## batch_size x embedding_dim_out x seq_len-filter conv = F.relu(conv) pooled = F.max_pool1d(conv, conv.shape[2])## batch_size x embedding_dim_out x 1 # print(pooled.size()) pooled = pooled.permute(0, 2, 1) return pooled def mul(x): x = x.permute(0, 2, 1) out = x.matmul(self.W1) out = out.permute(0, 2, 1) return out # splitNum = int(np.ceil(x2.size()[1] / x1.size()[1])) # x2 = torch.split(x2, splitNum, dim=1) # x2 = torch.cat([torch.mean(x, dim=1, keepdim=True) for x in x2], dim =1) # x2 = torch.cat([inn(x, dim=1, keepdim=True) for x in x2], dim =1) # print(x1.size(), x2.size()) x1 = x1.unsqueeze(1) x2 = x2.unsqueeze(1) # x = torch.cat([x1, x2], dim =1) # print(x2.size(), x1.size()) # x = x.unsqueeze(1) #embedded = [batch size, 1, sent len, emb dim] conved1 = [F.relu(conv(x1)).squeeze(3) for conv in self.convs1] # conved1.extend(conved1) conved2 = [F.relu(conv(x2)).squeeze(3) for conv in self.convs2] convedTo1 = [F.softmax(conv, dim=1)*conv for conv in conved2] convedTo2 = [F.softmax(conv, dim=1)*conv for conv in conved1] conved1 = [conved1[i] + convedTo1[i] for i in range(len(conved1))] conved2 = [conved2[i] + convedTo2[i] for i in range(len(conved2))] # conved1 = [torch.cat((conved1[i], convedTo1[i]), dim=2) for i in range(len(conved1))] # conved2 = [torch.cat((conved2[i], convedTo2[i]), dim=2) for i in range(len(conved2))] cosine_value1 = [self.cosine_2(conved1[i], conved2[i]).mean(dim=1, keepdim=True) for i in range(len(conved1))] # print(cosine_value1[0].size()) self.cosine_value1 = torch.cat(cosine_value1, dim=1).mean(dim=1) # print(cosine_value1.size()) # conved2 = [F.relu(conv(x2)).squeeze(3) for conv in self.convs2] #conv_n = [batch size, n_filters, sent len - filter_sizes[n]] pooled1 = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved1] #pooled_n = [batch size, n_filters] pooled2 = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved2] convedTo1 = [F.softmax(conv, dim=1)*conv for conv in pooled2] convedTo2 = [F.softmax(conv, dim=1)*conv for conv in pooled1] pooled1 = [pooled1[i] + convedTo1[i] for i in range(len(pooled1))] pooled2 = [pooled2[i] + convedTo2[i] for i in range(len(pooled2))] out1 = torch.cat(pooled1, dim = 1) out2 = torch.cat(pooled2, dim = 1) self.cosine_value2 = self.cosine_1(out1, out2) out1 = self.dropout(out1) out2 = self.dropout(out2) out1 = self.dropout(torch.cat(pooled1, dim = 1)) att1 = self.sigmoid(out1) att2 = self.sigmoid(out2) # att2 = F.softmax(out2, dim=1) # att2_1 = torch.exp(out2) # att2 = att2_1/ (1e-8 + (att2_1.sum(dim=1, keepdim=True))) out1 = out1*att2#*x1_att out2 = out2*att1#*x2_att self.cosine_value3 = self.cosine_1(out1, out2) # out = torch.cat([out1, out2], dim=1) # out = out1 + out2 out_sent = self.fcSent(out1) out_PDN = self.fcPDN(out2) out_PDN_pose = self.sigmoid(out_PDN) # out = (self.fc(out)) return out_sent, out_PDN, out_PDN_pose
def forward(self, x): x = self.embed(x) # (N,W,D) # x = self.dropout(x) x = x.unsqueeze(1) # (N,Ci,W,D) if self.args.batch_normalizations is True: x = [ self.convs1_bn(F.tanh(conv(x))).squeeze(3) for conv in self.convs1 ] #[(N,Co,W), ...]*len(Ks) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks) else: x = [ self.dropout(F.relu(conv(x)).squeeze(3)) for conv in self.convs1 ] #[(N,Co,W), ...]*len(Ks) # x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) # x = [F.tanh(conv(x)).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) # x = [conv(x).squeeze(3) for conv in self.convs1] #[(N,Co,W), ...]*len(Ks) x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] #[(N,Co), ...]*len(Ks) x = torch.cat(x, 1) ''' x1 = self.conv_and_pool(x,self.conv13) #(N,Co) x2 = self.conv_and_pool(x,self.conv14) #(N,Co) x3 = self.conv_and_pool(x,self.conv15) #(N,Co) x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co) ''' x = self.dropout(x) # (N,len(Ks)*Co) if self.args.batch_normalizations is True: x = self.fc1_bn(self.fc1(x)) logit = self.fc2_bn(self.fc2(F.tanh(x))) # x = self.fc1_bn(self.fc1(x)) # # x = self.fc1(x) # logit = self.fc2_bn(self.fc2(F.relu(x))) # logit = self.fc2_bn(self.fc2(F.relu(x))) # x = self.fc1(x) # logit = self.fc2(x) # x = F.relu(self.fc1_bn(self.fc1(x))) # logit = self.fc2(x) # x = F.relu(self.fc1(x)) # logit = self.fc2_bn(self.fc2(x)) # x = F.relu(self.fc1(F.relu(x))) # x= self.fc1(x) # logit = self.fc2(x) else: # logit = self.fc1(F.tanh(x)) # (N,C) # x = self.fc1(F.relu(x)) # x = self.dropout(x) x = self.fc1(x) logit = self.fc2(F.relu(x)) # logit = F.softmax(logit) # logit = self.fc2(F.tanh(x)) # x = self.fc1(x) # x = self.dropout(x) # logit = self.fc2(x) # x = F.relu(self.fc1(x)) # # x = self.fc1(x) # logit = self.fc2(x) # print("self.embed.weight {} ".format(self.embed.weight)) return logit
def forward(self, src_seq, adj, src_pos, return_attns=False): batch_size = src_seq.size(0) enc_input = self.src_word_emb(src_seq) if self.onehot: # stop() enc_input = F.relu( self.dropout(self.conv1(enc_input.transpose(1, 2))))[:, :, 0:-1] enc_input = F.max_pool1d(enc_input, 2, 2) enc_input = F.relu(self.conv2(enc_input).transpose(1, 2))[:, 0:-1, :] # enc_input = self.conv(enc_input.transpose(1,2)).transpose(1,2)[:,0:-1,:] # stop() enc_input += self.position_enc(src_pos[:, 0:enc_input.size(1)]) src_seq = src_seq[:, 0:enc_input.size(1)] elif hasattr(self, 'position_enc'): enc_input += self.position_enc(src_pos) enc_outputs = [] if return_attns: enc_slf_attns = [] enc_output = enc_input enc_slf_attn_mask = utils.get_attn_padding_mask(src_seq, src_seq) # stop() if adj: enc_slf_attn_mask = enc_slf_attn_mask.type(torch.float32) for idx in range(len(adj)): enc_slf_attn_mask[idx][0:adj[idx].size(0), 0:adj[idx].size(0)] = utils.swap_0_1( adj[idx], 1, 0) enc_slf_attn_mask = enc_slf_attn_mask.type(torch.uint8) for enc_layer in self.layer_stack: enc_output, enc_slf_attn = enc_layer( enc_output, slf_attn_mask=enc_slf_attn_mask) # enc_outputs += [enc_output] if return_attns: enc_slf_attns += [enc_slf_attn] if self.enc_transform != '': if self.enc_transform == 'max': enc_output = F.max_pool1d(enc_output.transpose(1, 2), x.size(1)).squeeze() elif self.enc_transform == 'sum': enc_output = enc_output.sum(1) elif self.enc_transform == 'mean': enc_output = enc_output.sum(1) / ( (src_seq > 0).sum(dim=1).float().view(-1, 1)) elif self.enc_transform == 'flatten': enc_output = enc_output.view(batch_size, -1).float() enc_output = enc_output.view(batch_size, 1, -1) if return_attns: return enc_output, enc_slf_attns else: return enc_output, None
def forward(self, x): x = F.relu(self.conv(x)) # Global max pool on all dims save batch. x = F.max_pool1d(x, x.size()[2:]) return x
def forward(self, inputs, lengths=None, hidden_state=None, inputs_pos=None): ''' params: inputs: [seq_len, batch_size] LongTensor hidden_state: [num_layers * bidirectional, batch_size, hidden_size] :return outputs: [batch_size, n_classes] ''' # embedded if not self.from_other: embedded = self.embedding(inputs) embedded = self.dropout(embedded) else: embedded = inputs # embedded: [max_len, batch_size, embedding_size] if lengths is not None: embedded = nn.utils.rnn.pack_padded_sequence(embedded, lengths) if hidden_state is not None: outputs, hidden_state = self.rnn(embedded, hidden_state) else: outputs, hidden_state = self.rnn(embedded) if lengths is not None: outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs) # print('outputs shape: ', outputs.shape) attns = None if self.model_type.find('attention') != -1: # [batch_size, max_len, hidden_size] outputs = outputs.permute(1, 0, 2) hidden_state = self.reduce_state(hidden_state) if self.rnn_type == 'LSTM': hidden_state = hidden_state[0] # if self.rnn_type == 'LSTM': # final_state = hidden_state[0] # final_state = final_state.view(self.num_layers, final_state.size(1), -1) # final_state = torch.sum(final_state, dim=0) final_state = hidden_state[-1] outputs, attns = self.attention_net(outputs, final_state) elif self.model_type == 'rnn_cnn': # [batch_size, max_len, hidden_size] outputs = outputs.permute(1, 0, 2) outputs = self.cnn(outputs) elif self.model_type == 'rnn_avg': # [batch_size, hidden_size, max_len] outputs = outputs.permute(1, 2, 0) outputs = F.avg_pool1d( outputs, kernel_size=outputs.size(2)).squeeze(2) elif self.model_type == 'rnn_avg_hidden': # [batch_size, max_len, hidden_size] outputs = outputs.permute(1, 0, 2) outputs = F.avg_pool1d(outputs, kernel_size=outputs.size(2)).squeeze(2) elif self.model_type == 'rnn_max': # [batch_size, hidden_size, max_len] outputs = outputs.permute(1, 2, 0) outputs = F.max_pool1d(outputs, kernel_size=outputs.size(2)).squeeze(2) elif self.model_type == 'rnn_max_hidden': # [batch_size, max_len, hidden_size] outputs = outputs.permute(1, 0, 2) outputs = F.max_pool1d(outputs, kernel_size=outputs.size(2)).squeeze(2) elif self.model_type == 'rnn_bert': # [batch_size, max_len, hidden_size] outputs = outputs.permute(1, 0, 2) # outputs = self.bert(outputs) outputs, attns = self.bert(outputs, None) # outputs = outputs[:, 0] outputs = outputs.mean(dim=1) else: # outputs = outputs[-1] hidden_state = self.reduce_state(hidden_state) if self.rnn_type == 'LSTM': hidden_state = hidden_state[0] # if hidden_state.size(0) > 1: outputs = hidden_state[-1] if not self.from_other: if self.problem == 'classification': # last step output [batch_size, hidden_state] outputs = self.linear_final(outputs) else: # outputs = self.linear_regression_dense(outputs) outputs = self.linear_regression_final(outputs) return outputs, attns
def conv_and_pool(self, x, conv): x = F.relu(conv(x)).squeeze(3)# (batch, word_kernel_dim_size, 畳み込んだ後の次元数) x = F.max_pool1d(x, x.size(2)).squeeze(2) # (batch, word_kernel_dim_size) return x
def conv_and_pool(self, x, conv): x = F.relu(conv(x)).squeeze(3) # (sample number,hidden_dim, length) #x = F.avg_pool1d(x, x.size(2)).squeeze(2) x = F.max_pool1d(x, x.size(2)).squeeze(2) return x