def forward(self, text, text_lengths): # 按照句子长度从大到小排序 text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( text, text_lengths) # text = [batch size,sent len] embedded = self.dropout(self.embedding(text)).to(torch.float32) # embedded = [batch size,sent len, emb dim] # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( embedded, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # unpack sequence # output = [sent len, batch size, hidden dim * num_direction] seq_output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) # 把句子序列再调整成输入时的顺序 seq_output = seq_output[desorted_indices] output = matrix_mul(seq_output, self.word_weight, self.word_bias) output = matrix_mul(output, self.context_weight) output = F.softmax(output, dim=-1) output = element_wise_mul(seq_output, output) return output, hidden
def forward(self, sentence_tensor, text_lengths): # 按照句子长度从大到小排序 sentence_tensor, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( sentence_tensor, text_lengths) # text = [batch size,sent len, emb dim] # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( sentence_tensor, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # unpack sequence # output = [sent len, batch size, hidden dim * num_direction] seq_output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) # 把句子序列再调整成输入时的顺序 seq_output = seq_output[desorted_indices] output = matrix_mul(seq_output, self.sent_weight, self.sent_bias) output = matrix_mul(output, self.context_weight).permute(1, 0) output = F.softmax(output, dim=-1) output = element_wise_mul(seq_output, output.permute(1, 0)).squeeze(0) output = self.fc(output) return output
def forward(self, text, bert_masks, seq_lens): # text = [batch size,sent len] # context 输入的句子 # mask 对padding部分进行mask,和句子一个size,padding部分用0表示,如:[1, 1, 1, 1, 0, 0] bert_sentence, bert_cls = self.bert(text, attention_mask=bert_masks) sentence_len = bert_sentence.shape[1] bert_cls = bert_cls.unsqueeze(dim=1).repeat(1, sentence_len, 1) bert_sentence = bert_sentence + bert_cls # 按照句子长度从大到小排序 bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( bert_sentence, seq_lens) # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( bert_sentence, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) output = output[desorted_indices] batch_size, max_seq_len, hidden_dim = output.shape out = torch.transpose(output.relu(), 1, 2) out = F.max_pool1d(out, max_seq_len).squeeze() out = self.fc(out) return out
def forward(self, text, _, text_lengths): # 按照句子长度从大到小排序 text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( text, text_lengths) # text = [batch size,sent len] embedded = self.dropout(self.embedding(text)).to(torch.float32) # embedded = [batch size, sent len, emb dim] # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( embedded, sorted_seq_lengths, batch_first=self.batch_first) # packed_output # hidden [n_layers * bi_direction,batch_size,hidden_dim] if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # unpack sequence # output [sent len, batch_size * n_layers * bi_direction] output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) # 把句子序列再调整成输入时的顺序 output = output[desorted_indices] # output = [batch_size,seq_len,hidden_dim * num_directionns ] batch_size, max_seq_len, hidden_dim = output.shape # 拼接左右上下文信息 output = torch.tanh(self.fc_cat(torch.cat((output, embedded), dim=2))) output = torch.transpose(output, 1, 2) output = F.max_pool1d(output, max_seq_len).squeeze().contiguous() output = self.fc(output) return output
def forward(self, input_ids, attention_masks, text_lengths): # text = [batch size,sent len] # context = input # mask the padding to be the same size as text length,padding as '0':[1, 1, 1, 1, 0, 0] sentence_out = self.transformer_model(input_ids, attention_mask=attention_masks) # reorder sentences in the descending order bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( sentence_out.last_hidden_state, text_lengths) # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( bert_sentence, sorted_seq_lengths.cpu(), batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # output = [ batch size,sent len, hidden_dim * bidirectional] output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) output = output[desorted_indices] batch_size, max_seq_len, hidden_dim = output.shape hidden = torch.mean(torch.reshape(hidden, [batch_size, -1, hidden_dim]), dim=1) output = torch.sum(output, dim=1) fc_input = self.dropout(output + hidden) out = self.fc_rnn(fc_input) print(out, type(out), out.shape) return out, fc_input
def forward(self, text, _, text_lengths): # sort text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(text, text_lengths) # text [batch_size, seq_len] embedded = self.dropout(self.embedding(text)).float() # embedded [batch_size, seq_len, emb_dim] packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, sorted_seq_lengths, batch_first=self.batch_first) # packed_embedded [batch_size, seq_len, emb_dim] if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # output [seq_len, batch_size, hidden_dim * num_direction] # hidden [n_layers * num_direction, batch_size, hidden_dim] output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first) # output [batch_size, seq_len, hidden_dim * num_direction] output = output[desorted_indices] # operate hidden hidden = torch.reshape(hidden, [output.shape[0], -1, output.shape[2]]) # hidden [batch_size, batch_size(-1), hidden_dim * num_direction] hidden = torch.mean(hidden, dim=1) # hidden [batch_size, hidden_dim * num_direction] # operate output # output [batch_size, seq_len, hidden_dim * num_direction] output = torch.mean(output, dim=1) # output [batch_size, hidden_dim * num_direction] # add fc_input = self.dropout(output + hidden) # fc_input [batch_size, hidden_dim * num_direction] out = self.fc(fc_input) # out [batch_size, output_dim] return out
def forward(self, input_ids, attention_masks, text_lengths): # text = [batch size,sent len] # context = input # mask the padding to be the same size as text length,padding as '0':[1, 1, 1, 1, 0, 0] sentence_out = self.transformer_model(input_ids, attention_mask=attention_masks) sentence_out, sentence_len, cls = sentence_out.last_hidden_state, sentence_out.last_hidden_state.shape[ 1], sentence_out.pooler_out cls = cls.unsqueeze(dim=1).repeat(1, sentence_len, 1) sentence_out = sentence_out + cls # descending order bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( sentence_out, text_lengths) # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( bert_sentence, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) output = output[desorted_indices] batch_size, max_seq_len, hidden_dim = output.shape out = torch.transpose(output.relu(), 1, 2) out_embedding = F.max_pool1d(out, int(max_seq_len)).squeeze() out = self.fc(out_embedding) print(out, type(out), out.shape) return out, out_embedding
def forward(self, text, _, text_lengths): # sort text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(text, text_lengths) # text [batch_size, seq_len] embedded = self.dropout(self.embedding(text)).to(torch.float32) # embedded [batch_size, seq_len, emb_dim] packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # packed_output [seq_len, batch_size, hidden_dim * num_direction] # hidden [n_layers * num_direction, batch_size, hidden_dim] output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first) # output [batch_size, seq_len, hidden_dim * num_direction] output = output[desorted_indices] _, max_seq_len, _ = output.shape output = torch.cat((output, embedded), dim=2) # output [batch_size, seq_len, hidden_dim * num_direction + emb_dim] output = self.tanh(self.fc_cat(output)) # output [batch_size, seq_len, emb_dim] output = torch.transpose(output, 1, 2) # output [batch_size, emb_dim, seq_len] output = F.max_pool1d(output, max_seq_len).squeeze().contiguous() # output [batch_size, emb_dim] output = self.fc(output) # output [batch_size, output_dim] return output
def forward(self, text, bert_masks, seq_lens): # text = [batch size,sent len] # context 输入的句子 # mask 对padding部分进行mask,和句子一个size,padding部分用0表示,如:[1, 1, 1, 1, 0, 0] bert_sentence, bert_cls = self.bert(text, attention_mask=bert_masks) # 按照句子长度从大到小排序 bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( bert_sentence, seq_lens) # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( bert_sentence, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # output = [ batch size,sent len, hidden_dim * bidirectional] output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) output = output[desorted_indices] batch_size, max_seq_len, hidden_dim = output.shape hidden = torch.mean(torch.reshape(hidden, [batch_size, -1, hidden_dim]), dim=1) output = torch.sum(output, dim=1) fc_input = self.dropout(output + hidden) out = self.fc_rnn(fc_input) return out
def forward(self, text, _, text_lengths): # 按照句子长度从大到小排序 text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( text, text_lengths) # text = [batch size,sent len] embedded = self.dropout(self.embedding(text)) # embedded = [batch size, sent len, emb dim] # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( embedded, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: # output (seq_len, batch, num_directions * hidden_size) # hidden (num_layers * num_directions, batch, hidden_size) packed_output, (hidden, cell) = self.rnn(packed_embedded) # unpack sequence output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) # 把句子序列再调整成输入时的顺序 output = output[desorted_indices] # output = [batch_size,seq_len,hidden_dim * num_directionns ] batch_size, max_seq_len, hidden_dim = output.shape hidden = torch.mean(torch.reshape(hidden, [batch_size, -1, hidden_dim]), dim=1) output = torch.mean(output, dim=1) fc_input = self.dropout(output + hidden) out = self.fc(fc_input) return out
def forward(self, text, _, text_lengths): # sort sentences length in descending order text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( text, text_lengths) # text = [batch size,sent len] embedded = self.dropout(self.embedding(text)).float() # embedded = [sent len, batch size, emb _dim] # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( embedded, sorted_seq_lengths.cpu(), batch_first=self.batch_first) #print(packed_embedded.data.shape) # packed_output # hidden [batch_size, seq_len, embed_size +2*hidden_size] self.rnn.flatten_parameters() if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) #print(packed_output.data.shape) # unpack sequence # output [sent len, batch_size * n_layers * bi_direction] output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) # turn back into the original order output = output[desorted_indices] #print(output.shape) # output = [batch_size,seq_len,hidden_dim * num_directionns ] batch_size, max_seq_len, hidden_dim = output.shape # concating the output and embedded layer input_features = torch.cat([output, embedded], dim=2) #input_features.shape = [batch_size, seq_len, embed_size + 2*hidden] #print(input_features.shape) # output = torch.tanh(self.fc_cat(input_features)) #linear_output.shape = (batch_size, seq_len, hidden_size) #print(output.shape) output = output.permute(0, 2, 1) # reshape for maxpooling #print(output.shape[2]) output = F.max_pool1d(output, int(output.shape[2])).squeeze(2) #max out features[batch_size, hidden_dim] output = self.dropout(output) return self.fc(output), output
def forward(self, text, _, text_lengths): # sort text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(text, text_lengths) # text [batch_size, seq_len] embedded = self.dropout(self.embedding(text)).to(torch.float32) # embedded [batch_size, seq_len, emb_dim] packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # output [seq_len, batch_size, hidden_dim * num_direction] # hidden [n_layers * num_direction, batch_size, hidden_dim] output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first) # output [batch_size, seq_len, hidden_dim * num_direction] output = output[desorted_indices] # attention m = self.tanh(output) # m [batch_size, seq_len, hidden_dim * num_direction] # w [hidden_dim * 1(or 2)] score = torch.matmul(m, self.w) # score [batch_size, seq_len] alpha = F.softmax(score, dim=0).unsqueeze(-1) # alpha [batch_size, seq_len, 1] output_attention = output * alpha # output_attention [batch_size, seq_len, hidden_dim * num_direction] # operate hidden hidden = torch.reshape(hidden, [output.shape[0], -1, output.shape[2]]) hidden = torch.mean(hidden, dim=1) # hidden [batch_size, hidden_dim * num_direction] # operate output_attention output_attention = torch.sum(output_attention, dim=1) # output_attention [batch_size, hidden_dim * num_direction] # operate output output = torch.sum(output, dim=1) # output [batch_size, hidden_dim * num_direction] # add fc_input = self.dropout(output + output_attention + hidden) # fc_input [batch_size, hidden_dim * num_direction] out = self.fc(fc_input) # out [batch_size, output_dim] return out
def forward(self, text, bert_masks, seq_lens): # text [batch_size, seq_len] bert_sentence, bert_cls = self.bert(text, attention_mask=bert_masks) # bert_sentence [batch_size, sen_len, H=768] # bert_cls [batch_size, H=768] """ torch.randn(33, 55).repeat(2,1).size() --->>>torch.Size([66, 55]) 变化到倍数维度 """ bert_cls = bert_cls.unsqueeze(dim=1) # bert_cls [batch_size] bert_cls = bert_cls.repeat(1, bert_sentence.shape[1], 1) # bert_cls [batch_size, sen_len, 1] # bert_sentence [batch_size, sen_len, H=768] bert_sentence = bert_sentence + bert_cls # bert_sentence [batch_size, sen_len, H=768] bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(bert_sentence, seq_lens) packed_embedded = nn.utils.rnn.pack_padded_sequence(bert_sentence, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # output = [seq_len, batch_size, hidden_dim * bidirectional] # hidden [n_layers * num_direction, batch_size, hidden_dim] output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first) # output = [batch_size, seq_len, hidden_dim * bidirectional] output = output[desorted_indices] output = output.relu() # output = [batch_size, seq_len, hidden_dim * bidirectional] _, max_seq_len, _ = output.shape out = torch.transpose(output, 1, 2) # output = [batch_size, hidden_dim * bidirectional, seq_len] out = F.max_pool1d(out, max_seq_len).squeeze() # output = [batch_size, hidden_dim * bidirectional] out = self.fc(out) # output = [batch_size, output_dim] return out
def forward(self, text, bert_masks, seq_lens): # text [batch_size, seq_len] bert_sentence, bert_cls = self.bert(text, attention_mask=bert_masks) # bert_sentence [batch_size, sen_len, H=768] # bert_cls [batch_size, H=768] # rnn bert_sentence, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(bert_sentence, seq_lens) packed_embedded = nn.utils.rnn.pack_padded_sequence(bert_sentence, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # output = [seq_len, batch_size, hidden_dim * bidirectional] # hidden [n_layers * num_direction, batch_size, hidden_dim] output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first) # output = [batch_size, seq_len, hidden_dim * bidirectional] output = output[desorted_indices] # attention # operate output_attention m = self.tanh(output) # m [batch_size, seq_len, hidden_dim * bidirectional] score = torch.matmul(m, self.w) # score [batch_size, seq_len] alpha = F.softmax(score, dim=0).unsqueeze(-1) # alpha [batch_size, seq_len, 1] output_attention = output * alpha # output_attention [batch_size, seq_len, hidden_dim * bidirectional] output_attention = torch.sum(output_attention, dim=1) # output_attention [batch_size, hidden_dim * bidirectional] # operate hidden hidden = torch.reshape(hidden, [output.shape[0], -1, output.shape[2]]) hidden = torch.mean(hidden, dim=1) # hidden [batch_size, hidden_dim * bidirectional] # operate output output = torch.sum(output, dim=1) # output [batch_size, hidden_dim * bidirectional] # add fc_input = self.dropout(output + output_attention + hidden) # fc_input [batch_size, hidden_dim * bidirectional] out = self.fc(fc_input) # out [batch_size, num_classes] return out
def forward(self, text, _, text_lengths): # 按照句子长度从大到小排序 text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( text, text_lengths) # text = [batch size,sent len] embedded = self.dropout(self.embedding(text)).to(torch.float32) # embedded = [batch size,sent len, emb dim] # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( embedded, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # unpack sequence # output = [sent len, batch size, hidden dim * num_direction] output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) # 把句子序列再调整成输入时的顺序 output = output[desorted_indices] hidden = hidden[desorted_indices] # attention # M = [sent len, batch size, hidden dim * num_direction] # M = self.tanh1(output) alpha = F.softmax(torch.matmul(self.tanh1(output), self.w), dim=0).unsqueeze(-1) # dim=0表示针对文本中的每个词的输出softmax output_attention = output * alpha batch_size, max_seq_len, hidden_dim = output.shape hidden = torch.mean(torch.reshape(hidden, [batch_size, -1, hidden_dim]), dim=1) output_attention = torch.sum(output_attention, dim=1) output = torch.sum(output, dim=1) fc_input = self.dropout(output + output_attention + hidden) # fc_input = self.dropout(output_attention) out = self.fc(fc_input) return out, fc_input
def forward(self, text, _, text_lengths): # sort sentences in the descending order text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( text, text_lengths) # text = [batch size,sent len] embedded = self.dropout(self.embedding(text)).float() # embedded = [batch size, sent len, emb dim] # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( embedded, sorted_seq_lengths.cpu(), batch_first=self.batch_first) self.rnn.flatten_parameters() if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: # output (seq_len, batch, num_directions * hidden_size) # hidden (num_layers * num_directions, batch, hidden_size) packed_output, (hidden, cell) = self.rnn(packed_embedded) # unpack sequence output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) # turn back into the original order output = output[desorted_indices] print(output.shape) # output = [batch_size,seq_len,hidden_dim * num_directionns ] batch_size, max_seq_len, hidden_dim = output.shape hidden = torch.mean(torch.reshape(hidden, [batch_size, -1, hidden_dim]), dim=1) #print(hidden.shape) output = torch.mean(output, dim=1) #print(output.shape) fc_input = self.dropout(output + hidden) #print(fc_input.shape) out = self.fc(fc_input) return out, fc_input
def forward(self, text, _, text_lengths): """ :param text: 输入的句子, 第一版,batch_first配置在config里面,为False 第二版,batch_first配置在这个类缺省参数里面,为True 现在它全是正常的顺序[batch_size, seq_len], CNN第二版不用这个参数 :param text_lengths: 句子长度 """ # 降序文本,降序文本长度,一开始不排序的索引 text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(text, text_lengths) # text [batch_size, seq_len] # 先embedding,然后dropout embedded = self.dropout(self.embedding(text)).float() # embedded [batch_size, seq_len, emb_dim] # pack """ pack_padded_sequence(),Packs a Tensor containing padded sequences of variable length, pack一个(已经padding过的)sequence,记下了做了padding的长度的list。 一般在处理数据时就已经将序列pad成等长了,但是LSTM需要一种方法来告知自己处理变长输入, 一个batch里的序列不一定等长,需要pad操作,用0把它们都填充成max_length长度。 LSTM的一次forward对应一个time step,接收的是across batches的输入, 这就导致短序列可能在当前time step上已经结束,而你还是在给它输入东西(pad), 这就会对结果产生影响(可以对照公式看看,即便输入全0还是会有影响), 我们想要的效果是,LSTM知道batch中每个序列的长度,等到某个序列输入结束后下面的time step就不带它了。 batch_first=self.batch_first,如果要保存batch_first的维度。 nn.utils.rnn.pack_padded_sequence(): Packs a Tensor containing padded sequences of variable length. :attr:`input` can be of size ``T x B x *`` where `T` is the length of the longest sequence (equal to ``lengths[0]``), ``B`` is the batch size, and ``*`` is any number of dimensions (including 0). If ``batch_first`` is ``True``, ``B x T x *`` :attr:`input` is expected. """ packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, sorted_seq_lengths, batch_first=self.batch_first) # packed_embedded [batch_size, seq_len, emb_dim] """ 注意, 这里batch_first=True, 会保护batch_size的维度, 下面的情况是官方的False情况的理解, 区别只是pytorch会自动先把两个维度调换, 输出的维度是一样的 """ # rnn、gru if self.rnn_type in ['rnn', 'gru']: # rnn """ 最后一层隐藏层状态集合,所有层最后一时刻隐藏层状态 packed_output,(seq_len, batch, num_directions * hidden_size) hidden,(num_layers * num_directions, batch, hidden_size) Outputs: output, h_n - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor containing the output features (`h_t`) from the last layer of the RNN, for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been given as the input, the output will also be a packed sequence. For the unpacked case, the directions can be separated using ``output.view(seq_len, batch, num_directions, hidden_size)``, with forward and backward being direction `0` and `1` respectively. Similarly, the directions can be separated in the packed case. - **h_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor containing the hidden state for `t = seq_len`. Like *output*, the layers can be separated using ``h_n.view(num_layers, num_directions, batch, hidden_size)``. """ # gru """ 最后一层隐藏层状态集合,所有层最后一时刻隐藏层状态 packed_output,(seq_len, batch, num_directions * hidden_size) hidden,(num_layers * num_directions, batch, hidden_size) Outputs: output, h_n - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor containing the output features h_t from the last layer of the GRU, for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been given as the input, the output will also be a packed sequence. For the unpacked case, the directions can be separated using ``output.view(seq_len, batch, num_directions, hidden_size)``, with forward and backward being direction `0` and `1` respectively. Similarly, the directions can be separated in the packed case. - **h_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor containing the hidden state for `t = seq_len` Like *output*, the layers can be separated using ``h_n.view(num_layers, num_directions, batch, hidden_size)``. """ packed_output, hidden = self.rnn(packed_embedded) # lstm else: """ 最后一层外部状态集合,(所有层最后一个时刻外部状态,所有层最后一个时刻内部状态) packed_output,(seq_len, batch, num_directions * hidden_size) hidden,(num_layers * num_directions, batch, hidden_size) cell,(num_layers * num_directions, batch, hidden_size) output, 最后一层,每个time_step的输出h, num_directions * hidden_size可以看出来, (双向,每个time_step的输出h = [h正向, h逆向],是同一个time_step的正向和逆向的h连接起来!) h_n, 每一层,最后一个time_step的输出h, num_layers * num_directions可以看出来, (双向,单独保存前向和后向的最后一个time_step的输出h) (一层的话,h_n和output最后一个是一样的【应该一定】) c_n, 和h_n一样意思,保存的是c Outputs: output, (h_n, c_n) - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor containing the output features `(h_t)` from the last layer of the LSTM, for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been given as the input, the output will also be a packed sequence. For the unpacked case, the directions can be separated using ``output.view(seq_len, batch, num_directions, hidden_size)``, with forward and backward being direction `0` and `1` respectively. Similarly, the directions can be separated in the packed case. - **h_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor containing the hidden state for `t = seq_len`. Like *output*, the layers can be separated using ``h_n.view(num_layers, num_directions, batch, hidden_size)`` and similarly for *c_n*. - **c_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor containing the cell state for `t = seq_len`. """ packed_output, (hidden, cell) = self.rnn(packed_embedded) # output(最后一层的每个词时刻),hidden(每一层的最后一时刻) # output [seq_len, batch_size, hidden_dim * num_direction] # hidden [n_layers * num_direction, batch_size, hidden_dim] # unpack """ unpack一个(经过packed的)sequence, output,output对应pad长度 pad_packed_sequence(),Pads a packed batch of variable length sequences. 这里的batch_first,把batch_size调回第一维 """ output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first) # output [batch_size, seq_len, hidden_dim * num_direction] # 文本顺序调回输入时的顺序 output = output[desorted_indices] # 处理hidden # output [batch_size, seq_len, hidden_dim * num_direction] # hidden [n_layers * num_direction, batch_size, hidden_dim] # hidden要对应的维度,直接取自output的某一维度 """ -1,指第1维自动适应计算(不动),记作batch_size(-1) """ hidden = torch.reshape(hidden, [output.shape[0], -1, output.shape[2]]) # hidden [batch_size, batch_size(-1), hidden_dim * num_direction] # 在第1维求均值,去掉第1维 hidden = torch.mean(hidden, dim=1) # hidden [batch_size, hidden_dim * num_direction] # 处理output # output [batch_size, seq_len, hidden_dim * num_direction] # 在第1维求均值,去掉第1维 output = torch.mean(output, dim=1) # output [batch_size, hidden_dim * num_direction] # 相加 # output [batch_size, hidden_dim * num_direction] # hidden [batch_size, hidden_dim * num_direction] # output(最后一层的每个词时刻),hidden(每一层的最后一时刻),直接相加(不是拼接),再dropout fc_input = self.dropout(output + hidden) # 线性映射,hidden_dim * n_layers到output_dim out = self.fc(fc_input) # output [batch_size, output_dim] return out
def forward(self, text, _, text_lengths): # text [batch_size, seq_len] text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence(text, text_lengths) embedded = self.dropout(self.embedding(text)).to(torch.float32) # embedded [batch_size, seq_len, emb_dim] packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, sorted_seq_lengths, batch_first=self.batch_first) if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: packed_output, (hidden, cell) = self.rnn(packed_embedded) # output [seq_len, batch_size, hidden_dim * num_direction] # hidden [n_layers * num_direction, batch_size, hidden_dim] output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=self.batch_first) # output [batch_size, seq_len, hidden_dim * num_direction] output = output[desorted_indices] # 注意力, """ 用的是中科院自动所论文提出的the attention for relation classification tasks """ # output [batch_size, seq_len, hidden_dim * num_direction] # 计算m m = self.tanh(output) # m [batch_size, seq_len, hidden_dim * num_direction] # w [hidden_dim * 1(or 2)] # 计算score(注意力打分函数),矩阵乘 """ torch.matmul(), w一维,和m最后一维相同,相乘会消掉m最后一维 """ score = torch.matmul(m, self.w) # score [batch_size, seq_len] # 计算alpha(注意力分布) """ dim=0表示针对文本中的每个词的输出softmax softmax(dim=0),对每个数做softmax(第1维,废话) unsqueeze(-1),在最后一维插入一个维度 """ alpha = F.softmax(score, dim=0).unsqueeze(-1) # alpha [batch_size, seq_len, 1] # output [batch_size, seq_len, hidden_dim * num_direction] # 计算r(加权平均),点乘 output_attention = output * alpha # output_attention [batch_size, seq_len, hidden_dim * num_direction] # 处理hidden # output [batch_size, seq_len, hidden_dim * num_direction] # hidden [n_layers * num_direction, batch_size, hidden_dim] hidden = torch.reshape(hidden, [output.shape[0], -1, output.shape[2]]) hidden = torch.mean(hidden, dim=1) # hidden [batch_size, hidden_dim * num_direction] # 处理output_attention # output_attention [batch_size, seq_len, hidden_dim * num_direction] # output_attention,第1维求和,消掉第1维 output_attention = torch.sum(output_attention, dim=1) # output_attention [batch_size, hidden_dim * num_direction] # 处理output # output [batch_size, seq_len, hidden_dim * num_direction] output = torch.sum(output, dim=1) # output [batch_size, hidden_dim * num_direction] # 相加 # output [batch_size, hidden_dim * num_direction] # output_attention [batch_size, hidden_dim * num_direction] # hidden [batch_size, hidden_dim * num_direction] # output,attention的output,hidden,三个相加,然后dropout fc_input = self.dropout(output + output_attention + hidden) # fc_input [batch_size, hidden_dim * num_direction] # 线性映射,hidden_dim * n_layers到output_dim out = self.fc(fc_input) # output [batch_size, output_dim] return out
def forward(self, text, _, text_lengths): #CNN # text = [batch size, sent len] embedded = self.embedding(text) # embedded = [batch size, sent len, emb dim] embedded = embedded.unsqueeze(1).float() # embedded = [batch size, 1, sent_len, emb_dim] #CNN conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs] # conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1] pooled = [ F.max_pool1d(conv, int(conv.shape[2])).squeeze(2) for conv in conved ] # pooled_n = [batch size, n_filters] cat = torch.cat(pooled, dim=1) cnn_out = self.dropout(cat) #print(cnn_out.shape) # cnn_out = [batch size, n_filters * len(filter_sizes)] # LSTM # sort sentences in the descending order text, sorted_seq_lengths, desorted_indices = prepare_pack_padded_sequence( text, text_lengths) # text = [batch size,sent len] embedded = self.dropout(self.embedding(text)).float() # embedded = [batch size, sent len, emb dim] # pack sequence packed_embedded = nn.utils.rnn.pack_padded_sequence( embedded, sorted_seq_lengths.cpu(), batch_first=self.batch_first) self.rnn.flatten_parameters() if self.rnn_type in ['rnn', 'gru']: packed_output, hidden = self.rnn(packed_embedded) else: # output (seq_len, batch, num_directions * hidden_size) # hidden (num_layers * num_directions, batch, hidden_size) packed_output, (hidden, cell) = self.rnn(packed_embedded) # unpack sequence output, output_lengths = nn.utils.rnn.pad_packed_sequence( packed_output, batch_first=self.batch_first) # turn back into the original order output = output[desorted_indices] # # output = [batch_size,seq_len,hidden_dim * num_directionns ] batch_size, _, hidden_dim = output.shape hidden = torch.mean(torch.reshape(hidden, [batch_size, -1, hidden_dim]), dim=1) output = torch.mean(output, dim=1) #print(output.shape) bilstm_out = output + hidden #print(bilstm_out.shape) #fc_input = self.dropout(output)# + hidden) # fc_input [batch_size, hidden_dim * num_directions,] # CNN and BiLSTM CAT cnn_lstm_out = torch.cat((cnn_out, bilstm_out), 1) #print(cnn_lstm_out.shape) # linear cnn_lstm_out = self.fc(F.tanh(cnn_lstm_out)) #cnn_lstm_out = self.fc2(F.tanh(cnn_lstm_out)) #print (cnn_lstm_out.shape) # output logit = cnn_lstm_out #print(logit.shape) return logit, cnn_lstm_out