def sequence_one_hot(seq: PackedSequence, size: int) -> PackedSequence: data = torch.zeros(seq.data.size(0), size, dtype=seq.data.dtype) data.scatter_(1, seq.data[:, None], 1) return PackedSequence( data, seq.batch_sizes, seq.sorted_indices, seq.unsorted_indices, )
def pack_wrapper(module, att_feats, att_masks): if att_masks is not None: packed, inv_ix = sort_pack_padded_sequence( att_feats, att_masks.data.long().sum(1)) return pad_unsort_packed_sequence( PackedSequence(module(packed[0]), packed[1]), inv_ix) else: return module(att_feats)
def wrap_align(b,l ): #batch, len_tgt, len_src if b is None: return b b = torch.stack(b, 0).transpose(0,1).contiguous().float() if self.cuda: b = b.cuda() packed = pack(b,list(l)) return PackedSequence(Variable(packed[0], volatile=self.volatile,requires_grad = False),packed[1])
def forward(self, sequences, mask=None): # packed with shape (pack_len w) or p w x = self.embedding(sequences.data) # p w u x = self.conv1d( x.permute(0, 2, 1) ).permute(0, 2, 1) # conv doesnt change shape; p w u # x is still p w u we have to take max across each word # big brain time for non zero maxing x, _ = torch.max(x + mask, 1) # max is across each word _, *args = sequences return PackedSequence(x, *args)
def forward(self, sentence): lstm_out, self.hidden = self.lstm(sentence, self.hidden) if type(lstm_out) == PackedSequence: target = PackedSequence(self.hidden2target(lstm_out.data), lstm_out.batch_sizes) else: target = self.hidden2target(lstm_out) return target
def forward(self, features, titles, lengths): title_embeddings = self.embedding(titles) embeddings = torch.cat((features.unsqueeze(0), title_embeddings), 0) embeddings = self.dropout(embeddings) packed = pack_padded_sequence(embeddings, lengths, enforce_sorted=False) hiddens, _ = self.lstm(packed) outputs = self.output(hiddens[0]) return outputs, PackedSequence(outputs, packed.batch_sizes, packed.sorted_indices, packed.unsorted_indices)
def forward(self, x): # noqa: F811 orig_input = x self.flatten_parameters() # xxx: isinstance check needs to be in conditional for TorchScript to compile if isinstance(orig_input, PackedSequence): x, batch_sizes, sorted_indices, unsorted_indices = x max_batch_size = batch_sizes[0] max_batch_size = int(max_batch_size) else: if self.batch_first == False: x = x.transpose(1, 0) batch_sizes = None max_batch_size = x.size(0) if self.batch_first else x.size(1) sorted_indices = None unsorted_indices = None if self.hidden_state is None or self.cell_state is None or max_batch_size != int_shape( self.hidden_state)[1]: self.initial_state(x) else: if self.stateful == False: self.clear_state() self.hidden_state, self.cell_state = self.permute_hidden( (self.hidden_state, self.cell_state), sorted_indices) self.check_forward_args(x, (self.hidden_state, self.cell_state), batch_sizes) if not isinstance(x, PackedSequence): result = _VF.lstm(x, (self.hidden_state, self.cell_state), self._flat_weights, self.use_bias, self.num_layers, self.dropout_rate, self.training, self.bidirectional, self.batch_first) else: result = _VF.lstm(x, batch_sizes, (self.hidden_state, self.cell_state), self._flat_weights, self.use_bias, self.num_layers, self.dropout_rate, self.training, self.bidirectional) output = result[0].permute( 1, 0, 2) if self.batch_first == False else result[0] #hidden = result[1:] self.hidden_state = result[1:][0].detach() self.cell_state = result[1:][1].detach() # xxx: isinstance check needs to be in conditional for TorchScript to compile if isinstance(orig_input, PackedSequence): output_packed = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices) return output_packed, self.permute_hidden( (self.hidden_state, self.cell_state), unsorted_indices) else: return output, self.permute_hidden( (self.hidden_state, self.cell_state), unsorted_indices)
def forward(self, sentences, words_per_sentence): # get word embeddings, apply dropout sentences = self.dropout(self.embeddings( sentences)) # (n_sentences, word_pad_len, emb_size) # re-arrange as words by removing word-pads (SENTENCES -> WORDS) packed_words = pack_padded_sequence( sentences, lengths=words_per_sentence.tolist(), batch_first=True, enforce_sorted=False ) # a PackedSequence object, where 'data' is the flattened words (n_words, word_emb) # apply the word-level RNN over the word embeddings (PyTorch automatically applies it on the PackedSequence) packed_words, _ = self.word_rnn( packed_words ) # a PackedSequence object, where 'data' is the output of the RNN (n_words, 2 * word_rnn_size) # find attention vectors by applying the attention linear layer on the output of the RNN att_w = self.word_attention(packed_words.data) # (n_words, att_size) att_w = torch.tanh(att_w) # (n_words, att_size) # Take the dot-product of the attention vectors with the context vector (i.e. parameter of linear layer) att_w = self.word_context_vector(att_w).squeeze(1) # (n_words) # compute softmax over the dot-product manually # manually because they have to be computed only over words in the same sentence # first, take the exponent max_value = att_w.max( ) # scalar, for numerical stability during exponent calculation att_w = torch.exp(att_w - max_value) # (n_words) # re-arrange as sentences by re-padding with 0s (WORDS -> SENTENCES) att_w, _ = pad_packed_sequence( PackedSequence(data=att_w, batch_sizes=packed_words.batch_sizes, sorted_indices=packed_words.sorted_indices, unsorted_indices=packed_words.unsorted_indices), batch_first=True) # (n_sentences, max(words_per_sentence)) # calculate softmax values as now words are arranged in their respective sentences word_alphas = att_w / torch.sum( att_w, dim=1, keepdim=True) # (n_sentences, max(words_per_sentence)) # similarly re-arrange word-level RNN outputs as sentences by re-padding with 0s (WORDS -> SENTENCES) sentences, _ = pad_packed_sequence( packed_words, batch_first=True ) # (n_sentences, max(words_per_sentence), 2 * word_rnn_size) # find sentence embeddings sentences = sentences * word_alphas.unsqueeze( 2) # (n_sentences, max(words_per_sentence), 2 * word_rnn_size) sentences = sentences.sum(dim=1) # (n_sentences, 2 * word_rnn_size) return sentences, word_alphas
def setUp(self): self.y_to_index = { '<PAD>': 0, '<UNK>': 1, '名詞': 2, '助詞': 3, '動詞': 4, '副詞': 5, '形容詞': 6 } self.x_to_index = [{ '<PAD>': 0, '<UNK>': 1, '人参': 2, 'を': 3, '切る': 4, 'ざっくり': 5, '葱': 6, 'は': 7, '細く': 8, '刻む': 9 }] self.model = BiLSTM([2], [], 4, [len(self.x_to_index[0])], len(self.y_to_index), batch_size=3, use_lstm=True) self.embedding_weight = Parameter( torch.tensor( [ [0, 0], # for <PAD> [1, 2], # for <UNK> [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [17, 18] ], dtype=torch.float)) self.X1 = ([2, 3, 4], [5, 4], [6, 7, 8, 9]) self.X2 = [[6, 7, 8, 9], [2, 3, 4], [5, 4]] self.X3 = [[6, 7, 8, 9], [2, 3, 4, 0], [5, 4, 0, 0]] self.X4 = torch.tensor([[[11, 12], [13, 14], [15, 16], [17, 18]], [[3, 4], [5, 6], [7, 8], [0, 0]], [[9, 10], [7, 8], [0, 0], [0, 0]]], dtype=torch.float) self.X5 = PackedSequence( torch.tensor([[11, 12], [3, 4], [9, 10], [13, 14], [5, 6], [7, 8], [15, 16], [7, 8], [17, 18]], dtype=torch.float), torch.tensor([3, 3, 2, 1])) self.Y = ([2, 3, 4], [5, 4], [2, 3, 6, 4]) self.lengths = [len(x) for x in self.X2]
def _to_cuda(self): if self.config.use_cuda: self.model = self.model.cuda() self.training_data = [(PackedSequence(sentences.data.cuda(), sentences.batch_sizes), PackedSequence(gazetteers.data.cuda(), gazetteers.batch_sizes), PackedSequence(batch_tags.data.cuda(), batch_tags.batch_sizes)) for sentences, gazetteers, batch_tags in self.training_data] self.valid_data = [(PackedSequence(sentences.data.cuda(), sentences.batch_sizes), PackedSequence(gazetteers.data.cuda(), gazetteers.batch_sizes), PackedSequence(batch_tags.data.cuda(), batch_tags.batch_sizes)) for sentences, gazetteers, batch_tags in self.valid_data] self.eval_data = [(PackedSequence(sentences.data.cuda(), sentences.batch_sizes), PackedSequence(gazetteers.data.cuda(), gazetteers.batch_sizes), PackedSequence(batch_tags.data.cuda(), batch_tags.batch_sizes)) for sentences, gazetteers, batch_tags in self.eval_data]
def exec_word_embedding(self, inputs: PackedSequence) -> PackedSequence: """ Word embedding. """ output_data = torch.nn.functional.embedding( inputs.data, self.word_embedding_weight, padding_idx=0, ) return PackedSequence(output_data, inputs.batch_sizes)
def to_one_hot(self, x): packed = type(x) is PackedSequence if packed: one_hot = x.data.new(x.data.size(0), self.nin).float().zero_() one_hot.scatter_(1, x.data.unsqueeze(1), 1) one_hot = PackedSequence(one_hot, x.batch_sizes) else: one_hot = x.new(x.size(0), x.size(1), self.nin).float().zero_() one_hot.scatter_(2, x.unsqueeze(2), 1) return one_hot
def forward(self, x: PackedSequence, h): x = PackedSequence(self.embedding.forward(x.data), batch_sizes=x.batch_sizes, sorted_indices=x.sorted_indices, unsorted_indices=x.unsorted_indices) out, h = self.lstm.forward(x, h) out = self.fc1.forward(out.data) out = self.fc2.forward(out) out = torch.matmul(out, self.embedding.weight.t()) out = torch.softmax(out + 1e-16, dim=1) out = PackedSequence(out, batch_sizes=x.batch_sizes, sorted_indices=x.sorted_indices, unsorted_indices=x.unsorted_indices) return out, h
def embed_seq(self, packed_seq, scatter_idx): tok_embed = self.tok_embed(packed_seq.data) packed_input = PackedSequence( data=tok_embed, batch_sizes=packed_seq.batch_sizes, sorted_indices=packed_seq.sorted_indices, unsorted_indices=packed_seq.unsorted_indices) _, (h, c) = self.lstm(packed_input) return self.agg_func(h[-1], scatter_idx)
def forward(self, image, questions): image_embed = self.img_channel(image) #returns tensor emb_qns = self.word_embeddings(questions.data) embeds = PackedSequence(emb_qns, questions.batch_sizes) cache = self.qns_channel.init_cache(batch=questions.batch_sizes[0]) questions_embed, _ = self.qns_channel(embeds, cache) questions_embed = questions_embed[-1] added = torch.cat((image_embed,questions_embed), 1) #concat the img and qns layers output = self.resolve_fc(added) return output
def forward(self, input: PackedSequence, hidden: List[Tensor]) -> Tuple[PackedSequence, List[Tensor]]: assert isinstance(input, PackedSequence) x, batch_sizes, sorted_indices, unsorted_indices = input f_gates_input = torch.mm(x, self.f_weight_i) f_gates_input = self.f_bn( f_gates_input ) # deepspeech sequence-wise normalization of input part b_gates_input = torch.mm(x, self.b_weight_i) b_gates_input = self.b_bn(b_gates_input) f_input = PackedSequence(f_gates_input, batch_sizes, sorted_indices, unsorted_indices) b_input = PackedSequence(b_gates_input, batch_sizes, sorted_indices, unsorted_indices) return self.rnn([f_input, b_input], hidden)
def forward(self, inp, hx=None): self.weight_hh_l0 = self.dropout_layer(self.old_weight_hh_l0) if self.training and self.dropout_inp != 0: input, batch_size = inp between_layer_mask = self.generate_mask(self.dropout_inp, input) droppedinput = input * between_layer_mask inp = PackedSequence(droppedinput, batch_size) return super(WeightdropLSTM, self).forward(inp, hx=hx)
def forward_packed(self, input: PackedSequence, hx: Optional[Tuple[Tensor, Tensor]] = None ) -> Tuple[PackedSequence, Tuple[Tensor, Tensor]]: input, batch_sizes, sorted_indices, unsorted_indices = input max_batch_size = batch_sizes[0] max_batch_size = int(max_batch_size) output, hidden = self.forward_impl(input, hx, batch_sizes, max_batch_size, sorted_indices) output = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices) return output, self.permute_hidden(hidden, unsorted_indices)
def pack_wrapper(self, module, att_feats, att_masks): """Apply embedding to padded inputs""" if att_masks is not None: packed, inv_ix = self.sort_pack_padded_sequence( att_feats, att_masks.data.long().sum(1)) return self.pad_unsort_packed_sequence( PackedSequence(module(packed[0]), packed[1]), inv_ix) else: return module(att_feats)
def pack_wrapper(module, att_feats, att_masks): if att_masks is not None: packed = pack_padded_sequence(att_feats, list(att_masks.data.long().sum(1)), batch_first=True) return pad_packed_sequence(PackedSequence(module(packed[0]), packed[1]), batch_first=True)[0] else: return module(att_feats)
def decode(self, theta): """Shortcut for doing inference """ data, batch_sizes = theta with torch.enable_grad(): data.requires_grad_() nll = self.forward(theta) v = torch.sum(nll) v_grad, = torch.autograd.grad(v, (data,), create_graph=True) return PackedSequence(v_grad, batch_sizes)
def forward(self, list_progs, context_embeds, ll=None, target_list=None, gen_method='sample', sizes=None, has_stopped=None): n_prog = len(list_progs) prog_int_seqs = [torch.LongTensor([self.vocab[c] for c in expr] + [self.tok_stop]).to(context_embeds.device) for expr in list_progs] lengths = [v.size(0) for v in prog_int_seqs] padded_int_seqs = pad_sequence(prog_int_seqs, batch_first=False, padding_value=self.tok_pad) packed_seq = pack_padded_sequence(padded_int_seqs, lengths=lengths, batch_first=False, enforce_sorted=False) tok_embed = self.tok_embed(packed_seq.data) packed_input = PackedSequence(data=tok_embed, batch_sizes=packed_seq.batch_sizes, sorted_indices=packed_seq.sorted_indices, unsorted_indices=packed_seq.unsorted_indices) h = self.ctx2h(context_embeds).view(n_prog, 2 * self.rnn_layers, -1).transpose(0, 1) c = self.ctx2c(context_embeds).view(n_prog, 2 * self.rnn_layers, -1).transpose(0, 1) packed_out, _ = self.lstm(packed_input, (h, c)) unpacked_out, _ = pad_packed_sequence(packed_out) # positions to mod/del expr_poses = (padded_int_seqs == self.tok_constexpr) | (padded_int_seqs == self.tok_subexpr) embed_expr = unpacked_out[expr_poses] if embed_expr.shape[0]: mod_scores = self.modify_score(embed_expr) del_scores = self.del_score(embed_expr) else: mod_scores = del_scores = None # positions to insert ins_poses = padded_int_seqs == self.tok_start insert_scores = self.insert_score(unpacked_out[ins_poses]) # positions to stop stop_poses = padded_int_seqs == self.tok_stop stop_scores = self.stop_score(unpacked_out[stop_poses]) logits = loc_score(mod_scores, del_scores, insert_scores, stop_scores, expr_poses, ins_poses, stop_poses, has_stopped) log_prob = F.log_softmax(logits, dim=0).t().contiguous() ll_target = None predecessors = None if target_list is None: if gen_method == 'sample': target = torch.multinomial(torch.exp(log_prob), 1) elif gen_method == 'argmax': target = torch.argmax(log_prob, dim=1) elif gen_method.startswith('beam'): beam_size = int(gen_method.split('-')[-1]) raw_scores = log_prob + ll if ll is not None else log_prob predecessors, target, ll_target, sizes = beam_step(raw_scores, sizes, beam_size) update_embed = unpacked_out[target, predecessors] else: raise NotImplementedError else: target = torch.LongTensor(target_list).to(log_prob.device) target = target.view(-1) if predecessors is None: ll_step = log_prob[range(n_prog), target] ll_target = ll_step.view(ll.shape) + ll if ll is not None else ll_step update_embed = unpacked_out[target, range(n_prog)] return ll_target.view(-1, 1), target, update_embed, predecessors, sizes
def forward(self, input, hx=None): is_packed = isinstance(input, PackedSequence) is_lstm = (self.mode == "LSTM") if is_packed: input, batch_sizes = input max_batch_size = int(batch_sizes[0]) else: batch_sizes = None max_batch_size = input.size(0) if self.batch_first else input.size(1) if hx is None: hx = input.new_zeros(self.num_layers * self.num_directions, max_batch_size, self.hidden_size, requires_grad=False) if is_lstm: hx = (hx, hx) if self.batch_first: input = input.transpose(0, 1) batch_size = input.shape[1] mask_x = input.new_ones((batch_size, self.input_size)) mask_out = input.new_ones((batch_size, self.hidden_size * self.num_directions)) mask_h_ones = input.new_ones((batch_size, self.hidden_size)) nn.functional.dropout(mask_x, p=self.input_dropout, training=self.training, inplace=True) nn.functional.dropout(mask_out, p=self.hidden_dropout, training=self.training, inplace=True) hidden_list = [] for layer in range(self.num_layers): output_list = [] mask_h = nn.functional.dropout(mask_h_ones, p=self.hidden_dropout, training=self.training, inplace=False) for direction in range(self.num_directions): input_x = input if direction == 0 else flip(input, [0]) idx = self.num_directions * layer + direction cell = self._all_cells[idx] hi = (hx[0][idx], hx[1][idx]) if is_lstm else hx[idx] mask_xi = mask_x if layer == 0 else mask_out output_x, hidden_x = cell(input_x, hi, mask_xi, mask_h) output_list.append(output_x if direction == 0 else flip(output_x, [0])) hidden_list.append(hidden_x) input = torch.cat(output_list, dim=-1) output = input.transpose(0, 1) if self.batch_first else input if is_lstm: h_list, c_list = zip(*hidden_list) hn = torch.stack(h_list, dim=0) cn = torch.stack(c_list, dim=0) hidden = (hn, cn) else: hidden = torch.stack(hidden_list, dim=0) if is_packed: output = PackedSequence(output, batch_sizes) return output, hidden
def forward(self, chars, words, sequence_lengths): assert chars.shape[0] == words.shape[0] assert chars.shape[1] == words.shape[1] # flatten the batch and sequence dims into words (batch * sequence, word_len, 1) sequence_lengths, sorted_indices = torch.sort(sequence_lengths, descending=True) chars = chars.index_select(0, sorted_indices) words = words.index_select(0, sorted_indices) word_chars = pack_padded_sequence(chars, sequence_lengths, batch_first=True) # run the char_cnn on it and then reshape back to [batch, sequence, ...] char_pools = self.char_cnn(word_chars.data).squeeze(-1) # Look up the word embeddings words = pack_padded_sequence(words, sequence_lengths, batch_first=True) embeddings = self.word_embeddings(words.data) # Create word representations from the concatenation of the char-cnn derived representation # and the word embedding representation word_reps = torch.cat((char_pools, embeddings), -1) # bath normalization, batch-normalize all words word_reps = self.batch_norm(word_reps) # Run LSTM on the sequences of word representations to create contextual word # representations word_reps = PackedSequence(word_reps, batch_sizes=word_chars.batch_sizes, sorted_indices=sorted_indices, unsorted_indices=None) contextual_word_reps, _ = self.lstm(word_reps) # Project to the "begin of sentence" space for each word contextual_word_reps = self.dropout(contextual_word_reps.data) bos = self.hidden2bos(contextual_word_reps).squeeze(-1) bos, _ = pad_packed_sequence(PackedSequence( bos, batch_sizes=word_chars.batch_sizes, sorted_indices=sorted_indices, unsorted_indices=None), batch_first=True) return bos
def forward(self, sent_embeddings, doc_perm_idx, doc_valid_bsz, word_att_weights): """ :param sent_embeddings: LongTensor (batch_size * padded_doc_length, sentence recurrent dim) :param doc_perm_idx: LongTensor (batch_size) :param doc_valid_bsz: LongTensor (max_doc_len) :param word_att_weights: LongTensor (batch_size * padded_doc_length, max_sent_len) :return: docs embeddings, word attention weights, sentence attention weights """ sent_embeddings = self.dropout(sent_embeddings) # Sentence-level LSTM over sentence embeddings packed_sentences, _ = self.encoder(PackedSequence(sent_embeddings, doc_valid_bsz)) u_i = torch.tanh(self.sentence_weight(packed_sentences.data)) u_w = self.sentence_context_weight(u_i).squeeze(1) val = u_w.max() att = torch.exp(u_w - val) # Restore as sentences by repadding att, _ = pad_packed_sequence(PackedSequence(att, doc_valid_bsz), batch_first=True) sent_att_weights = att / torch.sum(att, dim=1, keepdim=True) # Restore as documents by repadding docs, _ = pad_packed_sequence(packed_sentences, batch_first=True) # Compute document vectors docs = docs * sent_att_weights.unsqueeze(2) docs = docs.sum(dim=1) # Restore as documents by repadding word_att_weights, _ = pad_packed_sequence(PackedSequence(word_att_weights, doc_valid_bsz), batch_first=True) # Restore the original order of documents (undo the first sorting) _, doc_unperm_idx = doc_perm_idx.sort(dim=0, descending=False) docs = docs[doc_unperm_idx] word_att_weights = word_att_weights[doc_unperm_idx] sent_att_weights = sent_att_weights[doc_unperm_idx] return docs, word_att_weights, sent_att_weights
def forward(self, x): # x's are already flanked by the star/stop token as: # [stop, x, stop] z_fwd, z_rvs = self.embed_and_split(x, pad=False) h_fwd, h_rvs = self.transform(z_fwd, z_rvs, last_only=True) packed = type(z_fwd) is PackedSequence if packed: h_flat = h_fwd.data logp_fwd = self.linear(h_flat) logp_fwd = PackedSequence(logp_fwd, h_fwd.batch_sizes) h_flat = h_rvs.data logp_rvs = self.linear(h_flat) logp_rvs = PackedSequence(logp_rvs, h_rvs.batch_sizes) logp_fwd, batch_s = pad_packed_sequence(logp_fwd, batch_first=True) logp_rvs, batch_s = pad_packed_sequence(logp_rvs, batch_first=True) else: b = h_fwd.size(0) n = h_fwd.size(1) h_flat = h_fwd.contiguous().view(-1, h_fwd.size(2)) logp_fwd = self.linear(h_flat) logp_fwd = logp_fwd.view(b, n, -1) h_flat = h_rvs.contiguous().view(-1, h_rvs.size(2)) logp_rvs = self.linear(h_flat) logp_rvs = logp_rvs.view(b, n, -1) # prepend forward logp with zero # postpend reverse logp with zero b = h_fwd.size(0) zero = h_fwd.data.new(b, 1, logp_fwd.size(2)).zero_() logp_fwd = torch.cat([zero, logp_fwd], 1) logp_rvs = torch.cat([logp_rvs, zero], 1) logp = F.log_softmax(logp_fwd + logp_rvs, dim=2) if packed: batch_s = [s + 1 for s in batch_s] logp = pack_padded_sequence(logp, batch_s, batch_first=True) return logp
def forward(self, batch): """ Args: batch: Dictionary with lists of tensors Returns: List of mask tensors """ self.normalize_batch(batch['Y_abs'], batch['X_abs'], batch['X_clean']) h = pt.ops.pack_sequence(batch['Y_abs']) h_data = pt.ops.sequence.log1p(h.data) if self.use_pd: cos_pd = pt.ops.pack_sequence(batch['cos_inter_phase_difference']) sin_pd = pt.ops.pack_sequence(batch['sin_inter_phase_difference']) input_data = torch.cat((h_data, cos_pd.data, sin_pd.data), dim=-1) h = PackedSequence(input_data, h.batch_sizes) _, F = h.data.size() assert F == self.F, f'self.F = {self.F} != F = {F}' h_data = self.dropout_input(h.data) h = PackedSequence(h_data, h.batch_sizes) # Returns tensor with shape (t, b, num_directions * hidden_size) h, _ = self.blstm(h) h_data = self.dropout_linear(h.data) h_data = self.linear1(h_data) h_data = self.output_activation(h_data) h_data = self.linear2(h_data) h_data = self.output_activation(h_data) h = PackedSequence(h_data, h.batch_sizes) mask = PackedSequence( einops.rearrange(h.data, 'tb (k f) -> tb k f', k=self.K), h.batch_sizes, ) return pt.ops.unpack_sequence(mask)
def forward(self, W): X = PackedSequence(self.embedding_dropout(self.char_embedding(W.data)), W.batch_sizes) H, h = self.gru(X) #Concat layer and direction h vectors v = h.permute(1, 0, 2).contiguous().view(h.shape[1], -1) v = self.dropout(v) v = self.projection(v) return v
def forward(self, document, sentence_per_document, svo_length_per_sentence): batch_size, max_sentence_length, max_word_length = document.size() # |document| = (batch_size, max_sentence_length, max_word_length) # |sentence_per_document| = (batch_size) # |word_per_sentence| = (batch_size, max_sentence_length) # |svo_length_per_sentence| = (batch_size, max_sentence_length, 3) #print("제발", sentence_per_document) # Remove sentence-padding in document by using "pack_padded_sequence.data" packed_sentences = pack(document, lengths=sentence_per_document.tolist(), batch_first=True, enforce_sorted=False) # |packed_sentences.data| = (sum(sentence_length), max_word_length) # Remove sentence-padding in svo_length_per_sentence "pack_padded_sequence.data" packed_svo_length_per_sentence = pack( svo_length_per_sentence, lengths=sentence_per_document.tolist(), batch_first=True, enforce_sorted=False) # |packed_svo_length_per_sentence.data| = (sum(sentence_length), 3) sentence_vecs = self.ntn(packed_sentences.data, packed_svo_length_per_sentence.data) # |sentence_vecs| = (sum(sentence_length), tensor_dim) # "packed_sentences" have same information to recover PackedSequence for sentence packed_sentence_vecs = PackedSequence( data=sentence_vecs, batch_sizes=packed_sentences.batch_sizes, sorted_indices=packed_sentences.sorted_indices, unsorted_indices=packed_sentences.unsorted_indices) # Based on the length information, gererate mask to prevent that shorter sample has wasted attention. mask = self.generate_mask(sentence_per_document) # |mask| = (batch_size, max(sentence_per_document)) # Get document vectors By using GRU last_hiddens, _ = self.rnn(packed_sentence_vecs) # Unpack ouput of rnn model last_hiddens, _ = unpack(last_hiddens, batch_first=True) # |last_hiddens| = (batch_size, max(sentence_per_document), hidden_size) # Get attention weights and context vectors context_vectors, context_weights = self.attn(last_hiddens, mask) # |context_vectors| = (batch_size, hidden_size) # |context_weights| = (batch_size, max(sentence_per_document)) y = self.softmax(self.output(context_vectors)) return y, context_weights
def summarize_example_wise(packed_sequnce, batch_size): """ summairze packed_sequnce to example wise :param packed_sequnce: pakced_sequence to be aligned example-wise :param batch_sizes: caption_lengths for restoring padded matrix for calculating example acc """ x = PackedSequence(packed_sequnce, batch_size) x, batch_size = pad_packed_sequence(x, batch_first=True) x = x.sum(dim=1) / batch_size.to(x.device) return x