def prepare_for_gpu(self, batch, embedding_layer): ## outputform for the prepare for gpu function ## batch_size, gold_output, *input batch_size = int( len(batch['utterance_list']) / batch['max_num_utterances']) elmo_embeddings, input_mask = embedding_layer.get_embeddings( batch['utterance_list']) if self.args.use_cuda: elmo_embeddings = elmo_embeddings.cuda() input_mask_variable = variable(input_mask).float() batch_lengths = batch['conversation_lengths'] length_sort = np.argsort(batch_lengths)[::-1].copy() unsort = variable(LongTensor(np.argsort(length_sort))) conversation_mask_sorted = variable( FloatTensor(batch['conversation_mask'])[length_sort]) conversation_mask = LongTensor(batch['conversation_mask']) lengths_sorted = np.array(batch_lengths)[length_sort] sort = length_sort options_tensor = LongTensor(batch['utterance_options_list']) gold_ids = np.array(batch['next_utterance_gold']) goldids_variable = LongTensor(batch['next_utterance_gold']) ## to reshape conversations max_num_utterances_batch = batch['max_num_utterances'] return batch_size, goldids_variable, conversation_mask, elmo_embeddings, input_mask_variable, \ sort, unsort, conversation_mask_sorted, lengths_sorted, max_num_utterances_batch,\ options_tensor, goldids_variable
def prepare_for_gpu(self, batch, embedding_layer): batch_size = int( len(batch['utterance_list']) / batch['max_num_utterances']) max_num_utterances_batch = batch['max_num_utterances'] max_utterance_length = batch['max_utterance_length'] ### Prepare embeddings if self.args.embedding == "elmo": ### embedding_layer doesnt recide on cuda if its elmo utterance_embeddings, input_mask = embedding_layer.get_embeddings( batch['utterance_list']) embedding_layer.get_embeddings() elif self.args.embedding == "glove": ## change the batch into LongTensor batch_ids = LongTensor(batch["utterance_word_ids"]) utterance_embeddings = embedding_layer.lookup(batch_ids) input_mask = FloatTensor(batch['input_mask']) elif self.args.embedding == "avg_elmo": batch_ids = LongTensor(batch["utterance_word_ids"]) conversation_ids = batch["conversation_ids"] utterance_embeddings = embedding_layer.lookup( conversation_ids, max_num_utterances_batch) input_mask = FloatTensor(batch['input_mask']) if self.args.use_cuda: utterance_embeddings = utterance_embeddings.cuda() input_mask_variable = variable(input_mask) ### Prepare Encoder layer batch_lengths = batch['conversation_lengths'] length_sort = np.argsort(batch_lengths)[::-1].copy() unsort = variable(LongTensor(np.argsort(length_sort))) conversation_mask_sorted = variable( FloatTensor(batch['conversation_mask'])[length_sort]) conversation_mask = LongTensor(batch['conversation_mask']) lengths_sorted = np.array(batch_lengths)[length_sort] sort = length_sort ### Prepare output layer options_tensor = LongTensor(batch['utterance_options_list']) goldids_next_variable = LongTensor(batch['next_utterance_gold']) goldids_prev_variable = LongTensor(batch['prev_utterance_gold']) utterance_labels = LongTensor(batch['label']) return batch_size, tuple([batch_ids, utterance_labels]), input_mask, utterance_embeddings, input_mask_variable, \ variable(conversation_mask.float()), max_num_utterances_batch, \ max_utterance_length, batch_ids, utterance_labels \
def forward(self, x, x_mask): lengths = x_mask.data.eq(1).long().sum(1) _, idx_sort = torch.sort(lengths, dim=0, descending=True) _, idx_unsort = torch.sort(idx_sort, dim=0) lengths = list(lengths[idx_sort]) idx_sort = variable(idx_sort) idx_unsort = variable(idx_unsort) # Sort x x = x.index_select(0, idx_sort) rnn_input = nn.utils.rnn.pack_padded_sequence(x, lengths, batch_first=True) self.lstm.flatten_parameters() outputs, (hidden, cell) = self.lstm(rnn_input) outputs_unpacked, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs, batch_first=True) outputs_unpacked = outputs_unpacked[idx_unsort] ## hidden and cell are still sorted ordered return outputs_unpacked, hidden
def _forward_padded(self, x, x_mask): """Slower (significantly), but more precise, encoding that handles padding. """ # Compute sorted sequence lengths lengths = x_mask.data.eq(1).long().sum(1) _, idx_sort = torch.sort(lengths, dim=0, descending=True) _, idx_unsort = torch.sort(idx_sort, dim=0) lengths = list(lengths[idx_sort]) idx_sort = variable(idx_sort) idx_unsort = variable(idx_unsort) # Sort x x = x.index_select(0, idx_sort) # Transpose batch and sequence dims x = x.transpose(0, 1) # Pack it up rnn_input = nn.utils.rnn.pack_padded_sequence(x, lengths) # Encode all layers outputs = [rnn_input] for i in range(self.num_layers): rnn_input = outputs[-1] # Apply dropout to input if self.dropout_rate > 0: dropout_input = F.dropout(rnn_input.data, p=self.dropout_rate, training=self.training) rnn_input = nn.utils.rnn.PackedSequence( dropout_input, rnn_input.batch_sizes) outputs.append(self.rnns[i](rnn_input)[0]) # Unpack everything for i, o in enumerate(outputs[1:], 1): outputs[i] = nn.utils.rnn.pad_packed_sequence(o)[0] # Concat hidden layers or take final if self.concat_layers: output = torch.cat(outputs[1:], 2) else: output = outputs[-1] # Transpose and unsort output = output.transpose(0, 1) output = output.index_select(0, idx_unsort) # Pad up to original batch sequence length if output.size(1) != x_mask.size(1): padding = torch.zeros(output.size(0), x_mask.size(1) - output.size(1), output.size(2)).type(output.data.type()) output = torch.cat([output, variable(padding)], 1) # Dropout on output layer if self.dropout_output and self.dropout_rate > 0: output = F.dropout(output, p=self.dropout_rate, training=self.training) # hidden representation is not exposed return output, None