def prepare_for_gpu(self, batch, embedding_layer):
        ## outputform for the prepare for gpu function
        ## batch_size, gold_output, *input
        batch_size = int(
            len(batch['utterance_list']) / batch['max_num_utterances'])

        elmo_embeddings, input_mask = embedding_layer.get_embeddings(
            batch['utterance_list'])
        if self.args.use_cuda:
            elmo_embeddings = elmo_embeddings.cuda()
        input_mask_variable = variable(input_mask).float()
        batch_lengths = batch['conversation_lengths']

        length_sort = np.argsort(batch_lengths)[::-1].copy()
        unsort = variable(LongTensor(np.argsort(length_sort)))
        conversation_mask_sorted = variable(
            FloatTensor(batch['conversation_mask'])[length_sort])
        conversation_mask = LongTensor(batch['conversation_mask'])
        lengths_sorted = np.array(batch_lengths)[length_sort]
        sort = length_sort

        options_tensor = LongTensor(batch['utterance_options_list'])
        gold_ids = np.array(batch['next_utterance_gold'])
        goldids_variable = LongTensor(batch['next_utterance_gold'])
        ## to reshape conversations
        max_num_utterances_batch = batch['max_num_utterances']
        return batch_size, goldids_variable, conversation_mask, elmo_embeddings, input_mask_variable, \
          sort, unsort, conversation_mask_sorted, lengths_sorted, max_num_utterances_batch,\
            options_tensor, goldids_variable
예제 #2
0
    def prepare_for_gpu(self, batch, embedding_layer):
        batch_size = int(
            len(batch['utterance_list']) / batch['max_num_utterances'])
        max_num_utterances_batch = batch['max_num_utterances']
        max_utterance_length = batch['max_utterance_length']
        ### Prepare embeddings
        if self.args.embedding == "elmo":
            ### embedding_layer doesnt recide on cuda if its elmo
            utterance_embeddings, input_mask = embedding_layer.get_embeddings(
                batch['utterance_list'])
            embedding_layer.get_embeddings()

        elif self.args.embedding == "glove":
            ## change the batch into LongTensor
            batch_ids = LongTensor(batch["utterance_word_ids"])
            utterance_embeddings = embedding_layer.lookup(batch_ids)
            input_mask = FloatTensor(batch['input_mask'])

        elif self.args.embedding == "avg_elmo":
            batch_ids = LongTensor(batch["utterance_word_ids"])
            conversation_ids = batch["conversation_ids"]
            utterance_embeddings = embedding_layer.lookup(
                conversation_ids, max_num_utterances_batch)
            input_mask = FloatTensor(batch['input_mask'])

        if self.args.use_cuda:
            utterance_embeddings = utterance_embeddings.cuda()
        input_mask_variable = variable(input_mask)

        ### Prepare Encoder layer
        batch_lengths = batch['conversation_lengths']
        length_sort = np.argsort(batch_lengths)[::-1].copy()
        unsort = variable(LongTensor(np.argsort(length_sort)))
        conversation_mask_sorted = variable(
            FloatTensor(batch['conversation_mask'])[length_sort])
        conversation_mask = LongTensor(batch['conversation_mask'])
        lengths_sorted = np.array(batch_lengths)[length_sort]
        sort = length_sort

        ### Prepare output layer
        options_tensor = LongTensor(batch['utterance_options_list'])
        goldids_next_variable = LongTensor(batch['next_utterance_gold'])
        goldids_prev_variable = LongTensor(batch['prev_utterance_gold'])
        utterance_labels = LongTensor(batch['label'])

        return batch_size, tuple([batch_ids,
                utterance_labels]), input_mask, utterance_embeddings, input_mask_variable, \
            variable(conversation_mask.float()), max_num_utterances_batch, \
            max_utterance_length, batch_ids, utterance_labels \
	def forward(self, x, x_mask):
		lengths = x_mask.data.eq(1).long().sum(1)
		_, idx_sort = torch.sort(lengths, dim=0, descending=True)
		_, idx_unsort = torch.sort(idx_sort, dim=0)
		lengths = list(lengths[idx_sort])
		idx_sort = variable(idx_sort)
		idx_unsort = variable(idx_unsort)

		# Sort x
		x = x.index_select(0, idx_sort)

		rnn_input = nn.utils.rnn.pack_padded_sequence(x, lengths, batch_first=True)
		self.lstm.flatten_parameters()
		outputs, (hidden, cell) = self.lstm(rnn_input)
		outputs_unpacked, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs, batch_first=True)
		outputs_unpacked = outputs_unpacked[idx_unsort]
		## hidden and cell are still sorted ordered
		return outputs_unpacked, hidden
예제 #4
0
    def _forward_padded(self, x, x_mask):
        """Slower (significantly), but more precise, encoding that handles
		padding.
		"""
        # Compute sorted sequence lengths
        lengths = x_mask.data.eq(1).long().sum(1)
        _, idx_sort = torch.sort(lengths, dim=0, descending=True)
        _, idx_unsort = torch.sort(idx_sort, dim=0)

        lengths = list(lengths[idx_sort])
        idx_sort = variable(idx_sort)
        idx_unsort = variable(idx_unsort)

        # Sort x
        x = x.index_select(0, idx_sort)

        # Transpose batch and sequence dims
        x = x.transpose(0, 1)

        # Pack it up
        rnn_input = nn.utils.rnn.pack_padded_sequence(x, lengths)

        # Encode all layers
        outputs = [rnn_input]
        for i in range(self.num_layers):
            rnn_input = outputs[-1]

            # Apply dropout to input
            if self.dropout_rate > 0:
                dropout_input = F.dropout(rnn_input.data,
                                          p=self.dropout_rate,
                                          training=self.training)
                rnn_input = nn.utils.rnn.PackedSequence(
                    dropout_input, rnn_input.batch_sizes)
            outputs.append(self.rnns[i](rnn_input)[0])

        # Unpack everything
        for i, o in enumerate(outputs[1:], 1):
            outputs[i] = nn.utils.rnn.pad_packed_sequence(o)[0]

        # Concat hidden layers or take final
        if self.concat_layers:
            output = torch.cat(outputs[1:], 2)
        else:
            output = outputs[-1]

        # Transpose and unsort
        output = output.transpose(0, 1)
        output = output.index_select(0, idx_unsort)

        # Pad up to original batch sequence length
        if output.size(1) != x_mask.size(1):
            padding = torch.zeros(output.size(0),
                                  x_mask.size(1) - output.size(1),
                                  output.size(2)).type(output.data.type())
            output = torch.cat([output, variable(padding)], 1)

        # Dropout on output layer
        if self.dropout_output and self.dropout_rate > 0:
            output = F.dropout(output,
                               p=self.dropout_rate,
                               training=self.training)
        # hidden representation is not exposed
        return output, None