Beispiel #1
0
	def vectorize(self, batch, mode = "train"):
		## TODO: Get single example, abstract out batchification
		batch_size = int(len(batch['utterance_list']) / batch['max_num_utterances'])
		max_num_utterances_batch = batch['max_num_utterances']
		max_utterance_length = batch['max_utterance_length']

		## Prepare Token Embeddings
		token_embeddings, token_mask = self.token_encoder.lookup(batch)
		if self.args.use_cuda:
			token_embeddings = token_embeddings.cuda()
		input_mask_variable = variable(token_mask)

		## Prepare Utterance Encoder

		## Prepare Conversation Encoder
		## TODO: Abstraction similar to token embeddings
		conversation_lengths = batch['conversation_lengths']
		conversation_mask = variable(FloatTensor(batch['conversation_mask']))

		## Prepare Ouput (If exists)
		gold_next_bow_vectors = LongTensor(batch['next_bow_list'])
		gold_prev_bow_vectors = LongTensor(batch['prev_bow_list'])
		gold_next_bow_mask = LongTensor(batch['next_bow_mask'])
		gold_prev_bow_mask = LongTensor(batch['prev_bow_mask'])
		utterance_labels = LongTensor(batch['label'])

		if mode == "train":
			return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch, \
				gold_next_bow_mask, gold_prev_bow_mask, gold_next_bow_vectors, gold_prev_bow_vectors, utterance_labels
		else:
			return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch
    def vectorize(self, batch, mode="train"):

        batch_size = int(
            len(batch['utterance_list']) / batch['max_num_utterances'])
        max_num_utterances_batch = batch['max_num_utterances']
        max_utterance_length = batch['max_utterance_length']

        ## Prepare Token Embeddings
        # TODO: Batch has dummy utternances that need to be specifically handled incase of average elmo
        token_embeddings, token_mask = self.token_encoder.lookup(batch)
        if self.args.use_cuda:
            token_embeddings = token_embeddings.cuda()
        input_mask_variable = variable(token_mask)

        conversation_lengths = batch['conversation_lengths']
        conversation_mask = variable(FloatTensor(batch['conversation_mask']))

        ## For decoder prepare initial state
        conversation_ids = batch['utterance_word_ids']
        start_state = variable(LongTensor([self.vocabulary.sos] * batch_size))
        input = {}
        input["start_token_ids"] = start_state
        start_encoding = self.token_encoder.lookup_by_name(
            input, "start_token_ids")

        # Max utterance length will be the same for next and previous utterance lists as well
        # Needs access to the token encoder itself
        if mode == "train":
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, \
                max_num_utterances_batch, max_utterance_length, \
                start_encoding, conversation_ids
        else:
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, \
                max_num_utterances_batch, max_utterance_length, \
                start_encoding
Beispiel #3
0
    def vectorize(self, batch, mode="train"):
        ## TODO: Get single example, abstract out batchification
        batch_size = int(
            len(batch['utterance_list']) / batch['max_num_utterances'])
        max_num_utterances_batch = batch['max_num_utterances']

        ## Prepare Token Embeddings
        token_embeddings, token_mask = self.token_encoder.lookup(batch)
        if self.args.use_cuda:
            token_embeddings = token_embeddings.cuda()
        input_mask_variable = variable(token_mask)

        ## Prepare Utterance Encoder

        ## Prepare Conversation Encoder
        ## TODO: Abstraction similar to token embeddings
        conversation_lengths = batch['conversation_lengths']
        conversation_mask = variable(FloatTensor(batch['conversation_mask']))

        ## Prepare Ouput (If exists)
        ## TODO: Eliminate options tensor to make faster
        options_tensor = LongTensor(batch['utterance_options_list'])
        goldids_next_variable = LongTensor(batch['next_utterance_gold'])
        goldids_prev_variable = LongTensor(batch['prev_utterance_gold'])

        if mode == "train":
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch, \
             options_tensor, goldids_next_variable, goldids_prev_variable
        else:
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch, \
                options_tensor
Beispiel #4
0
    def vectorize(self, batch, mode="train"):
        batch_size = int(
            len(batch['utterance_list']) / batch['max_num_utterances'])
        max_num_utterances_batch = batch['max_num_utterances']

        # TODO: Batch has dummy utternances that need to be specifically handled incase of average elmo
        token_embeddings, token_mask = self.token_encoder.lookup(batch)

        if self.args.use_cuda:
            token_embeddings = token_embeddings.cuda()
        input_mask_variable = variable(token_mask)

        conversation_lengths = batch['conversation_lengths']
        conversation_mask = variable(FloatTensor(batch['conversation_mask']))

        if mode == "train":
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch
        else:
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch
	def forward(self, x, x_mask):
		lengths = x_mask.data.eq(1).long().sum(1)
		_, idx_sort = torch.sort(lengths, dim=0, descending=True)
		_, idx_unsort = torch.sort(idx_sort, dim=0)
		lengths = list(lengths[idx_sort])
		idx_sort = variable(idx_sort)
		idx_unsort = variable(idx_unsort)

		# Sort x
		x = x.index_select(0, idx_sort)

		rnn_input = pack_padded_sequence(x, lengths, batch_first=True)
		self.lstm.flatten_parameters()
		outputs, (hidden, cell) = self.lstm(rnn_input)
		outputs_unpacked, _ = pad_packed_sequence(outputs, batch_first=True)
		outputs_unpacked = outputs_unpacked[idx_unsort]
		outputs_unpacked_directions = outputs_unpacked.view(outputs_unpacked.shape[0], outputs_unpacked.shape[1],
															2, self.hidden_size)
		## hidden and cell are still sorted ordered
		if outputs_unpacked.shape[1] != x.shape[1]:
			print("Something up!")
		return outputs_unpacked_directions, hidden
	def vectorize(self, batch, mode = "train"):
		batch_size = int(len(batch['utterance_list']) / batch['max_num_utterances'])
		max_num_utterances_batch = batch['max_num_utterances']
		max_utterance_length = batch['max_utterance_length']

		## Prepare Token Embeddings
		token_embeddings, token_mask = self.token_encoder.lookup(batch)
		if self.args.use_cuda:
			token_embeddings = token_embeddings.cuda()
		input_mask_variable = variable(token_mask)

		conversation_lengths = batch['conversation_lengths']
		conversation_mask = variable(FloatTensor(batch['conversation_mask']))

		## Prepare Ouput (If exists)
		bow_list = LongTensor(batch['utterance_bow_list'])
		bow_mask = LongTensor(batch['utterance_bow_mask'])

		if mode == "train":
			return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch, \
				bow_list, bow_mask
		else:
			return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch
Beispiel #7
0
    def forward(self, *input):
        [
            token_embeddings, input_mask_variable, conversation_mask,
            max_num_utterances_batch
        ] = input
        conversation_batch_size = int(token_embeddings.shape[0] /
                                      max_num_utterances_batch)

        if self.args.fixed_utterance_encoder:
            utterance_encodings = token_embeddings
        else:
            utterance_encodings = self.dialogue_embedder.utterance_encoder(
                token_embeddings, input_mask_variable)
        utterance_encodings = utterance_encodings.view(
            conversation_batch_size, max_num_utterances_batch,
            utterance_encodings.shape[1])
        utterance_encodings_next = utterance_encodings[:, 1:, :].contiguous()
        utterance_encodings_prev = utterance_encodings[:, 0:-1, :].contiguous()

        conversation_encoded = self.dialogue_embedder([
            token_embeddings, input_mask_variable, conversation_mask,
            max_num_utterances_batch
        ])

        conversation_encoded_forward = conversation_encoded[:, 0, :]
        conversation_encoded_backward = conversation_encoded[:, 1, :]
        #conversation_encoded_forward = conversation_encoded.view(conversation_encoded.shape[0], 1, -1).squeeze(1)
        #conversation_encoded_backward = conversation_encoded.view(conversation_encoded.shape[0], 1, -1).squeeze(1)

        conversation_encoded_forward_reassembled = conversation_encoded_forward.view(
            conversation_batch_size, max_num_utterances_batch,
            conversation_encoded_forward.shape[1])
        conversation_encoded_backward_reassembled = conversation_encoded_backward.view(
            conversation_batch_size, max_num_utterances_batch,
            conversation_encoded_backward.shape[1])

        # Shift to prepare next and previous utterence encodings
        conversation_encoded_current1 = conversation_encoded_forward_reassembled[:,
                                                                                 0:
                                                                                 -1, :].contiguous(
                                                                                 )
        conversation_encoded_next = conversation_encoded_forward_reassembled[:,
                                                                             1:, :].contiguous(
                                                                             )
        conversation_mask_next = conversation_mask[:, 1:].contiguous()

        conversation_encoded_current2 = conversation_encoded_backward_reassembled[:,
                                                                                  1:, :].contiguous(
                                                                                  )
        conversation_encoded_previous = conversation_encoded_backward_reassembled[:,
                                                                                  0:
                                                                                  -1, :].contiguous(
                                                                                  )
        # conversation_mask_previous = conversation_mask[:, 0:-1].contiguous()

        # Gold Labels
        gold_indices = variable(
            LongTensor(range(conversation_encoded_current1.shape[1]))).view(
                -1, 1).repeat(conversation_batch_size, 1)

        # Linear transformation of both utterance representations
        transformed_current1 = self.current_dl_trasnformer1(
            conversation_encoded_current1)
        transformed_current2 = self.current_dl_trasnformer2(
            conversation_encoded_current2)

        transformed_next = self.next_dl_trasnformer(conversation_encoded_next)
        transformed_prev = self.prev_dl_trasnformer(
            conversation_encoded_previous)
        # transformed_next = self.next_dl_trasnformer(utterance_encodings_next)
        # transformed_prev = self.prev_dl_trasnformer(utterance_encodings_prev)

        # Output layer: Generate Scores for next and prev utterances
        next_logits = torch.bmm(transformed_current1,
                                transformed_next.transpose(2, 1))
        prev_logits = torch.bmm(transformed_current2,
                                transformed_prev.transpose(2, 1))

        # Computing custom masked cross entropy
        next_log_probs = F.log_softmax(next_logits, dim=2)
        prev_log_probs = F.log_softmax(prev_logits, dim=2)

        losses_next = -torch.gather(next_log_probs.view(
            next_log_probs.shape[0] * next_log_probs.shape[1], -1),
                                    dim=1,
                                    index=gold_indices)
        losses_prev = -torch.gather(prev_log_probs.view(
            prev_log_probs.shape[0] * prev_log_probs.shape[1], -1),
                                    dim=1,
                                    index=gold_indices)

        losses_masked = (losses_next.squeeze(1) * conversation_mask_next.view(conversation_mask_next.shape[0]*conversation_mask_next.shape[1]))\
            + (losses_prev.squeeze(1) * conversation_mask_next.view(conversation_mask_next.shape[0]*conversation_mask_next.shape[1]))

        loss = losses_masked.sum() / (2 * conversation_mask_next.float().sum())

        return loss
Beispiel #8
0
    def _forward_padded(self, x, x_mask):
        """Slower (significantly), but more precise, encoding that handles
		padding.
		"""
        # Compute sorted sequence lengths
        lengths = x_mask.data.eq(1).long().sum(1)
        _, idx_sort = torch.sort(lengths, dim=0, descending=True)
        _, idx_unsort = torch.sort(idx_sort, dim=0)

        lengths = list(lengths[idx_sort])
        idx_sort = variable(idx_sort)
        idx_unsort = variable(idx_unsort)

        # Sort x
        x = x.index_select(0, idx_sort)

        # Transpose batch and sequence dims
        x = x.transpose(0, 1)

        # Pack it up
        rnn_input = nn.utils.rnn.pack_padded_sequence(x, lengths)

        # Encode all layers
        outputs = [rnn_input]
        for i in range(self.num_layers):
            rnn_input = outputs[-1]

            # Apply dropout to input
            if self.dropout_rate > 0:
                dropout_input = F.dropout(rnn_input.data,
                                          p=self.dropout_rate,
                                          training=self.training)
                rnn_input = nn.utils.rnn.PackedSequence(
                    dropout_input, rnn_input.batch_sizes)
            outputs.append(self.rnns[i](rnn_input)[0])

        # Unpack everything
        for i, o in enumerate(outputs[1:], 1):
            outputs_unpacked = nn.utils.rnn.pad_packed_sequence(o)[0]
            outputs[i] = outputs_unpacked.view(outputs_unpacked.shape[0],
                                               outputs_unpacked.shape[1], 2,
                                               self.hidden_size)

        # Concat hidden layers or take final
        if self.concat_layers:
            output = torch.cat(outputs[1:], 3)
        else:
            output = outputs[-1]

        # Transpose and unsort
        output = output.transpose(0, 1)
        output = output.index_select(0, idx_unsort)

        # Pad up to original batch sequence length
        if output.size(1) != x_mask.size(1):
            padding = torch.zeros(output.size(0),
                                  x_mask.size(1) - output.size(1),
                                  output.size(2)).type(output.data.type())
            output = torch.cat([output, variable(padding)], 1)

        # Dropout on output layer
        if self.dropout_output and self.dropout_rate > 0:
            output = F.dropout(output,
                               p=self.dropout_rate,
                               training=self.training)
        # hidden representation is not exposed
        return output, None