def lookup(self, input):
		conversation_id_list = input["conversation_ids"]
		input_mask = FloatTensor(input["input_mask"])
		max_num_utterances_batch = input['max_num_utterances']
		utterance_ids_list = input['utterance_ids_list']

		batch_embeddings = []
		for x, id in enumerate(conversation_id_list):
			embeddings = self.embeddings[id]
			batch_embeddings += embeddings
			batch_embeddings += [np.random.rand(self.args.embed_size).tolist() for i in range(max_num_utterances_batch - len(embeddings))]
		batch_embedding_tensor = FloatTensor(batch_embeddings)
		return batch_embedding_tensor, input_mask
	def lookup_by_name(self, input, name_embed, name_mask):
		conversation_id_list = input[name_embed]
		input_mask = FloatTensor(input[name_mask])
		# Generally remains the same
		max_num_utterances_batch = input['max_num_utterances']

		batch_embeddings = []
		for x, id in enumerate(conversation_id_list):
			embeddings = self.embeddings[id]
			batch_embeddings += embeddings
			batch_embeddings += [np.random.rand(self.args.embed_size).tolist() for i in
								 range(max_num_utterances_batch - len(embeddings))]
		batch_embedding_tensor = FloatTensor(batch_embeddings)
		return batch_embedding_tensor, input_mask
Example #3
0
	def vectorize(self, batch, mode = "train"):
		## TODO: Get single example, abstract out batchification
		batch_size = int(len(batch['utterance_list']) / batch['max_num_utterances'])
		max_num_utterances_batch = batch['max_num_utterances']
		max_utterance_length = batch['max_utterance_length']

		## Prepare Token Embeddings
		token_embeddings, token_mask = self.token_encoder.lookup(batch)
		if self.args.use_cuda:
			token_embeddings = token_embeddings.cuda()
		input_mask_variable = variable(token_mask)

		## Prepare Utterance Encoder

		## Prepare Conversation Encoder
		## TODO: Abstraction similar to token embeddings
		conversation_lengths = batch['conversation_lengths']
		conversation_mask = variable(FloatTensor(batch['conversation_mask']))

		## Prepare Ouput (If exists)
		gold_next_bow_vectors = LongTensor(batch['next_bow_list'])
		gold_prev_bow_vectors = LongTensor(batch['prev_bow_list'])
		gold_next_bow_mask = LongTensor(batch['next_bow_mask'])
		gold_prev_bow_mask = LongTensor(batch['prev_bow_mask'])
		utterance_labels = LongTensor(batch['label'])

		if mode == "train":
			return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch, \
				gold_next_bow_mask, gold_prev_bow_mask, gold_next_bow_vectors, gold_prev_bow_vectors, utterance_labels
		else:
			return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch
    def vectorize(self, batch, mode="train"):

        batch_size = int(
            len(batch['utterance_list']) / batch['max_num_utterances'])
        max_num_utterances_batch = batch['max_num_utterances']
        max_utterance_length = batch['max_utterance_length']

        ## Prepare Token Embeddings
        # TODO: Batch has dummy utternances that need to be specifically handled incase of average elmo
        token_embeddings, token_mask = self.token_encoder.lookup(batch)
        if self.args.use_cuda:
            token_embeddings = token_embeddings.cuda()
        input_mask_variable = variable(token_mask)

        conversation_lengths = batch['conversation_lengths']
        conversation_mask = variable(FloatTensor(batch['conversation_mask']))

        ## For decoder prepare initial state
        conversation_ids = batch['utterance_word_ids']
        start_state = variable(LongTensor([self.vocabulary.sos] * batch_size))
        input = {}
        input["start_token_ids"] = start_state
        start_encoding = self.token_encoder.lookup_by_name(
            input, "start_token_ids")

        # Max utterance length will be the same for next and previous utterance lists as well
        # Needs access to the token encoder itself
        if mode == "train":
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, \
                max_num_utterances_batch, max_utterance_length, \
                start_encoding, conversation_ids
        else:
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, \
                max_num_utterances_batch, max_utterance_length, \
                start_encoding
Example #5
0
    def vectorize(self, batch, mode="train"):
        ## TODO: Get single example, abstract out batchification
        batch_size = int(
            len(batch['utterance_list']) / batch['max_num_utterances'])
        max_num_utterances_batch = batch['max_num_utterances']

        ## Prepare Token Embeddings
        token_embeddings, token_mask = self.token_encoder.lookup(batch)
        if self.args.use_cuda:
            token_embeddings = token_embeddings.cuda()
        input_mask_variable = variable(token_mask)

        ## Prepare Utterance Encoder

        ## Prepare Conversation Encoder
        ## TODO: Abstraction similar to token embeddings
        conversation_lengths = batch['conversation_lengths']
        conversation_mask = variable(FloatTensor(batch['conversation_mask']))

        ## Prepare Ouput (If exists)
        ## TODO: Eliminate options tensor to make faster
        options_tensor = LongTensor(batch['utterance_options_list'])
        goldids_next_variable = LongTensor(batch['next_utterance_gold'])
        goldids_prev_variable = LongTensor(batch['prev_utterance_gold'])

        if mode == "train":
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch, \
             options_tensor, goldids_next_variable, goldids_prev_variable
        else:
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch, \
                options_tensor
	def lookup_by_name(self, input, name_embed, name_mask = None):
		input_token_ids = LongTensor(input[name_embed])
		utterance_embeddings = self.embed_layer(input_token_ids)
		if name_mask in input:
			input_mask = FloatTensor(input[name_mask])
		else:
			input_mask = None
		return utterance_embeddings, input_mask
Example #7
0
    def vectorize(self, batch, mode="train"):
        batch_size = int(
            len(batch['utterance_list']) / batch['max_num_utterances'])
        max_num_utterances_batch = batch['max_num_utterances']

        # TODO: Batch has dummy utternances that need to be specifically handled incase of average elmo
        token_embeddings, token_mask = self.token_encoder.lookup(batch)

        if self.args.use_cuda:
            token_embeddings = token_embeddings.cuda()
        input_mask_variable = variable(token_mask)

        conversation_lengths = batch['conversation_lengths']
        conversation_mask = variable(FloatTensor(batch['conversation_mask']))

        if mode == "train":
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch
        else:
            return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch
	def vectorize(self, batch, mode = "train"):
		batch_size = int(len(batch['utterance_list']) / batch['max_num_utterances'])
		max_num_utterances_batch = batch['max_num_utterances']
		max_utterance_length = batch['max_utterance_length']

		## Prepare Token Embeddings
		token_embeddings, token_mask = self.token_encoder.lookup(batch)
		if self.args.use_cuda:
			token_embeddings = token_embeddings.cuda()
		input_mask_variable = variable(token_mask)

		conversation_lengths = batch['conversation_lengths']
		conversation_mask = variable(FloatTensor(batch['conversation_mask']))

		## Prepare Ouput (If exists)
		bow_list = LongTensor(batch['utterance_bow_list'])
		bow_mask = LongTensor(batch['utterance_bow_mask'])

		if mode == "train":
			return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch, \
				bow_list, bow_mask
		else:
			return batch_size, token_embeddings, input_mask_variable, conversation_mask, max_num_utterances_batch
	def lookup(self, input):
		input_token_ids = LongTensor(input["utterance_word_ids"])
		utterance_embeddings = self.embed_layer(input_token_ids)
		input_mask = FloatTensor(input['input_mask'])
		return utterance_embeddings, input_mask