Python VQATokenizerの例、vqa_preprocess.vqa_tokenizer.VQATokenizer Pythonの例

コード例 #1

0

ファイルを表示

ファイル: inference.py プロジェクト: JryonGitHub/Conditional-Batch-Norm

class DataLoader(object):
    def __init__(self, config, batch_size=1):
        super(DataLoader, self).__init__()

        self.batch_size = batch_size
        self.use_glove = config['model']['glove']

        # Load dictionary
        logger.info('Loading dictionary..')
        self.tokenizer = VQATokenizer(
            os.path.join(data_dir, config["dico_name"]))

        # Load glove
        self.glove = None
        if self.use_glove:
            logger.info('Loading glove..')
            self.glove = GloveEmbeddings(
                os.path.join(data_dir, config["glove_name"]))

    '''
	Arguments:
		image : pass the image in (batch, height, width, channels) format - batch=1 for single image
		question : list of question required 

	Returns:
		image : image in the form of torch.autograd.Varibale - (batch, channels, height, width) format
		tokens : question tokens
		glove_emb : glove embedding of the question
		answer : ground truth tokens
	'''

    def process_data(self, image, question):

        # convert the image into torch.autograd.Variable
        image = torch.autograd.Variable(torch.Tensor(image).cuda())
        # reshape the image to (batch, channels, height, width) format
        image = image.permute(0, 3, 1, 2).contiguous()

        # tokenize the questions and convert them into torch.autograd.Variable
        tokens = [self.tokenizer.encode_question(x)[0] for x in question]
        words = [self.tokenizer.encode_question(x)[1] for x in question]
        max_len = max([len(x) for x in tokens])  # max length of the question
        # pad the additional length with unknown token '<unk>'
        for x in tokens:
            for i in range(max_len - len(x)):
                x.append(self.tokenizer.word2i['<unk>'])
        for x in words:
            for i in range(max_len - len(x)):
                x.append('<unk>')
        tokens = Variable(torch.LongTensor(tokens).cuda())

        # if required
        # get the glove embeddings of the question token and convert them into torch.autograd.Variable
        glove_emb = [self.glove.get_embeddings(x) for x in words]
        glove_emb = Variable(torch.Tensor(glove_emb).cuda())

        return image, tokens, glove_emb

コード例 #2

0

ファイルを表示

	def __init__(self, config, batch_size=1):
		super(DataLoader, self).__init__()

		self.batch_size = batch_size
		self.use_glove = config['model']['glove']

		# Load dictionary
		logger.info('Loading dictionary..')
		self.tokenizer = VQATokenizer(os.path.join(data_dir, config["dico_name"]))

		# Load glove
		self.glove = None
		if self.use_glove:
			logger.info('Loading glove..')
			self.glove = GloveEmbeddings(os.path.join(data_dir, config["glove_name"]))

コード例 #3

0

ファイルを表示

    def __init__(self, config, data_dir, img_dir, year, test_set, data_type):

        self.total_len = None  # total size of the dataset being used
        self.use_glove = config['model']['glove']
        self.data_type = data_type

        # Load images
        logger.info('Loading images..')
        self.image_builder = get_img_builder(config['model']['image'], img_dir)
        self.use_resnet = self.image_builder.is_raw_image()
        self.require_multiprocess = self.image_builder.require_multiprocess()

        # Load dictionary
        logger.info('Loading dictionary..')
        self.tokenizer = VQATokenizer(
            os.path.join(data_dir, config["dico_name"]))

        # Load data
        logger.info('Loading data..')
        if data_type == 'train':
            self.dataset = VQADataset(
                data_dir,
                year=year,
                which_set="train",
                image_builder=self.image_builder,
                preprocess_answers=self.tokenizer.preprocess_answers)
        elif data_type == 'val':
            self.dataset = VQADataset(
                data_dir,
                year=year,
                which_set="val",
                image_builder=self.image_builder,
                preprocess_answers=self.tokenizer.preprocess_answers)
        else:
            self.dataset = VQADataset(data_dir,
                                      year=year,
                                      which_set=test_set,
                                      image_builder=self.image_builder)
        self.preprocess()
        # Load glove
        self.glove = None
        if self.use_glove:
            logger.info('Loading glove..')
            self.glove = GloveEmbeddings(
                os.path.join(data_dir, config["glove_name"]))

コード例 #4

0

ファイルを表示

ファイル: dataloader.py プロジェクト: zhaoyuzhi/Conditional-Batch-Norm

class DataLoader(object):
    def __init__(self, config, data_dir, img_dir, year, test_set, batch_size):
        super(DataLoader, self).__init__()

        self.batch_size = batch_size
        self.total_len = None  # total size of the dataset being used
        self.use_glove = config['model']['glove']

        # Load images
        logger.info('Loading images..')
        self.image_builder = get_img_builder(config['model']['image'], img_dir)
        self.use_resnet = self.image_builder.is_raw_image()
        self.require_multiprocess = self.image_builder.require_multiprocess()

        # Load dictionary
        logger.info('Loading dictionary..')
        self.tokenizer = VQATokenizer(
            os.path.join(data_dir, config["dico_name"]))

        # Load data
        logger.info('Loading data..')
        self.trainset = VQADataset(
            data_dir,
            year=year,
            which_set="train",
            image_builder=self.image_builder,
            preprocess_answers=self.tokenizer.preprocess_answers)
        self.validset = VQADataset(
            data_dir,
            year=year,
            which_set="val",
            image_builder=self.image_builder,
            preprocess_answers=self.tokenizer.preprocess_answers)
        self.testset = VQADataset(data_dir,
                                  year=year,
                                  which_set=test_set,
                                  image_builder=self.image_builder)

        # Load glove
        self.glove = None
        if self.use_glove:
            logger.info('Loading glove..')
            self.glove = GloveEmbeddings(
                os.path.join(data_dir, config["glove_name"]))

    '''
	Arguments:
		ind : current iteration which is converted to required indices to be loaded
		data_type: specifies the train('train'), validation('val') and test('test') partition

	Returns:
		image : image in the form of torch.autograd.Varibale - (batch, channels, height, width) format
		tokens : question tokens
		glove_emb : glove embedding of the question
		answer : ground truth tokens
	'''

    def get_mini_batch(self, ind, data_type='train'):

        if data_type == 'train':
            dataset = self.trainset.games
        elif data_type == 'val':
            dataset = self.validset.games
        elif data_type == 'test':
            dataset = self.testset.games

        self.total_len = len(dataset)  # total elements in dataset

        # specify the start and end indices of the minibatch
        # In case, the indices goes over total elements
        # wrap the indices around the dataset
        start_ind = (ind * self.batch_size) % self.total_len
        end_ind = ((ind + 1) * self.batch_size) % self.total_len
        if start_ind < end_ind:
            data = dataset[start_ind:end_ind]
        else:
            data = dataset[start_ind:self.total_len]
            data.extend(dataset[0:end_ind])

        # get the images from the dataset and convert them into torch.autograd.Variable
        image = np.array([x.image.get_image() for x in data])
        image = torch.autograd.Variable(torch.Tensor(image).cuda())
        # reshape the image to (batch, channels, height, width) format
        image = image.permute(0, 3, 1, 2).contiguous()

        # get the questions from the dataset, tokenize them and convert them into torch.autograd.Variable
        que = [x.question for x in data]
        tokens = [self.tokenizer.encode_question(x)[0] for x in que]
        words = [self.tokenizer.encode_question(x)[1] for x in que]
        max_len = max([len(x) for x in tokens])  # max length of the question
        # pad the additional length with unknown token '<unk>'
        for x in tokens:
            for i in range(max_len - len(x)):
                x.append(self.tokenizer.word2i['<unk>'])
        for x in words:
            for i in range(max_len - len(x)):
                x.append('<unk>')
        tokens = Variable(torch.LongTensor(tokens).cuda())

        # get the ground truth answer, tokenize them and convert them into torch.autograd.Variable
        ans = [x.majority_answer for x in data]
        answer = [self.tokenizer.encode_answer(x) for x in ans]
        answer = Variable(torch.LongTensor(answer).cuda())

        # get the glove embeddings of the question token and convert them into torch.autograd.Variable
        glove_emb = [self.glove.get_embeddings(x) for x in words]
        glove_emb = Variable(torch.Tensor(glove_emb).cuda())

        return image, tokens, glove_emb, answer

コード例 #5

0

ファイルを表示

class DataEngine(Dataset):
    def __init__(self, config, data_dir, img_dir, year, test_set, data_type):

        self.total_len = None  # total size of the dataset being used
        self.use_glove = config['model']['glove']
        self.data_type = data_type

        # Load images
        logger.info('Loading images..')
        self.image_builder = get_img_builder(config['model']['image'], img_dir)
        self.use_resnet = self.image_builder.is_raw_image()
        self.require_multiprocess = self.image_builder.require_multiprocess()

        # Load dictionary
        logger.info('Loading dictionary..')
        self.tokenizer = VQATokenizer(
            os.path.join(data_dir, config["dico_name"]))

        # Load data
        logger.info('Loading data..')
        if data_type == 'train':
            self.dataset = VQADataset(
                data_dir,
                year=year,
                which_set="train",
                image_builder=self.image_builder,
                preprocess_answers=self.tokenizer.preprocess_answers)
        elif data_type == 'val':
            self.dataset = VQADataset(
                data_dir,
                year=year,
                which_set="val",
                image_builder=self.image_builder,
                preprocess_answers=self.tokenizer.preprocess_answers)
        else:
            self.dataset = VQADataset(data_dir,
                                      year=year,
                                      which_set=test_set,
                                      image_builder=self.image_builder)
        self.preprocess()
        # Load glove
        self.glove = None
        if self.use_glove:
            logger.info('Loading glove..')
            self.glove = GloveEmbeddings(
                os.path.join(data_dir, config["glove_name"]))

    def __len__(self):
        return len(self.dataset.games)

    def preprocess(self):
        que = [x.question for x in self.dataset.games]
        tokens = [self.tokenizer.encode_question(x)[0] for x in que]
        self.max_len = max([len(x)
                            for x in tokens])  # max length of the question

    '''
    Arguments:
        ind : current iteration which is converted to required indices to be loaded
        data_type: specifies the train('train'), validation('val') and test('test') partition

    Returns:
        image : image in the form of torch.autograd.Varibale - (batch, channels, height, width) format
        tokens : question tokens
        glove_emb : glove embedding of the question
        answer : ground truth tokens
    '''

    def __getitem__(self, ind):

        dataset = self.dataset.games  # just for simplified
        data = dataset[ind]

        # get the images from the dataset and convert them into torch.autograd.Variable
        image = torch.Tensor(data.image.get_image())
        # reshape the image to (batch, channels, height, width) format
        image = image.permute(2, 0, 1).contiguous()

        # get the questions from the dataset, tokenize them and convert them into torch.autograd.Variable
        tokens = self.tokenizer.encode_question(data.question)[0]
        words = self.tokenizer.encode_question(data.question)[1]
        # pad the additional length with unknown token '<unk>'
        for i in range(self.max_len - len(tokens)):
            tokens.append(self.tokenizer.word2i['<unk>'])

        for i in range(self.max_len - len(words)):
            words.append('<unk>')
        tokens = torch.LongTensor(tokens)

        # get the ground truth answer, tokenize them and convert them into torch.autograd.Variable
        answer = self.tokenizer.encode_answer(data.majority_answer)
        answer = torch.LongTensor([answer])

        # get the glove embeddings of the question token and convert them into torch.autograd.Variable
        glove_emb = self.glove.get_embeddings(words)
        glove_emb = torch.Tensor(glove_emb)

        return image, tokens, glove_emb, answer