コード例 #1
0
    def __getitem__(self, idx):
        current_question = self.questions[idx]
        img_filename = os.path.join(self.img_dir,
                                    current_question['image_filename'])
        # image = Image.open(img_filename).convert('RGB')
        image_id = int(img_filename.rsplit('_', 1)[1][:-4])

        image = self.img_features_file[image_id]

        _, H, W = image.shape
        position_encoding = get_positional_encoding(H, W)

        image = np.concatenate((image, position_encoding), axis=0)

        question = utils.to_dictionary_indexes(self.dictionaries[0],
                                               current_question['question'])
        answer = utils.to_dictionary_indexes(self.dictionaries[1],
                                             current_question['answer'])
        # answer_class = self.dictionaries[2][answer.item()]

        question_type = get_ques_type(
            current_question['program'][-1]['function'])

        answer = (answer - 1)  # convert to zero based indexing

        return image, question, len(question), answer, question_type
コード例 #2
0
    def __getitem__(self, idx):
        current_question = self.questions[idx]
        scene_idx = current_question['image_index']
        obj = self.objects[scene_idx]

        question = utils.to_dictionary_indexes(self.dictionaries[0], current_question['question'])
        answer = utils.to_dictionary_indexes(self.dictionaries[1], current_question['answer'])
        '''if self.dictionaries[2][answer[0]]=='color':
            image = Image.open(img_filename).convert('L')
            image = numpy.array(image)
            image = numpy.stack((image,)*3)
            image = numpy.transpose(image, (1,2,0))
            image = Image.fromarray(image.astype('uint8'), 'RGB')'''
        
        sample = {'image': obj, 'question': question, 'answer': answer}
        
        return sample
コード例 #3
0
    def __getitem__(self, idx):
        current_question = self.questions[idx]
        img_filename = os.path.join(self.img_dir, current_question['image_filename'])
        image = Image.open(img_filename).convert('RGB')

        question = utils.to_dictionary_indexes(self.dictionaries[0], current_question['question'])
        answer = utils.to_dictionary_indexes(self.dictionaries[1], current_question['answer'])
        '''if self.dictionaries[2][answer[0]]=='color':
            image = Image.open(img_filename).convert('L')
            image = numpy.array(image)
            image = numpy.stack((image,)*3)
            image = numpy.transpose(image, (1,2,0))
            image = Image.fromarray(image.astype('uint8'), 'RGB')'''
        
        sample = {'image': image, 'question': question, 'answer': answer}

        if self.transform:
            sample['image'] = self.transform(sample['image'])
        
        return sample
コード例 #4
0
    def __getitem__(self, idx):
        current_question = self.questions[idx]
        scene_idx = current_question['image_index']
        obj = self.objects[scene_idx]

        # random permutation of objects for janossy pooling only
        # all the other methods don't depend on the order
        permutation = torch.randperm(obj.size(0))
        obj = obj[permutation]

        question = utils.to_dictionary_indexes(self.dictionaries[0],
                                               current_question['question'])
        answer = utils.to_dictionary_indexes(self.dictionaries[1],
                                             current_question['answer'])
        '''if self.dictionaries[2][answer[0]]=='color':
            image = Image.open(img_filename).convert('L')
            image = numpy.array(image)
            image = numpy.stack((image,)*3)
            image = numpy.transpose(image, (1,2,0))
            image = Image.fromarray(image.astype('uint8'), 'RGB')'''

        sample = {'image': obj, 'question': question, 'answer': answer}

        return sample