def __getitem__(self, idx): current_question = self.questions[idx] img_filename = os.path.join(self.img_dir, current_question['image_filename']) # image = Image.open(img_filename).convert('RGB') image_id = int(img_filename.rsplit('_', 1)[1][:-4]) image = self.img_features_file[image_id] _, H, W = image.shape position_encoding = get_positional_encoding(H, W) image = np.concatenate((image, position_encoding), axis=0) question = utils.to_dictionary_indexes(self.dictionaries[0], current_question['question']) answer = utils.to_dictionary_indexes(self.dictionaries[1], current_question['answer']) # answer_class = self.dictionaries[2][answer.item()] question_type = get_ques_type( current_question['program'][-1]['function']) answer = (answer - 1) # convert to zero based indexing return image, question, len(question), answer, question_type
def __getitem__(self, idx): current_question = self.questions[idx] scene_idx = current_question['image_index'] obj = self.objects[scene_idx] question = utils.to_dictionary_indexes(self.dictionaries[0], current_question['question']) answer = utils.to_dictionary_indexes(self.dictionaries[1], current_question['answer']) '''if self.dictionaries[2][answer[0]]=='color': image = Image.open(img_filename).convert('L') image = numpy.array(image) image = numpy.stack((image,)*3) image = numpy.transpose(image, (1,2,0)) image = Image.fromarray(image.astype('uint8'), 'RGB')''' sample = {'image': obj, 'question': question, 'answer': answer} return sample
def __getitem__(self, idx): current_question = self.questions[idx] img_filename = os.path.join(self.img_dir, current_question['image_filename']) image = Image.open(img_filename).convert('RGB') question = utils.to_dictionary_indexes(self.dictionaries[0], current_question['question']) answer = utils.to_dictionary_indexes(self.dictionaries[1], current_question['answer']) '''if self.dictionaries[2][answer[0]]=='color': image = Image.open(img_filename).convert('L') image = numpy.array(image) image = numpy.stack((image,)*3) image = numpy.transpose(image, (1,2,0)) image = Image.fromarray(image.astype('uint8'), 'RGB')''' sample = {'image': image, 'question': question, 'answer': answer} if self.transform: sample['image'] = self.transform(sample['image']) return sample
def __getitem__(self, idx): current_question = self.questions[idx] scene_idx = current_question['image_index'] obj = self.objects[scene_idx] # random permutation of objects for janossy pooling only # all the other methods don't depend on the order permutation = torch.randperm(obj.size(0)) obj = obj[permutation] question = utils.to_dictionary_indexes(self.dictionaries[0], current_question['question']) answer = utils.to_dictionary_indexes(self.dictionaries[1], current_question['answer']) '''if self.dictionaries[2][answer[0]]=='color': image = Image.open(img_filename).convert('L') image = numpy.array(image) image = numpy.stack((image,)*3) image = numpy.transpose(image, (1,2,0)) image = Image.fromarray(image.astype('uint8'), 'RGB')''' sample = {'image': obj, 'question': question, 'answer': answer} return sample