def encode_question(self, question, max_length=config.max_question_len): tokens = self.dictionary.tokenize(question, False) tokens = tokens[:max_length] if len(tokens) < max_length: # Note here we pad in front of the sentence padding = [self.dictionary.padding_idx] * (max_length - len(tokens)) tokens = padding + tokens utils.assert_eq(len(tokens), max_length) return torch.LongTensor(tokens)
def encode_segment(self, segment, max_length=config.max_segment_len): tokens = self.dictionary.tokenize(segment, False) tokens = tokens[:max_length] seg_len = len(tokens) if len(tokens) < max_length: # Note here we pad in front of the sentence padding = [self.dictionary.padding_idx] * (max_length - len(tokens)) tokens = tokens + padding utils.assert_eq(len(tokens), max_length) return torch.LongTensor(tokens), min(seg_len, max_length)
def _load_entries(self): """Load entries img_id2idx: dict {img_id -> idx} val can be used to retrieve image or features dataroot: root path of dataset split: 'train', 'val', 'trainval', 'test' """ def _create_entry(img_id, question, answer): entry = { 'question_id': question['question_id'], 'image_id': img_id, 'img_idx': self.img_id2idx[img_id], 'question': self.encode_question(question['question']), 'answer': self.encode_answer(answer) } return entry questions = utils.get_file(self.split, question=True) questions = sorted(questions, key=lambda x: x['question_id']) if self.split != 'test': answer_path = os.path.join(self.dataroot, '%s_target.json' % self.split) with open(answer_path, 'r') as fd: answers = json.load(fd) utils.assert_eq(len(questions), len(answers)) answers = sorted(answers, key=lambda x: x['question_id']) entries = [] for question, answer in zip(questions, answers): img_id = answer.pop('image_id') ques_id = answer.pop('question_id') utils.assert_eq(question['question_id'], ques_id) utils.assert_eq(question['image_id'], img_id) entries.append(_create_entry(img_id, question, answer)) else: entries = [_create_entry(question['image_id'], question, 0) for question in questions] print(len(entries)) return entries
def create_vqa_exp_dictionary(): dictionary = Dictionary() qid2exp = {} if config.type == 'cp': ques_file_types = ['train', 'test'] else: ques_file_types = ['train', 'val'] for type in ques_file_types: questions = utils.get_file(type, question=True) questions = sorted(questions, key=lambda x: x['question_id']) answers = utils.get_file(type, answer=True) answers = sorted(answers, key=lambda x: x['question_id']) for q, a in zip(questions, answers): ques_id = a.pop('question_id') utils.assert_eq(q['question_id'], ques_id) dictionary.tokenize(q['question'], True) dictionary.tokenize(a['multiple_choice_answer'], True) qid2exp[int(ques_id)] = { 'question': q['question'], 'answer': a['multiple_choice_answer'] } print(ques_id, a['image_id']) return dictionary, qid2exp
def _load_entries(self): """Load entries img_id2idx: dict {img_id -> idx} val can be used to retrieve image or features dataroot: root path of dataset split: 'train', 'val', 'trainval', 'test' """ def _create_entry(img_id, question, answer): entry = { 'question_id': question['question_id'], 'image_id': img_id, 'img_idx': self.img_id2idx[img_id], 'question': self.encode_question(question['question']), 'question_type': answer['question_type'], 'answer': self.encode_answer(answer), 'bias': self.question_type_to_prob_array[answer['question_type']] if self.split not in ['val', 'test'] else 0, 'has_hint': torch.tensor(False), 'hint': torch.zeros(36).bool(), # 'hint': torch.zeros(36), } return entry questions = utils.get_file(self.split, question=True) questions = sorted(questions, key=lambda x: x['question_id']) if self.split != 'test': answer_path = os.path.join(self.dataroot, '%s_target.json' % self.split) with open(answer_path, 'r') as fd: answers = json.load(fd) utils.assert_eq(len(questions), len(answers)) answers = sorted(answers, key=lambda x: x['question_id']) # compute the bias for train dataset if self.split not in ['val', 'test']: self.compute_bias_with_qty(answers) entries = [] for question, answer in zip(questions, answers): img_id = answer.pop('image_id') ques_id = answer.pop('question_id') utils.assert_eq(question['question_id'], ques_id) utils.assert_eq(question['image_id'], img_id) entries.append(_create_entry(img_id, question, answer)) else: answer = {'question_type': 'Null'} entries = [ _create_entry(question['image_id'], question, answer) for question in questions ] print(len(entries)) return entries