def test_evalai_answer_processor(self): evalai_answer_processor = EvalAIAnswerProcessor() # Test number processed = evalai_answer_processor("two") expected = "2" self.assertEqual(processed, expected) # Test article processed = evalai_answer_processor("a building") expected = "building" self.assertEqual(processed, expected) # Test tokenize processed = evalai_answer_processor("snow, mountain") expected = "snow mountain" self.assertEqual(processed, expected) # Test contractions processed = evalai_answer_processor("isnt") expected = "isn't" self.assertEqual(processed, expected) # Test processor processed = evalai_answer_processor("the two mountain's \t \n ") expected = "2 mountain 's" self.assertEqual(processed, expected)
def filter_answers(answers_dset, min_occurence): """This will change the answer to preprocessed version """ occurence = {} answer_list = [] evalai_answer_processor = EvalAIAnswerProcessor() for ans_entry in answers_dset: if "multiple_choice_answer" in ans_entry: gtruth = ans_entry["multiple_choice_answer"] gtruth = evalai_answer_processor(gtruth) if gtruth not in occurence: occurence[gtruth] = set() occurence[gtruth].add(ans_entry["question_id"]) else: for ans in ans_entry["answers"]: gtruth = ans["answer"] #gtruth = ans["raw_answer"] gtruth = evalai_answer_processor(gtruth) if gtruth not in occurence: occurence[gtruth] = set() occurence[gtruth].add(ans_entry["question_id"]) for answer in occurence.keys(): if len(occurence[answer]) >= min_occurence: answer_list.append(answer) print("Num of answers that appear >= %d times: %d" % (min_occurence, len(answer_list))) return answer_list
def __init__(self): super().__init__("vqa_evalai_accuracy") self.evalai_answer_processor = EvalAIAnswerProcessor() self.required_params = ["scores", "answers", "context_tokens"]
import json from tqdm import tqdm from mmf.utils.configuration import get_mmf_cache_dir from mmf.utils.file_io import PathManager from mmf.datasets.processors.processors import EvalAIAnswerProcessor root_dir = os.path.join(get_mmf_cache_dir(), "data", "datasets", "okvqa", "defaults", "annotations") out_dir = os.path.join(get_mmf_cache_dir(), "data", "datasets", "okvqa", "defaults", "extras", "vocabs") train_path = os.path.join(root_dir, "mscoco_train2014_annotations.json") val_path = os.path.join(root_dir, "mscoco_val2014_annotations.json") out_path = os.path.join(out_dir, "gt2raw_answers.json") evalai_answer_processor = EvalAIAnswerProcessor() with PathManager.open(train_path, "r") as f: annotations = json.load(f)["annotations"] with PathManager.open(val_path, "r") as f: annotations += json.load(f)["annotations"] gt2raw = {} for ann in tqdm(annotations): for ans in ann["answers"]: raw_ans = evalai_answer_processor(ans["raw_answer"]) gt_ans = evalai_answer_processor(ans["answer"]) if gt_ans in gt2raw: gt2raw[gt_ans].add(raw_ans)