def prepare_indices(config): set_name = "train2014" word_counts = defaultdict(lambda: 0) with open(QUESTION_FILE % set_name) as question_f: questions = json.load(question_f)["questions"] for question in questions: words = proc_question(question["question"]) for word in words: word_counts[word] += 1 for word, count in word_counts.items(): if count >= MIN_COUNT: QUESTION_INDEX.index(word) pred_counts = defaultdict(lambda: 0) with open(MULTI_PARSE_FILE % set_name) as parse_f: for line in parse_f: parts = line.strip().replace("(", "").replace(")", "").replace(";", " ").split() for part in parts: pred_counts[part] += 1 for pred, count in pred_counts.items(): if count >= 10 * MIN_COUNT: MODULE_INDEX.index(pred) answer_counts = defaultdict(lambda: 0) with open(ANN_FILE % set_name) as ann_f: annotations = json.load(ann_f)["annotations"] for ann in annotations: for answer in ann["answers"]: if answer["answer_confidence"] != "yes": continue word = answer["answer"] if re.search(r"[^\w\s]", word): continue answer_counts[word] += 1 keep_answers = reversed(sorted([(c, a) for a, c in answer_counts.items()])) keep_answers = list(keep_answers)[:config.answers] for count, answer in keep_answers: ANSWER_INDEX.index(answer)
def parse_to_layout_helper(parse, world, config, modules): if isinstance(parse, str): if parse in world.entities: return modules["lookup"], world.entities[parse] else: return modules["find"], MODULE_INDEX.index(parse) head = parse[0] below = [parse_to_layout_helper(c, world, config, modules) for c in parse[1:]] modules_below, labels_below = zip(*below) modules_below = tuple(modules_below) labels_below = tuple(labels_below) if head == "and": module_head = modules["and"] elif head == "exists": module_head = modules["exists"] else: module_head = modules["relate"] label_head = MODULE_INDEX.index(head) modules_here = (module_head,) + modules_below labels_here = (label_head,) + labels_below return modules_here, labels_here
def prepare_indices(): set_name = "train2014" word_counts = defaultdict(lambda: 0) with open(QUESTION_FILE % set_name) as question_f: questions = json.load(question_f)["questions"] for question in questions: words = proc_question(question["question"]) for word in words: word_counts[word] += 1 for word, count in word_counts.items(): if count >= MIN_COUNT: QUESTION_INDEX.index(word) pred_counts = defaultdict(lambda: 0) with open(MULTI_PARSE_FILE % set_name) as parse_f: for line in parse_f: parts = line.strip().replace("(", "").replace(")", "").replace(";", " ").split() for part in parts: pred_counts[part] += 1 for pred, count in pred_counts.items(): if count >= 10 * MIN_COUNT: MODULE_INDEX.index(pred) answer_counts = defaultdict(lambda: 0) with open(ANN_FILE % set_name) as ann_f: annotations = json.load(ann_f)["annotations"] for ann in annotations: for answer in ann["answers"]: if answer["answer_confidence"] != "yes": continue word = answer["answer"] if re.search(r"[^\w\s]", word): continue answer_counts[word] += 1 keep_answers = reversed(sorted([(c, a) for a, c in answer_counts.items()])) keep_answers = list(keep_answers)[:1000] for count, answer in keep_answers: ANSWER_INDEX.index(answer)
def parse_to_layout_helper(parse, world, config, modules): if isinstance(parse, str): if parse in world.entities: return modules["lookup"], world.entities[parse] else: return modules["find"], MODULE_INDEX.index(parse) head = parse[0] below = [ parse_to_layout_helper(c, world, config, modules) for c in parse[1:] ] modules_below, labels_below = zip(*below) modules_below = tuple(modules_below) labels_below = tuple(labels_below) if head == "and": module_head = modules["and"] elif head == "exists": module_head = modules["exists"] else: module_head = modules["relate"] label_head = MODULE_INDEX.index(head) modules_here = (module_head, ) + modules_below labels_here = (label_head, ) + labels_below return modules_here, labels_here