def parse_to_layout(parse): #if parse not in [("color", "cat"), ("color", "shirt")]: # return None #if not isinstance(parse, tuple): # return None #if parse[0] not in LEGAL_QUERIES: # return None #if isinstance(parse[1], tuple): # return None layout_modules = [None, None] layout_indices = [None, None] if parse[0] in ("is", "is_there", "count"): layout_modules[0] = DenseAnswerModule else: layout_modules[0] = AttAnswerModule #else: # print parse # exit() #elif parse[0] == "count": # layout_modules[0] = DenseAnswerModule layout_indices[0] = LAYOUT_INDEX.index(parse[0]) layout_modules[1] = DetectModule layout_indices[1] = LAYOUT_INDEX.index(parse[1]) layout = Layout(tuple(layout_modules), tuple(layout_indices)) return layout
def parse_to_layout_helper(parse, internal): if isinstance(parse, str): return (DetectModule, LAYOUT_INDEX.index(parse)) else: head = parse[0] head_idx = LAYOUT_INDEX.index(parse) if internal: if head == "and": mod_head = ConjModule else: mod_head = RedetectModule else: if head == "how many": mod_head = DenseAnswerModule else: mod_head = AttAnswerModule below = [parse_to_layout_helper(child, internal=True) for child in parse[1:]] mods_below, indices_below = zip(*below) return (mod_head,) + tuple(mods_below), (head_idx,) + tuple(indices_below)
def __init__(self, config, set_name): self.config = config data = set() data_by_layout_type = defaultdict(list) data_by_string_length = defaultdict(list) data_by_layout_and_length = defaultdict(list) if set_name == "val": self.data = data self.by_layout_type = data_by_layout_type self.by_string_length = data_by_string_length self.by_layout_and_length = data_by_layout_and_length return if set_name == "train": # TODO better index pred_counter = defaultdict(lambda: 0) with open(PARSE_FILE % set_name) as parse_f: for parse_str in parse_f: parse_preds = parse_str.strip() \ .replace("'", "") \ .replace("(", "") \ .replace(")", "") \ .split() for pred in parse_preds: pred_counter[pred] += 1 for pred, count in pred_counter.items(): if count <= 1: continue LAYOUT_INDEX.index(pred) with open(STRING_FILE % set_name) as question_f, \ open(PARSE_FILE % set_name) as parse_f, \ open(ANN_FILE % set_name) as ann_f, \ open(IMAGE_ID_FILE % set_name) as image_id_f: unked = 0 i = 0 for question, parse_str, answer, image_id in \ zip(question_f, parse_f, ann_f, image_id_f): question = question.strip() parse_str = parse_str.strip().replace("'", "") answer = answer.strip() image_id = int(image_id.strip()) words = question.split() words = ["<s>"] + words + ["</s>"] parse = parse_tree(parse_str) answer = ANSWER_INDEX.index(answer) words = [STRING_INDEX.index(w) for w in words] if len(parse) == 1: parse = parse + ("object",) layout = parse_to_layout(parse) #if i == 300: # continue i += 1 coco_set_name = "train" if set_name == "train" else "val" try: datum = CocoQADatum(words, layout, image_id, answer, coco_set_name) datum.raw_query = parse_str data.add(datum) data_by_layout_type[datum.layout.modules].append(datum) data_by_string_length[len(datum.string)].append(datum) data_by_layout_and_length[(datum.layout.modules, len(datum.string))].append(datum) except IOError as e: pass self.data = data self.by_layout_type = data_by_layout_type self.by_string_length = data_by_string_length self.by_layout_and_length = data_by_layout_and_length logging.info("%s:", set_name.upper()) logging.info("%s items", len(self.data)) logging.info("%s words", len(STRING_INDEX)) logging.info("%s functions", len(LAYOUT_INDEX)) logging.info("%s answers", len(ANSWER_INDEX)) logging.info("%s layouts", len(self.by_layout_type.keys())) logging.info("")
def __init__(self, config, set_name): self.config = config data = set() data_by_layout_type = defaultdict(list) data_by_string_length = defaultdict(list) data_by_layout_and_length = defaultdict(list) if set_name == "val": self.data = data self.by_layout_type = data_by_layout_type self.by_string_length = data_by_string_length self.by_layout_and_length = data_by_layout_and_length return if set_name == "train": # TODO better index pred_counter = defaultdict(lambda: 0) with open(PARSE_FILE % set_name) as parse_f: for parse_str in parse_f: parse_preds = parse_str.strip() \ .replace("'", "") \ .replace("(", "") \ .replace(")", "") \ .split() for pred in parse_preds: pred_counter[pred] += 1 for pred, count in pred_counter.items(): if count <= 1: continue LAYOUT_INDEX.index(pred) with open(STRING_FILE % set_name) as question_f, \ open(PARSE_FILE % set_name) as parse_f, \ open(ANN_FILE % set_name) as ann_f, \ open(IMAGE_ID_FILE % set_name) as image_id_f: unked = 0 i = 0 for question, parse_str, answer, image_id in \ zip(question_f, parse_f, ann_f, image_id_f): question = question.strip() parse_str = parse_str.strip().replace("'", "") answer = answer.strip() image_id = int(image_id.strip()) words = question.split() words = ["<s>"] + words + ["</s>"] parse = parse_tree(parse_str) answer = ANSWER_INDEX.index(answer) words = [STRING_INDEX.index(w) for w in words] if len(parse) == 1: parse = parse + ("object", ) layout = parse_to_layout(parse) #if i == 300: # continue i += 1 coco_set_name = "train" if set_name == "train" else "val" try: datum = CocoQADatum(words, layout, image_id, answer, coco_set_name) datum.raw_query = parse_str data.add(datum) data_by_layout_type[datum.layout.modules].append(datum) data_by_string_length[len(datum.string)].append(datum) data_by_layout_and_length[( datum.layout.modules, len(datum.string))].append(datum) except IOError as e: pass self.data = data self.by_layout_type = data_by_layout_type self.by_string_length = data_by_string_length self.by_layout_and_length = data_by_layout_and_length logging.info("%s:", set_name.upper()) logging.info("%s items", len(self.data)) logging.info("%s words", len(STRING_INDEX)) logging.info("%s functions", len(LAYOUT_INDEX)) logging.info("%s answers", len(ANSWER_INDEX)) logging.info("%s layouts", len(self.by_layout_type.keys())) logging.info("")