Ejemplo n.º 1
0
def parse_to_layout(parse):

    #if parse not in [("color", "cat"), ("color", "shirt")]:
    #    return None

    #if not isinstance(parse, tuple):
    #    return None
    #if parse[0] not in LEGAL_QUERIES:
    #    return None
    #if isinstance(parse[1], tuple):
    #    return None

    layout_modules = [None, None]
    layout_indices = [None, None]

    if parse[0] in ("is", "is_there", "count"):
        layout_modules[0] = DenseAnswerModule
    else:
        layout_modules[0] = AttAnswerModule
    #else:
    #    print parse
    #    exit()
    #elif parse[0] == "count":
    #    layout_modules[0] = DenseAnswerModule
    layout_indices[0] = LAYOUT_INDEX.index(parse[0])

    layout_modules[1] = DetectModule
    layout_indices[1] = LAYOUT_INDEX.index(parse[1])

    layout = Layout(tuple(layout_modules), tuple(layout_indices))
    return layout
Ejemplo n.º 2
0
def parse_to_layout(parse):

    #if parse not in [("color", "cat"), ("color", "shirt")]:
    #    return None

    #if not isinstance(parse, tuple):
    #    return None
    #if parse[0] not in LEGAL_QUERIES:
    #    return None
    #if isinstance(parse[1], tuple):
    #    return None

    layout_modules = [None, None]
    layout_indices = [None, None]

    if parse[0] in ("is", "is_there", "count"):
        layout_modules[0] = DenseAnswerModule
    else:
        layout_modules[0] = AttAnswerModule
    #else:
    #    print parse
    #    exit()
    #elif parse[0] == "count":
    #    layout_modules[0] = DenseAnswerModule
    layout_indices[0] = LAYOUT_INDEX.index(parse[0])

    layout_modules[1] = DetectModule
    layout_indices[1] = LAYOUT_INDEX.index(parse[1])

    layout = Layout(tuple(layout_modules), tuple(layout_indices))
    return layout
Ejemplo n.º 3
0
def parse_to_layout_helper(parse, internal):
    if isinstance(parse, str):
        #return (DetectModule, LAYOUT_INDEX.index(parse))
        return (DetectModule, LAYOUT_INDEX.get_or_else(parse,
                                                       LAYOUT_INDEX[UNK]))
    else:
        head = parse[0]
        #head_idx = LAYOUT_INDEX.index(parse)
        head_idx = LAYOUT_INDEX.get_or_else(head, LAYOUT_INDEX[UNK])
        if internal:
            if head == "and":
                mod_head = ConjModule
            else:
                mod_head = RedetectModule
        else:
            if head == "count":
                mod_head = DenseAnswerModule
            else:
                mod_head = AttAnswerModule

        below = [
            parse_to_layout_helper(child, internal=True) for child in parse[1:]
        ]
        mods_below, indices_below = zip(*below)
        return (mod_head, ) + tuple(mods_below), (
            head_idx, ) + tuple(indices_below)
Ejemplo n.º 4
0
def parse_to_layout_helper(parse, internal):
    if isinstance(parse, str):
        return (DetectModule, LAYOUT_INDEX.index(parse))
    else:
        head = parse[0]
        head_idx = LAYOUT_INDEX.index(parse)
        if internal:
            if head == "and":
                mod_head = ConjModule
            else:
                mod_head = RedetectModule
        else:
            if head == "how many":
                mod_head = DenseAnswerModule
            else:
                mod_head = AttAnswerModule

        below = [parse_to_layout_helper(child, internal=True) for child in parse[1:]]
        mods_below, indices_below = zip(*below)
        return (mod_head,) + tuple(mods_below), (head_idx,) + tuple(indices_below)
Ejemplo n.º 5
0
    def __init__(self, config, set_name):
        self.config = config

        data = set()
        data_by_layout_type = defaultdict(list)
        data_by_string_length = defaultdict(list)
        data_by_layout_and_length = defaultdict(list)

        if set_name == "val":
            self.data = data
            self.by_layout_type = data_by_layout_type
            self.by_string_length = data_by_string_length
            self.by_layout_and_length = data_by_layout_and_length
            return

        if set_name == "train":
            # TODO better index
            pred_counter = defaultdict(lambda: 0)
            with open(PARSE_FILE % set_name) as parse_f:
                for parse_str in parse_f:
                    parse_preds = parse_str.strip() \
                                           .replace("'", "") \
                                           .replace("(", "") \
                                           .replace(")", "") \
                                           .split()
                    for pred in parse_preds:
                        pred_counter[pred] += 1
            for pred, count in pred_counter.items():
                if count <= 1:
                    continue
                LAYOUT_INDEX.index(pred)

        with open(STRING_FILE % set_name) as question_f, \
             open(PARSE_FILE % set_name) as parse_f, \
             open(ANN_FILE % set_name) as ann_f, \
             open(IMAGE_ID_FILE % set_name) as image_id_f:

            unked = 0
            i = 0
            for question, parse_str, answer, image_id in \
                    zip(question_f, parse_f, ann_f, image_id_f):
            
                question = question.strip()
                parse_str = parse_str.strip().replace("'", "")
                answer = answer.strip()
                image_id = int(image_id.strip())
                words = question.split()
                words = ["<s>"] + words + ["</s>"]

                parse = parse_tree(parse_str)

                answer = ANSWER_INDEX.index(answer)
                words = [STRING_INDEX.index(w) for w in words]
                if len(parse) == 1:
                    parse = parse + ("object",)
                layout = parse_to_layout(parse)

                #if i == 300:
                #    continue
                i += 1

                coco_set_name = "train" if set_name == "train" else "val"
                try:
                    datum = CocoQADatum(words, layout, image_id, answer, coco_set_name)
                    datum.raw_query = parse_str
                    data.add(datum)
                    data_by_layout_type[datum.layout.modules].append(datum)
                    data_by_string_length[len(datum.string)].append(datum)
                    data_by_layout_and_length[(datum.layout.modules, len(datum.string))].append(datum)
                except IOError as e:
                    pass

        self.data = data
        self.by_layout_type = data_by_layout_type
        self.by_string_length = data_by_string_length
        self.by_layout_and_length = data_by_layout_and_length

        logging.info("%s:", set_name.upper())
        logging.info("%s items", len(self.data))
        logging.info("%s words", len(STRING_INDEX))
        logging.info("%s functions", len(LAYOUT_INDEX))
        logging.info("%s answers", len(ANSWER_INDEX))
        logging.info("%s layouts", len(self.by_layout_type.keys()))
        logging.info("")
Ejemplo n.º 6
0
    def __init__(self, config, set_name):
        self.config = config

        data = set()
        data_by_layout_type = defaultdict(list)
        data_by_string_length = defaultdict(list)
        data_by_layout_and_length = defaultdict(list)

        if set_name == "val":
            self.data = data
            self.by_layout_type = data_by_layout_type
            self.by_string_length = data_by_string_length
            self.by_layout_and_length = data_by_layout_and_length
            return

        if set_name == "train":
            # TODO better index
            pred_counter = defaultdict(lambda: 0)
            with open(PARSE_FILE % set_name) as parse_f:
                for parse_str in parse_f:
                    parse_preds = parse_str.strip() \
                                           .replace("'", "") \
                                           .replace("(", "") \
                                           .replace(")", "") \
                                           .split()
                    for pred in parse_preds:
                        pred_counter[pred] += 1
            for pred, count in pred_counter.items():
                if count <= 1:
                    continue
                LAYOUT_INDEX.index(pred)

        with open(STRING_FILE % set_name) as question_f, \
             open(PARSE_FILE % set_name) as parse_f, \
             open(ANN_FILE % set_name) as ann_f, \
             open(IMAGE_ID_FILE % set_name) as image_id_f:

            unked = 0
            i = 0
            for question, parse_str, answer, image_id in \
                    zip(question_f, parse_f, ann_f, image_id_f):

                question = question.strip()
                parse_str = parse_str.strip().replace("'", "")
                answer = answer.strip()
                image_id = int(image_id.strip())
                words = question.split()
                words = ["<s>"] + words + ["</s>"]

                parse = parse_tree(parse_str)

                answer = ANSWER_INDEX.index(answer)
                words = [STRING_INDEX.index(w) for w in words]
                if len(parse) == 1:
                    parse = parse + ("object", )
                layout = parse_to_layout(parse)

                #if i == 300:
                #    continue
                i += 1

                coco_set_name = "train" if set_name == "train" else "val"
                try:
                    datum = CocoQADatum(words, layout, image_id, answer,
                                        coco_set_name)
                    datum.raw_query = parse_str
                    data.add(datum)
                    data_by_layout_type[datum.layout.modules].append(datum)
                    data_by_string_length[len(datum.string)].append(datum)
                    data_by_layout_and_length[(
                        datum.layout.modules, len(datum.string))].append(datum)
                except IOError as e:
                    pass

        self.data = data
        self.by_layout_type = data_by_layout_type
        self.by_string_length = data_by_string_length
        self.by_layout_and_length = data_by_layout_and_length

        logging.info("%s:", set_name.upper())
        logging.info("%s items", len(self.data))
        logging.info("%s words", len(STRING_INDEX))
        logging.info("%s functions", len(LAYOUT_INDEX))
        logging.info("%s answers", len(ANSWER_INDEX))
        logging.info("%s layouts", len(self.by_layout_type.keys()))
        logging.info("")