Esempio n. 1
0
def test_opt_model():
    query = 'test'
    all_questions = geoserver_interface.download_questions(query)
    all_syntax_parses = questions_to_syntax_parses(all_questions)
    all_annotations = geoserver_interface.download_semantics(query)
    all_labels = geoserver_interface.download_labels(query)

    (tr_s, tr_a,
     tr_q), (te_s, te_a,
             te_q) = split([all_syntax_parses, all_annotations, all_questions],
                           0.5)
    tm = train_tag_model(all_syntax_parses, all_annotations)
    cm = train_semantic_model(tm, tr_s, tr_a)

    # te_m = questions_to_match_parses(te_q, all_labels)
    prs = evaluate_opt_model(cm, te_s, te_a, all_questions,
                             np.linspace(-2, 2, 21))

    ps, rs = zip(*prs.values())
    plt.plot(prs.keys(), ps, 'o', label='precision')
    plt.plot(prs.keys(), rs, 'o', label='recall')
    plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
               loc=3,
               ncol=2,
               mode="expand",
               borderaxespad=0.)
    plt.show()
Esempio n. 2
0
def test_solving():
    pk = 973
    questions = geoserver_interface.download_questions(pk)
    question = questions.values()[0]

    label_data = geoserver_interface.download_labels(pk)[pk]
    diagram = open_image(question.diagram_path)
    graph_parse = diagram_to_graph_parse(diagram)
    match_parse = parse_match_from_known_labels(graph_parse, label_data)

    AB = v('AB', 'line')
    AC = v('AC', 'line')
    BC = v('BC', 'line')
    ED = v('ED', 'line')
    AE = v('AE', 'line')
    E = v('E', 'point')
    D = v('D', 'point')
    x = v('x', 'number')
    p1 = f('LengthOf', AB) == f('LengthOf', AC)
    p2 = f('IsMidpointOf', E, AB)
    p3 = f('IsMidpointOf', D, AC)
    p4 = f('LengthOf', AE) == x
    p5 = f('LengthOf', ED) == 4
    qn = f('LengthOf', BC)
    confident_atoms = parse_confident_formulas(graph_parse)
    text_atoms = ground_formula_nodes(match_parse, [p1, p2, p3, p4, p5])
    atoms = confident_atoms + text_atoms
    grounded_qn = ground_formula_nodes(match_parse, [qn])[0]

    ns = NumericSolver(atoms)

    print ns.evaluate(grounded_qn)
Esempio n. 3
0
def test_rule_model():
    query = 'test'
    all_questions = geoserver_interface.download_questions(query)
    all_syntax_parses = questions_to_syntax_parses(all_questions)
    all_annotations = geoserver_interface.download_semantics(query)
    all_labels = geoserver_interface.download_labels(query)

    (tr_s, tr_a), (te_s, te_a) = split((all_syntax_parses, all_annotations),
                                       0.5)

    tm = train_tag_model(all_syntax_parses, all_annotations)
    cm = train_semantic_model(tm, tr_s, tr_a)
    unary_prs, core_prs, is_prs, cc_prs, core_tree_prs = evaluate_rule_model(
        cm, te_s, te_a, np.linspace(0, 1, 101))

    plt.plot(core_tree_prs.keys(), core_tree_prs.values(), 'o')
    plt.show()
    plt.plot(unary_prs.keys(), unary_prs.values(), 'o')
    plt.show()
    plt.plot(core_prs.keys(), core_prs.values(), 'o')
    plt.show()
    plt.plot(is_prs.keys(), is_prs.values(), 'o')
    plt.show()
    plt.plot(cc_prs.keys(), cc_prs.values(), 'o')
    plt.show()
Esempio n. 4
0
def test_ground_atoms():
    pk = 973
    questions = geoserver_interface.download_questions(pk)
    question = questions.values()[0]

    label_data = geoserver_interface.download_labels(pk)[pk]
    diagram = open_image(question.diagram_path)
    graph_parse = diagram_to_graph_parse(diagram)
    match_parse = parse_match_from_known_labels(graph_parse, label_data)

    AB = v('AB', 'line')
    AC = v('AC', 'line')
    BC = v('BC', 'line')
    ED = v('ED', 'line')
    AE = v('AE', 'line')
    E = v('E', 'point')
    D = v('D', 'point')
    x = v('x', 'number')
    p1 = f('LengthOf', AB) == f('LengthOf', AC)
    p2 = f('IsMidpointOf', E, AB)
    p3 = f('IsMidpointOf', D, AC)
    p4 = f('LengthOf', AE) == x
    p5 = f('LengthOf', ED) == 4
    qn = f('LengthOf', BC)

    grounded_atoms = ground_formula_nodes(match_parse, [p1, p2, p3, p4, p5, qn])
    for grounded_atom in grounded_atoms:
        print grounded_atom

    graph_parse.core_parse.display_points()
Esempio n. 5
0
def save_parse_primitives():
    question = geoserver_interface.download_questions(1037).values()[0]
    image_segment_parse = parse_image_segments(open_image(question.diagram_path))
    primitive_parse = parse_primitives(image_segment_parse)
    image = primitive_parse.get_image_primitives()
    file_path = "/Users/minjoon/Desktop/primitives.png"
    cv2.imwrite(file_path, image)
Esempio n. 6
0
def test_parse_graph():
    questions = geoserver_interface.download_questions(973).values()
    for question in questions:
        image_segment_parse = parse_image_segments(open_image(question.diagram_path))
        primitive_parse = parse_primitives(image_segment_parse)
        selected_primitive_parse = select_primitives(primitive_parse)
        core_parse = parse_core(selected_primitive_parse)
        graph_parse = parse_graph(core_parse)

        print("Confident information in the diagram:")
        for variable_node in parse_confident_atoms(graph_parse):
            print variable_node

        core_parse.display_points()
        lines = get_all_instances(graph_parse, 'line')
        circles = get_all_instances(graph_parse, 'circle')
        arcs = get_all_instances(graph_parse, 'arc')
        angles = get_all_instances(graph_parse, 'angle')
        print("Displaying lines...")
        for key, line in lines.iteritems():
            graph_parse.display_instances([line])
        print("Displaying circles...")
        for key, circle in circles.iteritems():
            graph_parse.display_instances([circle])
        print("Displaying arcs...")
        for key, arc in arcs.iteritems():
            graph_parse.display_instances([arc])
        print("Displaying angles...")
        for key, angle in angles.iteritems():
            graph_parse.display_instances([angle])
Esempio n. 7
0
def data_stat(query):
    questions = geoserver_interface.download_questions(query)
    syntax_parses = questions_to_syntax_parses(questions, parser=False)
    annotations = geoserver_interface.download_semantics(query)
    unary_rules = []
    binary_rules = []
    semantic_trees = []
    for pk, local_syntax_parses in syntax_parses.iteritems():
        print pk
        for number, syntax_parse in local_syntax_parses.iteritems():
            local_semantic_trees = [
                annotation_to_semantic_tree(syntax_parse, annotation)
                for annotation in annotations[pk][number].values()
            ]
            semantic_trees.extend(local_semantic_trees)
            print local_semantic_trees
            for semantic_tree in local_semantic_trees:
                unary_rules.extend(semantic_tree.get_unary_rules())
                binary_rules.extend(semantic_tree.get_binary_rules())

    tag_model = train_tag_model(syntax_parses, annotations)

    print "sentences: %d" % sum(
        len(question.sentence_words) for _, question in questions.iteritems())
    print "words: %d" % (sum(
        len(words) for _, question in questions.iteritems()
        for _, words in question.sentence_words.iteritems()))
    print "literals: %d" % len(semantic_trees)
    print "unary rules: %d" % len(unary_rules)
    print "binary rules: %d" % len(binary_rules)

    print ""
    print "LEXICON"
    for key, s in tag_model.lexicon.iteritems():
        print "%s: %s" % ("_".join(key), ", ".join(" ".join(ss) for ss in s))
Esempio n. 8
0
def test_solving():
    pk = 973
    questions = geoserver_interface.download_questions(pk)
    question = questions.values()[0]

    label_data = geoserver_interface.download_labels(pk)[pk]
    diagram = open_image(question.diagram_path)
    graph_parse = diagram_to_graph_parse(diagram)
    match_parse = parse_match_from_known_labels(graph_parse, label_data)

    AB = v('AB', 'line')
    AC = v('AC', 'line')
    BC = v('BC', 'line')
    ED = v('ED', 'line')
    AE = v('AE', 'line')
    E = v('E', 'point')
    D = v('D', 'point')
    x = v('x', 'number')
    p1 = f('LengthOf', AB) == f('LengthOf', AC)
    p2 = f('IsMidpointOf', E, AB)
    p3 = f('IsMidpointOf', D, AC)
    p4 = f('LengthOf', AE) == x
    p5 = f('LengthOf', ED) == 4
    qn = f('LengthOf', BC)
    confident_atoms = parse_confident_formulas(graph_parse)
    text_atoms = ground_formula_nodes(match_parse, [p1, p2, p3, p4, p5])
    atoms = confident_atoms + text_atoms
    grounded_qn = ground_formula_nodes(match_parse, [qn])[0]

    ns = NumericSolver(atoms)

    print ns.evaluate(grounded_qn)
Esempio n. 9
0
def test_ground_atoms():
    pk = 973
    questions = geoserver_interface.download_questions(pk)
    question = questions.values()[0]

    label_data = geoserver_interface.download_labels(pk)[pk]
    diagram = open_image(question.diagram_path)
    graph_parse = diagram_to_graph_parse(diagram)
    match_parse = parse_match_from_known_labels(graph_parse, label_data)

    AB = v('AB', 'line')
    AC = v('AC', 'line')
    BC = v('BC', 'line')
    ED = v('ED', 'line')
    AE = v('AE', 'line')
    E = v('E', 'point')
    D = v('D', 'point')
    x = v('x', 'number')
    p1 = f('LengthOf', AB) == f('LengthOf', AC)
    p2 = f('IsMidpointOf', E, AB)
    p3 = f('IsMidpointOf', D, AC)
    p4 = f('LengthOf', AE) == x
    p5 = f('LengthOf', ED) == 4
    qn = f('LengthOf', BC)

    grounded_atoms = ground_formula_nodes(match_parse,
                                          [p1, p2, p3, p4, p5, qn])
    for grounded_atom in grounded_atoms:
        print grounded_atom

    graph_parse.core_parse.display_points()
Esempio n. 10
0
def data_stat(query):
    questions = geoserver_interface.download_questions(query)
    syntax_parses = questions_to_syntax_parses(questions, parser=False)
    annotations = geoserver_interface.download_semantics(query)
    unary_rules = []
    binary_rules = []
    semantic_trees = []
    for pk, local_syntax_parses in syntax_parses.iteritems():
        print pk
        for number, syntax_parse in local_syntax_parses.iteritems():
            local_semantic_trees = [annotation_to_semantic_tree(syntax_parse, annotation)
                              for annotation in annotations[pk][number].values()]
            semantic_trees.extend(local_semantic_trees)
            print local_semantic_trees
            for semantic_tree in local_semantic_trees:
                unary_rules.extend(semantic_tree.get_unary_rules())
                binary_rules.extend(semantic_tree.get_binary_rules())

    tag_model = train_tag_model(syntax_parses, annotations)

    print "sentences: %d" % sum(len(question.sentence_words) for _, question in questions.iteritems())
    print "words: %d" % (sum(len(words) for _, question in questions.iteritems() for _, words in question.sentence_words.iteritems()))
    print "literals: %d" % len(semantic_trees)
    print "unary rules: %d" % len(unary_rules)
    print "binary rules: %d" % len(binary_rules)

    print ""
    print "LEXICON"
    for key, s in tag_model.lexicon.iteritems():
        print "%s: %s" % ("_".join(key), ", ".join(" ".join(ss) for ss in s))
Esempio n. 11
0
def save_parse_image_segments():
    question = geoserver_interface.download_questions(1037).values()[0]
    image_segment_parse = parse_image_segments(
        open_image(question.diagram_path))
    image = image_segment_parse.diagram_image_segment.segmented_image
    file_path = "/Users/minjoon/Desktop/diagram.png"
    cv2.imwrite(file_path, image)
Esempio n. 12
0
def test_parse_graph():
    questions = geoserver_interface.download_questions(973).values()
    for question in questions:
        image_segment_parse = parse_image_segments(
            open_image(question.diagram_path))
        primitive_parse = parse_primitives(image_segment_parse)
        selected_primitive_parse = select_primitives(primitive_parse)
        core_parse = parse_core(selected_primitive_parse)
        graph_parse = parse_graph(core_parse)

        print("Confident information in the diagram:")
        for variable_node in parse_confident_atoms(graph_parse):
            print variable_node

        core_parse.display_points()
        lines = get_all_instances(graph_parse, 'line')
        circles = get_all_instances(graph_parse, 'circle')
        arcs = get_all_instances(graph_parse, 'arc')
        angles = get_all_instances(graph_parse, 'angle')
        print("Displaying lines...")
        for key, line in lines.iteritems():
            graph_parse.display_instances([line])
        print("Displaying circles...")
        for key, circle in circles.iteritems():
            graph_parse.display_instances([circle])
        print("Displaying arcs...")
        for key, arc in arcs.iteritems():
            graph_parse.display_instances([arc])
        print("Displaying angles...")
        for key, angle in angles.iteritems():
            graph_parse.display_instances([angle])
Esempio n. 13
0
def save_parse_primitives():
    question = geoserver_interface.download_questions(1037).values()[0]
    image_segment_parse = parse_image_segments(
        open_image(question.diagram_path))
    primitive_parse = parse_primitives(image_segment_parse)
    image = primitive_parse.get_image_primitives()
    file_path = "/Users/minjoon/Desktop/primitives.png"
    cv2.imwrite(file_path, image)
Esempio n. 14
0
def test_select_primitives():
    question_dict = geoserver_interface.download_questions('test')
    for key in sorted(question_dict.keys()):
        question = question_dict[key]
        print(key)
        image_segment_parse = parse_image_segments(open_image(question.diagram_path))
        primitive_parse = parse_primitives(image_segment_parse)
        selected = select_primitives(primitive_parse)
        selected.display_primitives()
Esempio n. 15
0
def test_parse_image_segments():
    question = geoserver_interface.download_questions(1037).values()[0]
    image_segment_parse = parse_image_segments(
        open_image(question.diagram_path))
    image_segment_parse.diagram_image_segment.display_binarized_segmented_image(
    )
    for idx, label_image_segment in image_segment_parse.label_image_segments.iteritems(
    ):
        label_image_segment.display_segmented_image()
Esempio n. 16
0
def _annotated_unit_test(query):
    questions = geoserver_interface.download_questions(query)
    all_annotations = geoserver_interface.download_semantics(query)
    pk, question = questions.items()[0]

    choice_formulas = get_choice_formulas(question)
    label_data = geoserver_interface.download_labels(pk)[pk]
    diagram = open_image(question.diagram_path)
    graph_parse = diagram_to_graph_parse(diagram)
    core_parse = graph_parse.core_parse
    # core_parse.display_points()
    # core_parse.primitive_parse.display_primitives()
    match_parse = parse_match_from_known_labels(graph_parse, label_data)
    match_formulas = parse_match_formulas(match_parse)
    diagram_formulas = parse_confident_formulas(graph_parse)
    all_formulas = match_formulas + diagram_formulas
    for number, sentence_words in question.sentence_words.iteritems():
        syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words)
        annotation_nodes = [annotation_to_semantic_tree(syntax_parse, annotation)
                            for annotation in all_annotations[pk][number].values()]
        expr_formulas = {key: prefix_to_formula(expression_parser.parse_prefix(expression))
                         for key, expression in question.sentence_expressions[number].iteritems()}
        truth_expr_formulas, value_expr_formulas = _separate_expr_formulas(expr_formulas)
        text_formula_parse = semantic_trees_to_text_formula_parse(annotation_nodes)
        completed_formulas = complete_formulas(text_formula_parse)
        grounded_formulas = [ground_formula(match_parse, formula, value_expr_formulas)
                             for formula in completed_formulas+truth_expr_formulas]
        text_formulas = filter_formulas(flatten_formulas(grounded_formulas))
        all_formulas.extend(text_formulas)

    reduced_formulas = reduce_formulas(all_formulas)
    for reduced_formula in reduced_formulas:
        score = evaluate(reduced_formula, core_parse.variable_assignment)
        scores = [evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children]
        print reduced_formula, score, scores
    # core_parse.display_points()

    ans = solve(reduced_formulas, choice_formulas, assignment=core_parse.variable_assignment)
    print "ans:", ans

    if choice_formulas is None:
        attempted = True
        if abs(ans - float(question.answer)) < 0.01:
            correct = True
        else:
            correct = False
    else:
        attempted = True
        c = max(ans.iteritems(), key=lambda pair: pair[1].conf)[0]
        if c == int(question.answer):
            correct = True
        else:
            correct = False

    result = SimpleResult(query, False, attempted, correct)
    return result
Esempio n. 17
0
def _annotated_unit_test(query):
    questions = geoserver_interface.download_questions(query)
    all_annotations = geoserver_interface.download_semantics(query)
    pk, question = questions.items()[0]

    choice_formulas = get_choice_formulas(question)
    label_data = geoserver_interface.download_labels(pk)[pk]
    diagram = open_image(question.diagram_path)
    graph_parse = diagram_to_graph_parse(diagram)
    core_parse = graph_parse.core_parse
    # core_parse.display_points()
    # core_parse.primitive_parse.display_primitives()
    match_parse = parse_match_from_known_labels(graph_parse, label_data)
    match_formulas = parse_match_formulas(match_parse)
    diagram_formulas = parse_confident_formulas(graph_parse)
    all_formulas = match_formulas + diagram_formulas
    for number, sentence_words in question.sentence_words.iteritems():
        syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words)
        annotation_nodes = [annotation_to_semantic_tree(syntax_parse, annotation)
                            for annotation in all_annotations[pk][number].values()]
        expr_formulas = {key: prefix_to_formula(expression_parser.parse_prefix(expression))
                         for key, expression in question.sentence_expressions[number].iteritems()}
        truth_expr_formulas, value_expr_formulas = _separate_expr_formulas(expr_formulas)
        text_formula_parse = semantic_trees_to_text_formula_parse(annotation_nodes)
        completed_formulas = complete_formulas(text_formula_parse)
        grounded_formulas = [ground_formula(match_parse, formula, value_expr_formulas)
                             for formula in completed_formulas+truth_expr_formulas]
        text_formulas = filter_formulas(flatten_formulas(grounded_formulas))
        all_formulas.extend(text_formulas)

    reduced_formulas = reduce_formulas(all_formulas)
    for reduced_formula in reduced_formulas:
        score = evaluate(reduced_formula, core_parse.variable_assignment)
        scores = [evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children]
        print reduced_formula, score, scores
    # core_parse.display_points()

    ans = solve(reduced_formulas, choice_formulas, assignment=core_parse.variable_assignment)
    print "ans:", ans

    if choice_formulas is None:
        attempted = True
        if abs(ans - float(question.answer)) < 0.01:
            correct = True
        else:
            correct = False
    else:
        attempted = True
        c = max(ans.iteritems(), key=lambda pair: pair[1].conf)[0]
        if c == int(question.answer):
            correct = True
        else:
            correct = False

    result = SimpleResult(query, False, attempted, correct)
    return result
Esempio n. 18
0
def test_parse_match_from_known_labels():
    questions = geoserver_interface.download_questions(977)
    for pk, question in questions.iteritems():
        label_data = geoserver_interface.download_labels(pk)[pk]
        diagram = open_image(question.diagram_path)
        graph_parse = diagram_to_graph_parse(diagram)
        match_parse = parse_match_from_known_labels(graph_parse, label_data)
        for key, value in match_parse.match_dict.iteritems():
            print key, value
        graph_parse.core_parse.display_points()
Esempio n. 19
0
def test_select_primitives():
    question_dict = geoserver_interface.download_questions('test')
    for key in sorted(question_dict.keys()):
        question = question_dict[key]
        print(key)
        image_segment_parse = parse_image_segments(
            open_image(question.diagram_path))
        primitive_parse = parse_primitives(image_segment_parse)
        selected = select_primitives(primitive_parse)
        selected.display_primitives()
Esempio n. 20
0
def test_parse_match_from_known_labels():
    questions = geoserver_interface.download_questions(977)
    for pk, question in questions.iteritems():
        label_data = geoserver_interface.download_labels(pk)[pk]
        diagram = open_image(question.diagram_path)
        graph_parse = diagram_to_graph_parse(diagram)
        match_parse = parse_match_from_known_labels(graph_parse, label_data)
        for key, value in match_parse.match_dict.iteritems():
            print key, value
        graph_parse.core_parse.display_points()
Esempio n. 21
0
def test_parse_match_atoms():
    questions = geoserver_interface.download_questions(977)
    for pk, question in questions.iteritems():
        label_data = geoserver_interface.download_labels(pk)[pk]
        diagram = open_image(question.diagram_path)
        graph_parse = diagram_to_graph_parse(diagram)
        match_parse = parse_match_from_known_labels(graph_parse, label_data)
        match_atoms = parse_match_formulas(match_parse)
        for match_atom in match_atoms:
            print match_atom
        graph_parse.core_parse.display_points()
Esempio n. 22
0
def save_select_primitives():
    question_dict = geoserver_interface.download_questions('test')
    folder_path = "/Users/minjoon/Desktop/selected/"
    for key in sorted(question_dict.keys()):
        question = question_dict[key]
        print(key)
        image_segment_parse = parse_image_segments(open_image(question.diagram_path))
        primitive_parse = parse_primitives(image_segment_parse)
        selected = select_primitives(primitive_parse)
        image = selected.get_image_primitives()
        cv2.imwrite(os.path.join(folder_path, "%s.png" % str(question.key)), image)
Esempio n. 23
0
def test_parse_match_atoms():
    questions = geoserver_interface.download_questions(977)
    for pk, question in questions.iteritems():
        label_data = geoserver_interface.download_labels(pk)[pk]
        diagram = open_image(question.diagram_path)
        graph_parse = diagram_to_graph_parse(diagram)
        match_parse = parse_match_from_known_labels(graph_parse, label_data)
        match_atoms = parse_match_formulas(match_parse)
        for match_atom in match_atoms:
            print match_atom
        graph_parse.core_parse.display_points()
Esempio n. 24
0
def save_select_primitives():
    question_dict = geoserver_interface.download_questions('test')
    folder_path = "/Users/minjoon/Desktop/selected/"
    for key in sorted(question_dict.keys()):
        question = question_dict[key]
        print(key)
        image_segment_parse = parse_image_segments(
            open_image(question.diagram_path))
        primitive_parse = parse_primitives(image_segment_parse)
        selected = select_primitives(primitive_parse)
        image = selected.get_image_primitives()
        cv2.imwrite(os.path.join(folder_path, "%s.png" % str(question.key)),
                    image)
Esempio n. 25
0
def save_parse_core():
    question_dict = geoserver_interface.download_questions('test')
    folder_path = "/Users/minjoon/Desktop/core/"
    for key in sorted(question_dict.keys()):
        print(key)
        question = question_dict[key]
        file_path = os.path.join(folder_path, str(question.key) + ".png")
        if os.path.isfile(file_path):
            continue
        image_segment_parse = parse_image_segments(open_image(question.diagram_path))
        primitive_parse = parse_primitives(image_segment_parse)
        selected = select_primitives(primitive_parse)
        core_parse = parse_core(selected)
        image = core_parse.get_image_points()
        cv2.imwrite(file_path, image)
Esempio n. 26
0
def save_parse_core():
    question_dict = geoserver_interface.download_questions('test')
    folder_path = "/Users/minjoon/Desktop/core/"
    for key in sorted(question_dict.keys()):
        print(key)
        question = question_dict[key]
        file_path = os.path.join(folder_path, str(question.key) + ".png")
        if os.path.isfile(file_path):
            continue
        image_segment_parse = parse_image_segments(
            open_image(question.diagram_path))
        primitive_parse = parse_primitives(image_segment_parse)
        selected = select_primitives(primitive_parse)
        core_parse = parse_core(selected)
        image = core_parse.get_image_points()
        cv2.imwrite(file_path, image)
Esempio n. 27
0
def save_questions(query):
    questions = geoserver_interface.download_questions(query)
    base_path = os.path.join("../../temp/data/", query)
    if not os.path.exists(base_path):
        os.mkdir(base_path)
    for index, (key, question) in enumerate(questions.iteritems()):
        print key
        folder_name = get_number_string(index, 3)
        json_path = os.path.join(base_path, folder_name + ".json")
        diagram_path = os.path.join(base_path, folder_name + ".png")
        d = {}
        d['key'] = question.key
        d['text'] = question.text
        d['choices'] = question.choices
        d['answer'] = str(int(question.answer))
        json.dump(d, open(json_path, 'wb'))
        shutil.copyfile(question.diagram_path, diagram_path)
Esempio n. 28
0
def save_questions(query):
    questions = geoserver_interface.download_questions(query)
    base_path = os.path.join("../../temp/data/", query)
    if not os.path.exists(base_path):
        os.mkdir(base_path)
    for index, (key, question) in enumerate(questions.iteritems()):
        print key
        folder_name = get_number_string(index, 3)
        json_path = os.path.join(base_path, folder_name + ".json")
        diagram_path = os.path.join(base_path, folder_name + ".png")
        d = {}
        d['key'] = question.key
        d['text'] = question.text
        d['choices'] = question.choices
        d['answer'] = str(int(question.answer))
        json.dump(d, open(json_path, 'wb'))
        shutil.copyfile(question.diagram_path, diagram_path)
Esempio n. 29
0
def test_opt_model():
    query = 'test'
    all_questions = geoserver_interface.download_questions(query)
    all_syntax_parses = questions_to_syntax_parses(all_questions)
    all_annotations = geoserver_interface.download_semantics(query)
    all_labels = geoserver_interface.download_labels(query)

    (tr_s, tr_a, tr_q), (te_s, te_a, te_q) = split([all_syntax_parses, all_annotations, all_questions], 0.5)
    tm = train_tag_model(all_syntax_parses, all_annotations)
    cm = train_semantic_model(tm, tr_s, tr_a)

    # te_m = questions_to_match_parses(te_q, all_labels)
    prs = evaluate_opt_model(cm, te_s, te_a, all_questions, np.linspace(-2,2,21))

    ps, rs = zip(*prs.values())
    plt.plot(prs.keys(), ps, 'o', label='precision')
    plt.plot(prs.keys(), rs, 'o', label='recall')
    plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)
    plt.show()
Esempio n. 30
0
def test_rule_model():
    query = 'test'
    all_questions = geoserver_interface.download_questions(query)
    all_syntax_parses = questions_to_syntax_parses(all_questions)
    all_annotations = geoserver_interface.download_semantics(query)
    all_labels = geoserver_interface.download_labels(query)

    (tr_s, tr_a), (te_s, te_a) = split((all_syntax_parses, all_annotations), 0.5)

    tm = train_tag_model(all_syntax_parses, all_annotations)
    cm = train_semantic_model(tm, tr_s, tr_a)
    unary_prs, core_prs, is_prs, cc_prs, core_tree_prs = evaluate_rule_model(cm, te_s, te_a, np.linspace(0,1,101))

    plt.plot(core_tree_prs.keys(), core_tree_prs.values(), 'o')
    plt.show()
    plt.plot(unary_prs.keys(), unary_prs.values(), 'o')
    plt.show()
    plt.plot(core_prs.keys(), core_prs.values(), 'o')
    plt.show()
    plt.plot(is_prs.keys(), is_prs.values(), 'o')
    plt.show()
    plt.plot(cc_prs.keys(), cc_prs.values(), 'o')
    plt.show()
Esempio n. 31
0
def test_zip_diagrams():
    questions = geoserver_interface.download_questions(['development'])
    zip_diagrams(questions, '/Users/minjoon/Desktop/development.zip')
Esempio n. 32
0
def full_test():
    start = time.time()
    ids1 = [963, 968, 969, 971, 973, 974, 977, 985, 990, 993, 995, 1000, 1003, 1004, 1006, 1014, 1017, 1018, 1020,] #1011
    ids2 = [1025, 1030, 1031, 1032, 1035, 1038, 1039, 1040, 1042, 1043, 1045, 1047, 1050, 1051, 1052, 1054, 1056, 1058,] #1027, 1037
    ids3 = [1063, 1065, 1067, 1076, 1089, 1095, 1096, 1097, 1099, 1102, 1105, 1106, 1107, 1108, 1110, 1111, 1119, 1120, 1121] # 1103
    ids4 = [1122, 1123, 1124, 1127, 1141, 1142, 1143, 1145, 1146, 1147, 1149, 1150, 1151, 1152, 1070, 1083, 1090, 1092, 1144, 1148]
    ids5 = [975, 979, 981, 988, 989, 997, 1005, 1019, 1029, 1044, 1046, 1057, 1059, 1064, 1087, 1104, 1113, 1114, 1129, 1071]
    ids6 = [1100, 1101, 1109, 1140, 1053]
    tr_ids = ids4+ids5+ids6
    te_ids = ids1+ids2+ids3
    te_ids = ids4+ids6

    load = True

    tr_questions = geoserver_interface.download_questions('aaai')
    te_questions = geoserver_interface.download_questions('emnlp')
    te_keys = [968, 971, 973, 1018]
    all_questions = dict(tr_questions.items() + te_questions.items())
    tr_ids = tr_questions.keys()
    te_ids = te_questions.keys()

    if not load:
        all_syntax_parses = questions_to_syntax_parses(all_questions)
        pickle.dump(all_syntax_parses, open('syntax_parses.p', 'wb'))
    else:
        all_syntax_parses = pickle.load(open('syntax_parses.p', 'rb'))
    all_annotations = geoserver_interface.download_semantics()
    all_labels = geoserver_interface.download_labels()

    correct = 0
    penalized = 0
    error = 0
    total = len(te_keys)

    #(te_s, te_a, te_l), (tr_s, tr_a, trl_l) = split([all_syntax_parses, all_annotations, all_labels], 0.7)
    tr_s = {id_: all_syntax_parses[id_] for id_ in tr_ids}
    tr_a = {id_: all_annotations[id_] for id_ in tr_ids}
    te_s = {id_: all_syntax_parses[id_] for id_ in te_ids}

    if not load:
        tm = train_tag_model(all_syntax_parses, all_annotations)
        cm = train_semantic_model(tm, tr_s, tr_a)
        pickle.dump(cm, open('cm.p', 'wb'))
    else:
        cm = pickle.load(open('cm.p', 'rb'))

    print "test ids: %s" % ", ".join(str(k) for k in te_s.keys())
    for idx, id_ in enumerate(te_keys):
        question = all_questions[id_]
        label = all_labels[id_]
        id_ = str(id_)
        print "-"*80
        print "id: %s" % id_
        result = full_unit_test(cm, question, label)
        print result.message
        print result
        if result.error:
            error += 1
        if result.penalized:
            penalized += 1
        if result.correct:
            correct += 1
        print "-"*80
        print "%d/%d complete, %d correct, %d penalized, %d error" % (idx+1, len(te_keys), correct, penalized, error)
    end = time.time()
    print "-"*80
    print "duration:\t%.1f" % (end - start)

    out = "total:\t\t%d\npenalized:\t%d\ncorrect:\t%d\nerror:\t\t%d" % (total, penalized, correct, error)
    print out

    dirs_path = os.path.join(demo_path, 'dirs.json')
    json.dump([str(x) for x in te_keys], open(dirs_path, 'wb'))
Esempio n. 33
0
def test_parse_primitives():
    question = geoserver_interface.download_questions(1037).values()[0]
    image_segment_parse = parse_image_segments(
        open_image(question.diagram_path))
    primitive_parse = parse_primitives(image_segment_parse)
    primitive_parse.display_primitives()
Esempio n. 34
0
def test_zip_diagrams():
    questions = geoserver_interface.download_questions(['development'])
    zip_diagrams(questions, '/Users/minjoon/Desktop/development.zip')
Esempio n. 35
0
def test_geoserver_interface():
    data = geoserver_interface.download_questions(["annotated"])
    ann = geoserver_interface.download_semantics()
    print(ann)
    print(data)
Esempio n. 36
0
def test_geoserver_interface():
    data = geoserver_interface.download_questions(["annotated"])
    ann = geoserver_interface.download_semantics()
    print(ann)
    print(data)
Esempio n. 37
0
def test_parse_image_segments():
    question = geoserver_interface.download_questions(1037).values()[0]
    image_segment_parse = parse_image_segments(open_image(question.diagram_path))
    image_segment_parse.diagram_image_segment.display_binarized_segmented_image()
    for idx, label_image_segment in image_segment_parse.label_image_segments.iteritems():
        label_image_segment.display_segmented_image()
Esempio n. 38
0
def save_parse_image_segments():
    question = geoserver_interface.download_questions(1037).values()[0]
    image_segment_parse = parse_image_segments(open_image(question.diagram_path))
    image = image_segment_parse.diagram_image_segment.segmented_image
    file_path = "/Users/minjoon/Desktop/diagram.png"
    cv2.imwrite(file_path, image)
Esempio n. 39
0
def full_test():
    start = time.time()
    ids1 = [
        963,
        968,
        969,
        971,
        973,
        974,
        977,
        985,
        990,
        993,
        995,
        1000,
        1003,
        1004,
        1006,
        1014,
        1017,
        1018,
        1020,
    ]  #1011
    ids2 = [
        1025,
        1030,
        1031,
        1032,
        1035,
        1038,
        1039,
        1040,
        1042,
        1043,
        1045,
        1047,
        1050,
        1051,
        1052,
        1054,
        1056,
        1058,
    ]  #1027, 1037
    ids3 = [
        1063, 1065, 1067, 1076, 1089, 1095, 1096, 1097, 1099, 1102, 1105, 1106,
        1107, 1108, 1110, 1111, 1119, 1120, 1121
    ]  # 1103
    ids4 = [
        1122, 1123, 1124, 1127, 1141, 1142, 1143, 1145, 1146, 1147, 1149, 1150,
        1151, 1152, 1070, 1083, 1090, 1092, 1144, 1148
    ]
    ids5 = [
        975, 979, 981, 988, 989, 997, 1005, 1019, 1029, 1044, 1046, 1057, 1059,
        1064, 1087, 1104, 1113, 1114, 1129, 1071
    ]
    ids6 = [1100, 1101, 1109, 1140, 1053]
    tr_ids = ids4 + ids5 + ids6
    te_ids = ids1 + ids2 + ids3
    te_ids = ids4 + ids6

    load = False

    tr_questions = geoserver_interface.download_questions('aaai')
    te_questions = geoserver_interface.download_questions('official')
    te_keys = te_questions.keys()  # [968, 971, 973, 1018]
    all_questions = dict(tr_questions.items() + te_questions.items())
    tr_ids = tr_questions.keys()
    te_ids = te_questions.keys()

    if not load:
        all_syntax_parses = questions_to_syntax_parses(all_questions)
        pickle.dump(all_syntax_parses, open('syntax_parses.p', 'wb'))
    else:
        all_syntax_parses = pickle.load(open('syntax_parses.p', 'rb'))
    all_annotations = geoserver_interface.download_semantics()
    all_labels = geoserver_interface.download_labels()

    correct = 0
    penalized = 0
    error = 0
    total = len(te_keys)

    #(te_s, te_a, te_l), (tr_s, tr_a, trl_l) = split([all_syntax_parses, all_annotations, all_labels], 0.7)
    tr_s = {id_: all_syntax_parses[id_] for id_ in tr_ids}
    tr_a = {id_: all_annotations[id_] for id_ in tr_ids}
    te_s = {id_: all_syntax_parses[id_] for id_ in te_ids}

    if not load:
        tm = train_tag_model(all_syntax_parses, all_annotations)
        cm = train_semantic_model(tm, tr_s, tr_a)
        pickle.dump(cm, open('cm.p', 'wb'))
    else:
        cm = pickle.load(open('cm.p', 'rb'))

    print "test ids: %s" % ", ".join(str(k) for k in te_s.keys())
    for idx, id_ in enumerate(te_keys):
        question = all_questions[id_]
        label = all_labels[id_]
        id_ = str(id_)
        print "-" * 80
        print "id: %s" % id_
        result = full_unit_test(cm, question, label)
        print result.message
        print result
        if result.error:
            error += 1
        if result.penalized:
            penalized += 1
        if result.correct:
            correct += 1
        print "-" * 80
        print "%d/%d complete, %d correct, %d penalized, %d error" % (
            idx + 1, len(te_keys), correct, penalized, error)
    end = time.time()
    print "-" * 80
    print "duration:\t%.1f" % (end - start)

    out = "total:\t\t%d\npenalized:\t%d\ncorrect:\t%d\nerror:\t\t%d" % (
        total, penalized, correct, error)
    print out

    dirs_path = os.path.join(demo_path, 'dirs.json')
    json.dump([str(x) for x in te_keys], open(dirs_path, 'wb'))
Esempio n. 40
0
def test_parse_primitives():
    question = geoserver_interface.download_questions(1037).values()[0]
    image_segment_parse = parse_image_segments(open_image(question.diagram_path))
    primitive_parse = parse_primitives(image_segment_parse)
    primitive_parse.display_primitives()