예제 #1
0
def _full_unit_test(combined_model, question, label_data):
    assert isinstance(combined_model, CombinedModel)

    base_path = os.path.join(demo_path, str(question.key))
    if not os.path.exists(base_path):
        os.mkdir(base_path)
    question_path = os.path.join(base_path, 'question.json')
    text_parse_path = os.path.join(base_path, 'text_parse.json')
    diagram_parse_path = os.path.join(base_path, 'diagram_parse.json')
    optimized_path = os.path.join(base_path, 'optimized.json')
    entity_list_path = os.path.join(base_path, 'entity_map.json')
    diagram_path = os.path.join(base_path, 'diagram.png')
    solution_path = os.path.join(base_path, 'solution.json')
    shutil.copy(question.diagram_path, diagram_path)
    text_parse_list = []
    diagram_parse_list = []
    optimized_list = []
    entity_list = []
    solution = ""
    json.dump(question._asdict(), open(question_path, 'wb'))

    choice_formulas = get_choice_formulas(question)
    match_parse = question_to_match_parse(question, label_data)
    match_formulas = parse_match_formulas(match_parse)
    graph_parse = match_parse.graph_parse
    core_parse = graph_parse.core_parse
    # core_parse.display_points()
    # core_parse.primitive_parse.display_primitives()

    # opt_model = TextGreedyOptModel(combined_model)

    diagram_formulas = parse_confident_formulas(match_parse.graph_parse)
    all_formulas = set(match_formulas + diagram_formulas)

    opt_model = FullGreedyOptModel(combined_model, match_parse)
    for number, sentence_words in question.sentence_words.iteritems():
        syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words)

        expr_formulas = {
            key: prefix_to_formula(expression_parser.parse_prefix(expression))
            for key, expression in
            question.sentence_expressions[number].iteritems()
        }
        truth_expr_formulas, value_expr_formulas = _separate_expr_formulas(
            expr_formulas)

        semantic_forest = opt_model.combined_model.get_semantic_forest(
            syntax_parse)
        truth_semantic_trees = semantic_forest.get_semantic_trees_by_type(
            "truth")
        is_semantic_trees = semantic_forest.get_semantic_trees_by_type("is")
        cc_trees = set(
            t for t in semantic_forest.get_semantic_trees_by_type('cc')
            if opt_model.combined_model.get_tree_score(t) > 0.01)
        for cc_tree in cc_trees:
            print "cc tree:", cc_tree, opt_model.combined_model.get_tree_score(
                cc_tree)

        bool_semantic_trees = opt_model.optimize(
            truth_semantic_trees.union(is_semantic_trees), 0, cc_trees)
        # semantic_trees = bool_semantic_trees.union(cc_trees)

        for t in truth_semantic_trees.union(is_semantic_trees).union(cc_trees):
            text_parse_list.append({
                'simple':
                t.simple_repr(),
                'tree':
                t.serialized(),
                'sentence_number':
                number,
                'score':
                opt_model.combined_model.get_tree_score(t)
            })
            diagram_score = opt_model.get_diagram_score(
                t.to_formula(), cc_trees)
            if diagram_score is not None:
                diagram_parse_list.append({
                    'simple': t.simple_repr(),
                    'tree': t.serialized(),
                    'sentence_number': number,
                    'score': diagram_score
                })

            local_entities = semantic_tree_to_serialized_entities(
                match_parse, t, number, value_expr_formulas)
            entity_list.extend(local_entities)

        for t in bool_semantic_trees:
            optimized_list.append({
                'simple':
                t.simple_repr(),
                'tree':
                t.serialized(),
                'sentence_number':
                number,
                'score':
                opt_model.get_magic_score(t, cc_trees)
            })

        for key, f in expr_formulas.iteritems():
            if key.startswith("v"):
                pass
            index = (i for i, word in sentence_words.iteritems()
                     if word == key).next()
            tree = formula_to_semantic_tree(f, syntax_parse,
                                            (index, index + 1))
            print "f and t:", f, tree
            text_parse_list.append({
                'simple': f.simple_repr(),
                'tree': tree.serialized(),
                'sentence_number': number,
                'score': 1.0
            })
            optimized_list.append({
                'simple': f.simple_repr(),
                'tree': tree.serialized(),
                'sentence_number': number,
                'score': 1.0
            })

            local_entities = formula_to_serialized_entities(
                match_parse, f, tree, number)
            print "local entities:", local_entities
            entity_list.extend(local_entities)

        core_formulas = set(t.to_formula() for t in bool_semantic_trees)
        cc_formulas = set(t.to_formula() for t in cc_trees)
        augmented_formulas = augment_formulas(core_formulas)
        completed_formulas = complete_formulas(augmented_formulas, cc_formulas)

        print "completed formulas:"
        for f in completed_formulas:
            print f
        print ""

        grounded_formulas = ground_formulas(
            match_parse, completed_formulas + truth_expr_formulas,
            value_expr_formulas)
        text_formulas = filter_formulas(flatten_formulas(grounded_formulas))
        all_formulas = all_formulas.union(text_formulas)

    reduced_formulas = all_formulas  # reduce_formulas(all_formulas)
    for reduced_formula in reduced_formulas:
        if reduced_formula.is_grounded(core_parse.variable_assignment.keys()):
            score = evaluate(reduced_formula, core_parse.variable_assignment)
            scores = [
                evaluate(child, core_parse.variable_assignment)
                for child in reduced_formula.children
            ]
        else:
            score = None
            scores = None
        solution += repr(reduced_formula) + '\n'
        print reduced_formula, score, scores
    solution = solution.rstrip()
    # core_parse.display_points()

    json.dump(diagram_parse_list, open(diagram_parse_path, 'wb'))
    json.dump(optimized_list, open(optimized_path, 'wb'))
    json.dump(text_parse_list, open(text_parse_path, 'wb'))
    json.dump(entity_list, open(entity_list_path, 'wb'))
    json.dump(solution, open(solution_path, 'wb'))

    # return SimpleResult(question.key, False, False, True) # Early termination

    print "Solving..."
    ans = solve(reduced_formulas, choice_formulas,
                assignment=None)  #core_parse.variable_assignment)
    print "ans:", ans

    if choice_formulas is None:
        penalized = False
        if Equals(ans, float(question.answer)).conf > 0.98:
            correct = True
        else:
            correct = False
    else:
        idx, tv = max(ans.iteritems(), key=lambda pair: pair[1].conf)
        if tv.conf > 0.98:
            if idx == int(float(question.answer)):
                correct = True
                penalized = False
            else:
                correct = False
                penalized = True
        else:
            penalized = False
            correct = False

    result = SimpleResult(question.key, False, penalized, correct)
    return result
예제 #2
0
파일: run.py 프로젝트: Darriall/geosolver
def _full_unit_test(combined_model, question, label_data):
    assert isinstance(combined_model, CombinedModel)

    base_path = os.path.join(demo_path, str(question.key))
    if not os.path.exists(base_path):
        os.mkdir(base_path)
    question_path = os.path.join(base_path, 'question.json')
    text_parse_path = os.path.join(base_path, 'text_parse.json')
    diagram_parse_path = os.path.join(base_path, 'diagram_parse.json')
    optimized_path = os.path.join(base_path, 'optimized.json')
    entity_list_path = os.path.join(base_path, 'entity_map.json')
    diagram_path = os.path.join(base_path, 'diagram.png')
    solution_path = os.path.join(base_path, 'solution.json')
    shutil.copy(question.diagram_path, diagram_path)
    text_parse_list = []
    diagram_parse_list = []
    optimized_list = []
    entity_list = []
    solution = ""
    json.dump(question._asdict(), open(question_path, 'wb'))

    choice_formulas = get_choice_formulas(question)
    match_parse = question_to_match_parse(question, label_data)
    match_formulas = parse_match_formulas(match_parse)
    graph_parse = match_parse.graph_parse
    core_parse = graph_parse.core_parse
    # core_parse.display_points()
    # core_parse.primitive_parse.display_primitives()

    # opt_model = TextGreedyOptModel(combined_model)

    diagram_formulas = parse_confident_formulas(match_parse.graph_parse)
    all_formulas = set(match_formulas + diagram_formulas)

    opt_model = FullGreedyOptModel(combined_model, match_parse)
    for number, sentence_words in question.sentence_words.iteritems():
        syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words)

        expr_formulas = {key: prefix_to_formula(expression_parser.parse_prefix(expression))
                         for key, expression in question.sentence_expressions[number].iteritems()}
        truth_expr_formulas, value_expr_formulas = _separate_expr_formulas(expr_formulas)

        semantic_forest = opt_model.combined_model.get_semantic_forest(syntax_parse)
        truth_semantic_trees = semantic_forest.get_semantic_trees_by_type("truth")
        is_semantic_trees = semantic_forest.get_semantic_trees_by_type("is")
        cc_trees = set(t for t in semantic_forest.get_semantic_trees_by_type('cc')
                       if opt_model.combined_model.get_tree_score(t) > 0.01)
        for cc_tree in cc_trees:
            print "cc tree:", cc_tree, opt_model.combined_model.get_tree_score(cc_tree)

        bool_semantic_trees = opt_model.optimize(truth_semantic_trees.union(is_semantic_trees), 0, cc_trees)
        # semantic_trees = bool_semantic_trees.union(cc_trees)

        for t in truth_semantic_trees.union(is_semantic_trees).union(cc_trees):
            text_parse_list.append({'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number,
                                    'score': opt_model.combined_model.get_tree_score(t)})
            diagram_score = opt_model.get_diagram_score(t.to_formula(), cc_trees)
            if diagram_score is not None:
                diagram_parse_list.append({'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number,
                                           'score': diagram_score})

            local_entities = semantic_tree_to_serialized_entities(match_parse, t, number, value_expr_formulas)
            entity_list.extend(local_entities)

        for t in bool_semantic_trees:
            optimized_list.append({'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number,
                                    'score': opt_model.get_magic_score(t, cc_trees)})

        for key, f in expr_formulas.iteritems():
            if key.startswith("v"):
                pass
            index = (i for i, word in sentence_words.iteritems() if word == key).next()
            tree = formula_to_semantic_tree(f, syntax_parse, (index, index+1))
            print "f and t:", f, tree
            text_parse_list.append({'simple': f.simple_repr(), 'tree': tree.serialized(), 'sentence_number': number, 'score': 1.0})
            optimized_list.append({'simple': f.simple_repr(), 'tree': tree.serialized(), 'sentence_number': number, 'score': 1.0})

            local_entities = formula_to_serialized_entities(match_parse, f, tree, number)
            print "local entities:", local_entities
            entity_list.extend(local_entities)



        core_formulas = set(t.to_formula() for t in bool_semantic_trees)
        cc_formulas = set(t.to_formula() for t in cc_trees)
        augmented_formulas = augment_formulas(core_formulas)
        completed_formulas = complete_formulas(augmented_formulas, cc_formulas)

        print "completed formulas:"
        for f in completed_formulas: print f
        print ""

        grounded_formulas = ground_formulas(match_parse, completed_formulas+truth_expr_formulas, value_expr_formulas)
        text_formulas = filter_formulas(flatten_formulas(grounded_formulas))
        all_formulas = all_formulas.union(text_formulas)

    reduced_formulas = all_formulas # reduce_formulas(all_formulas)
    for reduced_formula in reduced_formulas:
        if reduced_formula.is_grounded(core_parse.variable_assignment.keys()):
            score = evaluate(reduced_formula, core_parse.variable_assignment)
            scores = [evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children]
        else:
            score = None
            scores = None
        solution += repr(reduced_formula) + '\n'
        print reduced_formula, score, scores
    solution = solution.rstrip()
    # core_parse.display_points()

    json.dump(diagram_parse_list, open(diagram_parse_path, 'wb'))
    json.dump(optimized_list, open(optimized_path, 'wb'))
    json.dump(text_parse_list, open(text_parse_path, 'wb'))
    json.dump(entity_list, open(entity_list_path, 'wb'))
    json.dump(solution, open(solution_path, 'wb'))

    return SimpleResult(question.key, False, False, True) # Early termination

    print "Solving..."
    ans = solve(reduced_formulas, choice_formulas, assignment=None)#core_parse.variable_assignment)
    print "ans:", ans


    if choice_formulas is None:
        penalized = False
        if Equals(ans, float(question.answer)).conf > 0.98:
            correct = True
        else:
            correct = False
    else:
        idx, tv = max(ans.iteritems(), key=lambda pair: pair[1].conf)
        if tv.conf > 0.98:
            if idx == int(question.answer):
                correct = True
                penalized = False
            else:
                correct = False
                penalized = True
        else:
            penalized = False
            correct = False

    result = SimpleResult(question.key, False, penalized, correct)
    return result
예제 #3
0
파일: run.py 프로젝트: codeviking/geosolver
def _full_unit_test(combined_model, question, label_data):
    assert isinstance(combined_model, CombinedModel)

    choice_formulas = get_choice_formulas(question)
    match_parse = question_to_match_parse(question, label_data)
    match_formulas = parse_match_formulas(match_parse)
    graph_parse = match_parse.graph_parse
    core_parse = graph_parse.core_parse
    # core_parse.display_points()
    # core_parse.primitive_parse.display_primitives()

    # opt_model = TextGreedyOptModel(combined_model)

    diagram_formulas = parse_confident_formulas(match_parse.graph_parse)
    all_formulas = match_formulas + diagram_formulas

    opt_model = FullGreedyOptModel(combined_model, match_parse)
    for number, sentence_words in question.sentence_words.iteritems():
        syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words)

        expr_formulas = {key: prefix_to_formula(expression_parser.parse_prefix(expression))
                         for key, expression in question.sentence_expressions[number].iteritems()}
        truth_expr_formulas, value_expr_formulas = _separate_expr_formulas(expr_formulas)

        semantic_forest = opt_model.combined_model.get_semantic_forest(syntax_parse)
        truth_semantic_trees = semantic_forest.get_semantic_trees_by_type("truth")
        is_semantic_trees = semantic_forest.get_semantic_trees_by_type("is")
        cc_trees = set(t for t in semantic_forest.get_semantic_trees_by_type('cc')
                       if opt_model.combined_model.get_tree_score(t) > 0.01)
        for cc_tree in cc_trees:
            print "cc tree:", cc_tree, opt_model.combined_model.get_tree_score(cc_tree)

        bool_semantic_trees = opt_model.optimize(truth_semantic_trees.union(is_semantic_trees), 0)
        # semantic_trees = bool_semantic_trees.union(cc_trees)

        core_formulas = set(t.to_formula() for t in bool_semantic_trees)
        cc_formulas = set(t.to_formula() for t in cc_trees)
        augmented_formulas = augment_formulas(core_formulas)
        completed_formulas = complete_formulas(augmented_formulas, cc_formulas)

        print "completed formulas:"
        for f in completed_formulas: print f
        print ""

        grounded_formulas = ground_formulas(match_parse, completed_formulas+truth_expr_formulas, value_expr_formulas)
        text_formulas = filter_formulas(flatten_formulas(grounded_formulas))
        all_formulas.extend(text_formulas)

    reduced_formulas = all_formulas # reduce_formulas(all_formulas)
    for reduced_formula in reduced_formulas:
        if reduced_formula.is_grounded(core_parse.variable_assignment.keys()):
            score = evaluate(reduced_formula, core_parse.variable_assignment)
            scores = [evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children]
        else:
            score = None
            scores = None
        print reduced_formula, score, scores
    # core_parse.display_points()

    ans = solve(reduced_formulas, choice_formulas, assignment=None)#core_parse.variable_assignment)
    print "ans:", ans

    if choice_formulas is None:
        penalized = False
        if Equals(ans, float(question.answer)).conf > 0.98:
            correct = True
        else:
            correct = False
    else:
        idx, tv = max(ans.iteritems(), key=lambda pair: pair[1].conf)
        if tv.conf > 0.98:
            if idx == int(question.answer):
                correct = True
                penalized = False
            else:
                correct = False
                penalized = True
        else:
            penalized = False
            correct = False

    result = SimpleResult(question.key, False, penalized, correct)
    return result