def _full_unit_test(combined_model, question, label_data): assert isinstance(combined_model, CombinedModel) base_path = os.path.join(demo_path, str(question.key)) if not os.path.exists(base_path): os.mkdir(base_path) question_path = os.path.join(base_path, 'question.json') text_parse_path = os.path.join(base_path, 'text_parse.json') diagram_parse_path = os.path.join(base_path, 'diagram_parse.json') optimized_path = os.path.join(base_path, 'optimized.json') entity_list_path = os.path.join(base_path, 'entity_map.json') diagram_path = os.path.join(base_path, 'diagram.png') solution_path = os.path.join(base_path, 'solution.json') shutil.copy(question.diagram_path, diagram_path) text_parse_list = [] diagram_parse_list = [] optimized_list = [] entity_list = [] solution = "" json.dump(question._asdict(), open(question_path, 'wb')) choice_formulas = get_choice_formulas(question) match_parse = question_to_match_parse(question, label_data) match_formulas = parse_match_formulas(match_parse) graph_parse = match_parse.graph_parse core_parse = graph_parse.core_parse # core_parse.display_points() # core_parse.primitive_parse.display_primitives() # opt_model = TextGreedyOptModel(combined_model) diagram_formulas = parse_confident_formulas(match_parse.graph_parse) all_formulas = set(match_formulas + diagram_formulas) opt_model = FullGreedyOptModel(combined_model, match_parse) for number, sentence_words in question.sentence_words.iteritems(): syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words) expr_formulas = { key: prefix_to_formula(expression_parser.parse_prefix(expression)) for key, expression in question.sentence_expressions[number].iteritems() } truth_expr_formulas, value_expr_formulas = _separate_expr_formulas( expr_formulas) semantic_forest = opt_model.combined_model.get_semantic_forest( syntax_parse) truth_semantic_trees = semantic_forest.get_semantic_trees_by_type( "truth") is_semantic_trees = semantic_forest.get_semantic_trees_by_type("is") cc_trees = set( t for t in semantic_forest.get_semantic_trees_by_type('cc') if opt_model.combined_model.get_tree_score(t) > 0.01) for cc_tree in cc_trees: print "cc tree:", cc_tree, opt_model.combined_model.get_tree_score( cc_tree) bool_semantic_trees = opt_model.optimize( truth_semantic_trees.union(is_semantic_trees), 0, cc_trees) # semantic_trees = bool_semantic_trees.union(cc_trees) for t in truth_semantic_trees.union(is_semantic_trees).union(cc_trees): text_parse_list.append({ 'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': opt_model.combined_model.get_tree_score(t) }) diagram_score = opt_model.get_diagram_score( t.to_formula(), cc_trees) if diagram_score is not None: diagram_parse_list.append({ 'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': diagram_score }) local_entities = semantic_tree_to_serialized_entities( match_parse, t, number, value_expr_formulas) entity_list.extend(local_entities) for t in bool_semantic_trees: optimized_list.append({ 'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': opt_model.get_magic_score(t, cc_trees) }) for key, f in expr_formulas.iteritems(): if key.startswith("v"): pass index = (i for i, word in sentence_words.iteritems() if word == key).next() tree = formula_to_semantic_tree(f, syntax_parse, (index, index + 1)) print "f and t:", f, tree text_parse_list.append({ 'simple': f.simple_repr(), 'tree': tree.serialized(), 'sentence_number': number, 'score': 1.0 }) optimized_list.append({ 'simple': f.simple_repr(), 'tree': tree.serialized(), 'sentence_number': number, 'score': 1.0 }) local_entities = formula_to_serialized_entities( match_parse, f, tree, number) print "local entities:", local_entities entity_list.extend(local_entities) core_formulas = set(t.to_formula() for t in bool_semantic_trees) cc_formulas = set(t.to_formula() for t in cc_trees) augmented_formulas = augment_formulas(core_formulas) completed_formulas = complete_formulas(augmented_formulas, cc_formulas) print "completed formulas:" for f in completed_formulas: print f print "" grounded_formulas = ground_formulas( match_parse, completed_formulas + truth_expr_formulas, value_expr_formulas) text_formulas = filter_formulas(flatten_formulas(grounded_formulas)) all_formulas = all_formulas.union(text_formulas) reduced_formulas = all_formulas # reduce_formulas(all_formulas) for reduced_formula in reduced_formulas: if reduced_formula.is_grounded(core_parse.variable_assignment.keys()): score = evaluate(reduced_formula, core_parse.variable_assignment) scores = [ evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children ] else: score = None scores = None solution += repr(reduced_formula) + '\n' print reduced_formula, score, scores solution = solution.rstrip() # core_parse.display_points() json.dump(diagram_parse_list, open(diagram_parse_path, 'wb')) json.dump(optimized_list, open(optimized_path, 'wb')) json.dump(text_parse_list, open(text_parse_path, 'wb')) json.dump(entity_list, open(entity_list_path, 'wb')) json.dump(solution, open(solution_path, 'wb')) # return SimpleResult(question.key, False, False, True) # Early termination print "Solving..." ans = solve(reduced_formulas, choice_formulas, assignment=None) #core_parse.variable_assignment) print "ans:", ans if choice_formulas is None: penalized = False if Equals(ans, float(question.answer)).conf > 0.98: correct = True else: correct = False else: idx, tv = max(ans.iteritems(), key=lambda pair: pair[1].conf) if tv.conf > 0.98: if idx == int(float(question.answer)): correct = True penalized = False else: correct = False penalized = True else: penalized = False correct = False result = SimpleResult(question.key, False, penalized, correct) return result
def _full_unit_test(combined_model, question, label_data): assert isinstance(combined_model, CombinedModel) base_path = os.path.join(demo_path, str(question.key)) if not os.path.exists(base_path): os.mkdir(base_path) question_path = os.path.join(base_path, 'question.json') text_parse_path = os.path.join(base_path, 'text_parse.json') diagram_parse_path = os.path.join(base_path, 'diagram_parse.json') optimized_path = os.path.join(base_path, 'optimized.json') entity_list_path = os.path.join(base_path, 'entity_map.json') diagram_path = os.path.join(base_path, 'diagram.png') solution_path = os.path.join(base_path, 'solution.json') shutil.copy(question.diagram_path, diagram_path) text_parse_list = [] diagram_parse_list = [] optimized_list = [] entity_list = [] solution = "" json.dump(question._asdict(), open(question_path, 'wb')) choice_formulas = get_choice_formulas(question) match_parse = question_to_match_parse(question, label_data) match_formulas = parse_match_formulas(match_parse) graph_parse = match_parse.graph_parse core_parse = graph_parse.core_parse # core_parse.display_points() # core_parse.primitive_parse.display_primitives() # opt_model = TextGreedyOptModel(combined_model) diagram_formulas = parse_confident_formulas(match_parse.graph_parse) all_formulas = set(match_formulas + diagram_formulas) opt_model = FullGreedyOptModel(combined_model, match_parse) for number, sentence_words in question.sentence_words.iteritems(): syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words) expr_formulas = {key: prefix_to_formula(expression_parser.parse_prefix(expression)) for key, expression in question.sentence_expressions[number].iteritems()} truth_expr_formulas, value_expr_formulas = _separate_expr_formulas(expr_formulas) semantic_forest = opt_model.combined_model.get_semantic_forest(syntax_parse) truth_semantic_trees = semantic_forest.get_semantic_trees_by_type("truth") is_semantic_trees = semantic_forest.get_semantic_trees_by_type("is") cc_trees = set(t for t in semantic_forest.get_semantic_trees_by_type('cc') if opt_model.combined_model.get_tree_score(t) > 0.01) for cc_tree in cc_trees: print "cc tree:", cc_tree, opt_model.combined_model.get_tree_score(cc_tree) bool_semantic_trees = opt_model.optimize(truth_semantic_trees.union(is_semantic_trees), 0, cc_trees) # semantic_trees = bool_semantic_trees.union(cc_trees) for t in truth_semantic_trees.union(is_semantic_trees).union(cc_trees): text_parse_list.append({'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': opt_model.combined_model.get_tree_score(t)}) diagram_score = opt_model.get_diagram_score(t.to_formula(), cc_trees) if diagram_score is not None: diagram_parse_list.append({'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': diagram_score}) local_entities = semantic_tree_to_serialized_entities(match_parse, t, number, value_expr_formulas) entity_list.extend(local_entities) for t in bool_semantic_trees: optimized_list.append({'simple': t.simple_repr(), 'tree': t.serialized(), 'sentence_number': number, 'score': opt_model.get_magic_score(t, cc_trees)}) for key, f in expr_formulas.iteritems(): if key.startswith("v"): pass index = (i for i, word in sentence_words.iteritems() if word == key).next() tree = formula_to_semantic_tree(f, syntax_parse, (index, index+1)) print "f and t:", f, tree text_parse_list.append({'simple': f.simple_repr(), 'tree': tree.serialized(), 'sentence_number': number, 'score': 1.0}) optimized_list.append({'simple': f.simple_repr(), 'tree': tree.serialized(), 'sentence_number': number, 'score': 1.0}) local_entities = formula_to_serialized_entities(match_parse, f, tree, number) print "local entities:", local_entities entity_list.extend(local_entities) core_formulas = set(t.to_formula() for t in bool_semantic_trees) cc_formulas = set(t.to_formula() for t in cc_trees) augmented_formulas = augment_formulas(core_formulas) completed_formulas = complete_formulas(augmented_formulas, cc_formulas) print "completed formulas:" for f in completed_formulas: print f print "" grounded_formulas = ground_formulas(match_parse, completed_formulas+truth_expr_formulas, value_expr_formulas) text_formulas = filter_formulas(flatten_formulas(grounded_formulas)) all_formulas = all_formulas.union(text_formulas) reduced_formulas = all_formulas # reduce_formulas(all_formulas) for reduced_formula in reduced_formulas: if reduced_formula.is_grounded(core_parse.variable_assignment.keys()): score = evaluate(reduced_formula, core_parse.variable_assignment) scores = [evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children] else: score = None scores = None solution += repr(reduced_formula) + '\n' print reduced_formula, score, scores solution = solution.rstrip() # core_parse.display_points() json.dump(diagram_parse_list, open(diagram_parse_path, 'wb')) json.dump(optimized_list, open(optimized_path, 'wb')) json.dump(text_parse_list, open(text_parse_path, 'wb')) json.dump(entity_list, open(entity_list_path, 'wb')) json.dump(solution, open(solution_path, 'wb')) return SimpleResult(question.key, False, False, True) # Early termination print "Solving..." ans = solve(reduced_formulas, choice_formulas, assignment=None)#core_parse.variable_assignment) print "ans:", ans if choice_formulas is None: penalized = False if Equals(ans, float(question.answer)).conf > 0.98: correct = True else: correct = False else: idx, tv = max(ans.iteritems(), key=lambda pair: pair[1].conf) if tv.conf > 0.98: if idx == int(question.answer): correct = True penalized = False else: correct = False penalized = True else: penalized = False correct = False result = SimpleResult(question.key, False, penalized, correct) return result
def _full_unit_test(combined_model, question, label_data): assert isinstance(combined_model, CombinedModel) choice_formulas = get_choice_formulas(question) match_parse = question_to_match_parse(question, label_data) match_formulas = parse_match_formulas(match_parse) graph_parse = match_parse.graph_parse core_parse = graph_parse.core_parse # core_parse.display_points() # core_parse.primitive_parse.display_primitives() # opt_model = TextGreedyOptModel(combined_model) diagram_formulas = parse_confident_formulas(match_parse.graph_parse) all_formulas = match_formulas + diagram_formulas opt_model = FullGreedyOptModel(combined_model, match_parse) for number, sentence_words in question.sentence_words.iteritems(): syntax_parse = stanford_parser.get_best_syntax_parse(sentence_words) expr_formulas = {key: prefix_to_formula(expression_parser.parse_prefix(expression)) for key, expression in question.sentence_expressions[number].iteritems()} truth_expr_formulas, value_expr_formulas = _separate_expr_formulas(expr_formulas) semantic_forest = opt_model.combined_model.get_semantic_forest(syntax_parse) truth_semantic_trees = semantic_forest.get_semantic_trees_by_type("truth") is_semantic_trees = semantic_forest.get_semantic_trees_by_type("is") cc_trees = set(t for t in semantic_forest.get_semantic_trees_by_type('cc') if opt_model.combined_model.get_tree_score(t) > 0.01) for cc_tree in cc_trees: print "cc tree:", cc_tree, opt_model.combined_model.get_tree_score(cc_tree) bool_semantic_trees = opt_model.optimize(truth_semantic_trees.union(is_semantic_trees), 0) # semantic_trees = bool_semantic_trees.union(cc_trees) core_formulas = set(t.to_formula() for t in bool_semantic_trees) cc_formulas = set(t.to_formula() for t in cc_trees) augmented_formulas = augment_formulas(core_formulas) completed_formulas = complete_formulas(augmented_formulas, cc_formulas) print "completed formulas:" for f in completed_formulas: print f print "" grounded_formulas = ground_formulas(match_parse, completed_formulas+truth_expr_formulas, value_expr_formulas) text_formulas = filter_formulas(flatten_formulas(grounded_formulas)) all_formulas.extend(text_formulas) reduced_formulas = all_formulas # reduce_formulas(all_formulas) for reduced_formula in reduced_formulas: if reduced_formula.is_grounded(core_parse.variable_assignment.keys()): score = evaluate(reduced_formula, core_parse.variable_assignment) scores = [evaluate(child, core_parse.variable_assignment) for child in reduced_formula.children] else: score = None scores = None print reduced_formula, score, scores # core_parse.display_points() ans = solve(reduced_formulas, choice_formulas, assignment=None)#core_parse.variable_assignment) print "ans:", ans if choice_formulas is None: penalized = False if Equals(ans, float(question.answer)).conf > 0.98: correct = True else: correct = False else: idx, tv = max(ans.iteritems(), key=lambda pair: pair[1].conf) if tv.conf > 0.98: if idx == int(question.answer): correct = True penalized = False else: correct = False penalized = True else: penalized = False correct = False result = SimpleResult(question.key, False, penalized, correct) return result