def plot_normal_decomposition(decomp: str): normalizer = NormalizedGraphMatchScorer() decomposition = Decomposition.from_str(decomp) print("decomposition:", decomposition.to_string()) norm_decomposition = normalizer.normalized_decomposition(decomposition, verbose=True) print("normal form:", norm_decomposition.to_string()) print("=========================================================") draw_decomposition_graph(decomposition.to_graph(), title="decomposition") draw_decomposition_graph(norm_decomposition.to_graph(), title="normal form")
def _decompose(self, question, verbose): pred = self.predictor.predict_json({"source": question}) if self.beam: decomposition = get_decomposition_from_tokens(pred["predicted_tokens"][0]) else: decomposition = get_decomposition_from_tokens(pred["predicted_tokens"]) return Decomposition(decomposition)
def main(args): # load data if args.input_file: with open(args.input_file, 'r', encoding='utf-8') as fd: lines = fd.readlines() if args.random_n and len(lines) > args.random_n: lines = random.sample(lines, args.random_n) lines_parts = [line.strip('\n').split('\t') for line in lines] questions = [line_parts[0] for line_parts in lines_parts] if args.model == "dynamic": allowed_tokens = [line_parts[1] for line_parts in lines_parts] golds_index = 2 else: allowed_tokens = None golds_index = 1 golds = [ line_parts[golds_index].split('@@SEP@@') for line_parts in lines_parts ] golds = [[s.strip() for s in g] for g in golds] else: questions = [args.question] if args.evaluate: golds = [[s.strip() for s in args.gold.split('@@SEP@@')]] allowed_tokens = None # initialize a model model = init_model(args) # load pre-computed predictions if provided, otherwise, # decompose questions using the model. if args.preds_file: decompositions = model.load_decompositions_from_file(args.preds_file) else: decompositions = model.predict(questions, args.print_non_decomposed, args.verbose, extra_args=allowed_tokens) # evaluation if args.evaluate: golds = [Decomposition(g) for g in golds] metadata = pd.read_csv( args.metadata_file) if args.metadata_file else None model.evaluate(questions, decompositions, golds, metadata, output_path_base=args.output_file_base, num_processes=args.num_processes)
def predictions_to_norm(df, qdmr_col, norm_col): df[norm_col] = df[qdmr_col] for index, row in df.iterrows(): dec = row[norm_col] try: decomposition = norm_g.normalized_decomposition(Decomposition.from_str(dec)) df.loc[index, norm_col] = decomposition.to_string() except Exception as ex: print(f"error in index {index}:{str(ex)}\n{dec}", flush=True) traceback.print_exc() df.loc[index, norm_col] = "ERROR"
def format_qdmr(input: str): # replace multiple whitespaces with a single whitespace. input = ' '.join(input.split()) # replace semi-colons with @@SEP@@ token, remove 'return' statements. parts = input.split(';') parts = [re.sub(r'return', '', part.strip().strip('\r')) for part in parts] # replacing references with special tokens, for example replacing #2 with @@2@@. parts = [re.sub(r'#(\d+)', '@@\g<1>@@', part) for part in parts] return Decomposition(parts)
def predict(self, questions, print_non_decomposed, verbose, extra_args=None): decompositions = [] num_decomposed, num_not_decomposed = 0, 0 for question in questions: decomposed, trace = self._decompose(question, verbose) if len(decomposed) == 1: num_not_decomposed += 1 if print_non_decomposed: print("question: {}\ndecomposition: -\n".format(question)) else: num_decomposed += 1 print("question: {}\ndecomposition: {}\ntrace: {}\n".format( question, decomposed, trace)) decompositions.append(decomposed) print( "\n{} decomposed questions, {} not-decomposed questions.\n".format( num_decomposed, num_not_decomposed)) return [Decomposition(d) for d in decompositions]
def normalized_decomposition(self, decomposition: Decomposition, verbose: bool = False) -> Decomposition: norm_g = self.normalize_graph(graph=decomposition.to_graph(), verbose=verbose) return Decomposition.from_graph(graph=norm_g)
from evaluation.decomposition import Decomposition, draw_decomposition_graph from evaluation.graph_matcher import AStarSearcher examples = [ # 0 (Decomposition([ "representatives from New York state or Indiana state", "the life spans of @@1@@" ]), Decomposition([ "representatives from new york state", "representatives from indiana state", "@@1@@ or @@2@@", "life spans of @@3@@" ])), # 1 (Decomposition( ["the team owned by Jerry Jones", "the 1996 coach of @@1@@"]), Decomposition( ["the team owned by Jerry Jones", "the 1996 coach of @@1@@"])), # 2 (Decomposition([ "the team with Baltimore Fight Song", "year did @@1@@ win the Superbowl" ]), Decomposition([ "the team with Baltimore Fight Song", "what year did @@1@@ win the Superbowl" ])), # 3 (Decomposition([ "a us air flight", "@@1@@ from toronto to san diego",
def str_test_create_graphs(inp: str, output: [str]): in_g = Decomposition([inp]).to_graph() out_g = Decomposition(output).to_graph() return in_g, out_g
def update_sucessors(graph, n_id, doc: [Token]): refs = Decomposition._get_references_ids(" ".join( [t.text for t in doc])) graph.remove_edges_from([(n_id, s_id) for s_id in graph.successors(n_id)]) graph.add_edges_from([(n_id, r) for r in refs])
def evaluate(ids, questions, decompositions, golds, metadata, output_path_base, metrics=None): decompositions_str = [d.to_string() for d in decompositions] golds_str = [g.to_string() for g in golds] # calculating exact match scores exact_match = get_exact_match(decompositions_str, golds_str) \ if (metrics is None) or 'exact_match' in metrics else None # evaluate using SARI sari = get_sari_score(decompositions_str, golds_str, questions) \ if (metrics is None) or 'sari' in metrics else None # evaluate using sequence matcher match_ratio = get_match_ratio(decompositions_str, golds_str) \ if (metrics is None) or 'match' in metrics else None structural_match_ratio = get_structural_match_ratio(decompositions_str, golds_str) \ if (metrics is None) or 'structural_match' in metrics else None # evaluate using graph distances graph_scorer = GraphMatchScorer() decomposition_graphs = [d.to_graph() for d in decompositions] gold_graphs = [g.to_graph() for g in golds] ged_scores = graph_scorer.get_edit_distance_match_scores(decomposition_graphs, gold_graphs) \ if (metrics is None) or 'ged_scores' in metrics else None if ged_scores: ged_scores = [s if s else 1 for s in ged_scores] # structural_ged_scores = graph_scorer.get_edit_distance_match_scores(decomposition_graphs, gold_graphs, # structure_only=True) # ged_plus_scores = get_ged_plus_scores(decomposition_graphs, gold_graphs, # exclude_thr=5, num_processes=num_processes) # calculate normalized match scores normalize_scorer = NormalizedGraphMatchScorer() def try_invoke(func, graph, default=None): try: return func(graph) except Exception as ex: return default decomposition_norm_graphs = [ try_invoke(normalize_scorer.normalize_graph, g, default=g) for g in decomposition_graphs ] decomposition_norm_str = [ try_invoke(lambda x: Decomposition.from_graph(x).to_string(), g) for g in decomposition_norm_graphs ] gold_norm_graphs = [ try_invoke(normalize_scorer.normalize_graph, g, default=g) for g in gold_graphs ] gold_norm_str = [ try_invoke(lambda x: Decomposition.from_graph(x).to_string(), g) for g in gold_norm_graphs ] normalized_exact_match = skip_none(get_exact_match, decomposition_norm_str, gold_norm_str) \ if (metrics is None) or 'normalized_exact_match' in metrics else None normalized_sari = skip_none(get_sari_score, decomposition_norm_str, gold_norm_str, questions) \ if (metrics is None) or 'normalized_sari' in metrics else None normalized_match_ratio = skip_none(get_match_ratio, decomposition_norm_str, gold_norm_str) \ if (metrics is None) or 'normalized_match' in metrics else None normalized_structural_match_ratio = skip_none(get_structural_match_ratio, decomposition_norm_str, gold_norm_str) \ if (metrics is None) or 'normalized_structural_match' in metrics else None evaluation_dict = { "id": ids, "question": questions, "gold": golds_str, "prediction": decompositions_str, "exact_match": exact_match, "match": match_ratio, "structural_match": structural_match_ratio, "sari": sari, "ged": ged_scores, # "structural_ged": structural_ged_scores, # "ged_plus": ged_plus_scores, "normalized_exact_match": normalized_exact_match, "normalized_match": normalized_match_ratio, "normalized_structural_match": normalized_structural_match_ratio, "normalized_sari": normalized_sari, } evaluation_dict = { k: v for k, v in evaluation_dict.items() if v is not None } num_examples = len(questions) print_first_example_scores(evaluation_dict, min(5, num_examples)) mean_scores = print_score_stats(evaluation_dict) if output_path_base: write_evaluation_output(output_path_base, num_examples, **evaluation_dict) if metadata is not None: #metadata = metadata[metadata["question_text"].isin(evaluation_dict["question"])] metadata = metadata[metadata['question_id'].isin( evaluation_dict['id'])] metadata["dataset"] = metadata["question_id"].apply( lambda x: x.split("_")[0]) metadata["num_steps"] = metadata["decomposition"].apply( lambda x: len(x.split(";"))) score_keys = [ key for key in evaluation_dict if key not in ["id", "question", "gold", "prediction"] ] for key in score_keys: metadata[key] = evaluation_dict[key] for agg_field in ["dataset", "num_steps"]: df = metadata[[agg_field] + score_keys].groupby(agg_field).agg("mean") print(df.round(decimals=3)) return mean_scores