def eval_trees(silver_tree, gold_tree, rst_span_perf, rst_nuc_perf, rst_rel_perf, use_parseval=False): # RST-Parseval met = Metrics(levels=['span', 'nuclearity', 'relation'], use_parseval=use_parseval) met.eval(gold_tree, silver_tree) assert met.span_perf.hit_num / met.span_num == rst_span_perf assert met.nuc_perf.hit_num / met.span_num == rst_nuc_perf assert met.rela_perf.hit_num / met.span_num == rst_rel_perf
def eval_trees(silver_tree, gold_tree, rst_span_perf, rst_nuc_perf, span_perf, nuc_perf): # RST-Parseval met = Metrics(use_parseval=False, levels=['span', 'nuclearity']) met.eval(silver_tree, gold_tree) assert met.span_perf.hit_num / met.span_num == rst_span_perf assert met.nuc_perf.hit_num / met.span_num == rst_nuc_perf # Parseval met = Metrics(use_parseval=True, levels=['span', 'nuclearity']) met.eval(silver_tree, gold_tree) assert met.span_perf.hit_num / met.span_num == span_perf assert met.nuc_perf.hit_num / met.span_num == nuc_perf
def eval_parser(self, dev_data=None, path='./examples', use_parseval=False): """ Test the parsing performance""" # Evaluation met = Metrics(use_parseval, levels=['span', 'nuclearity']) # ---------------------------------------- # Read all files from the given path if dev_data is None: eval_list = [ os.path.join(path, fname) for fname in os.listdir(path) if fname.endswith('.merge') ] else: eval_list = dev_data pred_forms = [] gold_forms = [] total_cost = 0 for eval_instance in eval_list: # ---------------------------------------- # Read *.merge file doc = Doc() if dev_data is not None: gold_rst = eval_instance doc = eval_instance.doc else: doc.read_from_fmerge(eval_instance) eval_instance = eval_instance.replace('.dis', '.merge') fdis = eval_instance.replace('.merge', '.dis') gold_rst = RstTree(fdis, eval_instance) gold_rst.build() _, gold_action_seq = gold_rst.generate_action_samples() gold_action_seq = list( map(lambda x: self.data_helper.action_map[x], gold_action_seq)) pred_rst, cost = self.parser.sr_parse( [doc], [torch.cuda.LongTensor(gold_action_seq)], None, is_train=False) total_cost += cost pred_rst = pred_rst[0] # ---------------------------------------- # Evaluate with gold RST tree met.eval(gold_rst, pred_rst) print("Total cost: ", total_cost) met.report()
def eval_parser(self, dev_data=None, path='./examples', save_preds=True, use_parseval=False): """ Test the parsing performance""" # Evaluation met = Metrics(levels=['span', 'nuclearity', 'relation'], use_parseval=use_parseval) # ---------------------------------------- # Read all files from the given path if dev_data is None: dev_data = [ os.path.join(path, fname) for fname in os.listdir(path) if fname.endswith('.dis') ] total_cost = 0 for eval_instance in dev_data: # ---------------------------------------- fmerge = eval_instance.replace('.dis', '.merge') doc = Doc() doc.read_from_fmerge(fmerge) gold_rst = RstTree(eval_instance, fmerge) gold_rst.build() # tok_edus = [nltk.word_tokenize(edu) for edu in doc.doc_edus] tok_edus = [edu.split(" ") for edu in doc.doc_edus] tokens = flatten(tok_edus) coref_document = Document(raw_text=None, tokens=tokens, sents=tok_edus, corefs=[], speakers=["0"] * len(tokens), genre="nw", filename=None) coref_document.token_dict = doc.token_dict coref_document.edu_dict = doc.edu_dict doc = coref_document gold_action_seq, gold_rel_seq = gold_rst.decode_rst_tree() gold_action_seq = [action_map[x] for x in gold_action_seq] gold_relation_seq = [ relation_map[x.lower()] for x in gold_rel_seq if x is not None ] pred_rst, cost = self.parser.sr_parse( doc, torch.cuda.LongTensor(gold_action_seq), torch.cuda.LongTensor(gold_relation_seq)) total_cost += cost if save_preds: if not os.path.isdir('../data/predicted_trees'): os.mkdir('../data/predicted_trees') filename = eval_instance.split(os.sep)[-1] filepath = f'../data/predicted_trees/{self.config[MODEL_NAME]}_{filename}' pred_brackets = pred_rst.bracketing() # Write brackets into file Evaluator.writebrackets(filepath, pred_brackets) # ---------------------------------------- # Evaluate with gold RST tree met.eval(gold_rst, pred_rst) print("Total cost: ", total_cost) if use_parseval: print("Reporting original Parseval metric.") else: print("Reporting RST Parseval metric.") met.report()
def eval_parser(self, path='./examples', report=False, bcvocab=None, draw=True): """ Test the parsing performance""" # Evaluation met = Metrics(levels=['span', 'nuclearity', 'relation']) # ---------------------------------------- # Read all files from the given path doclist = [ os.path.join(path, fname) for fname in os.listdir(path) if fname.endswith('.merge') ] pred_forms = [] gold_forms = [] depth_per_relation = {} for fmerge in doclist: # ---------------------------------------- # Read *.merge file doc = Doc() doc.read_from_fmerge(fmerge) # ---------------------------------------- # Parsing pred_rst = self.parser.sr_parse(doc, bcvocab) if draw: pred_rst.draw_rst(fmerge.replace(".merge", ".ps")) # Get brackets from parsing results pred_brackets = pred_rst.bracketing() fbrackets = fmerge.replace('.merge', '.brackets') # Write brackets into file Evaluator.writebrackets(fbrackets, pred_brackets) # ---------------------------------------- # Evaluate with gold RST tree if report: fdis = fmerge.replace('.merge', '.dis') gold_rst = RstTree(fdis, fmerge) gold_rst.build() met.eval(gold_rst, pred_rst) for node in pred_rst.postorder_DFT(pred_rst.tree, []): pred_forms.append(node.form) for node in gold_rst.postorder_DFT(gold_rst.tree, []): gold_forms.append(node.form) nodes = gold_rst.postorder_DFT(gold_rst.tree, []) inner_nodes = [ node for node in nodes if node.lnode is not None and node.rnode is not None ] for idx, node in enumerate(inner_nodes): relation = node.rnode.relation if node.form == 'NS' else node.lnode.relation rela_class = RstTree.extract_relation(relation) if rela_class in depth_per_relation: depth_per_relation[rela_class].append(node.depth) else: depth_per_relation[rela_class] = [node.depth] lnode_text = ' '.join([ gold_rst.doc.token_dict[tid].word for tid in node.lnode.text ]) lnode_lemmas = ' '.join([ gold_rst.doc.token_dict[tid].lemma for tid in node.lnode.text ]) rnode_text = ' '.join([ gold_rst.doc.token_dict[tid].word for tid in node.rnode.text ]) rnode_lemmas = ' '.join([ gold_rst.doc.token_dict[tid].lemma for tid in node.rnode.text ]) # if rela_class == 'Topic-Change': # print(fmerge) # print(relation) # print(lnode_text) # print(rnode_text) # print() if report: met.report()
def eval_parser(self, data_dir, output_dir='./examples', report=False, bcvocab=None, draw=True, isFlat=False): """ Test the parsing performance""" # Evaluation met = Metrics(levels=['span', 'nuclearity', 'relation']) # ---------------------------------------- # Read all files from the given path with open(os.path.join(output_dir, "Treebank/TEST", "processed_data.p"), 'rb') as file: doclist = pickle.load(file) fnames = [fn for fn in os.listdir(data_dir) if fn.endswith(".out")] pred_forms = [] gold_forms = [] depth_per_relation = {} for lines, fname in zip(doclist, fnames): # ---------------------------------------- # Read *.merge file doc = Doc() doc.read_from_fmerge(lines) fout = os.path.join(data_dir, fname) print(fout) # ---------------------------------------- # Parsing print("************************ predict rst ************************") pred_rst = self.parser.sr_parse(doc, self.isFlat,bcvocab) if draw: pred_rst.draw_rst(fout+'.ps') # Get brackets from parsing results pred_brackets = pred_rst.bracketing(self.isFlat) fbrackets = fout+'.brackets' # Write brackets into file Evaluator.writebrackets(fbrackets, pred_brackets) # ---------------------------------------- # Evaluate with gold RST tree if report: print("************************ gold rst ************************") fdis = fout+'.dis' gold_rst = RstTree(fdis, lines, isFlat) gold_rst.build() met.eval(gold_rst, pred_rst, self.isFlat) if isFlat: for node in pred_rst.postorder_flat_DFT(pred_rst.tree, []): pred_forms.append(node.form) for node in gold_rst.postorder_flat_DFT(gold_rst.tree, []): gold_forms.append(node.form) nodes = gold_rst.postorder_flat_DFT(gold_rst.tree, []) else: for node in pred_rst.postorder_DFT(pred_rst.tree, []): pred_forms.append(node.form) for node in gold_rst.postorder_DFT(gold_rst.tree, []): gold_forms.append(node.form) nodes = gold_rst.postorder_DFT(gold_rst.tree, []) inner_nodes = [node for node in nodes if node.lnode is not None and node.rnode is not None] for idx, node in enumerate(inner_nodes): relation = node.rnode.relation if node.form == 'NS' else node.lnode.relation rela_class = RstTree.extract_relation(relation) if rela_class in depth_per_relation: depth_per_relation[rela_class].append(node.depth) else: depth_per_relation[rela_class] = [node.depth] if report: met.report()