def parse_and_eval(dataset, model): parser = ShiftReduceParser(model) golds = list(filter(lambda d: d.root_relation(), chain(*dataset))) num_instances = len(golds) strips = [] for paragraph in golds: edus = [] for edu in paragraph.edus(): edu_copy = EDU([TEXT(edu.text)]) setattr(edu_copy, "words", edu.words) setattr(edu_copy, "tags", edu.tags) edus.append(edu_copy) strips.append(Paragraph(edus)) parses = [] for strip in strips: parses.append(parser.parse(strip)) return num_instances, parse_eval(parses, golds)
def main(): logging.basicConfig(level=logging.INFO) with open("data/models/treebuilder.partptr.model", "rb") as model_fd: model = torch.load(model_fd, map_location="cpu") model.eval() model.use_gpu = False parser = PartitionPtrParser(model) cdtb = CDTB("data/CDTB", "TRAIN", "VALIDATE", "TEST", ctb_dir="data/CTB", preprocess=True, cache_dir="data/cache") golds = list(filter(lambda d: d.root_relation(), chain(*cdtb.test))) import parse pipeline = parse.build_pipeline() strips = [] for paragraph in golds: edus = [] for edu in paragraph.edus(): # edu_copy = EDU([TEXT(edu.text)]) # setattr(edu_copy, "words", edu.words) # setattr(edu_copy, "tags", edu.tags) edus.append(edu.text) strips.append("".join(edus)) # print(strips[-1]) parses = [] parse_sessions = [] for edus in tqdm(strips): # parse, session = parser.parse(edus, ret_session=True) parse = pipeline(edus) parses.append(parse) # parse_sessions.append(session) # macro cdtb scores cdtb_macro_scores = eval.parse_eval(parses, golds, average="macro") logging.info("CDTB macro (strict) scores:") logging.info(eval.gen_parse_report(*cdtb_macro_scores)) # micro cdtb scores cdtb_micro_scores = eval.parse_eval(parses, golds, average="micro") logging.info("CDTB micro (strict) scores:") logging.info(eval.gen_parse_report(*cdtb_micro_scores)) # micro rst scores rst_scores = eval.rst_parse_eval(parses, golds) logging.info("RST styled scores:") logging.info(eval.gen_parse_report(*rst_scores)) # nuclear scores nuclear_scores = eval.nuclear_eval(parses, golds) logging.info("nuclear scores:") logging.info(eval.gen_category_report(nuclear_scores)) # relation scores ctype_scores, ftype_scores = eval.relation_eval(parses, golds) logging.info("coarse relation scores:") logging.info(eval.gen_category_report(ctype_scores)) logging.info("fine relation scores:") logging.info(eval.gen_category_report(ftype_scores)) # draw gold and parse tree along with decision hotmap for gold, parse, session in zip(golds, parses, parse_sessions): gold.draw() session.draw_decision_hotmap() parse.draw()
def evaluate(args): pipeline = build_pipeline(schema=args.schema, segmenter_name=args.segmenter_name, use_gpu=args.use_gpu) cdtb = CDTB(args.data, "TRAIN", "VALIDATE", "TEST", ctb_dir=args.ctb_dir, preprocess=True, cache_dir=args.cache_dir) golds = list(filter(lambda d: d.root_relation(), chain(*cdtb.test))) parses = [] if args.use_gold_edu: logger.info("evaluation with gold edu segmentation") else: logger.info("evaluation with auto edu segmentation") for para in tqdm(golds, desc="parsing", unit=" para"): if args.use_gold_edu: edus = [] for edu in para.edus(): edu_copy = EDU([TEXT(edu.text)]) setattr(edu_copy, "words", edu.words) setattr(edu_copy, "tags", edu.tags) edus.append(edu_copy) else: sentences = [] for sentence in para.sentences(): if list(sentence.iterfind(node_type_filter(EDU))): copy_sentence = Sentence([TEXT([sentence.text])]) if hasattr(sentence, "words"): setattr(copy_sentence, "words", sentence.words) if hasattr(sentence, "tags"): setattr(copy_sentence, "tags", sentence.tags) setattr(copy_sentence, "parse", cdtb.ctb[sentence.sid]) sentences.append(copy_sentence) para = pipeline.cut_edu(Paragraph(sentences)) edus = [] for edu in para.edus(): edu_copy = EDU([TEXT(edu.text)]) setattr(edu_copy, "words", edu.words) setattr(edu_copy, "tags", edu.tags) edus.append(edu_copy) parse = pipeline.parse(Paragraph(edus)) parses.append(parse) # edu score scores = edu_eval(golds, parses) logger.info("EDU segmentation scores:") logger.info(gen_edu_report(scores)) # parser score cdtb_macro_scores = eval.parse_eval(parses, golds, average="macro") logger.info("CDTB macro (strict) scores:") logger.info(eval.gen_parse_report(*cdtb_macro_scores)) # nuclear scores nuclear_scores = eval.nuclear_eval(parses, golds) logger.info("nuclear scores:") logger.info(eval.gen_category_report(nuclear_scores)) # relation scores ctype_scores, ftype_scores = eval.relation_eval(parses, golds) logger.info("coarse relation scores:") logger.info(eval.gen_category_report(ctype_scores)) logger.info("fine relation scores:") logger.info(eval.gen_category_report(ftype_scores)) # height eval height_scores = eval.height_eval(parses, golds) logger.info("structure precision by node height:") logger.info(eval.gen_height_report(height_scores))
def evaluate(args): with open("pub/models/segmenter.svm.model", "rb") as segmenter_fd: segmenter_model = pickle.load(segmenter_fd) with open("pub/models/treebuilder.partptr.model", "rb") as parser_fd: parser_model = torch.load(parser_fd, map_location="cpu") parser_model.use_gpu = False parser_model.eval() segmenter = SVMSegmenter(segmenter_model) parser = PartPtrParser(parser_model) cdtb = CDTB(args.data, "TRAIN", "VALIDATE", "TEST", ctb_dir=args.ctb_dir, preprocess=True, cache_dir=args.cache_dir) golds = list(filter(lambda d: d.root_relation(), chain(*cdtb.test))) parses = [] if args.use_gold_edu: logger.info("evaluation with gold edu segmentation") else: logger.info("evaluation with auto edu segmentation") for para in tqdm(golds, desc="parsing", unit=" para"): if args.use_gold_edu: edus = [] for edu in para.edus(): edu_copy = EDU([TEXT(edu.text)]) setattr(edu_copy, "words", edu.words) setattr(edu_copy, "tags", edu.tags) edus.append(edu_copy) parse = parser.parse(Paragraph(edus)) parses.append(parse) else: edus = [] for sentence in para.sentences(): if list(sentence.iterfind(node_type_filter(EDU))): setattr(sentence, "parse", cdtb.ctb[sentence.sid]) edus.extend(segmenter.cut_edu(sentence)) parse = parser.parse(Paragraph(edus)) parses.append(parse) # edu score scores = edu_eval(golds, parses) logger.info("EDU segmentation scores:") logger.info(gen_edu_report(scores)) # parser score cdtb_macro_scores = eval.parse_eval(parses, golds, average="macro") logger.info("CDTB macro (strict) scores:") logger.info(eval.gen_parse_report(*cdtb_macro_scores)) # nuclear scores nuclear_scores = eval.nuclear_eval(parses, golds) logger.info("nuclear scores:") logger.info(eval.gen_category_report(nuclear_scores)) # relation scores ctype_scores, ftype_scores = eval.relation_eval(parses, golds) logger.info("coarse relation scores:") logger.info(eval.gen_category_report(ctype_scores)) logger.info("fine relation scores:") logger.info(eval.gen_category_report(ftype_scores))