def main(): logging.basicConfig(level=logging.INFO) with open("data/models/treebuilder.partptr.model", "rb") as model_fd: model = torch.load(model_fd, map_location="cpu") model.eval() model.use_gpu = False parser = PartitionPtrParser(model) cdtb = CDTB("data/CDTB", "TRAIN", "VALIDATE", "TEST", ctb_dir="data/CTB", preprocess=True, cache_dir="data/cache") golds = list(filter(lambda d: d.root_relation(), chain(*cdtb.test))) import parse pipeline = parse.build_pipeline() strips = [] for paragraph in golds: edus = [] for edu in paragraph.edus(): # edu_copy = EDU([TEXT(edu.text)]) # setattr(edu_copy, "words", edu.words) # setattr(edu_copy, "tags", edu.tags) edus.append(edu.text) strips.append("".join(edus)) # print(strips[-1]) parses = [] parse_sessions = [] for edus in tqdm(strips): # parse, session = parser.parse(edus, ret_session=True) parse = pipeline(edus) parses.append(parse) # parse_sessions.append(session) # macro cdtb scores cdtb_macro_scores = eval.parse_eval(parses, golds, average="macro") logging.info("CDTB macro (strict) scores:") logging.info(eval.gen_parse_report(*cdtb_macro_scores)) # micro cdtb scores cdtb_micro_scores = eval.parse_eval(parses, golds, average="micro") logging.info("CDTB micro (strict) scores:") logging.info(eval.gen_parse_report(*cdtb_micro_scores)) # micro rst scores rst_scores = eval.rst_parse_eval(parses, golds) logging.info("RST styled scores:") logging.info(eval.gen_parse_report(*rst_scores)) # nuclear scores nuclear_scores = eval.nuclear_eval(parses, golds) logging.info("nuclear scores:") logging.info(eval.gen_category_report(nuclear_scores)) # relation scores ctype_scores, ftype_scores = eval.relation_eval(parses, golds) logging.info("coarse relation scores:") logging.info(eval.gen_category_report(ctype_scores)) logging.info("fine relation scores:") logging.info(eval.gen_category_report(ftype_scores)) # draw gold and parse tree along with decision hotmap for gold, parse, session in zip(golds, parses, parse_sessions): gold.draw() session.draw_decision_hotmap() parse.draw()
def evaluate(args): pipeline = build_pipeline(schema=args.schema, segmenter_name=args.segmenter_name, use_gpu=args.use_gpu) cdtb = CDTB(args.data, "TRAIN", "VALIDATE", "TEST", ctb_dir=args.ctb_dir, preprocess=True, cache_dir=args.cache_dir) golds = list(filter(lambda d: d.root_relation(), chain(*cdtb.test))) parses = [] if args.use_gold_edu: logger.info("evaluation with gold edu segmentation") else: logger.info("evaluation with auto edu segmentation") for para in tqdm(golds, desc="parsing", unit=" para"): if args.use_gold_edu: edus = [] for edu in para.edus(): edu_copy = EDU([TEXT(edu.text)]) setattr(edu_copy, "words", edu.words) setattr(edu_copy, "tags", edu.tags) edus.append(edu_copy) else: sentences = [] for sentence in para.sentences(): if list(sentence.iterfind(node_type_filter(EDU))): copy_sentence = Sentence([TEXT([sentence.text])]) if hasattr(sentence, "words"): setattr(copy_sentence, "words", sentence.words) if hasattr(sentence, "tags"): setattr(copy_sentence, "tags", sentence.tags) setattr(copy_sentence, "parse", cdtb.ctb[sentence.sid]) sentences.append(copy_sentence) para = pipeline.cut_edu(Paragraph(sentences)) edus = [] for edu in para.edus(): edu_copy = EDU([TEXT(edu.text)]) setattr(edu_copy, "words", edu.words) setattr(edu_copy, "tags", edu.tags) edus.append(edu_copy) parse = pipeline.parse(Paragraph(edus)) parses.append(parse) # edu score scores = edu_eval(golds, parses) logger.info("EDU segmentation scores:") logger.info(gen_edu_report(scores)) # parser score cdtb_macro_scores = eval.parse_eval(parses, golds, average="macro") logger.info("CDTB macro (strict) scores:") logger.info(eval.gen_parse_report(*cdtb_macro_scores)) # nuclear scores nuclear_scores = eval.nuclear_eval(parses, golds) logger.info("nuclear scores:") logger.info(eval.gen_category_report(nuclear_scores)) # relation scores ctype_scores, ftype_scores = eval.relation_eval(parses, golds) logger.info("coarse relation scores:") logger.info(eval.gen_category_report(ctype_scores)) logger.info("fine relation scores:") logger.info(eval.gen_category_report(ftype_scores)) # height eval height_scores = eval.height_eval(parses, golds) logger.info("structure precision by node height:") logger.info(eval.gen_height_report(height_scores))
def main(args): # set seed for reproducibility random.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # load dataset cdtb = CDTB(args.data, "TRAIN", "VALIDATE", "TEST", ctb_dir=args.ctb_dir, preprocess=True, cache_dir=args.cache_dir) # build vocabulary word_vocab, pos_vocab, nuc_label, rel_label = build_vocab(cdtb.train) trainset = numericalize(cdtb.train, word_vocab, pos_vocab, nuc_label, rel_label) logging.info("num of instances trainset: %d" % len(trainset)) logging.info("args: %s" % str(args)) # build model model = PartitionPtr(hidden_size=args.hidden_size, dropout=args.dropout, word_vocab=word_vocab, pos_vocab=pos_vocab, nuc_label=nuc_label, rel_label=rel_label, pretrained=args.pretrained, w2v_size=args.w2v_size, w2v_freeze=args.w2v_freeze, pos_size=args.pos_size, split_mlp_size=args.split_mlp_size, nuc_mlp_size=args.nuc_mlp_size, rel_mlp_size=args.rel_mlp_size, use_gpu=args.use_gpu) if args.use_gpu: model.cuda() logging.info("model:\n%s" % str(model)) # train and evaluate niter = 0 log_splits_loss = 0. log_nucs_loss = 0. log_rels_loss = 0. log_loss = 0. optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2) writer = SummaryWriter(args.log_dir) logging.info( "hint: run 'tensorboard --logdir %s' to observe training status" % args.log_dir) best_model = None best_model_score = 0. for nepoch in range(1, args.epoch + 1): batch_iter = gen_batch_iter(trainset, args.batch_size, args.use_gpu) for nbatch, (e_inputs, d_inputs, grounds) in enumerate(batch_iter, start=1): niter += 1 model.train() optimizer.zero_grad() splits_loss, nucs_loss, rels_loss = model.loss( e_inputs, d_inputs, grounds) loss = args.a_split_loss * splits_loss + args.a_nuclear_loss * nucs_loss + args.a_relation_loss * rels_loss loss.backward() optimizer.step() log_splits_loss += splits_loss.item() log_nucs_loss += nucs_loss.item() log_rels_loss += rels_loss.item() log_loss += loss.item() if niter % args.log_every == 0: logging.info( "[iter %-6d]epoch: %-3d, batch %-5d," "train splits loss:%.5f, nuclear loss %.5f, relation loss %.5f, loss %.5f" % (niter, nepoch, nbatch, log_splits_loss, log_nucs_loss, log_rels_loss, log_loss)) writer.add_scalar("train/split_loss", log_splits_loss, niter) writer.add_scalar("train/nuclear_loss", log_nucs_loss, niter) writer.add_scalar("train/relation_loss", log_rels_loss, niter) writer.add_scalar("train/loss", log_loss, niter) log_splits_loss = 0. log_nucs_loss = 0. log_rels_loss = 0. log_loss = 0. if niter % args.validate_every == 0: num_instances, validate_scores = parse_and_eval( cdtb.validate, model) logging.info("validation on %d instances" % num_instances) logging.info(gen_parse_report(*validate_scores)) writer.add_scalar("validate/span_f1", validate_scores[0][2], niter) writer.add_scalar("validate/nuclear_f1", validate_scores[1][2], niter) writer.add_scalar("validate/coarse_relation_f1", validate_scores[2][2], niter) writer.add_scalar("validate/fine_relation_f1", validate_scores[3][2], niter) new_model_score = model_score(validate_scores) if new_model_score > best_model_score: # test on testset with new best model best_model_score = new_model_score best_model = copy.deepcopy(model) logging.info("test on new best model") num_instances, test_scores = parse_and_eval( cdtb.test, best_model) logging.info("test on %d instances" % num_instances) logging.info(gen_parse_report(*test_scores)) writer.add_scalar("test/span_f1", test_scores[0][2], niter) writer.add_scalar("test/nuclear_f1", test_scores[1][2], niter) writer.add_scalar("test/coarse_relation_f1", test_scores[2][2], niter) writer.add_scalar("test/fine_relation_f1", test_scores[3][2], niter) if best_model: # evaluation and save best model logging.info("final test result") num_instances, test_scores = parse_and_eval(cdtb.test, best_model) logging.info("test on %d instances" % num_instances) logging.info(gen_parse_report(*test_scores)) logging.info("save best model to %s" % args.model_save) with open(args.model_save, "wb+") as model_fd: torch.save(best_model, model_fd) writer.close()
def main(args): # set seed for reproducibility random.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) # load dataset cdtb = CDTB(args.data, "TRAIN", "VALIDATE", "TEST", ctb_dir=args.ctb_dir, preprocess=True, cache_dir=args.cache_dir) trainset = gen_instances(chain(*cdtb.train)) logging.info("generate %d instances from trainset" % len(trainset)) word_vocab, pos_vocab, trans_label = build_vocab(trainset) trainset = numericalize(trainset, word_vocab, pos_vocab, trans_label) model = ShiftReduceModel(hidden_size=args.hidden_size, dropout=args.dropout, cnn_filters=args.cnn_filters, word_vocab=word_vocab, pos_vocab=pos_vocab, trans_label=trans_label, pretrained=args.pretrained, w2v_size=args.w2v_size, w2v_freeze=args.w2v_freeze, pos_size=args.pos_size, mlp_layers=args.mlp_layers, use_gpu=args.use_gpu) if args.use_gpu: model.cuda() logging.info("model:\n" + str(model)) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2) dataset = np.array(trainset) niter = 0 best_model = None best_model_score = 0. for nepoch in range(1, args.epoch + 1): np.random.shuffle(dataset) batch_iter = gen_batch(dataset, args.batch_size) for nbatch, batch in enumerate(batch_iter): niter += 1 model.train() optimizer.zero_grad() loss = 0. for word_ids, pos_ids, trans_ids in batch: batch_loss = model.loss(word_ids, pos_ids, trans_ids) loss += batch_loss loss = loss / len(batch) loss.backward() optimizer.step() if niter % args.log_every == 0: logging.info( "[iter %-6d]epoch: %-3d, batch %-5d, train loss %.5f" % (niter, nepoch, nbatch, loss.item())) if niter % args.validate_every == 0: model.eval() num_instances, validate_scores = parse_and_eval( cdtb.validate, model) logging.info("validation on %d instances" % num_instances) logging.info(gen_parse_report(*validate_scores)) new_model_score = model_score(validate_scores) if new_model_score > best_model_score: # test on testset with new best model best_model_score = new_model_score best_model = copy.deepcopy(model) logging.info("test on new best model") num_instances, test_scores = parse_and_eval( cdtb.test, best_model) logging.info("test on %d instances" % num_instances) logging.info(gen_parse_report(*test_scores)) if best_model: # evaluation and save best model logging.info("final test result") num_instances, test_scores = parse_and_eval(cdtb.test, best_model) logging.info("test on %d instances" % num_instances) logging.info(gen_parse_report(*test_scores)) logging.info("save best model to %s" % args.model_save) with open(args.model_save, "wb+") as model_fd: torch.save(best_model, model_fd)
def evaluate(args): with open("pub/models/segmenter.svm.model", "rb") as segmenter_fd: segmenter_model = pickle.load(segmenter_fd) with open("pub/models/treebuilder.partptr.model", "rb") as parser_fd: parser_model = torch.load(parser_fd, map_location="cpu") parser_model.use_gpu = False parser_model.eval() segmenter = SVMSegmenter(segmenter_model) parser = PartPtrParser(parser_model) cdtb = CDTB(args.data, "TRAIN", "VALIDATE", "TEST", ctb_dir=args.ctb_dir, preprocess=True, cache_dir=args.cache_dir) golds = list(filter(lambda d: d.root_relation(), chain(*cdtb.test))) parses = [] if args.use_gold_edu: logger.info("evaluation with gold edu segmentation") else: logger.info("evaluation with auto edu segmentation") for para in tqdm(golds, desc="parsing", unit=" para"): if args.use_gold_edu: edus = [] for edu in para.edus(): edu_copy = EDU([TEXT(edu.text)]) setattr(edu_copy, "words", edu.words) setattr(edu_copy, "tags", edu.tags) edus.append(edu_copy) parse = parser.parse(Paragraph(edus)) parses.append(parse) else: edus = [] for sentence in para.sentences(): if list(sentence.iterfind(node_type_filter(EDU))): setattr(sentence, "parse", cdtb.ctb[sentence.sid]) edus.extend(segmenter.cut_edu(sentence)) parse = parser.parse(Paragraph(edus)) parses.append(parse) # edu score scores = edu_eval(golds, parses) logger.info("EDU segmentation scores:") logger.info(gen_edu_report(scores)) # parser score cdtb_macro_scores = eval.parse_eval(parses, golds, average="macro") logger.info("CDTB macro (strict) scores:") logger.info(eval.gen_parse_report(*cdtb_macro_scores)) # nuclear scores nuclear_scores = eval.nuclear_eval(parses, golds) logger.info("nuclear scores:") logger.info(eval.gen_category_report(nuclear_scores)) # relation scores ctype_scores, ftype_scores = eval.relation_eval(parses, golds) logger.info("coarse relation scores:") logger.info(eval.gen_category_report(ctype_scores)) logger.info("fine relation scores:") logger.info(eval.gen_category_report(ftype_scores))