def main():
    logging.basicConfig(level=logging.INFO)
    with open("data/models/treebuilder.partptr.model", "rb") as model_fd:
        model = torch.load(model_fd, map_location="cpu")
        model.eval()
        model.use_gpu = False
    parser = PartitionPtrParser(model)
    cdtb = CDTB("data/CDTB", "TRAIN", "VALIDATE", "TEST", ctb_dir="data/CTB", preprocess=True, cache_dir="data/cache")
    golds = list(filter(lambda d: d.root_relation(), chain(*cdtb.test)))

    import parse
    pipeline = parse.build_pipeline()

    strips = []
    for paragraph in golds:
        edus = []
        for edu in paragraph.edus():
            # edu_copy = EDU([TEXT(edu.text)])
            # setattr(edu_copy, "words", edu.words)
            # setattr(edu_copy, "tags", edu.tags)
            edus.append(edu.text)
        strips.append("".join(edus))
        # print(strips[-1])
    parses = []
    parse_sessions = []
    for edus in tqdm(strips):
        # parse, session = parser.parse(edus, ret_session=True)
        parse = pipeline(edus)
        parses.append(parse)
        # parse_sessions.append(session)

    # macro cdtb scores
    cdtb_macro_scores = eval.parse_eval(parses, golds, average="macro")
    logging.info("CDTB macro (strict) scores:")
    logging.info(eval.gen_parse_report(*cdtb_macro_scores))
    # micro cdtb scores
    cdtb_micro_scores = eval.parse_eval(parses, golds, average="micro")
    logging.info("CDTB micro (strict) scores:")
    logging.info(eval.gen_parse_report(*cdtb_micro_scores))

    # micro rst scores
    rst_scores = eval.rst_parse_eval(parses, golds)
    logging.info("RST styled scores:")
    logging.info(eval.gen_parse_report(*rst_scores))

    # nuclear scores
    nuclear_scores = eval.nuclear_eval(parses, golds)
    logging.info("nuclear scores:")
    logging.info(eval.gen_category_report(nuclear_scores))

    # relation scores
    ctype_scores, ftype_scores = eval.relation_eval(parses, golds)
    logging.info("coarse relation scores:")
    logging.info(eval.gen_category_report(ctype_scores))
    logging.info("fine relation scores:")
    logging.info(eval.gen_category_report(ftype_scores))

    # draw gold and parse tree along with decision hotmap
    for gold, parse, session in zip(golds, parses, parse_sessions):
        gold.draw()
        session.draw_decision_hotmap()
        parse.draw()
def evaluate(args):
    pipeline = build_pipeline(schema=args.schema,
                              segmenter_name=args.segmenter_name,
                              use_gpu=args.use_gpu)
    cdtb = CDTB(args.data,
                "TRAIN",
                "VALIDATE",
                "TEST",
                ctb_dir=args.ctb_dir,
                preprocess=True,
                cache_dir=args.cache_dir)
    golds = list(filter(lambda d: d.root_relation(), chain(*cdtb.test)))
    parses = []

    if args.use_gold_edu:
        logger.info("evaluation with gold edu segmentation")
    else:
        logger.info("evaluation with auto edu segmentation")

    for para in tqdm(golds, desc="parsing", unit=" para"):
        if args.use_gold_edu:
            edus = []
            for edu in para.edus():
                edu_copy = EDU([TEXT(edu.text)])
                setattr(edu_copy, "words", edu.words)
                setattr(edu_copy, "tags", edu.tags)
                edus.append(edu_copy)
        else:
            sentences = []
            for sentence in para.sentences():
                if list(sentence.iterfind(node_type_filter(EDU))):
                    copy_sentence = Sentence([TEXT([sentence.text])])
                    if hasattr(sentence, "words"):
                        setattr(copy_sentence, "words", sentence.words)
                    if hasattr(sentence, "tags"):
                        setattr(copy_sentence, "tags", sentence.tags)
                    setattr(copy_sentence, "parse", cdtb.ctb[sentence.sid])
                    sentences.append(copy_sentence)
            para = pipeline.cut_edu(Paragraph(sentences))
            edus = []
            for edu in para.edus():
                edu_copy = EDU([TEXT(edu.text)])
                setattr(edu_copy, "words", edu.words)
                setattr(edu_copy, "tags", edu.tags)
                edus.append(edu_copy)
        parse = pipeline.parse(Paragraph(edus))
        parses.append(parse)

    # edu score
    scores = edu_eval(golds, parses)
    logger.info("EDU segmentation scores:")
    logger.info(gen_edu_report(scores))

    # parser score
    cdtb_macro_scores = eval.parse_eval(parses, golds, average="macro")
    logger.info("CDTB macro (strict) scores:")
    logger.info(eval.gen_parse_report(*cdtb_macro_scores))

    # nuclear scores
    nuclear_scores = eval.nuclear_eval(parses, golds)
    logger.info("nuclear scores:")
    logger.info(eval.gen_category_report(nuclear_scores))

    # relation scores
    ctype_scores, ftype_scores = eval.relation_eval(parses, golds)
    logger.info("coarse relation scores:")
    logger.info(eval.gen_category_report(ctype_scores))
    logger.info("fine relation scores:")
    logger.info(eval.gen_category_report(ftype_scores))

    # height eval
    height_scores = eval.height_eval(parses, golds)
    logger.info("structure precision by node height:")
    logger.info(eval.gen_height_report(height_scores))
Ejemplo n.º 3
0
def main(args):
    # set seed for reproducibility
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # load dataset
    cdtb = CDTB(args.data,
                "TRAIN",
                "VALIDATE",
                "TEST",
                ctb_dir=args.ctb_dir,
                preprocess=True,
                cache_dir=args.cache_dir)
    # build vocabulary
    word_vocab, pos_vocab, nuc_label, rel_label = build_vocab(cdtb.train)

    trainset = numericalize(cdtb.train, word_vocab, pos_vocab, nuc_label,
                            rel_label)
    logging.info("num of instances trainset: %d" % len(trainset))
    logging.info("args: %s" % str(args))
    # build model
    model = PartitionPtr(hidden_size=args.hidden_size,
                         dropout=args.dropout,
                         word_vocab=word_vocab,
                         pos_vocab=pos_vocab,
                         nuc_label=nuc_label,
                         rel_label=rel_label,
                         pretrained=args.pretrained,
                         w2v_size=args.w2v_size,
                         w2v_freeze=args.w2v_freeze,
                         pos_size=args.pos_size,
                         split_mlp_size=args.split_mlp_size,
                         nuc_mlp_size=args.nuc_mlp_size,
                         rel_mlp_size=args.rel_mlp_size,
                         use_gpu=args.use_gpu)
    if args.use_gpu:
        model.cuda()
    logging.info("model:\n%s" % str(model))

    # train and evaluate
    niter = 0
    log_splits_loss = 0.
    log_nucs_loss = 0.
    log_rels_loss = 0.
    log_loss = 0.
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.l2)
    writer = SummaryWriter(args.log_dir)
    logging.info(
        "hint: run 'tensorboard --logdir %s' to observe training status" %
        args.log_dir)
    best_model = None
    best_model_score = 0.
    for nepoch in range(1, args.epoch + 1):
        batch_iter = gen_batch_iter(trainset, args.batch_size, args.use_gpu)
        for nbatch, (e_inputs, d_inputs, grounds) in enumerate(batch_iter,
                                                               start=1):
            niter += 1
            model.train()
            optimizer.zero_grad()
            splits_loss, nucs_loss, rels_loss = model.loss(
                e_inputs, d_inputs, grounds)
            loss = args.a_split_loss * splits_loss + args.a_nuclear_loss * nucs_loss + args.a_relation_loss * rels_loss
            loss.backward()
            optimizer.step()
            log_splits_loss += splits_loss.item()
            log_nucs_loss += nucs_loss.item()
            log_rels_loss += rels_loss.item()
            log_loss += loss.item()
            if niter % args.log_every == 0:
                logging.info(
                    "[iter %-6d]epoch: %-3d, batch %-5d,"
                    "train splits loss:%.5f, nuclear loss %.5f, relation loss %.5f, loss %.5f"
                    % (niter, nepoch, nbatch, log_splits_loss, log_nucs_loss,
                       log_rels_loss, log_loss))
                writer.add_scalar("train/split_loss", log_splits_loss, niter)
                writer.add_scalar("train/nuclear_loss", log_nucs_loss, niter)
                writer.add_scalar("train/relation_loss", log_rels_loss, niter)
                writer.add_scalar("train/loss", log_loss, niter)
                log_splits_loss = 0.
                log_nucs_loss = 0.
                log_rels_loss = 0.
                log_loss = 0.
            if niter % args.validate_every == 0:
                num_instances, validate_scores = parse_and_eval(
                    cdtb.validate, model)
                logging.info("validation on %d instances" % num_instances)
                logging.info(gen_parse_report(*validate_scores))
                writer.add_scalar("validate/span_f1", validate_scores[0][2],
                                  niter)
                writer.add_scalar("validate/nuclear_f1", validate_scores[1][2],
                                  niter)
                writer.add_scalar("validate/coarse_relation_f1",
                                  validate_scores[2][2], niter)
                writer.add_scalar("validate/fine_relation_f1",
                                  validate_scores[3][2], niter)
                new_model_score = model_score(validate_scores)
                if new_model_score > best_model_score:
                    # test on testset with new best model
                    best_model_score = new_model_score
                    best_model = copy.deepcopy(model)
                    logging.info("test on new best model")
                    num_instances, test_scores = parse_and_eval(
                        cdtb.test, best_model)
                    logging.info("test on %d instances" % num_instances)
                    logging.info(gen_parse_report(*test_scores))
                    writer.add_scalar("test/span_f1", test_scores[0][2], niter)
                    writer.add_scalar("test/nuclear_f1", test_scores[1][2],
                                      niter)
                    writer.add_scalar("test/coarse_relation_f1",
                                      test_scores[2][2], niter)
                    writer.add_scalar("test/fine_relation_f1",
                                      test_scores[3][2], niter)
    if best_model:
        # evaluation and save best model
        logging.info("final test result")
        num_instances, test_scores = parse_and_eval(cdtb.test, best_model)
        logging.info("test on %d instances" % num_instances)
        logging.info(gen_parse_report(*test_scores))
        logging.info("save best model to %s" % args.model_save)
        with open(args.model_save, "wb+") as model_fd:
            torch.save(best_model, model_fd)
    writer.close()
Ejemplo n.º 4
0
def main(args):
    # set seed for reproducibility
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)

    # load dataset
    cdtb = CDTB(args.data,
                "TRAIN",
                "VALIDATE",
                "TEST",
                ctb_dir=args.ctb_dir,
                preprocess=True,
                cache_dir=args.cache_dir)

    trainset = gen_instances(chain(*cdtb.train))
    logging.info("generate %d instances from trainset" % len(trainset))
    word_vocab, pos_vocab, trans_label = build_vocab(trainset)
    trainset = numericalize(trainset, word_vocab, pos_vocab, trans_label)

    model = ShiftReduceModel(hidden_size=args.hidden_size,
                             dropout=args.dropout,
                             cnn_filters=args.cnn_filters,
                             word_vocab=word_vocab,
                             pos_vocab=pos_vocab,
                             trans_label=trans_label,
                             pretrained=args.pretrained,
                             w2v_size=args.w2v_size,
                             w2v_freeze=args.w2v_freeze,
                             pos_size=args.pos_size,
                             mlp_layers=args.mlp_layers,
                             use_gpu=args.use_gpu)
    if args.use_gpu:
        model.cuda()
    logging.info("model:\n" + str(model))
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.l2)
    dataset = np.array(trainset)
    niter = 0
    best_model = None
    best_model_score = 0.
    for nepoch in range(1, args.epoch + 1):
        np.random.shuffle(dataset)
        batch_iter = gen_batch(dataset, args.batch_size)
        for nbatch, batch in enumerate(batch_iter):
            niter += 1
            model.train()
            optimizer.zero_grad()
            loss = 0.
            for word_ids, pos_ids, trans_ids in batch:
                batch_loss = model.loss(word_ids, pos_ids, trans_ids)
                loss += batch_loss
            loss = loss / len(batch)
            loss.backward()
            optimizer.step()
            if niter % args.log_every == 0:
                logging.info(
                    "[iter %-6d]epoch: %-3d, batch %-5d, train loss %.5f" %
                    (niter, nepoch, nbatch, loss.item()))

            if niter % args.validate_every == 0:
                model.eval()
                num_instances, validate_scores = parse_and_eval(
                    cdtb.validate, model)
                logging.info("validation on %d instances" % num_instances)
                logging.info(gen_parse_report(*validate_scores))
                new_model_score = model_score(validate_scores)
                if new_model_score > best_model_score:
                    # test on testset with new best model
                    best_model_score = new_model_score
                    best_model = copy.deepcopy(model)
                    logging.info("test on new best model")
                    num_instances, test_scores = parse_and_eval(
                        cdtb.test, best_model)
                    logging.info("test on %d instances" % num_instances)
                    logging.info(gen_parse_report(*test_scores))
    if best_model:
        # evaluation and save best model
        logging.info("final test result")
        num_instances, test_scores = parse_and_eval(cdtb.test, best_model)
        logging.info("test on %d instances" % num_instances)
        logging.info(gen_parse_report(*test_scores))
        logging.info("save best model to %s" % args.model_save)
        with open(args.model_save, "wb+") as model_fd:
            torch.save(best_model, model_fd)
Ejemplo n.º 5
0
def evaluate(args):
    with open("pub/models/segmenter.svm.model", "rb") as segmenter_fd:
        segmenter_model = pickle.load(segmenter_fd)
    with open("pub/models/treebuilder.partptr.model", "rb") as parser_fd:
        parser_model = torch.load(parser_fd, map_location="cpu")
        parser_model.use_gpu = False
        parser_model.eval()
    segmenter = SVMSegmenter(segmenter_model)
    parser = PartPtrParser(parser_model)

    cdtb = CDTB(args.data,
                "TRAIN",
                "VALIDATE",
                "TEST",
                ctb_dir=args.ctb_dir,
                preprocess=True,
                cache_dir=args.cache_dir)
    golds = list(filter(lambda d: d.root_relation(), chain(*cdtb.test)))
    parses = []

    if args.use_gold_edu:
        logger.info("evaluation with gold edu segmentation")
    else:
        logger.info("evaluation with auto edu segmentation")

    for para in tqdm(golds, desc="parsing", unit=" para"):
        if args.use_gold_edu:
            edus = []
            for edu in para.edus():
                edu_copy = EDU([TEXT(edu.text)])
                setattr(edu_copy, "words", edu.words)
                setattr(edu_copy, "tags", edu.tags)
                edus.append(edu_copy)
            parse = parser.parse(Paragraph(edus))
            parses.append(parse)
        else:
            edus = []
            for sentence in para.sentences():
                if list(sentence.iterfind(node_type_filter(EDU))):
                    setattr(sentence, "parse", cdtb.ctb[sentence.sid])
                    edus.extend(segmenter.cut_edu(sentence))
            parse = parser.parse(Paragraph(edus))
            parses.append(parse)

    # edu score
    scores = edu_eval(golds, parses)
    logger.info("EDU segmentation scores:")
    logger.info(gen_edu_report(scores))

    # parser score
    cdtb_macro_scores = eval.parse_eval(parses, golds, average="macro")
    logger.info("CDTB macro (strict) scores:")
    logger.info(eval.gen_parse_report(*cdtb_macro_scores))

    # nuclear scores
    nuclear_scores = eval.nuclear_eval(parses, golds)
    logger.info("nuclear scores:")
    logger.info(eval.gen_category_report(nuclear_scores))

    # relation scores
    ctype_scores, ftype_scores = eval.relation_eval(parses, golds)
    logger.info("coarse relation scores:")
    logger.info(eval.gen_category_report(ctype_scores))
    logger.info("fine relation scores:")
    logger.info(eval.gen_category_report(ftype_scores))