Exemple #1
0
def main(args):

    feature_extractor = FeatureExtractor(get_resources(), args.model)
    train_feats, train_labels, _, _ = feature_extractor.get_features(
        args.train, args.train_topk)
    test_feats, _, test_segs, test_gold_truths = feature_extractor.get_features(
        args.test, args.test_topk)

    epochs, lr1, lr2 = 100, 0.01, 0.05

    # Initialize model
    model = None
    if args.model == "mse":
        model = mse_ranker.MSERanker(epochs, lr1)
    elif args.model == "mr":
        model = mr_ranker.MRRanker(epochs, lr1)
    elif args.model == "mse_multi":
        model = mse_multi_ranker.MSEMultiRanker(epochs, lr1, lr2)
    elif args.model == "mr_multi":
        model = mr_multi_ranker.MRMultiRanker(epochs, lr1, lr2)

    # Train model
    model.train(train_feats, train_labels)

    # Rerank top-k segmentations
    top_segmentations = []
    for segs_feats, segs, gds in zip(test_feats, test_segs, test_gold_truths):
        if len(segs) == 1:
            top_segmentations.extend(segs)
        else:
            reranked_segs = rerank(segs, segs_feats, model, args.model)
            top_segmentations.append(reranked_segs)

    if args.output is not None:
        fp = open(args.output, 'w')
        for segs in top_segmentations:
            target = "".join(segs[0].split())
            fp.write(target + "\t" + "\t".join([seg.strip()
                                                for seg in segs]) + "\n")
        fp.close()

    # Evaluate metrics
    print("MRR:", mean_reciprocal_rank(test_gold_truths, top_segmentations))
    print("Accuracy@1:", accuracy(1, test_gold_truths, top_segmentations))
    print("Accuracy@2:", accuracy(2, test_gold_truths, top_segmentations))
    print("Fscore@1:", fscore(1, test_gold_truths, top_segmentations))
    print("Fscore@2:", fscore(2, test_gold_truths, top_segmentations))