コード例 #1
0
ファイル: main.py プロジェクト: jonpiffle/amr_discourse
def test():
    print('Loading amr data')
    paragraphs = generate_paragraphs('amr_test.txt', k=5)
    print('%d total cleaned paragraphs' % len(paragraphs))
    paragraphs = paragraphs

    print('Testing Subgraph Selection Scorer')
    test_instances, test_labels = gen_subgraph_data(paragraphs, k=1)
    subgraph_scorer = SubgraphSelectionScorer()
    subgraph_scorer.load()
    subgraph_scorer.test(test_instances, test_labels)

    print('Testing Order Scorer')
    test_instances, test_labels, test_weights = gen_order_data(paragraphs)
    order_scorer = OrderScorer()
    order_scorer.load()
    order_scorer.test(test_instances, test_labels)    

    print('Testing Pipeline Scorer')
    pipeline_scorer = PipelineScorer()
    pipeline_scorer.load()
    baseline_graphs = pipeline_scorer.test(paragraphs, subgraph_strategy='baseline', order_strategy='baseline', processes=3)
    greedy_graphs = pipeline_scorer.test(paragraphs, subgraph_strategy='greedy', order_strategy='greedy', processes=3)
    anneal_graphs = pipeline_scorer.test(paragraphs, subgraph_strategy='greedy', order_strategy='anneal', processes=3)

    pickle.dump(baseline_graphs, open('baseline_graphs.pickle', 'wb'))
    pickle.dump(greedy_graphs, open('greedy_graphs.pickle', 'wb'))
    pickle.dump(anneal_graphs, open('anneal_graphs.pickle', 'wb'))
コード例 #2
0
ファイル: main.py プロジェクト: jonpiffle/amr_discourse
def train():
    print('Loading amr data')
    paragraphs = generate_paragraphs('amr.txt', k=5)
    print('%d total cleaned paragraphs' % len(paragraphs))

    print('Training Subgraph Selection Scorer')
    train_instances, train_labels = gen_subgraph_data(paragraphs)
    subgraph_scorer = SubgraphSelectionScorer()
    subgraph_scorer.train(train_instances, train_labels, update_cache=True)

    print('Training Order Scorer')
    train_instances, train_labels, train_weights = gen_order_data(paragraphs)
    order_scorer = OrderScorer()
    order_scorer.train(train_instances, train_labels, train_weights)

    print('Training Pipeline Scorer')
    pipeline_scorer = PipelineScorer()
    subgraph_optimizer = SubgraphOptimizer(subgraph_scorer)
    order_optimizer = OrderOptimizer(order_scorer)
    pipeline_scorer.train(subgraph_optimizer, order_optimizer)
コード例 #3
0
                count += 1
    return count


if __name__ == "__main__":
    from scorer import OrderScorer
    from optimizer import OrderAnnealer as Orderer

    train = generate_paragraphs("amr.txt", limit=500, k=5)

    examples, labels = add_negative_examples(train, 20)
    n = len(examples)
    weights = n - np.bincount(labels)
    features = np.array([get_features(e.paragraph_graph(), e.sentence_graphs()) for e in examples])

    scorer = OrderScorer()
    # reg = lm.LogisticRegression()
    print("learning")
    scorer.train(features, labels, sample_weight=[weights[i] for i in labels])
    # reg.fit(features, labels)
    print("done")
    test = generate_paragraphs("amr_test.txt", limit=50, k=5)
    good_tests = []
    for t in test:
        try:
            t.sentence_graphs()
            good_tests.append(t)
        except ValueError as e:
            print(e)
            continue
    goodness = []