コード例 #1
0
ファイル: main.py プロジェクト: jonpiffle/amr_discourse
def test():
    print('Loading amr data')
    paragraphs = generate_paragraphs('amr_test.txt', k=5)
    print('%d total cleaned paragraphs' % len(paragraphs))
    paragraphs = paragraphs

    print('Testing Subgraph Selection Scorer')
    test_instances, test_labels = gen_subgraph_data(paragraphs, k=1)
    subgraph_scorer = SubgraphSelectionScorer()
    subgraph_scorer.load()
    subgraph_scorer.test(test_instances, test_labels)

    print('Testing Order Scorer')
    test_instances, test_labels, test_weights = gen_order_data(paragraphs)
    order_scorer = OrderScorer()
    order_scorer.load()
    order_scorer.test(test_instances, test_labels)    

    print('Testing Pipeline Scorer')
    pipeline_scorer = PipelineScorer()
    pipeline_scorer.load()
    baseline_graphs = pipeline_scorer.test(paragraphs, subgraph_strategy='baseline', order_strategy='baseline', processes=3)
    greedy_graphs = pipeline_scorer.test(paragraphs, subgraph_strategy='greedy', order_strategy='greedy', processes=3)
    anneal_graphs = pipeline_scorer.test(paragraphs, subgraph_strategy='greedy', order_strategy='anneal', processes=3)

    pickle.dump(baseline_graphs, open('baseline_graphs.pickle', 'wb'))
    pickle.dump(greedy_graphs, open('greedy_graphs.pickle', 'wb'))
    pickle.dump(anneal_graphs, open('anneal_graphs.pickle', 'wb'))
コード例 #2
0
    # mean, min, max, std_dev of #of fragments per partition
    features += summary_statistics([len(s) for s in partition.root_partitioning])

    # mean, min, max, std_dev of subgraph similarity for every pair of subgraphs (including a subgraph with itself)
    features += summary_statistics([subgraph_similarity(partition.get_subgraph(s1), partition.get_subgraph(s2)) for s1, s2 in list(itertools.combinations(partition.root_partitioning, 2)) + [(s,s) for s in partition.root_partitioning]])

    # mean, min, max, std_dev of verb overlap for every pair of subgraphs (including a subgraph with itself)
    features += summary_statistics([len(partition.get_subgraph(s1).get_verbs() & partition.get_subgraph(s2).get_verbs()) for s1, s2 in list(itertools.combinations(partition.root_partitioning, 2)) + [(s,s) for s in partition.root_partitioning]])

    return features

if __name__ == '__main__':
    train_instances, train_labels, test_instances, test_labels, test = generate_train_test(use_cache=True)
    scorer = SubgraphSelectionScorer()
    scorer.train(train_instances, train_labels)
    scorer.test(test_instances, test_labels)

    for t in test:
        try:
            optimizer = SubgraphOptimizer(scorer)
            final_state = optimizer.optimize(t)
        except ValueError:
            continue

        print(final_state)

        '''
        final_partition = final_state.partition
        dummy_ordering = list(final_partition.root_partitioning)
        random.shuffle(dummy_ordering)
        actual_ordering = [frozenset(s.get_roots()) for s in t.sentence_graphs()]