def test(): print('Loading amr data') paragraphs = generate_paragraphs('amr_test.txt', k=5) print('%d total cleaned paragraphs' % len(paragraphs)) paragraphs = paragraphs print('Testing Subgraph Selection Scorer') test_instances, test_labels = gen_subgraph_data(paragraphs, k=1) subgraph_scorer = SubgraphSelectionScorer() subgraph_scorer.load() subgraph_scorer.test(test_instances, test_labels) print('Testing Order Scorer') test_instances, test_labels, test_weights = gen_order_data(paragraphs) order_scorer = OrderScorer() order_scorer.load() order_scorer.test(test_instances, test_labels) print('Testing Pipeline Scorer') pipeline_scorer = PipelineScorer() pipeline_scorer.load() baseline_graphs = pipeline_scorer.test(paragraphs, subgraph_strategy='baseline', order_strategy='baseline', processes=3) greedy_graphs = pipeline_scorer.test(paragraphs, subgraph_strategy='greedy', order_strategy='greedy', processes=3) anneal_graphs = pipeline_scorer.test(paragraphs, subgraph_strategy='greedy', order_strategy='anneal', processes=3) pickle.dump(baseline_graphs, open('baseline_graphs.pickle', 'wb')) pickle.dump(greedy_graphs, open('greedy_graphs.pickle', 'wb')) pickle.dump(anneal_graphs, open('anneal_graphs.pickle', 'wb'))
def train(): print('Loading amr data') paragraphs = generate_paragraphs('amr.txt', k=5) print('%d total cleaned paragraphs' % len(paragraphs)) print('Training Subgraph Selection Scorer') train_instances, train_labels = gen_subgraph_data(paragraphs) subgraph_scorer = SubgraphSelectionScorer() subgraph_scorer.train(train_instances, train_labels, update_cache=True) print('Training Order Scorer') train_instances, train_labels, train_weights = gen_order_data(paragraphs) order_scorer = OrderScorer() order_scorer.train(train_instances, train_labels, train_weights) print('Training Pipeline Scorer') pipeline_scorer = PipelineScorer() subgraph_optimizer = SubgraphOptimizer(subgraph_scorer) order_optimizer = OrderOptimizer(order_scorer) pipeline_scorer.train(subgraph_optimizer, order_optimizer)