def train(): print('Loading amr data') paragraphs = generate_paragraphs('amr.txt', k=5) print('%d total cleaned paragraphs' % len(paragraphs)) print('Training Subgraph Selection Scorer') train_instances, train_labels = gen_subgraph_data(paragraphs) subgraph_scorer = SubgraphSelectionScorer() subgraph_scorer.train(train_instances, train_labels, update_cache=True) print('Training Order Scorer') train_instances, train_labels, train_weights = gen_order_data(paragraphs) order_scorer = OrderScorer() order_scorer.train(train_instances, train_labels, train_weights) print('Training Pipeline Scorer') pipeline_scorer = PipelineScorer() subgraph_optimizer = SubgraphOptimizer(subgraph_scorer) order_optimizer = OrderOptimizer(order_scorer) pipeline_scorer.train(subgraph_optimizer, order_optimizer)
if __name__ == "__main__": from scorer import OrderScorer from optimizer import OrderAnnealer as Orderer train = generate_paragraphs("amr.txt", limit=500, k=5) examples, labels = add_negative_examples(train, 20) n = len(examples) weights = n - np.bincount(labels) features = np.array([get_features(e.paragraph_graph(), e.sentence_graphs()) for e in examples]) scorer = OrderScorer() # reg = lm.LogisticRegression() print("learning") scorer.train(features, labels, sample_weight=[weights[i] for i in labels]) # reg.fit(features, labels) print("done") test = generate_paragraphs("amr_test.txt", limit=50, k=5) good_tests = [] for t in test: try: t.sentence_graphs() good_tests.append(t) except ValueError as e: print(e) continue goodness = [] for t in good_tests: first_order = np.arange(len(t.sentence_graphs())) np.random.shuffle(first_order)