def train(): print('Loading amr data') paragraphs = generate_paragraphs('amr.txt', k=5) print('%d total cleaned paragraphs' % len(paragraphs)) print('Training Subgraph Selection Scorer') train_instances, train_labels = gen_subgraph_data(paragraphs) subgraph_scorer = SubgraphSelectionScorer() subgraph_scorer.train(train_instances, train_labels, update_cache=True) print('Training Order Scorer') train_instances, train_labels, train_weights = gen_order_data(paragraphs) order_scorer = OrderScorer() order_scorer.train(train_instances, train_labels, train_weights) print('Training Pipeline Scorer') pipeline_scorer = PipelineScorer() subgraph_optimizer = SubgraphOptimizer(subgraph_scorer) order_optimizer = OrderOptimizer(order_scorer) pipeline_scorer.train(subgraph_optimizer, order_optimizer)
# mean, min, max, std_dev of #of fragments per partition features += summary_statistics([len(s) for s in partition.root_partitioning]) # mean, min, max, std_dev of subgraph similarity for every pair of subgraphs (including a subgraph with itself) features += summary_statistics([subgraph_similarity(partition.get_subgraph(s1), partition.get_subgraph(s2)) for s1, s2 in list(itertools.combinations(partition.root_partitioning, 2)) + [(s,s) for s in partition.root_partitioning]]) # mean, min, max, std_dev of verb overlap for every pair of subgraphs (including a subgraph with itself) features += summary_statistics([len(partition.get_subgraph(s1).get_verbs() & partition.get_subgraph(s2).get_verbs()) for s1, s2 in list(itertools.combinations(partition.root_partitioning, 2)) + [(s,s) for s in partition.root_partitioning]]) return features if __name__ == '__main__': train_instances, train_labels, test_instances, test_labels, test = generate_train_test(use_cache=True) scorer = SubgraphSelectionScorer() scorer.train(train_instances, train_labels) scorer.test(test_instances, test_labels) for t in test: try: optimizer = SubgraphOptimizer(scorer) final_state = optimizer.optimize(t) except ValueError: continue print(final_state) ''' final_partition = final_state.partition dummy_ordering = list(final_partition.root_partitioning) random.shuffle(dummy_ordering)