def test_feature_extraction_d2_2(): global test_sent, gold, word_to_ix, vocab torch.manual_seed(1) feat_extractor = SimpleFeatureExtractor() embedder = VanillaWordEmbedding(word_to_ix, TEST_EMBEDDING_DIM) combiner = DummyCombiner() embeds = embedder(test_sent) state = ParserState(test_sent, embeds, combiner) state.shift() feats = feat_extractor.get_features(state) feats_list = make_list(feats) true = ([ -1.0276086330413818, -0.563052773475647, -0.8922905325889587, -0.05825017765164375 ], [ -0.4211951494216919, -0.510699987411499, -1.5726652145385742, -0.12324775755405426 ], [ 3.586989402770996, -1.8312901258468628, 1.5987002849578857, -1.277006983757019 ]) pairs = zip(feats_list, true) check_tensor_correctness(pairs)
def build_parser(DROPOUT, LSTM_NUM_LAYERS, word_to_ix, pretrained_embeds): # Predef TEST_EMBEDDING_DIM = 4 WORD_EMBEDDING_DIM = 64 STACK_EMBEDDING_DIM = 100 NUM_FEATURES = 3 # Build Model feat_extractor = SimpleFeatureExtractor() # BiLSTM word embeddings will probably work best, but feel free to experiment with the others you developed word_embedding_lookup = BiLSTMWordEmbedding(word_to_ix, WORD_EMBEDDING_DIM, STACK_EMBEDDING_DIM, num_layers=LSTM_NUM_LAYERS, dropout=DROPOUT) initialize_with_pretrained(pretrained_embeds, word_embedding_lookup) action_chooser = LSTMActionChooser(STACK_EMBEDDING_DIM * NUM_FEATURES, LSTM_NUM_LAYERS, dropout=DROPOUT) combiner = LSTMCombiner(STACK_EMBEDDING_DIM, num_layers=LSTM_NUM_LAYERS, dropout=DROPOUT) parser = TransitionParser(feat_extractor, word_embedding_lookup, action_chooser, combiner) return parser
def test_feature_extraction_d2_2(): global test_sent, gold, word_to_ix, vocab torch.manual_seed(1) feat_extractor = SimpleFeatureExtractor() embedder = VanillaWordEmbedding(word_to_ix, TEST_EMBEDDING_DIM) combiner = DummyCombiner() embeds = embedder(test_sent) state = ParserState(test_sent, embeds, combiner) state.shift() feats = feat_extractor.get_features(state) feats_list = make_list(feats) true = ([-1.0276086330413818, -0.563052773475647, -0.8922905325889587, -0.05825017765164375], [-0.4211951494216919, -0.510699987411499, -1.5726652145385742, -0.12324775755405426], [3.586989402770996, -1.8312901258468628, 1.5987002849578857, -1.277006983757019]) pairs = zip(feats_list, true) check_tensor_correctness(pairs)
def test_feature_extraction_d2_2(): """ 0.5 point(s) """ global test_sent, gold, word_to_ix, vocab torch.manual_seed(1) feat_extractor = SimpleFeatureExtractor() embedder = VanillaWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM) combiner = DummyCombiner() embeds = embedder(test_sent) state = ParserState(test_sent, embeds, combiner) state.shift() state.shift() feats = feat_extractor.get_features(state) feats_list = make_list(feats) true = ([ -1.8661, 1.4146, -1.8781, -0.4674 ], [ -0.9596, 0.5489, -0.9901, -0.3826 ], [ 0.5237, 0.0004, -1.2039, 3.5283 ]) pairs = zip(feats_list, true) check_tensor_correctness(pairs)
def test_predict_after_train_d3_1(): """ 1 point(s) """ global test_sent, gold, word_to_ix, vocab torch.manual_seed(1) feat_extract = SimpleFeatureExtractor() word_embed = VanillaWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM) act_chooser = ActionChooserNetwork(TEST_EMBEDDING_DIM * NUM_FEATURES) combiner = MLPCombinerNetwork(TEST_EMBEDDING_DIM) parser = TransitionParser(feat_extract, word_embed, act_chooser, combiner) # Train for i in xrange(75): train([ (test_sent[:-1], gold) ], parser, optim.SGD(parser.parameters(), lr=0.01), verbose=False) # predict pred = parser.predict(test_sent[:-1]) gold_graph = dependency_graph_from_oracle(test_sent[:-1], gold) assert pred == gold_graph
def test_parse_logic_d3_1(): global test_sent, gold, word_to_ix, vocab torch.manual_seed(1) feat_extract = SimpleFeatureExtractor() word_embed = VanillaWordEmbedding(word_to_ix, TEST_EMBEDDING_DIM) act_chooser = FFActionChooser(TEST_EMBEDDING_DIM * NUM_FEATURES) combiner = FFCombiner(TEST_EMBEDDING_DIM) parser = TransitionParser(feat_extract, word_embed, act_chooser, combiner) output, dep_graph, actions_done = parser(test_sent[:-1], gold) assert len(output) == 16 # Made the right number of decisions # check one of the outputs checked_out = output[9].view(-1).data.tolist() true_out = [-1.2444578409194946, -1.3128550052642822, -0.8145193457603455] check_tensor_correctness([(true_out, checked_out)]) true_dep_graph = dependency_graph_from_oracle(test_sent, gold) assert true_dep_graph == dep_graph assert actions_done == [0, 1, 0, 1, 0, 0, 1, 2, 0, 0, 0, 1, 2, 2, 2, 0]
def test_parse_logic_d3_1(): """ 0.5 point(s) """ global test_sent, gold, word_to_ix, vocab torch.manual_seed(1) feat_extract = SimpleFeatureExtractor() word_embed = VanillaWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM) act_chooser = ActionChooserNetwork(TEST_EMBEDDING_DIM * NUM_FEATURES) combiner = MLPCombinerNetwork(TEST_EMBEDDING_DIM) parser = TransitionParser(feat_extract, word_embed, act_chooser, combiner) output, dep_graph, actions_done = parser(test_sent[:-1], gold) assert len(output) == 15 # Made the right number of decisions # check one of the outputs checked_out = output[10].view(-1).data.tolist() true_out = [-1.4737, -1.0875, -0.8350] check_tensor_correctness([(true_out, checked_out)]) true_dep_graph = dependency_graph_from_oracle(test_sent, gold) assert true_dep_graph == dep_graph assert actions_done == [0, 0, 1, 0, 1, 0, 0, 1, 2, 0, 0, 0, 1, 1, 2]