예제 #1
0
def test_feature_extraction_d2_2():
    global test_sent, gold, word_to_ix, vocab
    torch.manual_seed(1)

    feat_extractor = SimpleFeatureExtractor()
    embedder = VanillaWordEmbedding(word_to_ix, TEST_EMBEDDING_DIM)
    combiner = DummyCombiner()
    embeds = embedder(test_sent)
    state = ParserState(test_sent, embeds, combiner)

    state.shift()

    feats = feat_extractor.get_features(state)
    feats_list = make_list(feats)
    true = ([
        -1.0276086330413818, -0.563052773475647, -0.8922905325889587,
        -0.05825017765164375
    ], [
        -0.4211951494216919, -0.510699987411499, -1.5726652145385742,
        -0.12324775755405426
    ], [
        3.586989402770996, -1.8312901258468628, 1.5987002849578857,
        -1.277006983757019
    ])
    pairs = zip(feats_list, true)
    check_tensor_correctness(pairs)
def build_parser(DROPOUT, LSTM_NUM_LAYERS, word_to_ix, pretrained_embeds):
    # Predef

    TEST_EMBEDDING_DIM = 4
    WORD_EMBEDDING_DIM = 64
    STACK_EMBEDDING_DIM = 100
    NUM_FEATURES = 3

    # Build Model
    feat_extractor = SimpleFeatureExtractor()
    # BiLSTM word embeddings will probably work best, but feel free to experiment with the others you developed
    word_embedding_lookup = BiLSTMWordEmbedding(word_to_ix,
                                                WORD_EMBEDDING_DIM,
                                                STACK_EMBEDDING_DIM,
                                                num_layers=LSTM_NUM_LAYERS,
                                                dropout=DROPOUT)
    initialize_with_pretrained(pretrained_embeds, word_embedding_lookup)
    action_chooser = LSTMActionChooser(STACK_EMBEDDING_DIM * NUM_FEATURES,
                                       LSTM_NUM_LAYERS,
                                       dropout=DROPOUT)
    combiner = LSTMCombiner(STACK_EMBEDDING_DIM,
                            num_layers=LSTM_NUM_LAYERS,
                            dropout=DROPOUT)
    parser = TransitionParser(feat_extractor, word_embedding_lookup,
                              action_chooser, combiner)

    return parser
예제 #3
0
def test_feature_extraction_d2_2():
    global test_sent, gold, word_to_ix, vocab
    torch.manual_seed(1)

    feat_extractor = SimpleFeatureExtractor()
    embedder = VanillaWordEmbedding(word_to_ix, TEST_EMBEDDING_DIM)
    combiner = DummyCombiner()
    embeds = embedder(test_sent)
    state = ParserState(test_sent, embeds, combiner)

    state.shift()

    feats = feat_extractor.get_features(state)
    feats_list = make_list(feats)
    true = ([-1.0276086330413818, -0.563052773475647, -0.8922905325889587, -0.05825017765164375],
            [-0.4211951494216919, -0.510699987411499, -1.5726652145385742, -0.12324775755405426],
            [3.586989402770996, -1.8312901258468628, 1.5987002849578857, -1.277006983757019])
    pairs = zip(feats_list, true)
    check_tensor_correctness(pairs)
예제 #4
0
def test_feature_extraction_d2_2():
    """ 0.5 point(s) """

    global test_sent, gold, word_to_ix, vocab
    torch.manual_seed(1)

    feat_extractor = SimpleFeatureExtractor()
    embedder = VanillaWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM)
    combiner = DummyCombiner()
    embeds = embedder(test_sent)
    state = ParserState(test_sent, embeds, combiner)

    state.shift()
    state.shift()

    feats = feat_extractor.get_features(state)
    feats_list = make_list(feats)
    true = ([ -1.8661, 1.4146, -1.8781, -0.4674 ], [ -0.9596, 0.5489, -0.9901, -0.3826 ], [ 0.5237, 0.0004, -1.2039, 3.5283 ])
    pairs = zip(feats_list, true)
    check_tensor_correctness(pairs)
예제 #5
0
def test_predict_after_train_d3_1():
    """ 1 point(s) """

    global test_sent, gold, word_to_ix, vocab
    torch.manual_seed(1)
    feat_extract = SimpleFeatureExtractor()
    word_embed = VanillaWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM)
    act_chooser = ActionChooserNetwork(TEST_EMBEDDING_DIM * NUM_FEATURES)
    combiner = MLPCombinerNetwork(TEST_EMBEDDING_DIM)

    parser = TransitionParser(feat_extract, word_embed, act_chooser, combiner)

    # Train
    for i in xrange(75):
        train([ (test_sent[:-1], gold) ], parser, optim.SGD(parser.parameters(), lr=0.01), verbose=False)

    # predict
    pred = parser.predict(test_sent[:-1])
    gold_graph = dependency_graph_from_oracle(test_sent[:-1], gold)
    assert pred == gold_graph
예제 #6
0
def test_parse_logic_d3_1():
    global test_sent, gold, word_to_ix, vocab
    torch.manual_seed(1)

    feat_extract = SimpleFeatureExtractor()
    word_embed = VanillaWordEmbedding(word_to_ix, TEST_EMBEDDING_DIM)
    act_chooser = FFActionChooser(TEST_EMBEDDING_DIM * NUM_FEATURES)
    combiner = FFCombiner(TEST_EMBEDDING_DIM)

    parser = TransitionParser(feat_extract, word_embed, act_chooser, combiner)
    output, dep_graph, actions_done = parser(test_sent[:-1], gold)

    assert len(output) == 16  # Made the right number of decisions

    # check one of the outputs
    checked_out = output[9].view(-1).data.tolist()
    true_out = [-1.2444578409194946, -1.3128550052642822, -0.8145193457603455]
    check_tensor_correctness([(true_out, checked_out)])

    true_dep_graph = dependency_graph_from_oracle(test_sent, gold)
    assert true_dep_graph == dep_graph
    assert actions_done == [0, 1, 0, 1, 0, 0, 1, 2, 0, 0, 0, 1, 2, 2, 2, 0]
예제 #7
0
def test_parse_logic_d3_1():
    """ 0.5 point(s) """

    global test_sent, gold, word_to_ix, vocab
    torch.manual_seed(1)

    feat_extract = SimpleFeatureExtractor()
    word_embed = VanillaWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM)
    act_chooser = ActionChooserNetwork(TEST_EMBEDDING_DIM * NUM_FEATURES)
    combiner = MLPCombinerNetwork(TEST_EMBEDDING_DIM)

    parser = TransitionParser(feat_extract, word_embed, act_chooser, combiner)
    output, dep_graph, actions_done = parser(test_sent[:-1], gold)

    assert len(output) == 15  # Made the right number of decisions

    # check one of the outputs
    checked_out = output[10].view(-1).data.tolist()
    true_out = [-1.4737, -1.0875, -0.8350]
    check_tensor_correctness([(true_out, checked_out)])

    true_dep_graph = dependency_graph_from_oracle(test_sent, gold)
    assert true_dep_graph == dep_graph
    assert actions_done == [0, 0, 1, 0, 1, 0, 0, 1, 2, 0, 0, 0, 1, 1, 2]