def test_predict():
    global model, X_tr, Y_tr, word_to_ix, tag_to_ix
    torch.manual_seed(765)
    best_tags = model.predict(bilstm.prepare_sequence(X_tr[5], word_to_ix))
    eq_(best_tags[0:5], ['SYM', 'NUM', 'INTJ', 'PART', 'SYM'])

    best_tags = model.predict(bilstm.prepare_sequence(X_tr[0], word_to_ix))
    eq_(best_tags[0:5], ['SYM', 'NUM', 'INTJ', 'PART', 'SYM'])
def test_forward_alg():
    global model, X_tr, word_to_ix
    torch.manual_seed(765)

    lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[0], word_to_ix))
    alpha = model.forward_alg(lstm_feats)
    assert_almost_equal(alpha.data.numpy()[0], 96.48747, places=4)

    lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[1], word_to_ix))
    alpha = model.forward_alg(lstm_feats)
    assert_almost_equal(alpha.data.numpy()[0], 59.80174, places=4)
def test_neg_log_likelihood():
    global model, X_tr, Y_tr, word_to_ix, tag_to_ix
    torch.manual_seed(765)
    lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[5], word_to_ix))
    loss = model.neg_log_likelihood(
        lstm_feats, bilstm.prepare_sequence(Y_tr[5], tag_to_ix))
    assert_almost_equal(loss.data.numpy()[0], 45.898315, places=4)

    lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[0], word_to_ix))
    loss = model.neg_log_likelihood(
        lstm_feats, bilstm.prepare_sequence(Y_tr[0], tag_to_ix))
    assert_almost_equal(loss.data.numpy()[0], 107.71689, places=4)
def test_score_sentence():
    global model, X_tr, Y_tr, word_to_ix, tag_to_ix
    torch.manual_seed(765)

    lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[0], word_to_ix))
    score = model.score_sentence(lstm_feats,
                                 bilstm.prepare_sequence(Y_tr[0], tag_to_ix))
    assert_almost_equal(score.data.numpy()[0], -11.368162, places=4)

    lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[1], word_to_ix))
    score = model.score_sentence(lstm_feats,
                                 bilstm.prepare_sequence(Y_tr[1], tag_to_ix))
    assert_almost_equal(score.data.numpy()[0], -3.9872737, places=4)
Beispiel #5
0
def apply_model(model,
                outfilename,
                word_to_ix,
                all_tags=None,
                trainfile=TRAIN_FILE,
                testfile=DEV_FILE):
    """
    applies the model on the data and writes the best sequence of tags to the outfile
    """
    if all_tags is None:
        all_tags = set()

        # this is slow
        for i, (words,
                tags) in enumerate(preproc.conll_seq_generator(trainfile)):
            for tag in tags:
                all_tags.add(tag)

    with open(outfilename, 'w') as outfile:
        for words, _ in preproc.conll_seq_generator(testfile):
            seq_words = bilstm.prepare_sequence(words, word_to_ix)
            pred_tags = model.predict(seq_words)
            for i, tag in enumerate(pred_tags):
                outfile.write(tag + '\n')
            outfile.write('\n')
Beispiel #6
0
def test_dlmodel_forward():
    global model, X_tr, word_to_ix
    
    torch.manual_seed(765);
    lstm_feats = model(bilstm.prepare_sequence(X_tr[0], word_to_ix))
    assert_almost_equal(lstm_feats[0].data.numpy()[0],-0.12883559, places=4)
    assert_almost_equal(lstm_feats[0].data.numpy()[1],-0.42470014, places=4)
    assert_almost_equal(lstm_feats[0].data.numpy()[2],-0.31790152, places=4)
Beispiel #7
0
def test_dlmodel_forward():
    global model, X_tr, word_to_ix

    torch.manual_seed(765)
    lstm_feats = model(bilstm.prepare_sequence(X_tr[0], word_to_ix))
    assert_almost_equal(lstm_feats[0].data.numpy()[0], -0.12883559, places=4)
    assert_almost_equal(lstm_feats[0].data.numpy()[1], -0.42470014, places=4)
    assert_almost_equal(lstm_feats[0].data.numpy()[2], -0.31790152, places=4)
Beispiel #8
0
def apply_model(model,outfilename,word_to_ix, all_tags=None,trainfile=TRAIN_FILE,testfile=DEV_FILE):
    """
    applies the model on the data and writes the best sequence of tags to the outfile
    """
    if all_tags is None:
        all_tags = set()
        
        # this is slow
        for i,(words, tags) in enumerate(preproc.conll_seq_generator(trainfile)):
            for tag in tags:
                all_tags.add(tag)
            
    with open(outfilename,'w') as outfile:
        for words,_ in preproc.conll_seq_generator(testfile):
            seq_words = bilstm.prepare_sequence(words, word_to_ix)
            pred_tags = model.predict(seq_words)
            for i,tag in enumerate(pred_tags):
                outfile.write(tag+'\n')
            outfile.write('\n')