Esempio n. 1
0
def test_vectorizer6():
    text = ['foo bar', 'baz']

    v = PooledVectorizer(2, sent_start='<s>')
    v.fit(text)
    bounds1, X1 = v.transform(text)

    text2 = ['foo alpha', 'foo bravo', 'foo charlie', 'foo delta']
    bounds2, X2 = v.transform(text2)

    assert X1.max() >= X2.max()
Esempio n. 2
0
def test_vectorizer5():
    text = ['foo bar baz foo', 'foo baz']

    v = PooledVectorizer(2)
    bounds, X = v.fit(text).transform(text)

    assert_array_equal(bounds, [[0, 3], [3, 4]])
    assert_array_equal(X, [[2, 3],
                           [3, 4],
                           [4, 2],
                           [2, 4]])
Esempio n. 3
0
def test_vectorizer2():
    text = ['foo bar baz foo']

    v = PooledVectorizer(2)
    bounds, X = v.fit_transform(text)

    assert_array_equal(bounds, [[0, 3]])
    assert_array_equal(X, [[2, 3],
                           [3, 4],
                           [4, 2]])

    assert_equal(v.get_feature_names(), [u'__padding-magic-1', u'__padding-magic-2', u'foo', u'bar', u'baz'])
Esempio n. 4
0
def test_vectorizer():
    text = ['foo bar baz foo']

    v = PooledVectorizer(2, min_order=1)
    bounds, X = v.fit_transform(text)

    assert_array_equal(bounds, [[0, 7]])
    assert_array_equal(X, [[2, 1],
                           [3, 1],
                           [2, 3],
                           [4, 1],
                           [3, 4],
                           [2, 1],
                           [4, 2]])
Esempio n. 5
0
def test_pooled_net():
    cats = ['alt.atheism', 'sci.space']
    newsgroups_train = fetch_20newsgroups(subset='train', categories=cats)

    newsgroups_test = fetch_20newsgroups(subset='test', categories=cats)

    v = PooledVectorizer(3, 1)
    bounds, X = v.fit_transform(newsgroups_train.data)
    y = newsgroups_train.target

    test_bounds, test_X = v.transform(newsgroups_test.data)
    test_y = newsgroups_test.target

    clsf = MyPooledNetwork2(n_epochs=1, learning_rate=0.1)
    clsf.fit((bounds, X), y)

    pred_y = clsf.predict((test_bounds, test_X))
    print accuracy_score(test_y, pred_y)
Esempio n. 6
0
def test_vectorizer4():
    text = ['foo bar baz foo', 'foo baz']

    v = PooledVectorizer(2)
    bounds, X = v.transform(text)