Exemple #1
0
def test_bert_jp():
    bert = BertSentenceVectorizer(use_cuda=False, lang='jp')

    X = pd.DataFrame({'id': [0, 1, 2, 3, 4, 5], 'sentence': _TEST_SENTENCE_JP})

    ret = bert.fit_transform(X)

    assert ret.shape[0] == 6
    assert ret.shape[1] == 768 + 1

    ret.drop('id', axis=1, inplace=True)
    npt.assert_almost_equal(ret.iloc[0, :].values, ret.iloc[4, :].values)
    npt.assert_almost_equal(ret.iloc[0, :].values, ret.iloc[5, :].values)
Exemple #2
0
def test_bert_en_svd_multicol():
    bert = BertSentenceVectorizer(use_cuda=False)

    X = pd.DataFrame({
        'id': [0, 1, 2, 3, 4, 5],
        'sentence': _TEST_SENTENCE_EN,
        'sentence2': _TEST_SENTENCE_EN
    })

    ret = bert.fit_transform(X)

    assert ret.shape[0] == 6
    assert ret.shape[1] == 2 * 768 + 1

    ret.drop('id', axis=1, inplace=True)
    npt.assert_almost_equal(ret.iloc[0, :].values, ret.iloc[4, :].values, decimal=3)
    npt.assert_almost_equal(ret.iloc[0, :].values, ret.iloc[5, :].values, decimal=3)
Exemple #3
0
def test_bert_fit_transform():
    X = pd.DataFrame({'id': [0, 1, 2, 3, 4, 5], 'sentence': _TEST_SENTENCE_EN})

    bert = BertSentenceVectorizer(use_cuda=False)
    ret = bert.fit_transform(X)

    bert = BertSentenceVectorizer(use_cuda=False)
    bert.fit(X)
    ret2 = bert.fit_transform(X)

    assert_frame_equal(ret, ret2)