def test_bert_jp(): bert = BertSentenceVectorizer(use_cuda=False, lang='jp') X = pd.DataFrame({'id': [0, 1, 2, 3, 4, 5], 'sentence': _TEST_SENTENCE_JP}) ret = bert.fit_transform(X) assert ret.shape[0] == 6 assert ret.shape[1] == 768 + 1 ret.drop('id', axis=1, inplace=True) npt.assert_almost_equal(ret.iloc[0, :].values, ret.iloc[4, :].values) npt.assert_almost_equal(ret.iloc[0, :].values, ret.iloc[5, :].values)
def test_bert_en_svd_multicol(): bert = BertSentenceVectorizer(use_cuda=False) X = pd.DataFrame({ 'id': [0, 1, 2, 3, 4, 5], 'sentence': _TEST_SENTENCE_EN, 'sentence2': _TEST_SENTENCE_EN }) ret = bert.fit_transform(X) assert ret.shape[0] == 6 assert ret.shape[1] == 2 * 768 + 1 ret.drop('id', axis=1, inplace=True) npt.assert_almost_equal(ret.iloc[0, :].values, ret.iloc[4, :].values, decimal=3) npt.assert_almost_equal(ret.iloc[0, :].values, ret.iloc[5, :].values, decimal=3)
def test_bert_fit_transform(): X = pd.DataFrame({'id': [0, 1, 2, 3, 4, 5], 'sentence': _TEST_SENTENCE_EN}) bert = BertSentenceVectorizer(use_cuda=False) ret = bert.fit_transform(X) bert = BertSentenceVectorizer(use_cuda=False) bert.fit(X) ret2 = bert.fit_transform(X) assert_frame_equal(ret, ret2)