def test_partial_fit(n_samples=70): X_txt = fetch_20newsgroups(subset='train')['data'] X = X_txt[:n_samples] # Gap encoder with fit on one batch enc = GapEncoder(random_state=42, batch_size=n_samples, max_iter=1) X_enc = enc.fit_transform(X) # Gap encoder with partial fit enc = GapEncoder(random_state=42) enc.partial_fit(X) X_enc_partial = enc.transform(X) # Check if the encoded vectors are the same np.testing.assert_almost_equal(X_enc, X_enc_partial) return
def test_missing_values(missing): observations = [['alice', 'bob'], ['bob', 'alice'], ['bob', np.nan], ['alice', 'charlie'], [np.nan, 'alice']] observations = np.array(observations, dtype=object) enc = GapEncoder(handle_missing=missing, n_components=3) if missing == 'error': with pytest.raises(ValueError, match=r'Input data contains missing values.'): enc.fit_transform(observations) elif missing == 'zero_impute': enc.fit_transform(observations) enc.partial_fit(observations) else: with pytest.raises(ValueError, match=r"handle_missing should be either " r"'error' or 'zero_impute', got 'aaa'"): enc.fit_transform(observations)