예제 #1
0
def test_partial_fit(n_samples=70):
    X_txt = fetch_20newsgroups(subset='train')['data']
    X = X_txt[:n_samples]
    # Gap encoder with fit on one batch
    enc = GapEncoder(random_state=42, batch_size=n_samples, max_iter=1)
    X_enc = enc.fit_transform(X)
    # Gap encoder with partial fit
    enc = GapEncoder(random_state=42)
    enc.partial_fit(X)
    X_enc_partial = enc.transform(X)
    # Check if the encoded vectors are the same
    np.testing.assert_almost_equal(X_enc, X_enc_partial)
    return
예제 #2
0
def test_missing_values(missing):
    observations = [['alice', 'bob'], ['bob', 'alice'], ['bob', np.nan],
                    ['alice', 'charlie'], [np.nan, 'alice']]
    observations = np.array(observations, dtype=object)
    enc = GapEncoder(handle_missing=missing, n_components=3)
    if missing == 'error':
        with pytest.raises(ValueError,
                           match=r'Input data contains missing values.'):
            enc.fit_transform(observations)
    elif missing == 'zero_impute':
        enc.fit_transform(observations)
        enc.partial_fit(observations)
    else:
        with pytest.raises(ValueError,
                           match=r"handle_missing should be either "
                           r"'error' or 'zero_impute', got 'aaa'"):
            enc.fit_transform(observations)