Esempio n. 1
0
def test_polynomial_count_sketch(X, Y, gamma, degree, coef0):
    # test that PolynomialCountSketch approximates polynomial
    # kernel on random area_data

    # compute exact kernel
    kernel = polynomial_kernel(X, Y, gamma=gamma, degree=degree, coef0=coef0)

    # approximate kernel mapping
    ps_transform = PolynomialCountSketch(n_components=5000,
                                         gamma=gamma,
                                         coef0=coef0,
                                         degree=degree,
                                         random_state=42)
    X_trans = ps_transform.fit_transform(X)
    Y_trans = ps_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) <= 0.05  # close to unbiased
    np.abs(error, out=error)
    assert np.max(error) <= 0.1  # nothing too far off
    assert np.mean(error) <= 0.05  # mean is fairly close
Esempio n. 2
0
def test_polynomial_count_sketch_dense_sparse(gamma, degree, coef0):
    """Check that PolynomialCountSketch results are the same for dense and sparse
    input.
    """
    ps_dense = PolynomialCountSketch(n_components=500,
                                     gamma=gamma,
                                     degree=degree,
                                     coef0=coef0,
                                     random_state=42)
    Xt_dense = ps_dense.fit_transform(X)
    Yt_dense = ps_dense.transform(Y)

    ps_sparse = PolynomialCountSketch(n_components=500,
                                      gamma=gamma,
                                      degree=degree,
                                      coef0=coef0,
                                      random_state=42)
    Xt_sparse = ps_sparse.fit_transform(csr_matrix(X))
    Yt_sparse = ps_sparse.transform(csr_matrix(Y))

    assert_allclose(Xt_dense, Xt_sparse)
    assert_allclose(Yt_dense, Yt_sparse)
Esempio n. 3
0
def test_polynomial_count_sketch_raises_if_degree_lower_than_one(degree):
    with pytest.raises(ValueError, match=f'degree={degree} should be >=1.'):
        ps_transform = PolynomialCountSketch(degree=degree)
        ps_transform.fit(X, Y)
lsvm_score = 100 * lsvm.score(X_test, y_test)

# Evaluate kernelized SVM
ksvm = SVC(kernel="poly", degree=2, gamma=1.0).fit(X_train, y_train)
ksvm_score = 100 * ksvm.score(X_test, y_test)

# Evaluate PolynomialCountSketch + LinearSVM
ps_svm_scores = []
n_runs = 5

# To compensate for the stochasticity of the method, we make n_tets runs
for k in out_dims:
    score_avg = 0
    for _ in range(n_runs):
        ps_svm = Pipeline([
            ("PS", PolynomialCountSketch(degree=2, n_components=k)),
            ("SVM", LinearSVC()),
        ])
        score_avg += ps_svm.fit(X_train, y_train).score(X_test, y_test)
    ps_svm_scores.append(100 * score_avg / n_runs)

# Evaluate Nystroem + LinearSVM
ny_svm_scores = []
n_runs = 5

for k in out_dims:
    score_avg = 0
    for _ in range(n_runs):
        ny_svm = Pipeline([
            (
                "NY",
Esempio n. 5
0
# features (precisely, 54^4). Thanks to :class:`PolynomialCountSketch`, we can
# condense most of the discriminative information of that feature space into a
# much more compact representation. We repeat the experiment 5 times to
# compensate for the stochastic nature of :class:`PolynomialCountSketch`.

n_runs = 3
for n_components in [250, 500, 1000, 2000]:

    ps_lsvm_time = 0
    ps_lsvm_score = 0
    for _ in range(n_runs):

        pipeline = Pipeline(steps=[
            (
                "kernel_approximator",
                PolynomialCountSketch(n_components=n_components, degree=4),
            ),
            ("linear_classifier", LinearSVC()),
        ])

        start = time.time()
        pipeline.fit(X_train, y_train)
        ps_lsvm_time += time.time() - start
        ps_lsvm_score += 100 * pipeline.score(X_test, y_test)

    ps_lsvm_time /= n_runs
    ps_lsvm_score /= n_runs

    results[f"LSVM + PS({n_components})"] = {
        "time": ps_lsvm_time,
        "score": ps_lsvm_score,
# -------------------------------------------------------
# The new :class:`~sklearn.kernel_approximation.PolynomialCountSketch`
# approximates a polynomial expansion of a feature space when used with linear
# models, but uses much less memory than
# :class:`~sklearn.preprocessing.PolynomialFeatures`.

from sklearn.datasets import fetch_covtype
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.kernel_approximation import PolynomialCountSketch
from sklearn.linear_model import LogisticRegression

X, y = fetch_covtype(return_X_y=True)
pipe = make_pipeline(MinMaxScaler(),
                     PolynomialCountSketch(degree=2, n_components=300),
                     LogisticRegression(max_iter=1000))
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    train_size=5000,
                                                    test_size=10000,
                                                    random_state=42)
pipe.fit(X_train, y_train).score(X_test, y_test)

# ##############################################################################
# # For comparison, here is the score of a linear baseline for the same data:

linear_baseline = make_pipeline(MinMaxScaler(),
                                LogisticRegression(max_iter=1000))
linear_baseline.fit(X_train, y_train).score(X_test, y_test)
            is_all_zero = np.all(X_array == 0)
            if is_all_zero:
                print('array is all zeros')
            else:
                print('Array is good')
                choice_length = np.count_nonzero(~np.isnan(labels))

                X, y = shuffle(X_array, labels)
                X = X[:choice_length]
                y = y[:choice_length].fillna(0)

                scaler = MinMaxScaler(feature_range=(-1, 1))
                mm = make_pipeline(MinMaxScaler(), Normalizer())
                X = mm.fit_transform(X)
                rbf_feature = RBFSampler(gamma=1.5, random_state=10)
                ps = PolynomialCountSketch(degree=11, random_state=1)
                X_rbf_features = rbf_feature.fit_transform(X)
                X_poly_features = ps.fit_transform(X)
                # We want to get TSNE embedding with 2 dimensions
                n_components = 3
                tsne = TSNE(n_components)
                tsne_result = tsne.fit_transform(X_rbf_features)
                locationFileName = os.path.join(
                    figuresDestination,
                    str(sorted(symbols)[symbolIdx]) + '_idx_' + str(idx) +
                    'date_' + str(dateIdx) + '_' + str(labelName) +
                    '_tsne_rbf_kernelised.png')

                fashion_scatter(tsne_result, y, locationFileName)

                fig = plt.figure(figsize=(16, 9))