Example #1
0
def test_polynomial_count_sketch_dense_sparse(gamma, degree, coef0):
    """Check that PolynomialCountSketch results are the same for dense and sparse
    input.
    """
    ps_dense = PolynomialCountSketch(n_components=500,
                                     gamma=gamma,
                                     degree=degree,
                                     coef0=coef0,
                                     random_state=42)
    Xt_dense = ps_dense.fit_transform(X)
    Yt_dense = ps_dense.transform(Y)

    ps_sparse = PolynomialCountSketch(n_components=500,
                                      gamma=gamma,
                                      degree=degree,
                                      coef0=coef0,
                                      random_state=42)
    Xt_sparse = ps_sparse.fit_transform(csr_matrix(X))
    Yt_sparse = ps_sparse.transform(csr_matrix(Y))

    assert_allclose(Xt_dense, Xt_sparse)
    assert_allclose(Yt_dense, Yt_sparse)
Example #2
0
def test_polynomial_count_sketch(X, Y, gamma, degree, coef0):
    # test that PolynomialCountSketch approximates polynomial
    # kernel on random data

    # compute exact kernel
    kernel = polynomial_kernel(X, Y, gamma=gamma, degree=degree, coef0=coef0)

    # approximate kernel mapping
    ps_transform = PolynomialCountSketch(n_components=5000, gamma=gamma,
                                         coef0=coef0, degree=degree,
                                         random_state=42)
    X_trans = ps_transform.fit_transform(X)
    Y_trans = ps_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) <= 0.05  # close to unbiased
    np.abs(error, out=error)
    assert np.max(error) <= 0.1  # nothing too far off
    assert np.mean(error) <= 0.05  # mean is fairly close
# Now lets evaluate the scalability of PolynomialCountSketch vs Nystroem
# First we generate some fake data with a lot of samples

fakeData = np.random.randn(10000, 100)
fakeDataY = np.random.randint(0, high=10, size=(10000))

out_dims = range(500, 6000, 500)

# Evaluate scalability of PolynomialCountSketch as n_components grows
ps_svm_times = []
for k in out_dims:
    ps = PolynomialCountSketch(degree=2, n_components=k)

    start = time()
    ps.fit_transform(fakeData, None)
    ps_svm_times.append(time() - start)

# Evaluate scalability of Nystroem as n_components grows
# This can take a while due to the inefficient training phase
ny_svm_times = []
for k in out_dims:
    ny = Nystroem(kernel="poly", gamma=1.0, degree=2, coef0=0, n_components=k)

    start = time()
    ny.fit_transform(fakeData, None)
    ny_svm_times.append(time() - start)

# Show results
fig, ax = plt.subplots(figsize=(6, 4))
ax.set_title("Scalability results")
                print('array is all zeros')
            else:
                print('Array is good')
                choice_length = np.count_nonzero(~np.isnan(labels))

                X, y = shuffle(X_array, labels)
                X = X[:choice_length]
                y = y[:choice_length].fillna(0)

                scaler = MinMaxScaler(feature_range=(-1, 1))
                mm = make_pipeline(MinMaxScaler(), Normalizer())
                X = mm.fit_transform(X)
                rbf_feature = RBFSampler(gamma=1.5, random_state=10)
                ps = PolynomialCountSketch(degree=11, random_state=1)
                X_rbf_features = rbf_feature.fit_transform(X)
                X_poly_features = ps.fit_transform(X)
                # We want to get TSNE embedding with 2 dimensions
                n_components = 3
                tsne = TSNE(n_components)
                tsne_result = tsne.fit_transform(X_rbf_features)
                locationFileName = os.path.join(
                    figuresDestination,
                    str(sorted(symbols)[symbolIdx]) + '_idx_' + str(idx) +
                    'date_' + str(dateIdx) + '_' + str(labelName) +
                    '_tsne_rbf_kernelised.png')

                fashion_scatter(tsne_result, y, locationFileName)

                fig = plt.figure(figsize=(16, 9))
                ax = plt.axes(projection='3d')
                # ax = Axes3D(fig)