예제 #1
0
def test_random_projection_fit_transform(datatype, method):
    if has_scipy():
        from scipy.spatial.distance import pdist
    else:
        pytest.skip('Skipping test_random_projection_fit_transform because ' +
                    'Scipy is missing')

    eps = 0.2

    # dataset generation
    data, target = make_blobs(n_samples=800, centers=400, n_features=3000)

    # conversion to input_type
    data = data.astype(datatype)
    target = target.astype(datatype)

    # creation of model
    if method == 'gaussian':
        model = GaussianRandomProjection(eps=eps)
    else:
        model = SparseRandomProjection(eps=eps)

    # fitting the model
    model.fit(data)
    # applying transformation
    transformed_data = model.transform(data)

    original_pdist = pdist(data)
    embedded_pdist = pdist(transformed_data)

    # check JL lemma
    assert (np.all(((1.0 - eps) * original_pdist) <= embedded_pdist)
            and np.all(embedded_pdist <= ((1.0 + eps) * original_pdist)))
예제 #2
0
def test_random_projection_fit_transform_default(datatype, method):

    eps = 0.8
    # dataset generation
    data, target = make_blobs(n_samples=30, centers=4, n_features=5000)

    # conversion to input_type
    data = data.astype(datatype)
    target = target.astype(datatype)

    # creation of model
    if method == 'gaussian':
        model = GaussianRandomProjection()
    else:
        model = SparseRandomProjection()

    # fitting the model
    model.fit(data)
    transformed_data = model.transform(data)

    original_pdist = pdist(data)
    embedded_pdist = pdist(transformed_data)

    # check JL lemma
    assert (np.all(((1.0 - eps) * original_pdist) <= embedded_pdist)
            and np.all(embedded_pdist <= ((1.0 + eps) * original_pdist)))
예제 #3
0
def test_random_projection_fit(datatype, method):
    # dataset generation
    data, target = make_blobs(n_samples=800, centers=400, n_features=3000)

    # conversion to input_type
    data = data.astype(datatype)
    target = target.astype(datatype)

    # creation of model
    if method == 'gaussian':
        model = GaussianRandomProjection(eps=0.2)
    else:
        model = SparseRandomProjection(eps=0.2)

    # fitting the model
    model.fit(data)

    assert True  # Did not crash
def test_random_projection_fit_transform(datatype, input_type, method):
    eps = 0.2

    # dataset generation
    data, target = make_blobs(n_samples=800, centers=400, n_features=3000)

    # conversion to input_type
    data = data.astype(datatype)
    target = target.astype(datatype)

    # creation of model
    if method == 'gaussian':
        model = GaussianRandomProjection(eps=eps)
    else:
        model = SparseRandomProjection(eps=eps)

    # fitting the model
    if input_type == 'dataframe':
        gdf = cudf.DataFrame()
        for i in range(data.shape[1]):
            gdf[str(i)] = np.asarray(data[:, i], dtype=datatype)
        model.fit(gdf)
    else:
        model.fit(data)

    # applying transformation
    if input_type == 'dataframe':
        transformed_data = model.transform(gdf).as_matrix()
    else:
        transformed_data = model.transform(data)

    original_pdist = pdist(data)
    embedded_pdist = pdist(transformed_data)

    # check JL lemma
    assert (np.all(((1.0 - eps) * original_pdist) <= embedded_pdist)
            and np.all(embedded_pdist <= ((1.0 + eps) * original_pdist)))
def test_random_projection_fit(datatype, input_type, method):
    # dataset generation
    data, target = make_blobs(n_samples=800, centers=400, n_features=3000)

    # conversion to input_type
    data = data.astype(datatype)
    target = target.astype(datatype)

    # creation of model
    if method == 'gaussian':
        model = GaussianRandomProjection(eps=0.2)
    else:
        model = SparseRandomProjection(eps=0.2)

    # fitting the model
    if input_type == 'dataframe':
        gdf = cudf.DataFrame()
        for i in range(data.shape[1]):
            gdf[str(i)] = np.asarray(data[:, i], dtype=datatype)
        model.fit(gdf)
    else:
        model.fit(data)

    assert True  # Did not crash