Python skTSVD Exemples, sklearn.decomposition.skTSVD Python Exemples

Exemple #1

0

Afficher le fichier

def test_tsvd_fit_transform(datatype, name, use_handle):
    if name == 'blobs':
        X, y = make_blobs(n_samples=500000, n_features=1000, random_state=0)

    elif name == 'random':
        pytest.skip('fails when using random dataset '
                    'used by sklearn for testing')
        shape = 5000, 100
        rng = check_random_state(42)
        X = rng.randint(-100, 20, np.product(shape)).reshape(shape)

    else:
        n, p = 500, 5
        rng = np.random.RandomState(0)
        X = rng.randn(n, p) * .1 + np.array([3, 4, 2, 3, 5])

    if name != 'blobs':
        skpca = skTSVD(n_components=1)
        Xsktsvd = skpca.fit_transform(X)

    handle, stream = get_handle(use_handle)
    cutsvd = cuTSVD(n_components=1, handle=handle)

    Xcutsvd = cutsvd.fit_transform(X)
    cutsvd.handle.sync()

    if name != 'blobs':
        assert array_equal(Xcutsvd, Xsktsvd, 1e-3, with_sign=True)

Exemple #2

0

Afficher le fichier

def run_tsvd(X, n_components, algorithm, random_state, model):
    if model == 'sklearn':
        tsvd = skTSVD(n_components=n_components,
                      algorithm=algorithm,
                      random_state=random_state)
    elif model == 'h2o4gpu':
        from h2o4gpu.solvers import TruncatedSVDH2O as h2oTSVD
        if algorithm == 'arpack':
            algorithm = 'cusolver'
        tsvd = h2oTSVD(n_components=n_components,
                       algorithm=algorithm,
                       random_state=random_state)
    elif model == 'cuml':
        from cuSKL import TruncatedSVD as cumlTSVD
        tsvd = cumlTSVD(n_components=n_components, random_state=random_state)
    else:
        raise NotImplementedError

    @timer
    def fit_(tsvd, X, model):
        tsvd.fit(X)
        return tsvd

    @timer
    def transform_(tsvd, X, model):
        return tsvd.transform(X)

    tsvd = fit_(tsvd, X, model=model)
    Xtsvd = transform_(tsvd, X, model=model)
    tsvd.transformed_result = lambda: None
    setattr(tsvd, 'transformed_result', Xtsvd)
    return tsvd

Exemple #3

0

Afficher le fichier

def test_tsvd_fit(datatype, input_type):
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                 dtype=datatype)
    sktsvd = skTSVD(n_components=1)
    sktsvd.fit(X)

    cutsvd = cuTSVD(n_components=1)

    if input_type == 'dataframe':
        gdf = cudf.DataFrame()
        gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
        gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
        cutsvd.fit(gdf)

    else:
        cutsvd.fit(X)

    for attr in [
            'singular_values_', 'components_', 'explained_variance_ratio_'
    ]:
        with_sign = False if attr in ['components_'] else True
        assert array_equal(getattr(cutsvd, attr),
                           getattr(sktsvd, attr),
                           0.4,
                           with_sign=with_sign)

Exemple #4

0

Afficher le fichier

Fichier : test_tsvd.py Projet : st071300/cuML

def test_tsvd_fit(datatype, name, use_handle):

    if name == 'blobs':
        X, y = make_blobs(n_samples=500000,
                          n_features=1000, random_state=0)

    elif name == 'random':
        pytest.skip('fails when using random dataset '
                    'used by sklearn for testing')
        shape = 5000, 100
        rng = check_random_state(42)
        X = rng.randint(-100, 20, np.product(shape)).reshape(shape)

    else:
        n, p = 500, 5
        rng = np.random.RandomState(0)
        X = rng.randn(n, p) * .1 + np.array([3, 4, 2, 3, 5])

    if name != 'blobs':
        sktsvd = skTSVD(n_components=1)
        sktsvd.fit(X)

    handle, stream = get_handle(use_handle)
    cutsvd = cuTSVD(n_components=1, handle=handle)

    cutsvd.fit(X)
    cutsvd.handle.sync()

    if name != 'blobs':
        for attr in ['singular_values_', 'components_',
                     'explained_variance_ratio_']:
            with_sign = False if attr in ['components_'] else True
            assert array_equal(getattr(cutsvd, attr), getattr(sktsvd, attr),
                               0.4, with_sign=with_sign)

Exemple #5

0

Afficher le fichier

Fichier : tsvd.py Projet : d3v3l0/offload-annotations

def run_cpu(X, y):
    tsvd_sk = skTSVD(n_components=n_components,
                     algorithm="arpack",
                     n_iter=5000,
                     tol=0.00001,
                     random_state=random_state)

    result_sk = tsvd_sk.fit_transform(X)
    return result_sk

Exemple #6

0

Afficher le fichier

def test_pca_fit_transform(datatype):
    gdf = pygdf.DataFrame()
    gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
    gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)

    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                 dtype=datatype)

    print("Calling fit_transform")
    cutsvd = cuTSVD(n_components=1)
    Xcutsvd = cutsvd.fit_transform(gdf)
    sktsvd = skTSVD(n_components=1)
    Xsktsvd = sktsvd.fit_transform(X)

    assert array_equal(Xcutsvd, Xsktsvd, 1e-3, with_sign=False)

Exemple #7

0

Afficher le fichier

def test_tsvd_fit_transform(datatype, input_type):
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                 dtype=datatype)
    skpca = skTSVD(n_components=1)
    Xsktsvd = skpca.fit_transform(X)

    cutsvd = cuTSVD(n_components=1)

    if input_type == 'dataframe':
        gdf = cudf.DataFrame()
        gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
        gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
        Xcutsvd = cutsvd.fit_transform(gdf)

    else:
        Xcutsvd = cutsvd.fit_transform(X)

    assert array_equal(Xcutsvd, Xsktsvd, 1e-3, with_sign=True)

Exemple #8

0

Afficher le fichier

def test_tsvd_fit(datatype, input_type,
                  name, use_handle):

    if name == 'blobs':
        X, y = make_blobs(n_samples=500000,
                          n_features=1000, random_state=0)

    elif name == 'random':
        pytest.skip('fails when using random dataset '
                    'used by sklearn for testing')
        shape = 5000, 100
        rng = check_random_state(42)
        X = rng.randint(-100, 20, np.product(shape)).reshape(shape)

    else:
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                     dtype=datatype)

    if name != 'blobs':
        sktsvd = skTSVD(n_components=1)
        sktsvd.fit(X)

    handle, stream = get_handle(use_handle)
    cutsvd = cuTSVD(n_components=1, handle=handle)

    if input_type == 'dataframe':
        X = pd.DataFrame(
            {'fea%d' % i: X[0:, i] for i in range(X.shape[1])})
        X_cudf = cudf.DataFrame.from_pandas(X)
        cutsvd.fit(X_cudf)

    else:
        cutsvd.fit(X)

    cutsvd.handle.sync()

    if name != 'blobs':
        for attr in ['singular_values_', 'components_',
                     'explained_variance_ratio_']:
            with_sign = False if attr in ['components_'] else True
            assert array_equal(getattr(cutsvd, attr), getattr(sktsvd, attr),
                               0.4, with_sign=with_sign)

Exemple #9

0

Afficher le fichier

def test_tsvd_fit_transform(datatype, input_type,
                            name, use_handle):
    if name == 'blobs':
        X, y = make_blobs(n_samples=500000,
                          n_features=1000, random_state=0)

    elif name == 'random':
        pytest.skip('fails when using random dataset '
                    'used by sklearn for testing')
        shape = 5000, 100
        rng = check_random_state(42)
        X = rng.randint(-100, 20, np.product(shape)).reshape(shape)

    else:
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                     dtype=datatype)

    if name != 'blobs':
        skpca = skTSVD(n_components=1)
        Xsktsvd = skpca.fit_transform(X)

    handle, stream = get_handle(use_handle)
    cutsvd = cuTSVD(n_components=1, handle=handle)

    if input_type == 'dataframe':
        X = pd.DataFrame(
            {'fea%d' % i: X[0:, i] for i in range(X.shape[1])})
        X_cudf = cudf.DataFrame.from_pandas(X)
        Xcutsvd = cutsvd.fit_transform(X_cudf)

    else:
        Xcutsvd = cutsvd.fit_transform(X)

    cutsvd.handle.sync()

    if name != 'blobs':
        assert array_equal(Xcutsvd, Xsktsvd, 1e-3, with_sign=True)