コード例 #1
0
def func(m=5000, n=10, k=9):
    np.random.seed(1234)

    X = np.random.rand(m, n)

    # Exact scikit impl
    sklearn_tsvd = sklearnsvd(algorithm="arpack",
                              n_components=k,
                              random_state=42)

    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")
    print("Sklearn run through h2o4gpu wrapper")

    h2o4gpu_tsvd_sklearn_wrapper = TruncatedSVD(n_components=k,
                                                algorithm="arpack",
                                                random_state=42,
                                                verbose=True)
    h2o4gpu_tsvd_sklearn_wrapper.fit(X)

    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd_sklearn_wrapper.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd_sklearn_wrapper.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_)

    print("\n")
    print("sklearn run")
    sklearn_tsvd.fit(X)
    print("Sklearn Singular Values")
    print(sklearn_tsvd.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd.explained_variance_ratio_)

    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.singular_values_,
                       sklearn_tsvd.singular_values_)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_,
                       sklearn_tsvd.components_)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_,
                       sklearn_tsvd.explained_variance_)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_,
                       sklearn_tsvd.explained_variance_ratio_)
コード例 #2
0
ファイル: test_daal_svd.py プロジェクト: yinxx/h2o4gpu
    def test_tsvd_wrapper(rows=100, cols=100, k=100):
        indata = get_random_array(rows, cols)
        start_sklearn = time.time()
        h2o4gpu_tsvd_sklearn = TruncatedSVD(n_components=k,
                                            verbose=True,
                                            backend='sklearn')
        h2o4gpu_tsvd_sklearn.fit(indata)
        end_sklearn = time.time()

        start_daal = time.time()
        h2o4gpu_tsvd_daal = TruncatedSVD(n_components=k,
                                         verbose=True,
                                         backend='daal')
        h2o4gpu_tsvd_daal.fit(indata)
        end_daal = time.time()

        print("H2o4GPU tsvd for backend=sklearn: {} seconds taken".format(
            end_sklearn - start_sklearn))
        print("H2o4GPU tsvd for backend=daal: {} seconds taken".format(
            end_daal - start_daal))

        sklearn_sigma = h2o4gpu_tsvd_sklearn.singular_values_
        daal_sigma = h2o4gpu_tsvd_daal.singular_values_
        print("H2o4GPU tsvd Sklearn Singular values: {}".format(sklearn_sigma))
        print("H2o4GPU tsvd Daal Singular values:    {}".format(daal_sigma))

        if os.getenv("CHECKPERFORMANCE") is not None:
            assert (end_daal - start_daal <= end_sklearn - start_sklearn)
コード例 #3
0
ファイル: test_tsvd_wrapper.py プロジェクト: yinxx/h2o4gpu
def func(m=5000, n=10, k=9, algorithm="cusolver", convert_to_float32=False):
    np.random.seed(1234)

    X = np.random.rand(m, n)
    if convert_to_float32:
        X = X.astype(np.float32)

    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("Original X Matrix")
    print(X)

    print("\n")
    print("H2O4GPU run")
    h2o4gpu_tsvd_sklearn_wrapper = TruncatedSVD(n_components=k,
                                                algorithm=algorithm,
                                                tol=1E-50,
                                                n_iter=200,
                                                random_state=42,
                                                verbose=True)
    h2o4gpu_tsvd_sklearn_wrapper.fit(X)
    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd_sklearn_wrapper.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd_sklearn_wrapper.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_)

    print("\n")
    print("Sklearn run")
    # Exact scikit impl
    sklearn_tsvd = sklearnsvd(algorithm="arpack",
                              n_components=k,
                              random_state=42)
    sklearn_tsvd.fit(X)
    print("Sklearn Singular Values")
    print(sklearn_tsvd.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd.explained_variance_ratio_)

    rtol = 1E-5

    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.singular_values_,
                       sklearn_tsvd.singular_values_,
                       rtol=rtol)

    #Check components for first singular value
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_[0],
                       sklearn_tsvd.components_[0],
                       rtol=rtol)

    #Check components for second singular value
    #TODO (navdeep) Why does this not match?
    if algorithm != "power":
        assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_[1],
                           sklearn_tsvd.components_[1],
                           rtol=.7)

    if algorithm == "power":
        print("Max diff of power components")
        print(
            str(
                np.max(h2o4gpu_tsvd_sklearn_wrapper.components_[1] -
                       sklearn_tsvd.components_[1])))

    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_,
                       sklearn_tsvd.explained_variance_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_,
                       sklearn_tsvd.explained_variance_ratio_,
                       rtol=rtol)
コード例 #4
0
def func(m=5000, n=10, k=9, algorithm="cusolver", convert_to_float32=False):
    np.random.seed(1234)

    X = np.random.rand(m, n)
    if convert_to_float32:
        X = X.astype(np.float32)
    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")

    # Exact scikit impl
    print("sklearn run")
    sklearn_tsvd = sklearnsvd(algorithm="arpack",
                              n_components=k,
                              random_state=42)
    sklearn_tsvd.fit(X)
    print("Sklearn Singular Values")
    print(sklearn_tsvd.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd.explained_variance_ratio_)
    print(sklearn_tsvd.get_params())

    print("GPU run through h2o4gpu wrapper")
    h2o4gpu_tsvd_sklearn_wrapper = TruncatedSVD(
        n_components=k,
        algorithm=[algorithm, 'randomized'],
        random_state=42,
        verbose=True,
        n_iter=500,
        tol=1E-7)
    h2o4gpu_tsvd_sklearn_wrapper.fit(X)
    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd_sklearn_wrapper.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd_sklearn_wrapper.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_)
    print(h2o4gpu_tsvd_sklearn_wrapper.get_params())

    rtol = 0.5
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.singular_values_,
                       sklearn_tsvd.singular_values_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_,
                       sklearn_tsvd.components_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_,
                       sklearn_tsvd.explained_variance_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_,
                       sklearn_tsvd.explained_variance_ratio_,
                       rtol=rtol)

    ######################

    # Exact scikit impl
    print("sklearn run")
    sklearn_tsvd2 = sklearnsvd(algorithm="randomized",
                               n_components=k,
                               random_state=42)
    sklearn_tsvd2.fit(X)
    print("Sklearn Singular Values")
    print(sklearn_tsvd2.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd2.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd2.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd2.explained_variance_ratio_)
    print(sklearn_tsvd2.get_params())

    print("Sklearn run through h2o4gpu wrapper using n_gpus=0")
    #FAILS to agree, seems cusolver solution is diverging or (unlikely) bug in randomized in same way.
    #h2o4gpu_tsvd_sklearn_wrapper2 = TruncatedSVD(n_components=k, algorithm=[algorithm, 'randomized'], random_state=42, verbose=True, n_gpus=0, n_iter=[1000,400], tol=[1E-7, 1E-7])
    h2o4gpu_tsvd_sklearn_wrapper2 = TruncatedSVD(
        n_components=k,
        algorithm=[algorithm, 'randomized'],
        random_state=42,
        verbose=True,
        n_gpus=0,
        n_iter=[1000, 5],
        tol=[1E-7, 1E-4])
    h2o4gpu_tsvd_sklearn_wrapper2.fit(X)
    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd_sklearn_wrapper2.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd_sklearn_wrapper2.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd_sklearn_wrapper2.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd_sklearn_wrapper2.explained_variance_ratio_)
    print(h2o4gpu_tsvd_sklearn_wrapper2.get_params())

    rtol = 1E-2
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper2.singular_values_,
                       sklearn_tsvd2.singular_values_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper2.components_,
                       sklearn_tsvd2.components_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper2.explained_variance_,
                       sklearn_tsvd2.explained_variance_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper2.explained_variance_ratio_,
                       sklearn_tsvd2.explained_variance_ratio_,
                       rtol=rtol)
コード例 #5
0
def func(k=9, algorithm="cusolver", rtol=1E-3):

    X = load_iris()
    X = X.data

    #Increase row size of matrix
    X = np.concatenate((X, X), axis=0)
    X = np.concatenate((X, X), axis=0)
    X = np.concatenate((X, X), axis=0)
    X = np.concatenate((X, X), axis=0)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)

    print("\n")
    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("Original X Matrix")
    print(X)

    print("\n")
    print("H2O4GPU run")
    h2o4gpu_tsvd_sklearn_wrapper = TruncatedSVD(n_components=k, algorithm=algorithm, tol = 1E-50, n_iter=200, random_state=42, verbose=True)
    h2o4gpu_tsvd_sklearn_wrapper.fit(X)
    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd_sklearn_wrapper.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd_sklearn_wrapper.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_)

    print("\n")
    print("Sklearn run")
    #Exact scikit impl
    sklearn_tsvd = sklearnsvd(algorithm="arpack", n_components=k, random_state=42)
    sklearn_tsvd.fit(X)
    print("Sklearn Singular Values")
    print(sklearn_tsvd.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd.explained_variance_ratio_)

    #Check singular values
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.singular_values_, sklearn_tsvd.singular_values_, rtol=rtol)

    #Check components for first singular value
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_[0], sklearn_tsvd.components_[0], rtol=rtol)

    #Check components for second singular value
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_[1], sklearn_tsvd.components_[1], rtol=.7)

    #Check explained variance and explained variance ratio
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_, sklearn_tsvd.explained_variance_, rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_, sklearn_tsvd.explained_variance_ratio_, rtol=rtol)
コード例 #6
0
def func(m=5000, n=10, k=9, algorithm="cusolver"):
    np.random.seed(1234)

    X = np.random.rand(m, n)

    # Exact scikit impl
    sklearn_tsvd = sklearnsvd(algorithm="arpack",
                              n_components=k,
                              random_state=42)

    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")
    print("Sklearn run through h2o4gpu wrapper")

    h2o4gpu_tsvd_sklearn_wrapper = TruncatedSVD(n_components=k,
                                                algorithm=algorithm,
                                                random_state=42,
                                                verbose=True,
                                                n_iter=100)
    h2o4gpu_tsvd_sklearn_wrapper.fit(X)

    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd_sklearn_wrapper.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd_sklearn_wrapper.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_)

    print("\n")
    print("sklearn run")
    sklearn_tsvd.fit(X)
    print("Sklearn Singular Values")
    print(sklearn_tsvd.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd.explained_variance_ratio_)

    if algorithm == 'arpack':
        rtol = 1E-5
    else:
        rtol = 1E-2
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.singular_values_,
                       sklearn_tsvd.singular_values_,
                       rtol=rtol)
    if algorithm == 'arpack':
        rtol = 1E-5
    else:
        rtol = 1E-1
    #TODO (navdeep) Why does this not match?
    #assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_, sklearn_tsvd.components_, rtol=rtol)
    if algorithm == 'arpack':
        rtol = 1E-5
    else:
        rtol = 0.5
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_,
                       sklearn_tsvd.explained_variance_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_,
                       sklearn_tsvd.explained_variance_ratio_,
                       rtol=rtol)