예제 #1
0
def func(m=5000, n=10, k=9, algorithm="cusolver", convert_to_float32=False):
    np.random.seed(1234)

    X = np.random.rand(m, n)
    if convert_to_float32:
        X = X.astype(np.float32)
    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")

    # Exact scikit impl
    print("sklearn run")
    sklearn_tsvd = sklearnsvd(algorithm="arpack",
                              n_components=k,
                              random_state=42)
    sklearn_tsvd.fit(X)
    print("Sklearn Singular Values")
    print(sklearn_tsvd.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd.explained_variance_ratio_)
    print(sklearn_tsvd.get_params())

    print("GPU run through h2o4gpu wrapper")
    h2o4gpu_tsvd_sklearn_wrapper = TruncatedSVD(
        n_components=k,
        algorithm=[algorithm, 'randomized'],
        random_state=42,
        verbose=True,
        n_iter=500,
        tol=1E-7)
    h2o4gpu_tsvd_sklearn_wrapper.fit(X)
    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd_sklearn_wrapper.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd_sklearn_wrapper.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_)
    print(h2o4gpu_tsvd_sklearn_wrapper.get_params())

    rtol = 0.5
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.singular_values_,
                       sklearn_tsvd.singular_values_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_,
                       sklearn_tsvd.components_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_,
                       sklearn_tsvd.explained_variance_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_,
                       sklearn_tsvd.explained_variance_ratio_,
                       rtol=rtol)

    ######################

    # Exact scikit impl
    print("sklearn run")
    sklearn_tsvd2 = sklearnsvd(algorithm="randomized",
                               n_components=k,
                               random_state=42)
    sklearn_tsvd2.fit(X)
    print("Sklearn Singular Values")
    print(sklearn_tsvd2.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd2.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd2.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd2.explained_variance_ratio_)
    print(sklearn_tsvd2.get_params())

    print("Sklearn run through h2o4gpu wrapper using n_gpus=0")
    #FAILS to agree, seems cusolver solution is diverging or (unlikely) bug in randomized in same way.
    #h2o4gpu_tsvd_sklearn_wrapper2 = TruncatedSVD(n_components=k, algorithm=[algorithm, 'randomized'], random_state=42, verbose=True, n_gpus=0, n_iter=[1000,400], tol=[1E-7, 1E-7])
    h2o4gpu_tsvd_sklearn_wrapper2 = TruncatedSVD(
        n_components=k,
        algorithm=[algorithm, 'randomized'],
        random_state=42,
        verbose=True,
        n_gpus=0,
        n_iter=[1000, 5],
        tol=[1E-7, 1E-4])
    h2o4gpu_tsvd_sklearn_wrapper2.fit(X)
    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd_sklearn_wrapper2.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd_sklearn_wrapper2.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd_sklearn_wrapper2.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd_sklearn_wrapper2.explained_variance_ratio_)
    print(h2o4gpu_tsvd_sklearn_wrapper2.get_params())

    rtol = 1E-2
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper2.singular_values_,
                       sklearn_tsvd2.singular_values_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper2.components_,
                       sklearn_tsvd2.components_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper2.explained_variance_,
                       sklearn_tsvd2.explained_variance_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper2.explained_variance_ratio_,
                       sklearn_tsvd2.explained_variance_ratio_,
                       rtol=rtol)
예제 #2
0
def func(dataset="higgs"):

    #Get higgs dataset
    if dataset == "higgs":
        print("Getting Higgs dataset")
        X = datasets.get_higgs()
    elif dataset == "covtype":
        print("Getting covtype dataset")
        X = datasets.get_cover_type()
    elif dataset == "regression":
        print("Getting synthetic regression data")
        X = datasets.get_synthetic_regression()
    elif dataset == "year":
        print("Getting YearPredictionMSD data")
        X = datasets.get_year()
    else:
        raise Exception("Unrecognized dataset " + dataset)

    # Warm start
    W = np.random.rand(1000, 5)
    print('h2o4gpu Cusolver Warm Start')
    h2o4gpu_tsvd_cusolver_warm = TruncatedSVDH2O(n_components=3,
                                                 algorithm="cusolver",
                                                 tol=1e-5,
                                                 n_iter=100,
                                                 random_state=42,
                                                 verbose=True)
    h2o4gpu_tsvd_cusolver_warm.fit(W)
    print('h2o4gpu Power Warm Start')
    h2o4gpu_tsvd_power_warm = TruncatedSVDH2O(n_components=3,
                                              algorithm="power",
                                              tol=1e-5,
                                              n_iter=100,
                                              random_state=42,
                                              verbose=True)
    h2o4gpu_tsvd_power_warm.fit(W)

    print('sklearn ARPACK Warm Start')
    sklearn_tsvd_arpack_warm = sklearnsvd(n_components=3,
                                          algorithm="arpack",
                                          n_iter=5,
                                          random_state=42)
    sklearn_tsvd_arpack_warm.fit(W)
    print('sklearn Randomized Warm Start')
    sklearn_tsvd_random_warm = sklearnsvd(n_components=3,
                                          algorithm="randomized",
                                          tol=1e-5,
                                          n_iter=5,
                                          random_state=42)
    sklearn_tsvd_random_warm.fit(W)

    #Set k to n-1
    k = X.shape[1]

    # Exact scikit impl
    sklearn_tsvd_arpack = sklearnsvd(algorithm="arpack",
                                     n_components=k,
                                     tol=1e-5,
                                     n_iter=5,
                                     random_state=42)

    # Randomized scikit impl
    sklearn_tsvd_random = sklearnsvd(algorithm="randomized",
                                     n_components=k,
                                     tol=1e-5,
                                     n_iter=5,
                                     random_state=42)

    #Cusolver h2o4gpu impl
    print("Cusolver SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) +
          " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")
    print("h2o4gpu cusolver tsvd run")
    h2o4gpu_tsvd_cusolver = TruncatedSVDH2O(n_components=k,
                                            algorithm="cusolver",
                                            tol=1e-5,
                                            n_iter=100,
                                            random_state=42)
    start_time_gpu_cusolver = time.time()
    h2o4gpu_tsvd_cusolver.fit(X)
    end_time_gpu_cusolver = time.time() - start_time_gpu_cusolver
    print("Total time for h2o4gpu cusolver tsvd is " +
          str(end_time_gpu_cusolver))
    print("h2o4gpu tsvd cusolver Singular Values")
    print(h2o4gpu_tsvd_cusolver.singular_values_)
    print("h2o4gpu tsvd cusolver Components (V^T)")
    print(h2o4gpu_tsvd_cusolver.components_)
    print("h2o4gpu tsvd cusolver Explained Variance")
    print(h2o4gpu_tsvd_cusolver.explained_variance_)
    print("h2o4gpu tsvd cusolver Explained Variance Ratio")
    print(h2o4gpu_tsvd_cusolver.explained_variance_ratio_)

    print("Sleep before Power")
    time.sleep(5)

    #Power h2o4gpu impl
    print("Power SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) +
          " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")
    print("h2o4gpu tsvd power method run")
    h2o4gpu_tsvd_power = TruncatedSVDH2O(n_components=k,
                                         algorithm="power",
                                         tol=1e-5,
                                         n_iter=100,
                                         random_state=42)
    start_time_gpu_power = time.time()
    h2o4gpu_tsvd_power.fit(X)
    end_time_gpu_power = time.time() - start_time_gpu_power
    print("Total time for h2o4gpu tsvd is " + str(end_time_gpu_power))
    print("h2o4gpu tsvd power Singular Values")
    print(h2o4gpu_tsvd_power.singular_values_)
    print("h2o4gpu tsvd power Components (V^T)")
    print(h2o4gpu_tsvd_power.components_)
    print("h2o4gpu tsvd power Explained Variance")
    print(h2o4gpu_tsvd_power.explained_variance_)
    print("h2o4gpu tsvd power Explained Variance Ratio")
    print(h2o4gpu_tsvd_power.explained_variance_ratio_)

    print("Sleep before Sklearn ARPACK")
    time.sleep(5)

    #ARPACK sklearn impl
    print("\n")
    print("ARPACK sklearn run")
    start_sk_arpack = time.time()
    sklearn_tsvd_arpack.fit(X)
    end_sk_arpack = time.time() - start_sk_arpack
    print("Total time for sklearn is " + str(end_sk_arpack))
    print("Sklearn ARPACK Singular Values")
    print(sklearn_tsvd_arpack.singular_values_)
    print("Sklearn ARPACK Components (V^T)")
    print(sklearn_tsvd_arpack.components_)
    print("Sklearn ARPACK Explained Variance")
    print(sklearn_tsvd_arpack.explained_variance_)
    print("Sklearn ARPACK Explained Variance Ratio")
    print(sklearn_tsvd_arpack.explained_variance_ratio_)

    print("Sleep before Sklearn Randomized")
    time.sleep(5)

    #Randomized sklearn impl
    print("\n")
    print("Randomized sklearn randomized run")
    start_sk_random = time.time()
    sklearn_tsvd_random.fit(X)
    end_sk_randomized = time.time() - start_sk_random
    print("Total time for sklearn is " + str(end_sk_randomized))
    print("Sklearn Random Singular Values")
    print(sklearn_tsvd_random.singular_values_)
    print("Sklearn Random Components (V^T)")
    print(sklearn_tsvd_random.components_)
    print("Sklearn Random Explained Variance")
    print(sklearn_tsvd_random.explained_variance_)
    print("Sklearn Random Explained Variance Ratio")
    print(sklearn_tsvd_random.explained_variance_ratio_)

    return end_time_gpu_cusolver, end_sk_arpack, end_time_gpu_power, end_sk_randomized, X.shape[
        0], X.shape[1]
예제 #3
0
파일: test_tsvd.py 프로젝트: yinxx/h2o4gpu
def func(m=5000000, n=10, k=9, convert_to_float32=False):
    np.random.seed(1234)

    X = np.random.rand(m, n)

    if convert_to_float32:
        X = X.astype(np.float32)

    # Exact scikit impl
    sklearn_tsvd = sklearnsvd(algorithm="arpack",
                              n_components=k,
                              random_state=42)

    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")
    print("h2o4gpu tsvd run")
    start_time = time.time()
    h2o4gpu_tsvd = TruncatedSVDH2O(n_components=k, random_state=42)
    h2o4gpu_tsvd.fit(X)
    end_time = time.time() - start_time
    print("Total time for h2o4gpu tsvd is " + str(end_time))
    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd.explained_variance_ratio_)

    print("\n")
    print("sklearn run")
    start_sk = time.time()
    sklearn_tsvd.fit(X)
    end_sk = time.time() - start_sk
    print("Total time for sklearn is " + str(end_sk))
    print("Sklearn Singular Values")
    print(sklearn_tsvd.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd.explained_variance_ratio_)

    print("\n")
    print("h2o4gpu tsvd U matrix")
    print(h2o4gpu_tsvd.U)
    print("h2o4gpu tsvd V^T")
    print(h2o4gpu_tsvd.components_)
    print("h2o4gpu tsvd Sigma")
    print(h2o4gpu_tsvd.singular_values_)
    print("h2o4gpu tsvd U * Sigma")
    x_tsvd_transformed = h2o4gpu_tsvd.U * h2o4gpu_tsvd.singular_values_
    print(x_tsvd_transformed)
    print("h2o4gpu tsvd Explained Variance")
    print(np.var(x_tsvd_transformed, axis=0))

    U, Sigma, VT = svds(X, k=k, tol=0)
    Sigma = Sigma[::-1]
    U, VT = svd_flip(U[:, ::-1], VT[::-1])
    print("\n")
    print("Sklearn U matrix")
    print(U)
    print("Sklearn V^T")
    print(VT)
    print("Sklearn Sigma")
    print(Sigma)
    print("Sklearn U * Sigma")
    X_transformed = U * Sigma
    print(X_transformed)
    print("sklearn Explained Variance")
    print(np.var(X_transformed, axis=0))

    print("U shape")
    print(np.shape(h2o4gpu_tsvd.U))
    print(np.shape(U))

    print("Singular Value shape")
    print(np.shape(h2o4gpu_tsvd.singular_values_))
    print(np.shape(sklearn_tsvd.singular_values_))

    print("Components shape")
    print(np.shape(h2o4gpu_tsvd.components_))
    print(np.shape(sklearn_tsvd.components_))

    print("Reconstruction")
    reconstruct_h2o4gpu = h2o4gpu_tsvd.inverse_transform(
        h2o4gpu_tsvd.fit_transform(X))
    reconstruct_sklearn = sklearn_tsvd.inverse_transform(
        sklearn_tsvd.fit_transform(X))
    reconstruct_h2o4gpu_manual = np.sum([
        np.outer(h2o4gpu_tsvd.U[:, i], h2o4gpu_tsvd.components_[i, :]) * si
        for i, si in enumerate(h2o4gpu_tsvd.singular_values_)
    ],
                                        axis=0)
    print("Check inverse_transform() vs manual reconstruction for h2o4gpu")
    rtol = 1E-2
    assert np.allclose(reconstruct_h2o4gpu,
                       reconstruct_h2o4gpu_manual,
                       rtol=rtol)
    #reconstruct_sklearn_manual = np.sum([np.outer(U[:, i], sklearn_tsvd.components_[i, :]) * si for i, si in enumerate(sklearn_tsvd.singular_values_)], axis=0)
    print("original X")
    print(X)
    print("h2o4gpu reconstruction")
    print(reconstruct_h2o4gpu)
    print("sklearn reconstruction")
    print(reconstruct_sklearn)
    h2o4gpu_diff = np.subtract(reconstruct_h2o4gpu, X)
    sklearn_diff = np.subtract(reconstruct_sklearn, X)
    print("h2o4gpu diff")
    print(h2o4gpu_diff)
    print("sklearn diff")
    print(sklearn_diff)
    h2o4gpu_max_diff = np.amax(abs(h2o4gpu_diff))
    sklearn_max_diff = np.amax(abs(sklearn_diff))
    print("h2o4gpu max diff")
    print(h2o4gpu_max_diff)
    print("sklearn max diff")
    print(sklearn_max_diff)
    print("h2o4gpu mae")
    h2o4gpu_mae = np.mean(np.abs(h2o4gpu_diff))
    print(h2o4gpu_mae)
    print("sklearn mae")
    sklearn_mae = np.mean(np.abs(sklearn_diff))
    print(sklearn_mae)

    return h2o4gpu_mae, sklearn_mae
예제 #4
0
def func(k=9, algorithm="cusolver", rtol=1E-3):

    X = load_iris()
    X = X.data

    #Increase row size of matrix
    X = np.concatenate((X, X), axis=0)
    X = np.concatenate((X, X), axis=0)
    X = np.concatenate((X, X), axis=0)
    X = np.concatenate((X, X), axis=0)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)
    X = np.concatenate((X, X), axis=1)

    print("\n")
    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("Original X Matrix")
    print(X)

    print("\n")
    print("H2O4GPU run")
    h2o4gpu_tsvd_sklearn_wrapper = TruncatedSVD(n_components=k, algorithm=algorithm, tol = 1E-50, n_iter=200, random_state=42, verbose=True)
    h2o4gpu_tsvd_sklearn_wrapper.fit(X)
    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd_sklearn_wrapper.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd_sklearn_wrapper.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_)

    print("\n")
    print("Sklearn run")
    #Exact scikit impl
    sklearn_tsvd = sklearnsvd(algorithm="arpack", n_components=k, random_state=42)
    sklearn_tsvd.fit(X)
    print("Sklearn Singular Values")
    print(sklearn_tsvd.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd.explained_variance_ratio_)

    #Check singular values
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.singular_values_, sklearn_tsvd.singular_values_, rtol=rtol)

    #Check components for first singular value
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_[0], sklearn_tsvd.components_[0], rtol=rtol)

    #Check components for second singular value
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_[1], sklearn_tsvd.components_[1], rtol=.7)

    #Check explained variance and explained variance ratio
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_, sklearn_tsvd.explained_variance_, rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_, sklearn_tsvd.explained_variance_ratio_, rtol=rtol)
def func(m=5000000, n=10, k=9, convert_to_float32=False):

    np.random.seed(1234)

    X = np.random.rand(m, n)

    if convert_to_float32:
        print("Converting input matrix to float32")
        X = X.astype(np.float32)

    # Warm start
    W = np.random.rand(1000, 5)
    print('h2o4gpu Cusolver Warm Start')
    h2o4gpu_tsvd_cusolver_warm = TruncatedSVDH2O(n_components=3,
                                                 algorithm="cusolver",
                                                 tol=1e-5,
                                                 n_iter=100,
                                                 random_state=42,
                                                 verbose=True)
    h2o4gpu_tsvd_cusolver_warm.fit(W)
    print('h2o4gpu Power Warm Start')
    h2o4gpu_tsvd_power_warm = TruncatedSVDH2O(n_components=3,
                                              algorithm="power",
                                              tol=1e-5,
                                              n_iter=100,
                                              random_state=42,
                                              verbose=True)
    h2o4gpu_tsvd_power_warm.fit(W)

    print('sklearn ARPACK Warm Start')
    sklearn_tsvd_arpack_warm = sklearnsvd(n_components=3,
                                          algorithm="arpack",
                                          n_iter=5,
                                          random_state=42)
    sklearn_tsvd_arpack_warm.fit(W)
    print('sklearn Randomized Warm Start')
    sklearn_tsvd_random_warm = sklearnsvd(n_components=3,
                                          algorithm="randomized",
                                          tol=1e-5,
                                          n_iter=5,
                                          random_state=42)
    sklearn_tsvd_random_warm.fit(W)

    # Exact scikit impl
    sklearn_tsvd_arpack = sklearnsvd(algorithm="arpack",
                                     n_components=k,
                                     tol=1e-5,
                                     n_iter=5,
                                     random_state=42)

    # Randomized scikit impl
    sklearn_tsvd_random = sklearnsvd(algorithm="randomized",
                                     n_components=k,
                                     tol=1e-5,
                                     n_iter=5,
                                     random_state=42)

    #Cusolver h2o4gpu impl
    print("Cusolver SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) +
          " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")
    print("h2o4gpu cusolver tsvd run")
    h2o4gpu_tsvd_cusolver = TruncatedSVDH2O(n_components=k,
                                            algorithm="cusolver",
                                            tol=1e-5,
                                            n_iter=100,
                                            random_state=42)
    start_time_gpu_cusolver = time.time()
    h2o4gpu_tsvd_cusolver.fit(X)
    end_time_gpu_cusolver = time.time() - start_time_gpu_cusolver
    print("Total time for h2o4gpu cusolver tsvd is " +
          str(end_time_gpu_cusolver))
    print("h2o4gpu tsvd cusolver Singular Values")
    print(h2o4gpu_tsvd_cusolver.singular_values_)
    print("h2o4gpu tsvd cusolver Components (V^T)")
    print(h2o4gpu_tsvd_cusolver.components_)
    print("h2o4gpu tsvd cusolver Explained Variance")
    print(h2o4gpu_tsvd_cusolver.explained_variance_)
    print("h2o4gpu tsvd cusolver Explained Variance Ratio")
    print(h2o4gpu_tsvd_cusolver.explained_variance_ratio_)

    print("Sleep before Power")
    time.sleep(5)

    #Power h2o4gpu impl
    print("Power SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) +
          " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")
    print("h2o4gpu tsvd power method run")
    h2o4gpu_tsvd_power = TruncatedSVDH2O(n_components=k,
                                         algorithm="power",
                                         tol=1e-5,
                                         n_iter=100,
                                         random_state=42)
    start_time_gpu_power = time.time()
    h2o4gpu_tsvd_power.fit(X)
    end_time_gpu_power = time.time() - start_time_gpu_power
    print("Total time for h2o4gpu tsvd is " + str(end_time_gpu_power))
    print("h2o4gpu tsvd power Singular Values")
    print(h2o4gpu_tsvd_power.singular_values_)
    print("h2o4gpu tsvd power Components (V^T)")
    print(h2o4gpu_tsvd_power.components_)
    print("h2o4gpu tsvd power Explained Variance")
    print(h2o4gpu_tsvd_power.explained_variance_)
    print("h2o4gpu tsvd power Explained Variance Ratio")
    print(h2o4gpu_tsvd_power.explained_variance_ratio_)

    print("Sleep before Sklearn ARPACK")
    time.sleep(5)

    #ARPACK sklearn impl
    print("\n")
    print("ARPACK sklearn run")
    start_sk_arpack = time.time()
    sklearn_tsvd_arpack.fit(X)
    end_sk_arpack = time.time() - start_sk_arpack
    print("Total time for sklearn is " + str(end_sk_arpack))
    print("Sklearn ARPACK Singular Values")
    print(sklearn_tsvd_arpack.singular_values_)
    print("Sklearn ARPACK Components (V^T)")
    print(sklearn_tsvd_arpack.components_)
    print("Sklearn ARPACK Explained Variance")
    print(sklearn_tsvd_arpack.explained_variance_)
    print("Sklearn ARPACK Explained Variance Ratio")
    print(sklearn_tsvd_arpack.explained_variance_ratio_)

    print("Sleep before Sklearn Randomized")
    time.sleep(5)

    #Randomized sklearn impl
    print("\n")
    print("Randomized sklearn randomized run")
    start_sk_random = time.time()
    sklearn_tsvd_random.fit(X)
    end_sk_randomized = time.time() - start_sk_random
    print("Total time for sklearn is " + str(end_sk_randomized))
    print("Sklearn Random Singular Values")
    print(sklearn_tsvd_random.singular_values_)
    print("Sklearn Random Components (V^T)")
    print(sklearn_tsvd_random.components_)
    print("Sklearn Random Explained Variance")
    print(sklearn_tsvd_random.explained_variance_)
    print("Sklearn Random Explained Variance Ratio")
    print(sklearn_tsvd_random.explained_variance_ratio_)

    return end_time_gpu_cusolver, end_sk_arpack, end_time_gpu_power, end_sk_randomized
예제 #6
0
def func(m=5000, n=10, k=9, algorithm="cusolver", convert_to_float32=False):
    np.random.seed(1234)

    X = np.random.rand(m, n)
    if convert_to_float32:
        X = X.astype(np.float32)

    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("Original X Matrix")
    print(X)

    print("\n")
    print("H2O4GPU run")
    h2o4gpu_tsvd_sklearn_wrapper = TruncatedSVD(n_components=k,
                                                algorithm=algorithm,
                                                tol=1E-50,
                                                n_iter=200,
                                                random_state=42,
                                                verbose=True)
    h2o4gpu_tsvd_sklearn_wrapper.fit(X)
    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd_sklearn_wrapper.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd_sklearn_wrapper.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_)

    print("\n")
    print("Sklearn run")
    # Exact scikit impl
    sklearn_tsvd = sklearnsvd(algorithm="arpack",
                              n_components=k,
                              random_state=42)
    sklearn_tsvd.fit(X)
    print("Sklearn Singular Values")
    print(sklearn_tsvd.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd.explained_variance_ratio_)

    rtol = 1E-3
    atol = 1E-5

    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.singular_values_,
                       sklearn_tsvd.singular_values_,
                       rtol=rtol)

    #Check components for first singular value
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_[0],
                       sklearn_tsvd.components_[0],
                       rtol=rtol)

    #Check components for second singular value
    #TODO (navdeep) Why does this not match?
    if algorithm != "power":
        assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_[1],
                           sklearn_tsvd.components_[1],
                           rtol=.7)

    if algorithm == "power":
        print("Max diff of power components")
        print(
            str(
                np.max(h2o4gpu_tsvd_sklearn_wrapper.components_[1] -
                       sklearn_tsvd.components_[1])))

    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_,
                       sklearn_tsvd.explained_variance_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_,
                       sklearn_tsvd.explained_variance_ratio_,
                       rtol=rtol)
예제 #7
0
def func(m=5000, n=10, k=9, algorithm="cusolver"):
    np.random.seed(1234)

    X = np.random.rand(m, n)

    # Exact scikit impl
    sklearn_tsvd = sklearnsvd(algorithm="arpack",
                              n_components=k,
                              random_state=42)

    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")
    print("Sklearn run through h2o4gpu wrapper")

    h2o4gpu_tsvd_sklearn_wrapper = TruncatedSVD(n_components=k,
                                                algorithm=algorithm,
                                                random_state=42,
                                                verbose=True,
                                                n_iter=100)
    h2o4gpu_tsvd_sklearn_wrapper.fit(X)

    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd_sklearn_wrapper.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd_sklearn_wrapper.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_)

    print("\n")
    print("sklearn run")
    sklearn_tsvd.fit(X)
    print("Sklearn Singular Values")
    print(sklearn_tsvd.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd.explained_variance_ratio_)

    if algorithm == 'arpack':
        rtol = 1E-5
    else:
        rtol = 1E-2
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.singular_values_,
                       sklearn_tsvd.singular_values_,
                       rtol=rtol)
    if algorithm == 'arpack':
        rtol = 1E-5
    else:
        rtol = 1E-1
    #TODO (navdeep) Why does this not match?
    #assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.components_, sklearn_tsvd.components_, rtol=rtol)
    if algorithm == 'arpack':
        rtol = 1E-5
    else:
        rtol = 0.5
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_,
                       sklearn_tsvd.explained_variance_,
                       rtol=rtol)
    assert np.allclose(h2o4gpu_tsvd_sklearn_wrapper.explained_variance_ratio_,
                       sklearn_tsvd.explained_variance_ratio_,
                       rtol=rtol)