Esempio n. 1
0
def func(m=5000000, n=10, k=9):
    np.random.seed(1234)

    X = np.random.rand(m, n)

    # Exact scikit impl
    sklearn_tsvd = sklearnsvd(algorithm="arpack",
                              n_components=k,
                              random_state=42)

    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")
    print("h2o4gpu tsvd run")
    start_time = time.time()
    h2o4gpu_tsvd = TruncatedSVDH2O(n_components=k)
    h2o4gpu_tsvd.fit(X)
    end_time = time.time() - start_time
    print("Total time for h2o4gpu tsvd is " + str(end_time))
    print("h2o4gpu tsvd Singular Values")
    print(h2o4gpu_tsvd.singular_values_)
    print("h2o4gpu tsvd Components (V^T)")
    print(h2o4gpu_tsvd.components_)
    print("h2o4gpu tsvd Explained Variance")
    print(h2o4gpu_tsvd.explained_variance_)
    print("h2o4gpu tsvd Explained Variance Ratio")
    print(h2o4gpu_tsvd.explained_variance_ratio_)

    print("\n")
    print("sklearn run")
    start_sk = time.time()
    sklearn_tsvd.fit(X)
    end_sk = time.time() - start_sk
    print("Total time for sklearn is " + str(end_sk))
    print("Sklearn Singular Values")
    print(sklearn_tsvd.singular_values_)
    print("Sklearn Components (V^T)")
    print(sklearn_tsvd.components_)
    print("Sklearn Explained Variance")
    print(sklearn_tsvd.explained_variance_)
    print("Sklearn Explained Variance Ratio")
    print(sklearn_tsvd.explained_variance_ratio_)

    print("\n")
    print("h2o4gpu tsvd U matrix")
    print(h2o4gpu_tsvd.U)
    print("h2o4gpu tsvd V^T")
    print(h2o4gpu_tsvd.components_)
    print("h2o4gpu tsvd Sigma")
    print(h2o4gpu_tsvd.singular_values_)
    print("h2o4gpu tsvd U * Sigma")
    x_tsvd_transformed = h2o4gpu_tsvd.U * h2o4gpu_tsvd.singular_values_
    print(x_tsvd_transformed)
    print("h2o4gpu tsvd Explained Variance")
    print(np.var(x_tsvd_transformed, axis=0))

    U, Sigma, VT = svds(X, k=k, tol=0)
    Sigma = Sigma[::-1]
    U, VT = svd_flip(U[:, ::-1], VT[::-1])
    print("\n")
    print("Sklearn U matrix")
    print(U)
    print("Sklearn V^T")
    print(VT)
    print("Sklearn Sigma")
    print(Sigma)
    print("Sklearn U * Sigma")
    X_transformed = U * Sigma
    print(X_transformed)
    print("sklearn Explained Variance")
    print(np.var(X_transformed, axis=0))

    print("U shape")
    print(np.shape(h2o4gpu_tsvd.U))
    print(np.shape(U))

    print("Singular Value shape")
    print(np.shape(h2o4gpu_tsvd.singular_values_))
    print(np.shape(sklearn_tsvd.singular_values_))

    print("Components shape")
    print(np.shape(h2o4gpu_tsvd.components_))
    print(np.shape(sklearn_tsvd.components_))

    print("Reconstruction")
    reconstruct_h2o4gpu = h2o4gpu_tsvd.inverse_transform(
        h2o4gpu_tsvd.fit_transform(X))
    reconstruct_sklearn = sklearn_tsvd.inverse_transform(
        sklearn_tsvd.fit_transform(X))
    reconstruct_h2o4gpu_manual = np.sum([
        np.outer(h2o4gpu_tsvd.U[:, i], h2o4gpu_tsvd.components_[i, :]) * si
        for i, si in enumerate(h2o4gpu_tsvd.singular_values_)
    ],
                                        axis=0)
    print("Check inverse_transform() vs manual reconstruction for h2o4gpu")
    assert np.allclose(reconstruct_h2o4gpu, reconstruct_h2o4gpu_manual)
    #reconstruct_sklearn_manual = np.sum([np.outer(U[:, i], sklearn_tsvd.components_[i, :]) * si for i, si in enumerate(sklearn_tsvd.singular_values_)], axis=0)
    print("original X")
    print(X)
    print("h2o4gpu reconstruction")
    print(reconstruct_h2o4gpu)
    print("sklearn reconstruction")
    print(reconstruct_sklearn)
    h2o4gpu_diff = np.subtract(reconstruct_h2o4gpu, X)
    sklearn_diff = np.subtract(reconstruct_sklearn, X)
    print("h2o4gpu diff")
    print(h2o4gpu_diff)
    print("sklearn diff")
    print(sklearn_diff)
    h2o4gpu_max_diff = np.amax(abs(h2o4gpu_diff))
    sklearn_max_diff = np.amax(abs(sklearn_diff))
    print("h2o4gpu max diff")
    print(h2o4gpu_max_diff)
    print("sklearn max diff")
    print(sklearn_max_diff)
    print("h2o4gpu mae")
    h2o4gpu_mae = np.mean(np.abs(h2o4gpu_diff))
    print(h2o4gpu_mae)
    print("sklearn mae")
    sklearn_mae = np.mean(np.abs(sklearn_diff))
    print(sklearn_mae)

    return h2o4gpu_mae, sklearn_mae
Esempio n. 2
0
def func_bench(m=2000, n=20, k=5):
    np.random.seed(1234)

    X = np.random.rand(m, n)

    #Warm start
    W = np.random.rand(1000, 5)
    print('Cusolver Warm Start')
    h2o4gpu_tsvd_cusolver = TruncatedSVDH2O(n_components=3,
                                            algorithm="cusolver",
                                            random_state=42)
    h2o4gpu_tsvd_cusolver.fit(W)
    print('Power Warm Start')
    h2o4gpu_tsvd_power = TruncatedSVDH2O(n_components=3,
                                         algorithm="power",
                                         tol=1e-5,
                                         n_iter=100,
                                         random_state=42,
                                         verbose=True)
    h2o4gpu_tsvd_power.fit(W)

    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) +
          " matrix with k=" + str(k))
    print("\n")

    cusolver_sum_time = 0
    power_sum_time = 0
    for i in range(5):
        start_time_cusolver = time.time()
        print("CUSOLVER Bencmark on iteration " + str(i))
        h2o4gpu_tsvd_cusolver.n_components = k
        h2o4gpu_tsvd_cusolver.fit(X)
        end_time_cusolver = time.time() - start_time_cusolver
        cusolver_sum_time += end_time_cusolver
        print("Took cusolver " + str(end_time_cusolver) +
              " seconds on iteration " + str(i))

        print("Sleep before Power on iteration " + str(i))
        time.sleep(5)

        start_time_power = time.time()
        print("POWER Bencmark on iteration " + str(i))
        h2o4gpu_tsvd_power.n_components = k
        h2o4gpu_tsvd_power.fit(X)
        end_time_power = time.time() - start_time_power
        power_sum_time += end_time_power
        print("Took power method " + str(end_time_power) +
              " seconds on iteration " + str(i))

    #Benchmarks
    ########################################################################
    dim = str(m) + "by" + str(n)
    with open('power_cusolver_avg_run.csv', 'a', newline='') as csvfile:
        csvwriter = csv.writer(csvfile,
                               delimiter=',',
                               quotechar='|',
                               quoting=csv.QUOTE_MINIMAL)
        csvwriter.writerow(
            ['cusolver', str(cusolver_sum_time / 5), dim,
             str(k)])
        csvwriter.writerow(['power', str(power_sum_time / 5), dim, str(k)])
        csvfile.close()
Esempio n. 3
0
def func(m=2000, n=20, k=5):
    np.random.seed(1234)

    X = np.random.rand(m, n)

    print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix")
    print("\n")

    start_time_cusolver = time.time()
    print("CUSOLVER")
    h2o4gpu_tsvd_cusolver = TruncatedSVDH2O(n_components=k,
                                            algorithm="cusolver",
                                            random_state=42)
    h2o4gpu_tsvd_cusolver.fit(X)
    end_time_cusolver = time.time() - start_time_cusolver
    print("Took cusolver " + str(end_time_cusolver) + " seconds")

    start_time_power = time.time()
    print("POWER")
    h2o4gpu_tsvd_power = TruncatedSVDH2O(n_components=k,
                                         algorithm="power",
                                         tol=1E-50,
                                         n_iter=2000,
                                         random_state=42,
                                         verbose=True)
    h2o4gpu_tsvd_power.fit(X)
    end_time_power = time.time() - start_time_power
    print("Took power method " + str(end_time_power) + " seconds")

    print("h2o4gpu cusolver components")
    print(h2o4gpu_tsvd_cusolver.components_)
    print("h2o4gpu cusolver singular values")
    print(h2o4gpu_tsvd_cusolver.singular_values_)
    print("h2o4gpu tsvd cusolver Explained Variance")
    print(h2o4gpu_tsvd_cusolver.explained_variance_)
    print("h2o4gpu tsvd cusolver Explained Variance Ratio")
    print(h2o4gpu_tsvd_cusolver.explained_variance_ratio_)

    print("h2o4gpu power components")
    print(h2o4gpu_tsvd_power.components_)
    print("h2o4gpu power singular values")
    print(h2o4gpu_tsvd_power.singular_values_)
    print("h2o4gpu tsvd power Explained Variance")
    print(h2o4gpu_tsvd_power.explained_variance_)
    print("h2o4gpu tsvd power Explained Variance Ratio")
    print(h2o4gpu_tsvd_power.explained_variance_ratio_)

    print("Checking singular values")
    rtol = 1E-5
    assert np.allclose(h2o4gpu_tsvd_cusolver.singular_values_,
                       h2o4gpu_tsvd_power.singular_values_,
                       rtol=rtol)

    print("Checking explained variance")
    rtol = 1E-3
    assert np.allclose(h2o4gpu_tsvd_cusolver.explained_variance_,
                       h2o4gpu_tsvd_power.explained_variance_,
                       rtol=rtol)

    print("Checking explained variance ratio")
    assert np.allclose(h2o4gpu_tsvd_cusolver.explained_variance_ratio_,
                       h2o4gpu_tsvd_power.explained_variance_ratio_,
                       rtol=rtol)
def func(m=5000000, n=10, k=9, convert_to_float32=False):

    np.random.seed(1234)

    X = np.random.rand(m, n)

    if convert_to_float32:
        print("Converting input matrix to float32")
        X = X.astype(np.float32)

    # Warm start
    W = np.random.rand(1000, 5)
    print('h2o4gpu Cusolver Warm Start')
    h2o4gpu_tsvd_cusolver_warm = TruncatedSVDH2O(n_components=3,
                                                 algorithm="cusolver",
                                                 tol=1e-5,
                                                 n_iter=100,
                                                 random_state=42,
                                                 verbose=True)
    h2o4gpu_tsvd_cusolver_warm.fit(W)
    print('h2o4gpu Power Warm Start')
    h2o4gpu_tsvd_power_warm = TruncatedSVDH2O(n_components=3,
                                              algorithm="power",
                                              tol=1e-5,
                                              n_iter=100,
                                              random_state=42,
                                              verbose=True)
    h2o4gpu_tsvd_power_warm.fit(W)

    print('sklearn ARPACK Warm Start')
    sklearn_tsvd_arpack_warm = sklearnsvd(n_components=3,
                                          algorithm="arpack",
                                          n_iter=5,
                                          random_state=42)
    sklearn_tsvd_arpack_warm.fit(W)
    print('sklearn Randomized Warm Start')
    sklearn_tsvd_random_warm = sklearnsvd(n_components=3,
                                          algorithm="randomized",
                                          tol=1e-5,
                                          n_iter=5,
                                          random_state=42)
    sklearn_tsvd_random_warm.fit(W)

    # Exact scikit impl
    sklearn_tsvd_arpack = sklearnsvd(algorithm="arpack",
                                     n_components=k,
                                     tol=1e-5,
                                     n_iter=5,
                                     random_state=42)

    # Randomized scikit impl
    sklearn_tsvd_random = sklearnsvd(algorithm="randomized",
                                     n_components=k,
                                     tol=1e-5,
                                     n_iter=5,
                                     random_state=42)

    #Cusolver h2o4gpu impl
    print("Cusolver SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) +
          " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")
    print("h2o4gpu cusolver tsvd run")
    h2o4gpu_tsvd_cusolver = TruncatedSVDH2O(n_components=k,
                                            algorithm="cusolver",
                                            tol=1e-5,
                                            n_iter=100,
                                            random_state=42)
    start_time_gpu_cusolver = time.time()
    h2o4gpu_tsvd_cusolver.fit(X)
    end_time_gpu_cusolver = time.time() - start_time_gpu_cusolver
    print("Total time for h2o4gpu cusolver tsvd is " +
          str(end_time_gpu_cusolver))
    print("h2o4gpu tsvd cusolver Singular Values")
    print(h2o4gpu_tsvd_cusolver.singular_values_)
    print("h2o4gpu tsvd cusolver Components (V^T)")
    print(h2o4gpu_tsvd_cusolver.components_)
    print("h2o4gpu tsvd cusolver Explained Variance")
    print(h2o4gpu_tsvd_cusolver.explained_variance_)
    print("h2o4gpu tsvd cusolver Explained Variance Ratio")
    print(h2o4gpu_tsvd_cusolver.explained_variance_ratio_)

    print("Sleep before Power")
    time.sleep(5)

    #Power h2o4gpu impl
    print("Power SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) +
          " matrix")
    print("Original X Matrix")
    print(X)
    print("\n")
    print("h2o4gpu tsvd power method run")
    h2o4gpu_tsvd_power = TruncatedSVDH2O(n_components=k,
                                         algorithm="power",
                                         tol=1e-5,
                                         n_iter=100,
                                         random_state=42)
    start_time_gpu_power = time.time()
    h2o4gpu_tsvd_power.fit(X)
    end_time_gpu_power = time.time() - start_time_gpu_power
    print("Total time for h2o4gpu tsvd is " + str(end_time_gpu_power))
    print("h2o4gpu tsvd power Singular Values")
    print(h2o4gpu_tsvd_power.singular_values_)
    print("h2o4gpu tsvd power Components (V^T)")
    print(h2o4gpu_tsvd_power.components_)
    print("h2o4gpu tsvd power Explained Variance")
    print(h2o4gpu_tsvd_power.explained_variance_)
    print("h2o4gpu tsvd power Explained Variance Ratio")
    print(h2o4gpu_tsvd_power.explained_variance_ratio_)

    print("Sleep before Sklearn ARPACK")
    time.sleep(5)

    #ARPACK sklearn impl
    print("\n")
    print("ARPACK sklearn run")
    start_sk_arpack = time.time()
    sklearn_tsvd_arpack.fit(X)
    end_sk_arpack = time.time() - start_sk_arpack
    print("Total time for sklearn is " + str(end_sk_arpack))
    print("Sklearn ARPACK Singular Values")
    print(sklearn_tsvd_arpack.singular_values_)
    print("Sklearn ARPACK Components (V^T)")
    print(sklearn_tsvd_arpack.components_)
    print("Sklearn ARPACK Explained Variance")
    print(sklearn_tsvd_arpack.explained_variance_)
    print("Sklearn ARPACK Explained Variance Ratio")
    print(sklearn_tsvd_arpack.explained_variance_ratio_)

    print("Sleep before Sklearn Randomized")
    time.sleep(5)

    #Randomized sklearn impl
    print("\n")
    print("Randomized sklearn randomized run")
    start_sk_random = time.time()
    sklearn_tsvd_random.fit(X)
    end_sk_randomized = time.time() - start_sk_random
    print("Total time for sklearn is " + str(end_sk_randomized))
    print("Sklearn Random Singular Values")
    print(sklearn_tsvd_random.singular_values_)
    print("Sklearn Random Components (V^T)")
    print(sklearn_tsvd_random.components_)
    print("Sklearn Random Explained Variance")
    print(sklearn_tsvd_random.explained_variance_)
    print("Sklearn Random Explained Variance Ratio")
    print(sklearn_tsvd_random.explained_variance_ratio_)

    return end_time_gpu_cusolver, end_sk_arpack, end_time_gpu_power, end_sk_randomized