def func(m=5000, n=10, k=9, convert_to_float32=False): np.random.seed(1234) X = np.random.rand(m, n) if convert_to_float32: X = X.astype(np.float32) gpu_id = 0 total_gpu, total_mem, gpu_type = gpu.get_gpu_info() if(total_gpu > 0): gpu_id = 1 #Use second gpu print("\n") print("SVD on gpu id -> " + str(gpu_id)) print("SVD on " + str(X.shape[0]) + " by " + str(X.shape[1]) + " matrix") print("Original X Matrix") print(X) print("\n") print("h2o4gpu tsvd run") start_time = time.time() h2o4gpu_tsvd = TruncatedSVDH2O(n_components=k, gpu_id=gpu_id) h2o4gpu_tsvd.fit(X) end_time = time.time() - start_time print("Total time for h2o4gpu tsvd is " + str(end_time)) print("h2o4gpu tsvd Singular Values") print(h2o4gpu_tsvd.singular_values_) print("h2o4gpu tsvd Components (V^T)") print(h2o4gpu_tsvd.components_) print("h2o4gpu tsvd Explained Variance") print(h2o4gpu_tsvd.explained_variance_) print("h2o4gpu tsvd Explained Variance Ratio") print(h2o4gpu_tsvd.explained_variance_ratio_)
def func(m=5000000, n=10, k=9, change_gpu_id=False, use_wrappper=False, convert_to_float32=False): np.random.seed(1234) X = np.random.rand(m, n) if convert_to_float32: X = X.astype(np.float32) gpu_id = 0 if change_gpu_id: total_gpu, total_mem, gpu_type = gpu.get_gpu_info() if (total_gpu > 0): gpu_id = 1 #Use second gpu print("\n") print(X) print(np.mean(X, axis=0)) if use_wrappper: h2o4gpu_pca = PCA(n_components=k, gpu_id=gpu_id) else: h2o4gpu_pca = PCAH2O(n_components=k, gpu_id=gpu_id) scikit_pca = PCASklearn(n_components=k, svd_solver="arpack") scikit_pca.fit(X) h2o4gpu_pca.fit(X) print("Mean") print(h2o4gpu_pca.mean_) print(scikit_pca.mean_) if convert_to_float32: assert np.allclose(h2o4gpu_pca.mean_, scikit_pca.mean_, 1e-4) else: assert np.allclose(h2o4gpu_pca.mean_, scikit_pca.mean_) print("Noise Variance") print(h2o4gpu_pca.noise_variance_) print(scikit_pca.noise_variance_) assert np.allclose(h2o4gpu_pca.noise_variance_, h2o4gpu_pca.noise_variance_) print("Explained variance") print(h2o4gpu_pca.explained_variance_) print(scikit_pca.explained_variance_) assert np.allclose(h2o4gpu_pca.explained_variance_, scikit_pca.explained_variance_) print("Explained variance ratio") print(h2o4gpu_pca.explained_variance_ratio_) print(scikit_pca.explained_variance_ratio_) assert np.allclose(h2o4gpu_pca.explained_variance_ratio_, scikit_pca.explained_variance_ratio_, .1) print("Singular values") print(h2o4gpu_pca.singular_values_) print(scikit_pca.singular_values_) assert np.allclose(h2o4gpu_pca.singular_values_, scikit_pca.singular_values_) print("Components") print(h2o4gpu_pca.components_) print(scikit_pca.components_) assert np.allclose(h2o4gpu_pca.components_, scikit_pca.components_, .1) print("Num components") print(h2o4gpu_pca.n_components) print(scikit_pca.n_components) assert h2o4gpu_pca.n_components_ == scikit_pca.n_components_
def func(m=5000000, n=10, k=9, change_gpu_id=False, use_wrappper=False, convert_to_float32=False, whiten=False): np.random.seed(1234) X = np.random.rand(m, n) if convert_to_float32: X = X.astype(np.float32) gpu_id = 0 if change_gpu_id: total_gpu, total_mem, gpu_type = gpu.get_gpu_info() if (total_gpu > 1): #More than one gpu? gpu_id = 1 #Use second gpu print("\n") print(X) print(np.mean(X, axis=0)) if use_wrappper: h2o4gpu_pca = PCA(n_components=k, gpu_id=gpu_id, whiten=whiten) else: h2o4gpu_pca = PCAH2O(n_components=k, gpu_id=gpu_id, whiten=whiten) scikit_pca = PCASklearn(n_components=k, svd_solver="arpack", whiten=whiten) print("Fitting scikit PCA") scikit_pca.fit(X) print("Fitting h2o4gpu PCA") h2o4gpu_pca.fit(X) print("Column Means") print(h2o4gpu_pca.mean_) print(scikit_pca.mean_) if convert_to_float32: assert np.allclose(h2o4gpu_pca.mean_, scikit_pca.mean_, 1e-4) else: assert np.allclose(h2o4gpu_pca.mean_, scikit_pca.mean_) print("Noise Variance") print(h2o4gpu_pca.noise_variance_) print(scikit_pca.noise_variance_) assert np.allclose(h2o4gpu_pca.noise_variance_, h2o4gpu_pca.noise_variance_) print("Explained variance") print(h2o4gpu_pca.explained_variance_) print(scikit_pca.explained_variance_) assert np.allclose(h2o4gpu_pca.explained_variance_, scikit_pca.explained_variance_) print("Explained variance ratio") print(h2o4gpu_pca.explained_variance_ratio_) print(scikit_pca.explained_variance_ratio_) assert np.allclose(h2o4gpu_pca.explained_variance_ratio_, scikit_pca.explained_variance_ratio_, .1) print("Singular values") print(h2o4gpu_pca.singular_values_) print(scikit_pca.singular_values_) assert np.allclose(h2o4gpu_pca.singular_values_, scikit_pca.singular_values_) print("Components") if whiten: #Need to manually calculate as Scikit does not store whiten components. See link below: #https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/decomposition/pca.py#L567 scikit_pca.components_ = (scikit_pca.components_ * sqrt( X.shape[0])) / scikit_pca.singular_values_[:, np.newaxis] print(h2o4gpu_pca.components_) print(scikit_pca.components_) assert np.allclose(h2o4gpu_pca.components_, scikit_pca.components_, .1) print("Num components") print(h2o4gpu_pca.n_components) print(scikit_pca.n_components) assert h2o4gpu_pca.n_components_ == scikit_pca.n_components_