def test_output_type_context_mgr(global_output_type, context_type): dataset = get_small_dataset('numba') test_type = 'cupy' if global_output_type != 'cupy' else 'numpy' cuml.set_global_output_type(test_type) # use cuml context manager with cuml.using_output_type(context_type): dbscan_float = cuml.DBSCAN(eps=1.0, min_samples=1) dbscan_float.fit(dataset) res = dbscan_float.labels_ if context_type == 'numba': assert is_cuda_array(res) else: assert isinstance(res, test_output_types[context_type]) # use cuml again outside the context manager dbscan_float = cuml.DBSCAN(eps=1.0, min_samples=1) dbscan_float.fit(dataset) res = dbscan_float.labels_ assert isinstance(res, test_output_types[test_type])
def test_default_global_output_type(input_type): dataset = get_small_dataset(input_type) dbscan_float = cuml.DBSCAN(eps=1.0, min_samples=1) dbscan_float.fit(dataset) res = dbscan_float.labels_ if input_type == 'numba': assert is_cuda_array(res) else: assert isinstance(res, test_output_types[input_type])
def run_gpu(X, eps, min_samples): # Transfer inputs to GPU X = cp.array(X) # Begin computation t0 = time.time() mean = cp.mean(X, axis=0) std = cp.std(X, axis=0) cp.subtract(X, mean, out=X) cp.divide(X, std, out=X) print('Preprocessing:', time.time() - t0) # Run DBSCAN db = cuml.DBSCAN(eps=eps, min_samples=min_samples) db = db.fit(X) labels = db.labels_ # Transfer outputs to CPU # labels = labels.to_pandas().to_numpy() labels = cp.asnumpy(labels) return labels
solver_models = dict(CD=cuml.CD(), SGD=cuml.SGD(eta0=0.005)) cluster_models = dict(KMeans=cuml.KMeans()) decomposition_models = dict( PCA=cuml.PCA(), TruncatedSVD=cuml.TruncatedSVD(), ) decomposition_models_xfail = dict( GaussianRandomProjection=cuml.GaussianRandomProjection(), SparseRandomProjection=cuml.SparseRandomProjection()) neighbor_models = dict(NearestNeighbors=cuml.NearestNeighbors()) dbscan_model = dict(DBSCAN=cuml.DBSCAN()) umap_model = dict(UMAP=cuml.UMAP()) def unit_param(*args, **kwargs): return pytest.param(*args, **kwargs, marks=pytest.mark.unit) def quality_param(*args, **kwargs): return pytest.param(*args, **kwargs, marks=pytest.mark.quality) def stress_param(*args, **kwargs): return pytest.param(*args, **kwargs, marks=pytest.mark.stress)
cluster_models = {"KMeans": lambda: cuml.KMeans()} decomposition_models = { "PCA": lambda: cuml.PCA(), "TruncatedSVD": lambda: cuml.TruncatedSVD(), } decomposition_models_xfail = { "GaussianRandomProjection": lambda: cuml.GaussianRandomProjection(), "SparseRandomProjection": lambda: cuml.SparseRandomProjection() } neighbor_models = {"NearestNeighbors": lambda: cuml.NearestNeighbors()} dbscan_model = {"DBSCAN": lambda: cuml.DBSCAN()} umap_model = {"UMAP": lambda: cuml.UMAP()} rf_models = { "rfc": lambda: cuml.RandomForestClassifier(), "rfr": lambda: cuml.RandomForestRegressor() } all_models = { **regression_models, **solver_models, **cluster_models, **decomposition_models, **decomposition_models_xfail, **neighbor_models,
"""Load a dataset into GPU memory""" import cudf import io, requests # download CSV file from github url = "https://github.com/plotly/datasets/raw/master/tips.csv" content = requests.get(url).content.decode('utf-8') # read CSV from memory tips_df = cudf.read_csv(io.StringIO(content)) tips_df['tip_percentage'] = tips_df['tip']/tips_df['total_bill']*100 # display average tip by dining party size print(tips_df.groupby('size').tip_percentage.mean()) import cuml # Create and populate a GPU DataFrame df_float = cudf.DataFrame() df_float['0'] = [1.0, 2.0, 5.0] df_float['1'] = [4.0, 2.0, 1.0] df_float['2'] = [4.0, 2.0, 1.0] # Setup and fit clusters dbscan_float = cuml.DBSCAN(eps=1.0, min_samples=1) dbscan_float.fit(df_float) print(dbscan_float.labels_)
decomposition_models = { "PCA": lambda: cuml.PCA(), "TruncatedSVD": lambda: cuml.TruncatedSVD(), } decomposition_models_xfail = { "GaussianRandomProjection": lambda: cuml.GaussianRandomProjection(), "SparseRandomProjection": lambda: cuml.SparseRandomProjection() } neighbor_models = { "NearestNeighbors": lambda: cuml.NearestNeighbors() } dbscan_model = { "DBSCAN": lambda: cuml.DBSCAN() } umap_model = { "UMAP": lambda: cuml.UMAP() } rf_classification_model = { "rfc": lambda: cuml.RandomForestClassifier() } rf_regression_model = { "rfr": lambda: cuml.RandomForestRegressor() }