Ejemplo n.º 1
0
def test_pca_fit_transform_fp32(nrows, ncols, n_parts, client=None):

    owns_cluster = False
    if client is None:
        owns_cluster = True
        cluster = LocalCUDACluster(threads_per_worker=1)
        client = Client(cluster)

    from cuml.dask.decomposition import PCA as daskPCA
    from cuml.dask.datasets import make_blobs

    X_cudf, _ = make_blobs(nrows,
                           ncols,
                           1,
                           n_parts,
                           cluster_std=1.5,
                           verbose=False,
                           random_state=10,
                           dtype=np.float32)

    wait(X_cudf)

    cupca = daskPCA(n_components=20, whiten=True)
    cupca.fit_transform(X_cudf)

    if owns_cluster:
        client.close()
        cluster.close()
Ejemplo n.º 2
0
def test_pagerank():
    gc.collect()
    input_data_path = r"../datasets/karate.csv"
    # Networkx Call
    pd_df = pd.read_csv(input_data_path,
                        delimiter=' ',
                        names=['src', 'dst', 'value'])
    G = nx.Graph()
    for i in range(0, len(pd_df)):
        G.add_edge(pd_df['src'][i], pd_df['dst'][i])
    nx_pr = nx.pagerank(G, alpha=0.85)
    nx_pr = sorted(nx_pr.items(), key=lambda x: x[0])
    # Cugraph snmg pagerank Call
    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)
    chunksize = dcg.get_chunksize(input_data_path)
    ddf = dask_cudf.read_csv(input_data_path,
                             chunksize=chunksize,
                             delimiter=' ',
                             names=['src', 'dst', 'value'],
                             dtype=['int32', 'int32', 'float32'])

    pr = dcg.pagerank(ddf, alpha=0.85, max_iter=50)
    res_df = pr.compute()

    err = 0
    tol = 1.0e-05
    for i in range(len(res_df)):
        if (abs(res_df['pagerank'][i] - nx_pr[i][1]) > tol * 1.1):
            err = err + 1
    print("Mismatches:", err)
    assert err < (0.01 * len(res_df))

    client.close()
    cluster.close()
Ejemplo n.º 3
0
def create_cuml_distributed(X_train, y_train):
    start_time = datetime.now()
    print('init dask cluster')

    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)
    workers = client.has_what().keys()

    n_workers = len(workers)
    X_train_cudf = cudf.DataFrame.from_pandas(pd.DataFrame(X_train))
    y_train_cudf = cudf.Series(y_train)

    X_train_dask = dask_cudf.from_cudf(X_train_cudf, npartitions=n_workers)
    y_train_dask = dask_cudf.from_cudf(y_train_cudf, npartitions=n_workers)

    X_train_ddask, y_train_ddask = dask_utils.persist_across_workers(
        client, [X_train_dask, y_train_dask], workers=workers)
    print('cuml distributed initialized', datetime.now() - start_time)
    model = distributed_cuml_Rf(n_estimators=500, n_streams=64)
    model.fit(X_train, y_train)

    wait(model.rfs)
    print('cuml distributed finished', datetime.now() - start_time)
    client.close()
    cluster.close()
    return model
Ejemplo n.º 4
0
 def fit(self, data, args):
     params = self.configure(data, args)
     n_workers = None if args.gpus < 0 else args.gpus
     cluster = LocalCUDACluster(n_workers=n_workers,
                                local_directory=args.root)
     client = Client(cluster)
     n_partitions = len(client.scheduler_info()['workers'])
     X_sliced, y_sliced = self.get_slices(n_partitions, data.X_train,
                                          data.y_train)
     X = da.concatenate(
         [da.from_array(sub_array) for sub_array in X_sliced])
     X = X.rechunk((X_sliced[0].shape[0], data.X_train.shape[1]))
     y = da.concatenate(
         [da.from_array(sub_array) for sub_array in y_sliced])
     y = y.rechunk(X.chunksize[0])
     dtrain = xgb.dask.DaskDMatrix(client, X, y)
     with Timer() as t:
         output = xgb.dask.train(client,
                                 params,
                                 dtrain,
                                 num_boost_round=args.ntrees)
     self.model = output['booster']
     client.close()
     cluster.close()
     return t.interval
Ejemplo n.º 5
0
    def fit(self, data, args):
        params = self.configure(data, args)
        cluster = LocalCUDACluster(
            n_workers=None if args.gpus < 0 else args.gpus,
            local_directory=args.root,
            threads_per_worker=1)
        client = Client(cluster)
        partition_size = 10000
        if isinstance(data.X_train, np.ndarray):
            X = da.from_array(data.X_train,
                              (partition_size, data.X_train.shape[1]))
            y = da.from_array(data.y_train, partition_size)
        else:

            X = dd.from_pandas(data.X_train, chunksize=partition_size)
            y = dd.from_pandas(data.y_train, chunksize=partition_size)
        dtrain = xgb.dask.DaskDMatrix(client, X, y)
        with Timer() as t:
            output = xgb.dask.train(client,
                                    params,
                                    dtrain,
                                    num_boost_round=args.ntrees)
        self.model = output['booster']
        client.close()
        cluster.close()
        return t.interval
Ejemplo n.º 6
0
def test_send_recv(n_trials):

    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)

    cb = CommsContext(comms_p2p=True)
    cb.init()

    cb = default_comms()

    start = time.time()
    dfs = [client.submit(func_test_send_recv,
                         cb.sessionId,
                         n_trials,
                         random.random(),
                         workers=[w])
           for wid, w in zip(range(len(cb.worker_addresses)),
                             cb.worker_addresses)]

    wait(dfs)
    print("Time: " + str(time.time() - start))

    result = list(map(lambda x: x.result(), dfs))

    print(str(result))

    assert(result)

    cb.destroy()
    client.close()
    cluster.close()
Ejemplo n.º 7
0
def test_consolidation(graph_file):
    gc.collect()

    cluster = LocalCUDACluster()
    client = Client(cluster)
    chunksize = dcg.get_chunksize(graph_file)

    M = utils.read_csv_for_nx(graph_file)

    df = pd.DataFrame()
    df['source'] = pd.Series(M['0'])
    df['target'] = pd.Series(M['1'])

    ddf = dask_cudf.read_csv(graph_file,
                             chunksize=chunksize,
                             delimiter=' ',
                             names=['source', 'target', 'weight'],
                             dtype=['int32', 'int32', 'float32'],
                             header=None)

    Gnx = nx.from_pandas_edgelist(df,
                                  source='source',
                                  target='target',
                                  create_using=nx.DiGraph)
    G = cugraph.from_cudf_edgelist(ddf,
                                   source='source',
                                   destination='target',
                                   create_using=cugraph.DiGraph)

    assert compare_graphs(Gnx, G)
    Gnx.clear()
    G.clear()
    client.close()
    cluster.close()
Ejemplo n.º 8
0
def get_cuda_cluster():
    from dask_cuda import LocalCUDACluster

    CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
    n_workers = min(2, len(CUDA_VISIBLE_DEVICES.split(",")))
    cluster = LocalCUDACluster(n_workers=n_workers)
    yield cluster
    cluster.close()
Ejemplo n.º 9
0
def cluster():

    print("Starting cluster")
    cluster = LocalCUDACluster(protocol="tcp", scheduler_port=0)
    yield cluster
    print("Closing cluster")
    cluster.close()
    print("Closed cluster")
Ejemplo n.º 10
0
class MGContext:
    """Utility Context Manager to start a multi GPU context using dask_cuda
    Parameters:
    -----------
    number_of_devices : int
        Number of devices to use, verification must be done prior to call to
        ensure that there are enough devices available. If not specified, the
        cluster will be initialized to use all visible devices.
    rmm_managed_memory : bool
        True to enable managed memory (UVM) in RMM as part of the
        cluster. Default is False.
    p2p : bool
        Initialize UCX endpoints if True. Default is False.
    """
    def __init__(self,
                 number_of_devices=None,
                 rmm_managed_memory=False,
                 p2p=False):
        self._number_of_devices = number_of_devices
        self._rmm_managed_memory = rmm_managed_memory
        self._client = None
        self._p2p = p2p
        self._cluster = CUDACluster(
            n_workers=self._number_of_devices,
            rmm_managed_memory=self._rmm_managed_memory)

    @property
    def client(self):
        return self._client

    @property
    def cluster(self):
        return self._cluster

    def __enter__(self):
        self._prepare_mg()
        return self

    def _prepare_mg(self):
        self._prepare_client()
        self._prepare_comms()

    def _prepare_client(self):
        self._client = Client(self._cluster)
        self._client.wait_for_workers(self._number_of_devices)

    def _prepare_comms(self):
        Comms.initialize(p2p=self._p2p)

    def _close(self):
        Comms.destroy()
        if self._client is not None:
            self._client.close()
        if self._cluster is not None:
            self._cluster.close()

    def __exit__(self, type, value, traceback):
        self._close()
Ejemplo n.º 11
0
def predict_xgboost_gpu(xgb_model,
                        X,
                        data_chunksize=None,
                        n_gpus=None,
                        n_threads_per_gpu=1,
                        gpu_cluster=None,
                        client=None):
    '''
	Predicts the output for the input features X using the 'xgb_model' running on the GPU.

	:param xgb_model: a dask XGBoost model to use for predictions
	:param X: the input features to use for predictions, must be either a numpy ndarray or a pandas DataFrame
	:param data_chunksize: chunk sizes to be used on a dask dataframe, leave the default value None for auto decision
	:param n_gpus: number of GPUs to be used. Default value None selects all available devices;
	:param n_threads_per_gpu: number of threads per GPU;
	:param gpu_cluster: an existing dask cluster object to use. This param should be used if you call this method
		too many times in quick successions. Note that this function doesn't close an externally created cluster.
	:param client: an existing dask cluster object to use. This param should be used if you call this method
		too many times in quick successions. Note that this function doesn't close an externally created client.
	:return:
		If the input features X is a pandas DataFrame, returns a array-like DataFrame of single column containing
		the predictions;

		Otherwise, if the input features X is a numpy ndarray, returns a 1D ndarray containing the predictions .
	'''
    if gpu_cluster is None:
        local_gpus = LocalCUDACluster(n_workers=n_gpus,
                                      threads_per_worker=n_threads_per_gpu)
    else:
        local_gpus = gpu_cluster
    if client is None:
        local_dask_client = Client(local_gpus)
    else:
        local_dask_client = client

    if data_chunksize is None:
        data_chunksize = X.shape[0] // len(local_gpus.cuda_visible_devices)

    if isinstance(X, pd.DataFrame):
        ndarray = False
        X = from_pandas(X, chunksize=data_chunksize)
    else:
        ndarray = True
        X = from_array(X, chunksize=data_chunksize)

    y_predicted = dask_xgboost_predict(local_dask_client, xgb_model, X)
    y_predicted = pd.DataFrame(y_predicted)

    if client is None:
        local_dask_client.close()
    if gpu_cluster is None:
        local_gpus.close()

    if ndarray:
        return y_predicted.to_numpy()
    return y_predicted
Ejemplo n.º 12
0
def test_pagerank():
    gc.collect()
    input_data_path = r"../datasets/hibench_small/1/part-00000.csv"

    # Networkx Call
    pd_df = pd.read_csv(input_data_path, delimiter='\t', names=['src', 'dst'])
    G = nx.DiGraph()
    for i in range(0, len(pd_df)):
        G.add_edge(pd_df['src'][i], pd_df['dst'][i])
    nx_pr = nx.pagerank(G, alpha=0.85)
    nx_pr = sorted(nx_pr.items(), key=lambda x: x[0])

    # Cugraph snmg pagerank Call
    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)

    t0 = time.time()
    chunksize = dcg.get_chunksize(input_data_path)
    ddf = dask_cudf.read_csv(input_data_path,
                             chunksize=chunksize,
                             delimiter='\t',
                             names=['src', 'dst'],
                             dtype=['int32', 'int32'])
    y = ddf.to_delayed()
    x = client.compute(y)
    wait(x)
    t1 = time.time()
    print("Reading Csv time: ", t1 - t0)
    new_ddf = dcg.drop_duplicates(x)
    t2 = time.time()
    pr = dcg.pagerank(new_ddf, alpha=0.85, max_iter=50)
    wait(pr)
    t3 = time.time()
    print("Running PR algo time: ", t3 - t2)
    t4 = time.time()
    res_df = pr.compute()
    t5 = time.time()
    print("Compute time: ", t5 - t4)
    print(res_df)
    t6 = time.time()
    # For bigdatax4, chunksize=100000000 to avoid oom on write csv
    res_df.to_csv('~/pagerank.csv', header=False, index=False)
    t7 = time.time()
    print("Write csv time: ", t7 - t6)

    # Comparison
    err = 0
    tol = 1.0e-05
    for i in range(len(res_df)):
        if (abs(res_df['pagerank'][i] - nx_pr[i][1]) > tol * 1.1):
            err = err + 1
    print("Mismatches:", err)
    assert err < (0.02 * len(res_df))

    client.close()
    cluster.close()
Ejemplo n.º 13
0
    def test_tree_stats(self) -> None:
        with LocalCUDACluster(n_workers=1) as cluster:
            with Client(cluster) as client:
                local = run_tree_stats(client, "gpu_hist")

        with LocalCUDACluster(n_workers=2) as cluster:
            with Client(cluster) as client:
                distributed = run_tree_stats(client, "gpu_hist")

        assert local == distributed
Ejemplo n.º 14
0
def test_default_comms_no_exist():
    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)
    cb = default_comms()
    assert cb is not None

    cb2 = default_comms()
    assert cb.sessionId == cb2.sessionId
    client.close()
    cluster.close()
Ejemplo n.º 15
0
def client_connection():
    cluster = LocalCUDACluster()
    client = Client(cluster)
    Comms.initialize(p2p=True)

    yield client

    Comms.destroy()
    client.close()
    cluster.close()
Ejemplo n.º 16
0
def ucx_cluster():
    initialize.initialize(create_cuda_context=True,
                          enable_tcp_over_ucx=enable_tcp_over_ucx,
                          enable_nvlink=enable_nvlink,
                          enable_infiniband=enable_infiniband)
    cluster = LocalCUDACluster(protocol="ucx",
                               enable_tcp_over_ucx=enable_tcp_over_ucx,
                               enable_nvlink=enable_nvlink,
                               enable_infiniband=enable_infiniband)
    yield cluster
    cluster.close()
Ejemplo n.º 17
0
class MGContext:
    """Utility Context Manager to start a multi GPU context using dask_cuda

    Parameters:
    -----------

    number_of_devices : int
        Number of devices to use, verification must be done prior to call
        to ensure that there are enough devices available.
    """
    def __init__(self, number_of_devices=None, rmm_managed_memory=False):
        self._number_of_devices = number_of_devices
        self._rmm_managed_memory = rmm_managed_memory
        self._cluster = None
        self._client = None

    @property
    def client(self):
        return self._client

    @property
    def cluster(self):
        return self._cluster

    def __enter__(self):
        self._prepare_mg()
        return self

    def _prepare_mg(self):
        self._prepare_cluster()
        self._prepare_client()
        self._prepare_comms()

    def _prepare_cluster(self):
        self._cluster = CUDACluster(
            n_workers=self._number_of_devices,
            rmm_managed_memory=self._rmm_managed_memory)

    def _prepare_client(self):
        self._client = Client(self._cluster)
        self._client.wait_for_workers(self._number_of_devices)

    def _prepare_comms(self):
        Comms.initialize()

    def _close(self):
        Comms.destroy()
        if self._client is not None:
            self._client.close()
        if self._cluster is not None:
            self._cluster.close()

    def __exit__(self, type, value, traceback):
        self._close()
Ejemplo n.º 18
0
def client_connection():
    # setup
    cluster = LocalCUDACluster()
    client = Client(cluster)
    Comms.initialize()

    yield client

    # teardown
    Comms.destroy()
    client.close()
    cluster.close()
Ejemplo n.º 19
0
def test_pca_fit(nrows, ncols, n_parts, client=None):

    owns_cluster = False
    if client is None:
        owns_cluster = True
        cluster = LocalCUDACluster(threads_per_worker=1)
        client = Client(cluster)

    from cuml.dask.decomposition import TruncatedSVD as daskTPCA
    from sklearn.decomposition import TruncatedSVD

    from cuml.dask.datasets import make_blobs

    X_cudf, _ = make_blobs(nrows,
                           ncols,
                           1,
                           n_parts,
                           cluster_std=0.5,
                           verbose=False,
                           random_state=10,
                           dtype=np.float32)

    wait(X_cudf)

    X = X_cudf.compute().to_pandas().values

    cutsvd = daskTPCA(n_components=5)
    cutsvd.fit(X_cudf)

    sktsvd = TruncatedSVD(n_components=5, algorithm="arpack")
    sktsvd.fit(X)

    all_attr = [
        'singular_values_', 'components_', 'explained_variance_',
        'explained_variance_ratio_'
    ]

    if owns_cluster:
        client.close()
        cluster.close()

    for attr in all_attr:
        with_sign = False if attr in ['components_'] else True
        cuml_res = (getattr(cutsvd, attr))
        if type(cuml_res) == np.ndarray:
            cuml_res = cuml_res.as_matrix()
        skl_res = getattr(sktsvd, attr)
        if attr == 'singular_values_':
            assert array_equal(cuml_res, skl_res, 1, with_sign=with_sign)
        else:
            assert array_equal(cuml_res, skl_res, 1e-1, with_sign=with_sign)
Ejemplo n.º 20
0
def test_default_comms():

    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)

    cb = CommsContext(comms_p2p=True, client=client)
    cb.init()

    comms = default_comms()
    assert(cb.sessionId == comms.sessionId)

    comms.destroy()
    client.close()
    cluster.close()
Ejemplo n.º 21
0
def test_splitting():
    gc.collect()

    # This is an experimental setup for 300GB bigdatax8 dataset.
    # This test can be run on 16 32GB gpus. The dataset is split into 32 files.
    input_data_path = r"/datasets/pagerank_demo/1/Input-bigdatax8/edges/"
    input_files = [
        'file-00000.csv', 'file-00001.csv', 'file-00002.csv', 'file-00003.csv',
        'file-00004.csv', 'file-00005.csv', 'file-00006.csv', 'file-00007.csv',
        'file-00008.csv', 'file-00009.csv', 'file-00010.csv', 'file-00011.csv',
        'file-00012.csv', 'file-00013.csv', 'file-00014.csv', 'file-00015.csv',
        'file-00016.csv', 'file-00017.csv', 'file-00018.csv', 'file-00019.csv',
        'file-00020.csv', 'file-00021.csv', 'file-00022.csv', 'file-00023.csv',
        'file-00024.csv', 'file-00025.csv', 'file-00026.csv', 'file-00027.csv',
        'file-00028.csv', 'file-00029.csv', 'file-00030.csv', 'file-00031.csv'
    ]

    # Cugraph snmg pagerank Call
    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)

    files = [input_data_path + f for f in input_files]

    # Read 2 files per gpu/worker and concatenate the dataframe
    # This is a work around for large files to fit memory requirements
    # of cudf.read_csv
    t0 = time.time()
    new_ddf = dcg.read_split_csv(files)
    t1 = time.time()
    print("Reading Csv time: ", t1 - t0)
    t2 = time.time()
    pr = dcg.pagerank(new_ddf, alpha=0.85, max_iter=3)
    wait(pr)
    t3 = time.time()
    print("Pagerank (Dask) time: ", t3 - t2)
    t4 = time.time()
    res_df = pr.compute()
    t5 = time.time()
    print("Compute time: ", t5 - t4)
    print(res_df)
    t6 = time.time()
    res_df.to_csv('~/pagerank.csv',
                  chunksize=40000000,
                  header=False,
                  index=False)
    t7 = time.time()
    print("Write csv time: ", t7 - t6)

    client.close()
    cluster.close()
Ejemplo n.º 22
0
def test_pca_fit(nrows, ncols, n_parts, client=None):

    owns_cluster = False
    if client is None:
        owns_cluster = True
        cluster = LocalCUDACluster(threads_per_worker=1)
        client = Client(cluster)

    from cuml.dask.decomposition import PCA as daskPCA
    from sklearn.decomposition import PCA

    from cuml.dask.datasets import make_blobs

    X_cudf, _ = make_blobs(nrows,
                           ncols,
                           1,
                           n_parts,
                           cluster_std=0.5,
                           verbose=False,
                           random_state=10,
                           dtype=np.float32)

    wait(X_cudf)

    X = X_cudf.compute().to_pandas().values

    cupca = daskPCA(n_components=5, whiten=True)
    cupca.fit(X_cudf)

    skpca = PCA(n_components=5, whiten=True, svd_solver="full")
    skpca.fit(X)

    from cuml.test.utils import array_equal

    all_attr = [
        'singular_values_', 'components_', 'explained_variance_',
        'explained_variance_ratio_'
    ]

    if owns_cluster:
        client.close()
        cluster.close()

    for attr in all_attr:
        with_sign = False if attr in ['components_'] else True
        cuml_res = (getattr(cupca, attr))
        if type(cuml_res) == np.ndarray:
            cuml_res = cuml_res.as_matrix()
        skl_res = getattr(skpca, attr)
        assert array_equal(cuml_res, skl_res, 1e-3, with_sign=with_sign)
Ejemplo n.º 23
0
def test_end_to_end(nrows, ncols, nclusters, n_parts, client=None):

    owns_cluster = False
    if client is None:
        owns_cluster = True
        cluster = LocalCUDACluster(threads_per_worker=1)
        client = Client(cluster)

    from cuml.dask.cluster import KMeans as cumlKMeans
    from dask_ml.cluster import KMeans as dmlKMeans

    from cuml.test.dask.utils import dask_make_blobs

    X_df, X_cudf = dask_make_blobs(nrows,
                                   ncols,
                                   nclusters,
                                   n_parts,
                                   cluster_std=0.1,
                                   verbose=True,
                                   random_state=10)

    wait(X_cudf)

    cumlModel = cumlKMeans(verbose=0,
                           init="k-means||",
                           n_clusters=nclusters,
                           random_state=10)
    daskmlModel1 = dmlKMeans(init="k-means||",
                             n_clusters=nclusters,
                             random_state=10)

    cumlModel.fit(X_cudf)
    daskmlModel1.fit(X_df)

    cumlLabels = cumlModel.predict(X_cudf)
    daskmlLabels1 = daskmlModel1.predict(X_df)

    from sklearn.metrics import adjusted_rand_score

    cumlPred = cumlLabels.compute().to_pandas().values
    daskmlPred1 = daskmlLabels1.compute()

    score = adjusted_rand_score(cumlPred, daskmlPred1)

    if owns_cluster:
        client.close()
        cluster.close()

    assert 1.0 == score
Ejemplo n.º 24
0
    def __init__(
        self,
        cloud_type="Azure",
        model_type="RandomForest",
        data_type="Parquet",
        compute_type="single-GPU",
        verbose_estimator=False,
        CSP_paths=default_azureml_paths,
    ):

        self.CSP_paths = CSP_paths
        self.cloud_type = cloud_type
        self.model_type = model_type
        self.data_type = data_type
        self.compute_type = compute_type
        self.verbose_estimator = verbose_estimator
        self.log_to_file(
            f"\n> RapidsCloudML\n\tCompute, Data , Model, Cloud types {self.compute_type, self.data_type, self.model_type, self.cloud_type}"
        )

        # Setting up client for multi-GPU option
        if "multi" in self.compute_type:
            self.log_to_file("\n\tMulti-GPU selected")
            # This will use all GPUs on the local host by default
            cluster = LocalCUDACluster(threads_per_worker=1)
            self.client = Client(cluster)

            # Query the client for all connected workers
            self.workers = self.client.has_what().keys()
            self.n_workers = len(self.workers)
            self.log_to_file(f"\n\tClient information {self.client}")
Ejemplo n.º 25
0
async def run():
    initialize(
        create_cuda_context=True,
        enable_tcp_over_ucx=enable_tcp_over_ucx,
        enable_infiniband=enable_infiniband,
        enable_nvlink=enable_nvlink,
    )

    async with LocalCUDACluster(
        interface="enp1s0f0",
        protocol="ucx",
        enable_tcp_over_ucx=enable_tcp_over_ucx,
        enable_infiniband=enable_infiniband,
        enable_nvlink=enable_nvlink,
        asynchronous=True,
    ) as cluster:
        async with Client(cluster, asynchronous=True) as client:
            rs = da.random.RandomState(RandomState=cupy.random.RandomState)
            a = rs.normal(10, 1, (int(4e3), int(4e3)), chunks=(int(1e3), int(1e3)))
            x = a + a.T

            for i in range(100):
                print("Running iteration:", i)
                start = time.time()
                await client.compute(x)
                print("Time for iteration", i, ":", time.time() - start)
Ejemplo n.º 26
0
def test_with_asyncio():
    with LocalCUDACluster() as cluster:
        with Client(cluster) as client:
            address = client.scheduler.address
            output = asyncio.run(run_from_dask_array_asyncio(address))
            assert isinstance(output['booster'], xgboost.Booster)
            assert isinstance(output['history'], dict)
Ejemplo n.º 27
0
 def test_empty_dmatrix(self):
     with LocalCUDACluster() as cluster:
         with Client(cluster) as client:
             parameters = {'tree_method': 'gpu_hist',
                           'debug_synchronize': True}
             run_empty_dmatrix_reg(client, parameters)
             run_empty_dmatrix_cls(client, parameters)
Ejemplo n.º 28
0
 def test_gpu_hist(self, params, num_rounds, dataset):
     with LocalCUDACluster(n_workers=2) as cluster:
         with Client(cluster) as client:
             run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix,
                          client)
             run_gpu_hist(params, num_rounds, dataset,
                          dxgb.DaskDeviceQuantileDMatrix, client)
Ejemplo n.º 29
0
def setup(dask_scheduler_file=None, rmm_pool_size=None):
    if dask_scheduler_file:
        cluster = None
        # Env var UCX_MAX_RNDV_RAILS=1 must be set too.
        initialize(
            enable_tcp_over_ucx=True,
            enable_nvlink=True,
            enable_infiniband=False,
            enable_rdmacm=False,
            #net_devices="mlx5_0:1",
        )
        client = Client(scheduler_file=dask_scheduler_file)

    else:
        tempdir_object = tempfile.TemporaryDirectory()
        cluster = LocalCUDACluster(local_directory=tempdir_object.name,
                                   rmm_pool_size=rmm_pool_size)
        client = Client(cluster)
        # add the obj to the client so it doesn't get deleted until
        # the 'client' obj gets cleaned up
        client.tempdir_object = tempdir_object
        client.wait_for_workers(len(get_visible_devices()))

    Comms.initialize(p2p=True)
    return (client, cluster)
Ejemplo n.º 30
0
async def test_with_subset_of_cuda_visible_devices():
    os.environ["CUDA_VISIBLE_DEVICES"] = "2,3,6,7"
    try:
        async with LocalCUDACluster(scheduler_port=0,
                                    asynchronous=True,
                                    device_memory_limit=1) as cluster:
            async with Client(cluster, asynchronous=True) as client:
                assert len(cluster.workers) == 4

                # CUDA_VISIBLE_DEVICES cycles properly
                def get_visible_devices():
                    return os.environ["CUDA_VISIBLE_DEVICES"]

                result = await client.run(get_visible_devices)

                assert all(len(v.split(",")) == 4 for v in result.values())
                for i in range(4):
                    assert {int(v.split(",")[i])
                            for v in result.values()} == {
                                2,
                                3,
                                6,
                                7,
                            }
    finally:
        del os.environ["CUDA_VISIBLE_DEVICES"]