예제 #1
0
def test_redirected_logger():
    new_stdout = StringIO()

    with logger.set_level(logger.level_trace):
        # We do not test trace because CUML_LOG_TRACE is not compiled by
        # default
        test_msg = "This is a debug message"
        with redirect_stdout(new_stdout):
            logger.debug(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is an info message"
        with redirect_stdout(new_stdout):
            logger.info(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is a warn message"
        with redirect_stdout(new_stdout):
            logger.warn(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is an error message"
        with redirect_stdout(new_stdout):
            logger.error(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is a critical message"
        with redirect_stdout(new_stdout):
            logger.critical(test_msg)
        assert test_msg in new_stdout.getvalue()

    # Check that logging does not error with sys.stdout of None
    with redirect_stdout(None):
        test_msg = "This is a debug message"
        logger.debug(test_msg)
예제 #2
0
파일: comms.py 프로젝트: isVoid/cuml
    def init(self, workers=None):
        """
        Initializes the underlying comms. NCCL is required but
        UCX is only initialized if `comms_p2p == True`
        """

        self.worker_addresses = list(
            set((self.client.has_what().keys()
                 if workers is None else workers)))

        if self.nccl_initialized:
            warnings.warn("CommsContext has already been initialized.")
            return

        worker_info = self.worker_info(self.worker_addresses)
        worker_info = {w: worker_info[w] for w in self.worker_addresses}

        self.uniqueId = nccl.get_unique_id()

        self.client.run(_func_init_all,
                        self.sessionId,
                        self.uniqueId,
                        self.comms_p2p,
                        worker_info,
                        self.streams_per_handle,
                        workers=self.worker_addresses,
                        wait=True)

        self.nccl_initialized = True

        if self.comms_p2p:
            self.ucx_initialized = True

        logger.debug("Initialization complete.")
예제 #3
0
def test_concat_memory_leak(large_clf, estimator_type):
    import gc
    import os

    try:
        import psutil
    except ImportError:
        pytest.skip("psutil not installed")

    process = psutil.Process(os.getpid())

    X, y = large_clf
    X = X.astype(np.float32)

    # Build a series of RF models
    n_models = 10
    if estimator_type == 'classification':
        base_models = [
            curfc(max_depth=10, n_estimators=100, random_state=123)
            for i in range(n_models)
        ]
        y = y.astype(np.int32)
    elif estimator_type == 'regression':
        base_models = [
            curfr(max_depth=10, n_estimators=100, random_state=123)
            for i in range(n_models)
        ]
        y = y.astype(np.float32)
    else:
        assert False

    # Pre-fit once - this is our baseline and memory usage
    # should not significantly exceed it after later fits
    for model in base_models:
        model.fit(X, y)

    # Just concatenate over and over in a loop
    concat_models = base_models[1:]
    init_model = base_models[0]
    other_handles = [
        model._obtain_treelite_handle() for model in concat_models
    ]
    init_model._concatenate_treelite_handle(other_handles)

    gc.collect()
    initial_baseline_mem = process.memory_info().rss
    for i in range(10):
        init_model._concatenate_treelite_handle(other_handles)
        gc.collect()
        used_mem = process.memory_info().rss
        logger.debug("memory at rep %2d: %d m" %
                     (i, (used_mem - initial_baseline_mem) / 1e6))

    gc.collect()
    used_mem = process.memory_info().rss
    logger.info("Final memory delta: %d" %
                ((used_mem - initial_baseline_mem) / 1e6))
    assert (used_mem - initial_baseline_mem) < 1e6
예제 #4
0
 def check_order(arr_order):
     if order != 'K' and arr_order != order:
         if fail_on_order:
             raise ValueError("Expected " + order_to_str(order) +
                              " major order, but got the opposite.")
         else:
             debug("Expected " + order_to_str(order) + " major order, "
                   "but got the opposite. Converting data, this will "
                   "result in additional memory utilization.")
             return True
     return False
예제 #5
0
    def __init__(self,
                 data=None,
                 convert_to_dtype=False,
                 convert_index=np.int32,
                 convert_format=True):
        if not cpx.scipy.sparse.isspmatrix(data) and \
                not (has_scipy() and scipy.sparse.isspmatrix(data)):
            raise ValueError("A sparse matrix is expected as input. "
                             "Received %s" % type(data))

        check_classes = [cpx.scipy.sparse.csr_matrix]
        if has_scipy():
            check_classes.append(scipy.sparse.csr_matrix)

        if not isinstance(data, tuple(check_classes)):
            if convert_format:
                debug('Received sparse matrix in %s format but CSR is '
                      'expected. Data will be converted to CSR, but this '
                      'will require additional memory copies. If this '
                      'conversion is not desired, set '
                      'set_convert_format=False to raise an exception '
                      'instead.' % type(data))
                data = data.tocsr()  # currently only CSR is supported
            else:
                raise ValueError("Expected CSR matrix but received %s" %
                                 type(data))

        if not convert_to_dtype:
            convert_to_dtype = data.dtype

        if not convert_index:
            convert_index = data.indptr.dtype

        # Note: Only 32-bit indexing is supported currently.
        # In CUDA11, Cusparse provides 64-bit function calls
        # but these are not yet used in RAFT/Cuml
        self.indptr, _, _, _ = cuml.common.input_to_cuml_array(
            data.indptr,
            check_dtype=convert_index,
            convert_to_dtype=convert_index)

        self.indices, _, _, _ = cuml.common.input_to_cuml_array(
            data.indices,
            check_dtype=convert_index,
            convert_to_dtype=convert_index)

        self.data, _, _, _ = cuml.common.input_to_cuml_array(
            data.data,
            check_dtype=data.dtype,
            convert_to_dtype=convert_to_dtype)

        self.shape = data.shape
        self.dtype = self.data.dtype
        self.nnz = data.nnz
예제 #6
0
def run_classification(datatype, penalty, loss, dims, nclasses):

    t = time.perf_counter()
    nrows, ncols = dims
    X_train, X_test, y_train, y_test = make_classification_dataset(
        datatype, nrows, ncols, nclasses)
    logger.debug(f"Data generation time: {time.perf_counter() - t} s.")

    # solving in primal is not supported by sklearn for this loss type.
    skdual = loss == 'hinge' and penalty == 'l2'
    if loss == 'hinge' and penalty == 'l1':
        pytest.skip(
            "sklearn does not support this combination of loss and penalty")

    # limit the max iterations for sklearn to reduce the max test time
    cuit = 10000
    skit = max(10, min(cuit, cuit * 1000 / nrows))

    t = time.perf_counter()
    handle = cuml.Handle(n_streams=0)
    cum = cu.LinearSVC(handle=handle,
                       loss=loss,
                       penalty=penalty,
                       max_iter=cuit)
    cum.fit(X_train, y_train)
    cus = cum.score(X_test, y_test)
    handle.sync()
    t = time.perf_counter() - t
    logger.debug(f"Cuml time: {t} s.")
    t = max(5, t * SKLEARN_TIMEOUT_FACTOR)

    # cleanup cuml objects so that we can more easily fork the process
    # and test sklearn
    del cum
    X_train = X_train.get()
    X_test = X_test.get()
    y_train = y_train.get()
    y_test = y_test.get()
    gc.collect()

    try:

        def run_sklearn():
            skm = sk.LinearSVC(loss=loss,
                               penalty=penalty,
                               max_iter=skit,
                               dual=skdual)
            skm.fit(X_train, y_train)
            return skm.score(X_test, y_test)

        sks = with_timeout(timeout=t, target=run_sklearn)
        good_enough(cus, sks, nrows)
    except TimeoutError:
        pytest.skip(f"sklearn did not finish within {t} seconds.")
예제 #7
0
def test_logger():
    logger.trace("This is a trace message")
    logger.debug("This is a debug message")
    logger.info("This is an info message")
    logger.warn("This is a warn message")
    logger.error("This is a error message")
    logger.critical("This is a critical message")

    with logger.set_level(logger.level_warn):
        assert (logger.should_log_for(logger.level_warn))
        assert (not logger.should_log_for(logger.level_info))

    with logger.set_pattern("%v"):
        logger.info("This is an info message")
예제 #8
0
파일: comms.py 프로젝트: isVoid/cuml
    def destroy(self):
        """
        Shuts down initialized comms and cleans up resources.
        """
        self.client.run(_func_destroy_all,
                        self.sessionId,
                        self.comms_p2p,
                        wait=True,
                        workers=self.worker_addresses)

        logger.debug("Destroying comms.")

        self.nccl_initialized = False
        self.ucx_initialized = False
예제 #9
0
def test_log_flush():
    stdout_buffer = BytesIO()
    new_stdout = TextIOWrapper(stdout_buffer)

    with logger.set_level(logger.level_trace):
        test_msg = "This is a debug message"
        with redirect_stdout(new_stdout):
            logger.debug(test_msg)
            assert test_msg not in stdout_buffer.getvalue().decode('utf-8')
            logger.flush()
            assert test_msg in stdout_buffer.getvalue().decode('utf-8')

    # Check that logging flush does not error with sys.stdout of None
    with redirect_stdout(None):
        logger.flush()
예제 #10
0
def _to_dask_cudf(futures, client=None):
    """
    Convert a list of futures containing cudf Dataframes into a Dask.Dataframe
    :param futures: list[cudf.Dataframe] list of futures containing dataframes
    :param client: dask.distributed.Client Optional client to use
    :return: dask.Dataframe a dask.Dataframe
    """
    c = default_client() if client is None else client
    # Convert a list of futures containing dfs back into a dask_cudf
    dfs = [d for d in futures if d.type != type(None)]  # NOQA
    if logger.should_log_for(logger.level_debug):
        logger.debug("to_dask_cudf dfs=%s" % str(dfs))
    meta_future = c.submit(_get_meta, dfs[0], pure=False)
    meta = meta_future.result()
    return dd.from_delayed(dfs, meta=meta)
예제 #11
0
def test_umap_mnmg(n_parts, n_rows, sampling_ratio, supervised, dataset,
                   n_neighbors, client):
    local_X, local_y = _load_dataset(dataset, n_rows)

    dist_umap = _umap_mnmg_trustworthiness(local_X, local_y, n_neighbors,
                                           supervised, n_parts, sampling_ratio)

    loc_umap = _local_umap_trustworthiness(local_X, local_y, n_neighbors,
                                           supervised)

    logger.debug(
        "\nLocal UMAP trustworthiness score : {:.2f}".format(loc_umap))
    logger.debug("UMAP MNMG trustworthiness score : {:.2f}".format(dist_umap))

    trust_diff = loc_umap - dist_umap

    assert trust_diff <= 0.1
예제 #12
0
파일: func.py 프로젝트: daxiongshu/cuml
def tree_reduce(objs, func=sum):
    """
    Performs a binary tree reduce on an associative
    and commutative function in parallel across
    Dask workers. Since this supports dask.delayed
    objects, which have yet been scheduled on workers,
    it does not take locality into account. As a result,
    any local reductions should be performed before
    this function is called.

    Parameters
    ----------
    func : Python function or dask.delayed function
        Function to use for reduction. The reduction function
        acceps a list of objects to reduce as an argument and
        produces a single reduced object
    objs : array-like of dask.delayed or future
           objects to reduce.

    Returns
    -------
    reduced_result : dask.delayed or future
        if func is delayed, the result will be delayed
        if func is a future, the result will be a future
    """

    func = dask.delayed(func) \
        if not isinstance(func, Delayed) else func

    while len(objs) > 1:
        new_objs = []
        n_objs = len(objs)
        for i in range(0, n_objs, 2):
            inputs = dask.delayed(objs[i:i + 2], pure=False)
            obj = func(inputs)
            new_objs.append(obj)
        wait(new_objs)
        objs = new_objs

    logger.debug(str(objs))

    return first(objs)
예제 #13
0
파일: comms.py 프로젝트: isVoid/cuml
    def __init__(self, comms_p2p=False, client=None, streams_per_handle=0):
        """
        Construct a new CommsContext instance
        :param comms_p2p: bool Should p2p comms be initialized?
        """
        self.client = client if client is not None else default_client()
        self.comms_p2p = comms_p2p

        self.streams_per_handle = streams_per_handle

        self.sessionId = uuid.uuid4().bytes

        self.nccl_initialized = False
        self.ucx_initialized = False

        if comms_p2p and (not is_ucx_enabled() or not has_ucp()):
            warnings.warn("ucx-py not found. UCP Integration will "
                          "be disabled.")
            self.comms_p2p = False

        logger.debug("Initializing comms!")
예제 #14
0
    def transform(self, X):
        """
        Transform X using one-hot encoding.

        Parameters
        ----------
        X : cudf.DataFrame or cupy.ndarray
            The data to encode.

        Returns
        -------
        X_out : sparse matrix if sparse=True else a 2-d array
            Transformed input.
        """
        self._check_is_fitted()
        X = self._check_input(X)

        cols, rows = list(), list()
        col_idx = None
        j = 0

        try:
            for feature in X.columns:
                encoder = self._encoders[feature]
                col_idx = encoder.transform(X[feature])
                idx_to_keep = cp.asarray(col_idx.notnull().to_gpu_array())
                col_idx = cp.asarray(col_idx.dropna().to_gpu_array())

                # Simple test to auto upscale col_idx type as needed
                # First, determine the maximum value we will add assuming
                # monotonically increasing up to len(encoder.classes_)
                # Ensure we dont go negative by clamping to 0
                max_value = int(max(len(encoder.classes_) - 1, 0) + j)

                # If we exceed the max value, upconvert
                if (max_value > np.iinfo(col_idx.dtype).max):
                    col_idx = col_idx.astype(np.min_scalar_type(max_value))
                    logger.debug("Upconverting column: '{}', to dtype: '{}', \
                            to support up to {} classes".format(
                        feature, np.min_scalar_type(max_value), max_value))

                # increase indices to take previous features into account
                col_idx += j

                # Filter out rows with null values
                row_idx = cp.arange(len(X))[idx_to_keep]

                if self.drop_idx_ is not None:
                    drop_idx = self.drop_idx_[feature] + j
                    mask = cp.ones(col_idx.shape, dtype=cp.bool)
                    mask[col_idx == drop_idx] = False
                    col_idx = col_idx[mask]
                    row_idx = row_idx[mask]
                    # account for dropped category in indices
                    col_idx[col_idx > drop_idx] -= 1
                    # account for dropped category in current cats number
                    j -= 1

                j += len(encoder.classes_)
                cols.append(col_idx)
                rows.append(row_idx)

            cols = cp.concatenate(cols)
            rows = cp.concatenate(rows)
            val = cp.ones(rows.shape[0], dtype=self.dtype)
            ohe = cupyx.scipy.sparse.coo_matrix((val, (rows, cols)),
                                                shape=(len(X), j),
                                                dtype=self.dtype)

            if not self.sparse:
                ohe = ohe.toarray()

            return ohe

        except TypeError as e:
            # Append to cols to include the column that threw the error
            cols.append(col_idx)

            # Build a string showing what the types are
            input_types_str = ", ".join([str(x.dtype) for x in cols])

            raise TypeError(
                "A TypeError occurred while calculating column "
                "category indices, most likely due to integer overflow. This "
                "can occur when columns have a large difference in the number "
                "of categories, resulting in different category code dtypes "
                "for different columns."
                "Calculated column code dtypes: {}.\n"
                "Internal Error: {}".format(input_types_str, repr(e)))
예제 #15
0
파일: kernel_utils.py 프로젝트: teju85/cuml
def cuda_kernel_factory(nvrtc_kernel_str, dtypes, kernel_name=None):
    """
    A factory wrapper function to perform some of the boiler-plate involved in
    making cuPy RawKernels type-agnostic.

    Until a better method is created, either by RAPIDS or cuPy, this function
    will perform a string search and replace of c-based datatype primitives
    in ``nvrtc_kernel_str`` using a numerical placeholder (eg. {0}, {1}) for
    the dtype in the corresponding index of tuple ``dtypes``.

    Note that the extern, function scope, and function name should not be
    included in the kernel string. These will be added by this function and
    the function name will be made unique, based on the given dtypes.

    Example
    -------

        The following kernel string with dtypes = [float, double, int]

        ({0} *a, {1} *b, {2} *c) {}

        Will become

        (float *a, double *b, int *c) {}

    Parameters
    ----------

    nvrtc_kernel_str : string valid nvrtc kernel string without extern, scope,
                       or function name.
    dtypes : tuple of dtypes to search and replace.
    kernel_name : string prefix and function name to use. Note that when
                  this not set (or is set to None), a UUID will
                  be used, which will stop this function from
                  being memoized.

    Returns
    -------

    kernel_name : string unique function name created for kernel,
    raw_kernel : cupy.RawKernel object ready for use
    """

    dtype_strs = get_dtype_strs(dtypes)

    for idx, dtype in enumerate(dtypes):
        nvrtc_kernel_str = nvrtc_kernel_str.replace("{%d}" % idx,
                                                    dtype_strs[idx])

    kernel_name = f'''{uuid1()
                      if kernel_name is None
                      else kernel_name}_{
                        "".join(dtype_strs).replace(" ", "_")
                    }'''

    nvrtc_kernel_str = "%s\nvoid %s%s" % \
                       (extern_prefix, kernel_name, nvrtc_kernel_str)

    if logger.should_log_for(logger.LEVEL_DEBUG):
        logger.debug(str(nvrtc_kernel_str))

    return cp.RawKernel(nvrtc_kernel_str, kernel_name)
예제 #16
0
def input_to_cuml_array(X,
                        order='F',
                        deepcopy=False,
                        check_dtype=False,
                        convert_to_dtype=False,
                        check_cols=False,
                        check_rows=False,
                        fail_on_order=False,
                        force_contiguous=True):
    """
    Convert input X to CumlArray.

    Acceptable input formats:

    * cuDF Dataframe - returns a deep copy always.
    * cuDF Series - returns by reference or a deep copy depending on
        `deepcopy`.
    * Numpy array - returns a copy in device always
    * cuda array interface compliant array (like Cupy) - returns a
        reference unless `deepcopy`=True.
    * numba device array - returns a reference unless deepcopy=True

    Parameters
    ----------

    X : cuDF.DataFrame, cuDF.Series, NumPy array, Pandas DataFrame, Pandas
        Series or any cuda_array_interface (CAI) compliant array like CuPy,
        Numba or pytorch.

    order: 'F', 'C' or 'K' (default: 'F')
        Whether to return a F-major ('F'),  C-major ('C') array or Keep ('K')
        the order of X. Used to check the order of the input. If
        fail_on_order=True, the method will raise ValueError,
        otherwise it will convert X to be of order `order` if needed.

    deepcopy: boolean (default: False)
        Set to True to always return a deep copy of X.

    check_dtype: np.dtype (default: False)
        Set to a np.dtype to throw an error if X is not of dtype `check_dtype`.

    convert_to_dtype: np.dtype (default: False)
        Set to a dtype if you want X to be converted to that dtype if it is
        not that dtype already.

    check_cols: int (default: False)
        Set to an int `i` to check that input X has `i` columns. Set to False
        (default) to not check at all.

    check_rows: boolean (default: False)
        Set to an int `i` to check that input X has `i` columns. Set to False
        (default) to not check at all.

    fail_on_order: boolean (default: False)
        Set to True if you want the method to raise a ValueError if X is not
        of order `order`.

    force_contiguous: boolean (default: True)
        Set to True to force CumlArray produced to be contiguous. If `X` is
        non contiguous then a contiguous copy will be done.
        If False, and `X` doesn't need to be converted and is not contiguous,
        the underlying memory underneath the CumlArray will be non contiguous.
        Only affects CAI inputs. Only affects CuPy and Numba device array
        views, all other input methods produce contiguous CumlArrays.

    Returns
    -------
    `cuml_array`: namedtuple('cuml_array', 'array n_rows n_cols dtype')

        A new CumlArray and associated data.

    """
    def check_order(arr_order):
        if order != 'K' and arr_order != order:
            if fail_on_order:
                raise ValueError("Expected " + order_to_str(order) +
                                 " major order, but got the opposite.")
            else:
                debug("Expected " + order_to_str(order) + " major order, "
                      "but got the opposite. Converting data, this will "
                      "result in additional memory utilization.")
                return True
        return False

    # dtype conversion

    # force_contiguous set to True always for now
    # upcoming CumlArray improvements will affect this
    # https://github.com/rapidsai/cuml/issues/2412
    force_contiguous = True

    if convert_to_dtype:
        X = convert_dtype(X, to_dtype=convert_to_dtype)
        check_dtype = False

    # format conversion

    if (isinstance(X, cudf.Series)):
        if X.null_count != 0:
            raise ValueError("Error: cuDF Series has missing/null values, "
                             "which are not supported by cuML.")

    # converting pandas to numpy before sending it to CumlArray
    if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
        # pandas doesn't support custom order in to_numpy
        X = cp.asarray(X.to_numpy(copy=False), order=order)

    if isinstance(X, cudf.DataFrame):
        if order == 'K':
            X_m = CumlArray(data=X.as_gpu_matrix(order='F'))
        else:
            X_m = CumlArray(data=X.as_gpu_matrix(order=order))

    elif isinstance(X, CumlArray):
        X_m = X

    elif hasattr(X, "__array_interface__") or \
            hasattr(X, "__cuda_array_interface__"):

        # Since we create the array with the correct order here, do the order
        # check now if necessary
        interface = getattr(X, "__array_interface__", None) or getattr(
            X, "__cuda_array_interface__", None)

        arr_info = ArrayInfo.from_interface(interface)

        check_order(arr_info.order)

        make_copy = False

        if force_contiguous or hasattr(X, "__array_interface__"):
            if not _check_array_contiguity(X):
                debug("Non contiguous array or view detected, a "
                      "contiguous copy of the data will be done.")
                # X = cp.array(X, order=order, copy=True)
                make_copy = True

        cp_arr = cp.array(X, copy=make_copy, order=order)

        X_m = CumlArray(data=cp_arr)

        if deepcopy:
            X_m = copy.deepcopy(X_m)

    else:
        msg = "X matrix format " + str(X.__class__) + " not supported"
        raise TypeError(msg)

    if check_dtype:
        if not isinstance(check_dtype, list):
            check_dtype = [check_dtype]

        check_dtype = [np.dtype(dtype) for dtype in check_dtype]

        if X_m.dtype not in check_dtype:
            type_str = X_m.dtype
            del X_m
            raise TypeError("Expected input to be of type in " +
                            str(check_dtype) + " but got " + str(type_str))

    # Checks based on parameters

    n_rows = X_m.shape[0]

    if len(X_m.shape) > 1:
        n_cols = X_m.shape[1]
    else:
        n_cols = 1

    if n_cols == 1 or n_rows == 1:
        order = 'K'

    if check_cols:
        if n_cols != check_cols:
            raise ValueError("Expected " + str(check_cols) +
                             " columns but got " + str(n_cols) + " columns.")

    if check_rows:
        if n_rows != check_rows:
            raise ValueError("Expected " + str(check_rows) + " rows but got " +
                             str(n_rows) + " rows.")

    if (check_order(X_m.order)):
        X_m = cp.array(X_m, copy=False, order=order)
        X_m = CumlArray(data=X_m)

    return cuml_array(array=X_m, n_rows=n_rows, n_cols=n_cols, dtype=X_m.dtype)
예제 #17
0
def random_state():
    random_state = random.randint(0, 1e6)
    with logger.set_level(logger.level_debug):
        logger.debug("Random seed: {}".format(random_state))
    return random_state
예제 #18
0
def make_blobs(n_samples=100, n_features=2, centers=None, cluster_std=1.0,
               n_parts=None, center_box=(-10, 10), shuffle=True,
               random_state=None, return_centers=False,
               verbosity=logger.LEVEL_INFO, order='F', dtype='float32',
               client=None):
    """
    Makes labeled Dask-Cupy arrays containing blobs
    for a randomly generated set of centroids.

    This function calls `make_blobs` from `cuml.datasets` on each Dask worker
    and aggregates them into a single Dask Dataframe.

    For more information on Scikit-learn's `make_blobs:
    <https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_blobs.html>`_.

    Parameters
    ----------

    n_samples : int
        number of rows
    n_features : int
        number of features
    centers : int or array of shape [n_centers, n_features],
        optional (default=None) The number of centers to generate, or the fixed
        center locations. If n_samples is an int and centers is None, 3 centers
        are generated. If n_samples is array-like, centers must be either None
        or an array of length equal to the length of n_samples.
    cluster_std : float (default = 1.0)
         standard deviation of points around centroid
    n_parts : int (default = None)
        number of partitions to generate (this can be greater
        than the number of workers)
    center_box : tuple (int, int) (default = (-10, 10))
         the bounding box which constrains all the centroids
    random_state : int (default = None)
         sets random seed (or use None to reinitialize each time)
    return_centers : bool, optional (default=False)
        If True, then return the centers of each cluster
    verbosity : int (default = cuml.logger.LEVEL_INFO)
         Logging level.
    shuffle : bool (default=False)
              Shuffles the samples on each worker.
    order: str, optional (default='F')
        The order of the generated samples
    dtype : str, optional (default='float32')
        Dtype of the generated samples
    client : dask.distributed.Client (optional)
             Dask client to use

    Returns
    -------
    X : dask.array backed by CuPy array of shape [n_samples, n_features]
        The input samples.
    y : dask.array backed by CuPy array of shape [n_samples]
        The output values.
    centers : dask.array backed by CuPy array of shape
        [n_centers, n_features], optional
        The centers of the underlying blobs. It is returned only if
        return_centers is True.
    """

    client = get_client(client=client)

    generator = _create_rs_generator(random_state=random_state)

    workers = list(client.scheduler_info()['workers'].keys())

    n_parts = n_parts if n_parts is not None else len(workers)
    parts_workers = (workers * n_parts)[:n_parts]

    centers, n_centers = _get_centers(generator, centers, center_box,
                                      n_samples, n_features,
                                      dtype)

    rows_per_part = max(1, int(n_samples / n_parts))

    worker_rows = [rows_per_part] * n_parts

    if rows_per_part == 1:
        worker_rows[-1] += n_samples % n_parts
    else:
        worker_rows[-1] += n_samples % rows_per_part

    worker_rows = tuple(worker_rows)

    logger.debug("Generating %d samples across %d partitions on "
                 "%d workers (total=%d samples)" %
                 (math.ceil(n_samples / len(workers)),
                  n_parts, len(workers), n_samples))

    seeds = generator.randint(n_samples, size=len(parts_workers))
    parts = [client.submit(_create_local_data,
                           part_rows,
                           n_features,
                           centers,
                           cluster_std,
                           shuffle,
                           int(seeds[idx]),
                           order,
                           dtype,
                           pure=False,
                           workers=[parts_workers[idx]])
             for idx, part_rows in enumerate(worker_rows)]

    X = [client.submit(_get_X, f, pure=False)
         for idx, f in enumerate(parts)]
    y = [client.submit(_get_labels, f, pure=False)
         for idx, f in enumerate(parts)]

    X_del = _create_delayed(X, dtype, worker_rows, n_features)
    y_del = _create_delayed(y, dtype, worker_rows)

    X_final = da.concatenate(X_del, axis=0)
    y_final = da.concatenate(y_del, axis=0)

    if return_centers:
        return X_final, y_final, centers
    else:
        return X_final, y_final
예제 #19
0
파일: comms.py 프로젝트: isVoid/cuml
async def _func_init_all(sessionId, uniqueId, comms_p2p, worker_info,
                         streams_per_handle):

    session_state = worker_state(sessionId)
    session_state["nccl_uid"] = uniqueId
    session_state["wid"] = worker_info[get_worker().address]["rank"]
    session_state["nworkers"] = len(worker_info)

    if logger.should_log_for(logger.level_debug):
        logger.debug("Initializing NCCL")
        start = time.time()

    _func_init_nccl(sessionId, uniqueId)

    if logger.should_log_for(logger.level_debug):
        elapsed = time.time() - start
        logger.debug("NCCL Initialization took: %f seconds." % elapsed)

    if comms_p2p:
        logger.debug("Initializing UCX Endpoints")

        if logger.should_log_for(logger.level_debug):
            start = time.time()
        await _func_ucp_create_endpoints(sessionId, worker_info)

        if logger.should_log_for(logger.level_debug):
            elapsed = time.time() - start
            logger.debug("Done initializing UCX endpoints. Took: %f seconds." %
                         elapsed)
            logger.debug("Building handle")

        _func_build_handle_p2p(sessionId, streams_per_handle)

        logger.debug("Done building handle.")

    else:
        _func_build_handle(sessionId, streams_per_handle)