예제 #1
0
def test_redirected_logger():
    new_stdout = StringIO()

    with logger.set_level(logger.level_trace):
        # We do not test trace because CUML_LOG_TRACE is not compiled by
        # default
        test_msg = "This is a debug message"
        with redirect_stdout(new_stdout):
            logger.debug(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is an info message"
        with redirect_stdout(new_stdout):
            logger.info(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is a warn message"
        with redirect_stdout(new_stdout):
            logger.warn(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is an error message"
        with redirect_stdout(new_stdout):
            logger.error(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is a critical message"
        with redirect_stdout(new_stdout):
            logger.critical(test_msg)
        assert test_msg in new_stdout.getvalue()

    # Check that logging does not error with sys.stdout of None
    with redirect_stdout(None):
        test_msg = "This is a debug message"
        logger.debug(test_msg)
예제 #2
0
def with_timeout(timeout, target, args=(), kwargs={}):
    '''Don't wait if the sklearn function takes really too long.'''
    try:
        ctx = mp.get_context('fork')
    except ValueError:
        logger.warn('"fork" multiprocessing start method is not available. '
                    'The sklearn model will run in the same process and '
                    'cannot be killed if it runs too long.')
        return target(*args, **kwargs)
    q = ctx.Queue()

    def target_res():
        try:
            q.put((True, target(*args, **kwargs)))
        except BaseException as e:  # noqa E722
            print("Test subprocess failed with an exception: ", e)
            q.put((False, None))

    p = ctx.Process(target=target_res)
    p.start()
    try:
        success, val = q.get(True, timeout)
        if success:
            return val
        else:
            raise RuntimeError("Got an exception in the subprocess.")
    except Empty:
        p.terminate()
        raise TimeoutError()
예제 #3
0
def test_logger():
    logger.trace("This is a trace message")
    logger.debug("This is a debug message")
    logger.info("This is an info message")
    logger.warn("This is a warn message")
    logger.error("This is a error message")
    logger.critical("This is a critical message")

    with logger.set_level(logger.level_warn):
        assert (logger.should_log_for(logger.level_warn))
        assert (not logger.should_log_for(logger.level_info))

    with logger.set_pattern("%v"):
        logger.info("This is an info message")
예제 #4
0
 def _warn_for_unused_params(self):
     if self.analyzer != "word" and self.stop_words is not None:
         logger.warn("The parameter 'stop_words' will not be used"
                     " since 'analyzer' != 'word'")
예제 #5
0
def input_to_cuml_array(X,
                        order='F',
                        deepcopy=False,
                        check_dtype=False,
                        convert_to_dtype=False,
                        check_cols=False,
                        check_rows=False,
                        fail_on_order=False,
                        force_contiguous=True):
    """
    Convert input X to CumlArray.

    Acceptable input formats:

    * cuDF Dataframe - returns a deep copy always.
    * cuDF Series - returns by reference or a deep copy depending on
        `deepcopy`.
    * Numpy array - returns a copy in device always
    * cuda array interface compliant array (like Cupy) - returns a
        reference unless `deepcopy`=True.
    * numba device array - returns a reference unless deepcopy=True

    Parameters
    ----------

    X : cuDF.DataFrame, cuDF.Series, NumPy array, Pandas DataFrame, Pandas
        Series or any cuda_array_interface (CAI) compliant array like CuPy,
        Numba or pytorch.

    order: 'F', 'C' or 'K' (default: 'F')
        Whether to return a F-major ('F'),  C-major ('C') array or Keep ('K')
        the order of X. Used to check the order of the input. If
        fail_on_order=True, the method will raise ValueError,
        otherwise it will convert X to be of order `order` if needed.

    deepcopy: boolean (default: False)
        Set to True to always return a deep copy of X.

    check_dtype: np.dtype (default: False)
        Set to a np.dtype to throw an error if X is not of dtype `check_dtype`.

    convert_to_dtype: np.dtype (default: False)
        Set to a dtype if you want X to be converted to that dtype if it is
        not that dtype already.

    check_cols: int (default: False)
        Set to an int `i` to check that input X has `i` columns. Set to False
        (default) to not check at all.

    check_rows: boolean (default: False)
        Set to an int `i` to check that input X has `i` columns. Set to False
        (default) to not check at all.

    fail_on_order: boolean (default: False)
        Set to True if you want the method to raise a ValueError if X is not
        of order `order`.

    force_contiguous: boolean (default: True)
        Set to True to force CumlArray produced to be contiguous. If `X` is
        non contiguous then a contiguous copy will be done.
        If False, and `X` doesn't need to be converted and is not contiguous,
        the underlying memory underneath the CumlArray will be non contiguous.
        Only affects CAI inputs. Only affects CuPy and Numba device array
        views, all other input methods produce contiguous CumlArrays.

    Returns
    -------
    `cuml_array`: namedtuple('cuml_array', 'array n_rows n_cols dtype')

        A new CumlArray and associated data.

    """

    # dtype conversion

    if convert_to_dtype:
        X = convert_dtype(X, to_dtype=convert_to_dtype)
        check_dtype = False

    # format conversion

    if (isinstance(X, cudf.Series)):
        if X.null_count != 0:
            raise ValueError("Error: cuDF Series has missing/null values, \
                             which are not supported by cuML.")

    # converting pandas to numpy before sending it to CumlArray
    if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
        # pandas doesn't support custom order in to_numpy
        X = cp.asarray(X.to_numpy(copy=False), order=order)

    if isinstance(X, cudf.DataFrame):
        if order == 'K':
            X_m = CumlArray(data=X.as_gpu_matrix(order='F'))
        else:
            X_m = CumlArray(data=X.as_gpu_matrix(order=order))

    elif isinstance(X, CumlArray):
        X_m = X

    elif hasattr(X, "__array_interface__") or \
            hasattr(X, "__cuda_array_interface__"):

        if force_contiguous or hasattr(X, "__array_interface__"):
            if not _check_array_contiguity(X):
                warn("Non contiguous array or view detected, a \
                     contiguous copy of the data will be done. ")
                X = cp.array(X, order=order, copy=True)

        X_m = CumlArray(data=X)

        if deepcopy:
            X_m = copy.deepcopy(X_m)

    else:
        msg = "X matrix format " + str(X.__class__) + " not supported"
        raise TypeError(msg)

    if check_dtype:
        if not isinstance(check_dtype, list):
            check_dtype = [check_dtype]

        check_dtype = [np.dtype(dtype) for dtype in check_dtype]

        if X_m.dtype not in check_dtype:
            type_str = X_m.dtype
            del X_m
            raise TypeError("Expected input to be of type in " +
                            str(check_dtype) + " but got " + str(type_str))

    # Checks based on parameters

    n_rows = X_m.shape[0]

    if len(X_m.shape) > 1:
        n_cols = X_m.shape[1]
    else:
        n_cols = 1

    if n_cols == 1 or n_rows == 1:
        order = 'K'

    if check_cols:
        if n_cols != check_cols:
            raise ValueError("Expected " + str(check_cols) +
                             " columns but got " + str(n_cols) + " columns.")

    if check_rows:
        if n_rows != check_rows:
            raise ValueError("Expected " + str(check_rows) + " rows but got " +
                             str(n_rows) + " rows.")

    if order != 'K' and X_m.order != order:
        if fail_on_order:
            raise ValueError("Expected " + order_to_str(order) +
                             " major order, but got the opposite.")
        else:
            warn("Expected " + order_to_str(order) + " major order, "
                 "but got the opposite. Converting data, this will "
                 "result in additional memory utilization.")
            X_m = cp.array(X_m, copy=False, order=order)
            X_m = CumlArray(data=X_m)

    return cuml_array(array=X_m, n_rows=n_rows, n_cols=n_cols, dtype=X_m.dtype)