Esempio n. 1
0
def test_ivfsq_pred(qtype, encodeResidual, nrows, ncols, n_neighbors, nlist):
    algo_params = {
        'nlist': nlist,
        'nprobe': nlist * 0.25,
        'qtype': qtype,
        'encodeResidual': encodeResidual
    }

    X, y = make_blobs(n_samples=nrows,
                      centers=5,
                      n_features=ncols,
                      random_state=0)

    logger.set_level(logger.level_debug)
    knn_cu = cuKNN(algorithm="ivfsq", algo_params=algo_params)
    knn_cu.fit(X)
    neigh_ind = knn_cu.kneighbors(X,
                                  n_neighbors=n_neighbors,
                                  return_distance=False)
    del knn_cu
    gc.collect()

    labels, probs = predict(neigh_ind, y, n_neighbors)

    assert array_equal(labels, y)
Esempio n. 2
0
def test_redirected_logger():
    new_stdout = StringIO()

    with logger.set_level(logger.level_trace):
        # We do not test trace because CUML_LOG_TRACE is not compiled by
        # default
        test_msg = "This is a debug message"
        with redirect_stdout(new_stdout):
            logger.debug(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is an info message"
        with redirect_stdout(new_stdout):
            logger.info(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is a warn message"
        with redirect_stdout(new_stdout):
            logger.warn(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is an error message"
        with redirect_stdout(new_stdout):
            logger.error(test_msg)
        assert test_msg in new_stdout.getvalue()

        test_msg = "This is a critical message"
        with redirect_stdout(new_stdout):
            logger.critical(test_msg)
        assert test_msg in new_stdout.getvalue()

    # Check that logging does not error with sys.stdout of None
    with redirect_stdout(None):
        test_msg = "This is a debug message"
        logger.debug(test_msg)
Esempio n. 3
0
def test_nearest_neighbors_sparse(shape,
                                  metric,
                                  n_neighbors,
                                  batch_size_index,
                                  batch_size_query):

    nrows, ncols, density = shape

    if nrows == 1 and n_neighbors > 1:
        return

    a = cp.sparse.random(nrows, ncols, format='csr', density=density,
                         random_state=35)
    b = cp.sparse.random(nrows, ncols, format='csr', density=density,
                         random_state=38)

    if metric == 'jaccard':
        a = a.astype('bool').astype('float32')
        b = b.astype('bool').astype('float32')

    logger.set_level(logger.level_debug)
    nn = cuKNN(metric=metric, p=2.0, n_neighbors=n_neighbors,
               algorithm="brute", output_type="numpy",
               verbose=logger.level_debug,
               algo_params={"batch_size_index": batch_size_index,
                            "batch_size_query": batch_size_query})
    nn.fit(a)

    cuD, cuI = nn.kneighbors(b)

    if metric not in sklearn.neighbors.VALID_METRICS_SPARSE['brute']:
        a = a.todense()
        b = b.todense()

    sknn = skKNN(metric=metric, p=2.0, n_neighbors=n_neighbors,
                 algorithm="brute", n_jobs=-1)
    sk_X = a.get()
    sknn.fit(sk_X)

    skD, skI = sknn.kneighbors(b.get())

    cp.testing.assert_allclose(cuD, skD, atol=1e-3, rtol=1e-3)

    # Jaccard & Chebyshev have a high potential for mismatched indices
    # due to duplicate distances. We can ignore the indices in this case.
    if metric not in ['jaccard', 'chebyshev']:
        cp.testing.assert_allclose(cuI, skI, atol=1e-4, rtol=1e-4)
Esempio n. 4
0
def test_logger():
    logger.trace("This is a trace message")
    logger.debug("This is a debug message")
    logger.info("This is an info message")
    logger.warn("This is a warn message")
    logger.error("This is a error message")
    logger.critical("This is a critical message")

    with logger.set_level(logger.level_warn):
        assert (logger.should_log_for(logger.level_warn))
        assert (not logger.should_log_for(logger.level_info))

    with logger.set_pattern("%v"):
        logger.info("This is an info message")
Esempio n. 5
0
def test_nearest_neighbors_sparse(nrows, ncols, density, metric, n_neighbors,
                                  batch_size_index, batch_size_query):

    if nrows == 1 and n_neighbors > 1:
        return

    a = cp.sparse.random(nrows,
                         ncols,
                         format='csr',
                         density=density,
                         random_state=32)

    logger.set_level(logger.level_info)
    nn = cuKNN(metric=metric,
               n_neighbors=n_neighbors,
               algorithm="brute",
               verbose=logger.level_debug,
               algo_params={
                   "batch_size_index": batch_size_index,
                   "batch_size_query": batch_size_query
               })
    nn.fit(a)

    cuD, cuI = nn.kneighbors(a)

    sknn = skKNN(metric=metric,
                 n_neighbors=n_neighbors,
                 algorithm="brute",
                 n_jobs=-1)
    sk_X = a.get()
    sknn.fit(sk_X)

    skD, skI = sknn.kneighbors(sk_X)

    cp.testing.assert_allclose(cuI, skI, atol=1e-4, rtol=1e-4)
    cp.testing.assert_allclose(cuD, skD, atol=1e-3, rtol=1e-3)
Esempio n. 6
0
def test_log_flush():
    stdout_buffer = BytesIO()
    new_stdout = TextIOWrapper(stdout_buffer)

    with logger.set_level(logger.level_trace):
        test_msg = "This is a debug message"
        with redirect_stdout(new_stdout):
            logger.debug(test_msg)
            assert test_msg not in stdout_buffer.getvalue().decode('utf-8')
            logger.flush()
            assert test_msg in stdout_buffer.getvalue().decode('utf-8')

    # Check that logging flush does not error with sys.stdout of None
    with redirect_stdout(None):
        logger.flush()
Esempio n. 7
0
def test_nearest_neighbors_sparse(metric, nrows, ncols, density, n_neighbors,
                                  batch_size_index, batch_size_query):
    if nrows == 1 and n_neighbors > 1:
        return

    a = cupyx.scipy.sparse.random(nrows,
                                  ncols,
                                  format='csr',
                                  density=density,
                                  random_state=35)
    b = cupyx.scipy.sparse.random(nrows,
                                  ncols,
                                  format='csr',
                                  density=density,
                                  random_state=38)

    if metric == 'jaccard':
        a = a.astype('bool').astype('float32')
        b = b.astype('bool').astype('float32')

    logger.set_level(logger.level_debug)
    nn = cuKNN(metric=metric,
               p=2.0,
               n_neighbors=n_neighbors,
               algorithm="brute",
               output_type="numpy",
               verbose=logger.level_debug,
               algo_params={
                   "batch_size_index": batch_size_index,
                   "batch_size_query": batch_size_query
               })
    nn.fit(a)

    cuD, cuI = nn.kneighbors(b)

    if metric not in sklearn.neighbors.VALID_METRICS_SPARSE['brute']:
        a = a.todense()
        b = b.todense()

    sknn = skKNN(metric=metric,
                 p=2.0,
                 n_neighbors=n_neighbors,
                 algorithm="brute",
                 n_jobs=-1)
    sk_X = a.get()
    sknn.fit(sk_X)

    skD, skI = sknn.kneighbors(b.get())

    # For some reason, this will occasionally fail w/ a single
    # mismatched element in CI. Allowing the single mismatch for now.
    cp.testing.assert_allclose(cuD, skD, atol=1e-5, rtol=1e-5)

    # Jaccard & Chebyshev have a high potential for mismatched indices
    # due to duplicate distances. We can ignore the indices in this case.
    if metric not in ['jaccard', 'chebyshev']:

        # The actual neighbors returned in the presence of duplicate distances
        # is non-deterministic. If we got to this point, the distances all
        # match between cuml and sklearn. We set a reasonable threshold
        # (.5% in this case) to allow differences from non-determinism.
        diffs = abs(cuI - skI)
        assert (len(diffs[diffs > 0]) / len(np.ravel(skI))) <= 0.005
Esempio n. 8
0
def random_state():
    random_state = random.randint(0, 1e6)
    with logger.set_level(logger.level_debug):
        logger.debug("Random seed: {}".format(random_state))
    return random_state