Exemple #1
0
def test_output_type_context_mgr(global_output_type, context_type):
    dataset = get_small_dataset('numba')

    test_type = 'cupy' if global_output_type != 'cupy' else 'numpy'
    cuml.set_global_output_type(test_type)

    # use cuml context manager
    with cuml.using_output_type(context_type):
        dbscan_float = cuml.DBSCAN(eps=1.0, min_samples=1)
        dbscan_float.fit(dataset)

        res = dbscan_float.labels_

        if context_type == 'numba':
            assert is_cuda_array(res)
        else:
            assert isinstance(res, test_output_types[context_type])

    # use cuml again outside the context manager

    dbscan_float = cuml.DBSCAN(eps=1.0, min_samples=1)
    dbscan_float.fit(dataset)

    res = dbscan_float.labels_
    assert isinstance(res, test_output_types[test_type])
Exemple #2
0
def global_output_type(request):

    output_type = request.param

    yield output_type

    # Ensure we reset the type at the end of the test
    cuml.set_global_output_type(None)
 def check_correct_type(index):
     output_type = test_output_types_str[index]
     # Force a race condition
     if index == 0:
         sleep(0.1)
     set_global_output_type(output_type)
     sleep(0.5)
     return cuml.global_settings.output_type == output_type
Exemple #4
0
def test_dask_sql_sg_logistic_regression(
    datatype,
    nrows,
    ncols,
    n_parts,
    wrap_predict
):
    if wrap_predict:
        cuml.set_global_output_type("input")
    else:
        cuml.set_global_output_type("cudf")

    X, y = make_classification(
        n_samples=nrows, n_features=ncols, n_informative=5, random_state=0
    )
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    train_df = cudf.DataFrame(
        X_train, dtype=datatype, columns=[chr(i) for i in range(ncols)]
    )
    train_df["target"] = y_train
    train_ddf = dask_cudf.from_cudf(train_df, npartitions=n_parts)

    c = Context()
    c.create_table("train_df", train_ddf)

    train_query = f"""
        CREATE MODEL model WITH (
            model_class = 'cuml.linear_model.LogisticRegression',
            wrap_predict = {wrap_predict},
            target_column = 'target'
        ) AS (
            SELECT * FROM train_df
        )
    """

    c.sql(train_query)

    skmodel = LogisticRegression().fit(X_train, y_train)

    test_df = cudf.DataFrame(
        X_test, dtype=datatype, columns=[chr(i) for i in range(ncols)]
    )
    test_ddf = dask_cudf.from_cudf(test_df, npartitions=n_parts)
    c.create_table("test_df", test_ddf)

    inference_query = """
        SELECT * FROM PREDICT(
            MODEL model,
            SELECT * FROM test_df
        )
    """

    preds = c.sql(inference_query).compute()
    score = cuml.metrics.accuracy_score(y_test, preds["target"].to_numpy())

    assert score >= skmodel.score(X_test, y_test) - 0.022
Exemple #5
0
def test_global_output_type(global_output_type, input_type):
    dataset = get_small_dataset(input_type)

    cuml.set_global_output_type(global_output_type)

    dbscan_float = cuml.DBSCAN(eps=1.0, min_samples=1)
    dbscan_float.fit(dataset)

    res = dbscan_float.labels_

    if global_output_type == 'numba':
        assert is_cuda_array(res)
    else:
        assert isinstance(res, test_output_types[global_output_type])
Exemple #6
0
 def __init__(self, operation_type: str, params: Optional[dict] = None):
     super().__init__(operation_type, params)
     self.operation_impl = self._convert_to_operation(operation_type)
     self.operation_id = operation_type
     cuml.set_global_output_type('numpy')