Exemple #1
0
def test_predict_and_score(dataset, datatype, n_neighbors, n_parts, batch_size,
                           client):
    X_train, X_test, y_train, y_test = dataset
    np_y_test = y_test

    l_model = lKNNReg(n_neighbors=n_neighbors)
    l_model.fit(X_train, y_train)
    l_distances, l_indices = l_model.kneighbors(X_test)
    l_outputs = l_model.predict(X_test)
    local_out = (l_outputs, l_indices, l_distances)
    handmade_local_score = r2_score(y_test, l_outputs)
    handmade_local_score = round(float(handmade_local_score), 3)

    X_train = generate_dask_array(X_train, n_parts)
    X_test = generate_dask_array(X_test, n_parts)
    y_train = generate_dask_array(y_train, n_parts)
    y_test = generate_dask_array(y_test, n_parts)

    if datatype == 'dask_cudf':
        X_train = to_dask_cudf(X_train, client)
        X_test = to_dask_cudf(X_test, client)
        y_train = to_dask_cudf(y_train, client)
        y_test = to_dask_cudf(y_test, client)

    d_model = dKNNReg(client=client,
                      n_neighbors=n_neighbors,
                      batch_size=batch_size)
    d_model.fit(X_train, y_train)
    d_outputs, d_indices, d_distances = \
        d_model.predict(X_test, convert_dtype=True)
    distributed_out = da.compute(d_outputs, d_indices, d_distances)
    if datatype == 'dask_array':
        distributed_score = d_model.score(X_test, y_test)
        distributed_score = round(float(distributed_score), 3)

    if datatype == 'dask_cudf':
        distributed_out = list(
            map(
                lambda o: o.as_matrix()
                if isinstance(o, DataFrame) else o.to_array()[..., np.newaxis],
                distributed_out))

    exact_match(local_out, distributed_out)

    if datatype == 'dask_array':
        assert distributed_score == pytest.approx(handmade_local_score,
                                                  abs=1e-2)
    else:
        y_pred = distributed_out[0]
        handmade_distributed_score = float(r2_score(np_y_test, y_pred))
        handmade_distributed_score = round(handmade_distributed_score, 3)
        assert handmade_distributed_score == pytest.approx(
            handmade_local_score, abs=1e-2)
Exemple #2
0
def test_predict(dataset, datatype, n_neighbors, n_parts, batch_size, client):
    X_train, X_test, y_train, y_test = dataset

    l_model = lKNNReg(n_neighbors=n_neighbors)
    l_model.fit(X_train, y_train)
    l_distances, l_indices = l_model.kneighbors(X_test)
    l_outputs = l_model.predict(X_test)
    local_out = (l_outputs, l_indices, l_distances)

    if not n_parts:
        n_parts = len(client.has_what().keys())

    X_train = generate_dask_array(X_train, n_parts)
    X_test = generate_dask_array(X_test, n_parts)
    y_train = generate_dask_array(y_train, n_parts)

    if datatype == 'dask_cudf':
        X_train = to_dask_cudf(X_train, client)
        X_test = to_dask_cudf(X_test, client)
        y_train = to_dask_cudf(y_train, client)

    d_model = dKNNReg(client=client,
                      n_neighbors=n_neighbors,
                      batch_size=batch_size)
    d_model.fit(X_train, y_train)
    d_outputs, d_indices, d_distances = \
        d_model.predict(X_test, convert_dtype=True)
    distributed_out = da.compute(d_outputs, d_indices, d_distances)

    if datatype == 'dask_cudf':
        distributed_out = list(
            map(
                lambda o: o.as_matrix()
                if isinstance(o, DataFrame) else o.to_array()[..., np.newaxis],
                distributed_out))

    match_test(local_out, distributed_out)
    accuracy_score(local_out[0], distributed_out[0]) > 0.12
def test_predict_and_score(dataset, datatype, parameters, client):
    n_neighbors, n_parts, batch_size = parameters
    X_train, X_test, y_train, y_test = dataset

    l_model = lKNNReg(n_neighbors=n_neighbors)
    l_model.fit(X_train, y_train)
    l_outputs = l_model.predict(X_test)
    handmade_local_score = r2_score(y_test, l_outputs)
    handmade_local_score = round(float(handmade_local_score), 3)

    X_train = generate_dask_array(X_train, n_parts)
    X_test = generate_dask_array(X_test, n_parts)
    y_train = generate_dask_array(y_train, n_parts)
    y_test = generate_dask_array(y_test, n_parts)

    if datatype == 'dask_cudf':
        X_train = to_dask_cudf(X_train, client)
        X_test = to_dask_cudf(X_test, client)
        y_train = to_dask_cudf(y_train, client)
        y_test = to_dask_cudf(y_test, client)

    d_model = dKNNReg(client=client,
                      n_neighbors=n_neighbors,
                      batch_size=batch_size)
    d_model.fit(X_train, y_train)
    d_outputs = d_model.predict(X_test, convert_dtype=True)
    d_outputs = d_outputs.compute()

    d_outputs = d_outputs.to_numpy() \
        if isinstance(d_outputs, DataFrame) \
        else d_outputs

    exact_match(l_outputs, d_outputs)

    distributed_score = d_model.score(X_test, y_test)
    distributed_score = round(float(distributed_score), 3)
    assert distributed_score == pytest.approx(handmade_local_score, abs=1e-2)