def test_predict_and_score(dataset, datatype, n_neighbors, n_parts, batch_size, client): X_train, X_test, y_train, y_test = dataset np_y_test = y_test l_model = lKNNReg(n_neighbors=n_neighbors) l_model.fit(X_train, y_train) l_distances, l_indices = l_model.kneighbors(X_test) l_outputs = l_model.predict(X_test) local_out = (l_outputs, l_indices, l_distances) handmade_local_score = r2_score(y_test, l_outputs) handmade_local_score = round(float(handmade_local_score), 3) X_train = generate_dask_array(X_train, n_parts) X_test = generate_dask_array(X_test, n_parts) y_train = generate_dask_array(y_train, n_parts) y_test = generate_dask_array(y_test, n_parts) if datatype == 'dask_cudf': X_train = to_dask_cudf(X_train, client) X_test = to_dask_cudf(X_test, client) y_train = to_dask_cudf(y_train, client) y_test = to_dask_cudf(y_test, client) d_model = dKNNReg(client=client, n_neighbors=n_neighbors, batch_size=batch_size) d_model.fit(X_train, y_train) d_outputs, d_indices, d_distances = \ d_model.predict(X_test, convert_dtype=True) distributed_out = da.compute(d_outputs, d_indices, d_distances) if datatype == 'dask_array': distributed_score = d_model.score(X_test, y_test) distributed_score = round(float(distributed_score), 3) if datatype == 'dask_cudf': distributed_out = list( map( lambda o: o.as_matrix() if isinstance(o, DataFrame) else o.to_array()[..., np.newaxis], distributed_out)) exact_match(local_out, distributed_out) if datatype == 'dask_array': assert distributed_score == pytest.approx(handmade_local_score, abs=1e-2) else: y_pred = distributed_out[0] handmade_distributed_score = float(r2_score(np_y_test, y_pred)) handmade_distributed_score = round(handmade_distributed_score, 3) assert handmade_distributed_score == pytest.approx( handmade_local_score, abs=1e-2)
def test_predict(dataset, datatype, n_neighbors, n_parts, batch_size, client): X_train, X_test, y_train, y_test = dataset l_model = lKNNReg(n_neighbors=n_neighbors) l_model.fit(X_train, y_train) l_distances, l_indices = l_model.kneighbors(X_test) l_outputs = l_model.predict(X_test) local_out = (l_outputs, l_indices, l_distances) if not n_parts: n_parts = len(client.has_what().keys()) X_train = generate_dask_array(X_train, n_parts) X_test = generate_dask_array(X_test, n_parts) y_train = generate_dask_array(y_train, n_parts) if datatype == 'dask_cudf': X_train = to_dask_cudf(X_train, client) X_test = to_dask_cudf(X_test, client) y_train = to_dask_cudf(y_train, client) d_model = dKNNReg(client=client, n_neighbors=n_neighbors, batch_size=batch_size) d_model.fit(X_train, y_train) d_outputs, d_indices, d_distances = \ d_model.predict(X_test, convert_dtype=True) distributed_out = da.compute(d_outputs, d_indices, d_distances) if datatype == 'dask_cudf': distributed_out = list( map( lambda o: o.as_matrix() if isinstance(o, DataFrame) else o.to_array()[..., np.newaxis], distributed_out)) match_test(local_out, distributed_out) accuracy_score(local_out[0], distributed_out[0]) > 0.12
def test_predict_and_score(dataset, datatype, parameters, client): n_neighbors, n_parts, batch_size = parameters X_train, X_test, y_train, y_test = dataset l_model = lKNNReg(n_neighbors=n_neighbors) l_model.fit(X_train, y_train) l_outputs = l_model.predict(X_test) handmade_local_score = r2_score(y_test, l_outputs) handmade_local_score = round(float(handmade_local_score), 3) X_train = generate_dask_array(X_train, n_parts) X_test = generate_dask_array(X_test, n_parts) y_train = generate_dask_array(y_train, n_parts) y_test = generate_dask_array(y_test, n_parts) if datatype == 'dask_cudf': X_train = to_dask_cudf(X_train, client) X_test = to_dask_cudf(X_test, client) y_train = to_dask_cudf(y_train, client) y_test = to_dask_cudf(y_test, client) d_model = dKNNReg(client=client, n_neighbors=n_neighbors, batch_size=batch_size) d_model.fit(X_train, y_train) d_outputs = d_model.predict(X_test, convert_dtype=True) d_outputs = d_outputs.compute() d_outputs = d_outputs.to_numpy() \ if isinstance(d_outputs, DataFrame) \ else d_outputs exact_match(l_outputs, d_outputs) distributed_score = d_model.score(X_test, y_test) distributed_score = round(float(distributed_score), 3) assert distributed_score == pytest.approx(handmade_local_score, abs=1e-2)