def test_log_loss_random(n_samples, dtype): y_true, _, _, _ = generate_random_labels( lambda rng: rng.randint(0, 10, n_samples).astype(dtype)) y_pred, _, _, _ = generate_random_labels( lambda rng: rng.rand(n_samples, 10)) assert_almost_equal(log_loss(y_true, y_pred), sklearn_log_loss(y_true, y_pred))
def test_roc_auc_score_random(n_samples, dtype): y_true, _, _, _ = generate_random_labels( lambda rng: rng.randint(0, 2, n_samples).astype(dtype)) y_pred, _, _, _ = generate_random_labels( lambda rng: rng.randint(0, 1000, n_samples).astype(dtype)) auc = roc_auc_score(y_true, y_pred) skl_auc = sklearn_roc_auc_score(y_true, y_pred) assert_almost_equal(auc, skl_auc)
def test_average_precision_score_random(n_samples, dtype): y_true, _, _, _ = generate_random_labels( lambda rng: rng.randint(0, 2, n_samples).astype(dtype)) y_pred, _, _, _ = generate_random_labels( lambda rng: rng.randint(0, 1000, n_samples).astype(dtype)) ap = average_precision_score(y_true, y_pred) skl_ap = sklearn_average_precision_score(y_true, y_pred) assert_almost_equal(ap, skl_ap)
def test_completeness_score_big_array(use_handle, input_range): a, b, _, _ = generate_random_labels(lambda rd: rd.randint(*input_range, int(10e4), dtype=np.int32)) score = score_completeness(a, b, use_handle) ref = sk_completeness_score(a, b) np.testing.assert_almost_equal(score, ref, decimal=4)
def test_mutual_info_score_many_blocks(use_handle, input_range, n_samples): a, b, _, _ = generate_random_labels(lambda rd: rd.randint(*input_range, n_samples, dtype=np.int32)) score = score_mutual_info(a, b, use_handle) ref = sk_mutual_info_score(a, b) np.testing.assert_almost_equal(score, ref, decimal=4)
def test_homogeneity_completeness_symmetry(use_handle, input_range): a, b, _, _ = generate_random_labels(lambda rd: rd.randint(*input_range, int(10e3), dtype=np.int32)) hom = score_homogeneity(a, b, use_handle) com = score_completeness(b, a, use_handle) np.testing.assert_almost_equal(hom, com, decimal=4)
def test_confusion_matrix_multiclass_subset_labels(labels): y_true, y_pred, _, _ = generate_random_labels( lambda rng: rng.randint(0, 3, 10).astype(np.int32)) ref = sk_confusion_matrix(y_true, y_pred, labels=labels) labels = cp.array(labels, dtype=np.int32) cm = confusion_matrix(y_true, y_pred, labels=labels) cp.testing.assert_array_almost_equal(ref, cm, decimal=4)
def test_confusion_matrix_random(n_samples, dtype, problem_type): upper_range = 2 if problem_type == 'binary' else 1000 y_true, y_pred, _, _ = generate_random_labels( lambda rng: rng.randint(0, upper_range, n_samples).astype(dtype)) cm = confusion_matrix(y_true, y_pred) ref = sk_confusion_matrix(y_true, y_pred) cp.testing.assert_array_almost_equal(ref, cm, decimal=4)
def test_precision_recall_curve_random(n_samples, dtype): y_true, _, _, _ = generate_random_labels( lambda rng: rng.randint(0, 2, n_samples).astype(dtype)) y_score, _, _, _ = generate_random_labels( lambda rng: rng.randint(0, 1000, n_samples).astype(dtype)) precision_using_sk, recall_using_sk, thresholds_using_sk = \ sklearn_precision_recall_curve( y_true, y_score) precision, recall, thresholds = precision_recall_curve(y_true, y_score) assert array_equal(precision, precision_using_sk) assert array_equal(recall, recall_using_sk) assert array_equal(thresholds, thresholds_using_sk)
def test_confusion_matrix_multiclass_subset_labels(labels, client): y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( lambda rng: rng.randint(0, 3, 10).astype(np.int32), as_cupy=True) y_true, y_pred = da.from_array(y_true), da.from_array(y_pred) ref = sk_confusion_matrix(np_y_true, np_y_pred, labels=labels) labels = cp.array(labels, dtype=np.int32) cm = confusion_matrix(y_true, y_pred, labels=labels) cp.testing.assert_array_almost_equal(ref, cm, decimal=4)
def test_confusion_matrix_random_weights(n_samples, dtype, weights_dtype): y_true, y_pred, _, _ = generate_random_labels( lambda rng: rng.randint(0, 10, n_samples).astype(dtype)) if weights_dtype == 'int': sample_weight = np.random.RandomState(0).randint(0, 10, n_samples) else: sample_weight = np.random.RandomState(0).rand(n_samples) cm = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) ref = sk_confusion_matrix(y_true, y_pred, sample_weight=sample_weight) cp.testing.assert_array_almost_equal(ref, cm, decimal=4)
def test_confusion_matrix_random(n_samples, dtype, problem_type, cluster): client = Client(cluster) upper_range = 2 if problem_type == 'binary' else 1000 y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( lambda rng: rng.randint(0, upper_range, n_samples).astype(dtype), as_cupy=True) y_true, y_pred = da.from_array(y_true), da.from_array(y_pred) cm = confusion_matrix(y_true, y_pred) ref = sk_confusion_matrix(np_y_true, np_y_pred) cp.testing.assert_array_almost_equal(ref, cm, decimal=4) client.close()
def test_entropy_random(n_samples, base, use_handle): handle, stream = get_handle(use_handle) clustering, _ = \ generate_random_labels(lambda rng: rng.randint(0, 1000, n_samples)) # generate unormalized probabilities from clustering pk = np.bincount(clustering) # scipy's entropy uses probabilities sp_S = sp_entropy(pk, base=base) # we use a clustering S = entropy(np.array(clustering, dtype=np.int32), base, handle=handle) assert_almost_equal(S, sp_S, decimal=2)
def test_regression_metrics_random(n_samples, dtype, function): if dtype == np.float32 and n_samples == 500000: # stress test for float32 fails because of floating point precision pytest.xfail() y_true, y_pred, _, _ = generate_random_labels( lambda rng: rng.randint(0, 1000, n_samples).astype(dtype)) cuml_reg, sklearn_reg = { 'mse': (mean_squared_error, sklearn_mse), 'mae': (mean_absolute_error, sklearn_mae), 'msle': (mean_squared_log_error, sklearn_msle) }[function] res = cuml_reg(y_true, y_pred, multioutput='raw_values') ref = sklearn_reg(y_true, y_pred, multioutput='raw_values') cp.testing.assert_array_almost_equal(res, ref, decimal=2)
def test_entropy_random(n_samples, base, use_handle): if has_scipy(): from scipy.stats import entropy as sp_entropy else: pytest.skip('Skipping test_entropy_random because Scipy is missing') handle, stream = get_handle(use_handle) clustering, _, _, _ = \ generate_random_labels(lambda rng: rng.randint(0, 1000, n_samples)) # generate unormalized probabilities from clustering pk = np.bincount(clustering) # scipy's entropy uses probabilities sp_S = sp_entropy(pk, base=base) # we use a clustering S = entropy(np.array(clustering, dtype=np.int32), base, handle=handle) assert_almost_equal(S, sp_S, decimal=2)
def test_confusion_matrix_random_weights(n_samples, dtype, weights_dtype, client): y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( lambda rng: rng.randint(0, 10, n_samples).astype(dtype), as_cupy=True) y_true, y_pred = da.from_array(y_true), da.from_array(y_pred) if weights_dtype == 'int': sample_weight = np.random.RandomState(0).randint(0, 10, n_samples) else: sample_weight = np.random.RandomState(0).rand(n_samples) ref = sk_confusion_matrix(np_y_true, np_y_pred, sample_weight=sample_weight) sample_weight = cp.array(sample_weight) sample_weight = da.from_array(sample_weight) cm = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) cp.testing.assert_array_almost_equal(ref, cm, decimal=4)