def test_non_consecutive_labels_ari(): """regression tests for labels with gaps """ ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2]) ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2]) assert_almost_equal(ari_1, 0.24, 2) assert_almost_equal(ari_2, 0.24, 2)
def test_RxC_metrics(): """Alternative implementations should coincide for RxC matrices """ for _ in xrange(100): ltrue = np.random.randint(low=0, high=5, size=(20,)) lpred = np.random.randint(low=0, high=5, size=(20,)) cm = ClusteringMetrics.from_labels(ltrue, lpred) # homogeneity, completeness, V-measure expected_v = cm.vi_similarity_m3() expected_hcv = sklearn_hcv(ltrue, lpred) actual_hcv = cm.entropy_scores() assert_array_almost_equal(actual_hcv, expected_hcv) assert_array_almost_equal(actual_hcv[2], expected_v) # mutual information score expected_mi = sklearn_mi(ltrue, lpred) actual_mi = mutual_info_score(ltrue, lpred) assert_array_almost_equal(actual_mi, expected_mi) # adjusted mutual information expected_ami = sklearn_ami(ltrue, lpred) actual_ami = adjusted_mutual_info_score(ltrue, lpred) assert_array_almost_equal(actual_ami, expected_ami) # adjusted rand index expected_ari = sklearn_ari(ltrue, lpred) actual_ari = adjusted_rand_score(ltrue, lpred) assert_array_almost_equal(actual_ari, expected_ari)
def test_ari_nan(): """Returns NaN for empty lists """ ari = adjusted_rand_score([], []) assert_true(np.isnan(ari))