def test_RxC_metrics(): """Alternative implementations should coincide for RxC matrices """ for _ in xrange(100): ltrue = np.random.randint(low=0, high=5, size=(20,)) lpred = np.random.randint(low=0, high=5, size=(20,)) cm = ClusteringMetrics.from_labels(ltrue, lpred) # homogeneity, completeness, V-measure expected_v = cm.vi_similarity_m3() expected_hcv = sklearn_hcv(ltrue, lpred) actual_hcv = cm.entropy_scores() assert_array_almost_equal(actual_hcv, expected_hcv) assert_array_almost_equal(actual_hcv[2], expected_v) # mutual information score expected_mi = sklearn_mi(ltrue, lpred) actual_mi = mutual_info_score(ltrue, lpred) assert_array_almost_equal(actual_mi, expected_mi) # adjusted mutual information expected_ami = sklearn_ami(ltrue, lpred) actual_ami = adjusted_mutual_info_score(ltrue, lpred) assert_array_almost_equal(actual_ami, expected_ami) # adjusted rand index expected_ari = sklearn_ari(ltrue, lpred) actual_ari = adjusted_rand_score(ltrue, lpred) assert_array_almost_equal(actual_ari, expected_ari)
def test_adjusted_mutual_info_score(): # Compute the Adjusted Mutual Information and test against known values labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]) labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2]) # Mutual information mi_1 = mutual_info_score(labels_a, labels_b) assert_almost_equal(mi_1, 0.41022, 5) mi_2 = mutual_info_score(labels_b, labels_a) assert_almost_equal(mi_2, 0.41022, 5) # Expected mutual information cm = ClusteringMetrics.from_labels(labels_a, labels_b) row_totals = np.fromiter(cm.iter_row_totals(), dtype=np.int64) col_totals = np.fromiter(cm.iter_col_totals(), dtype=np.int64) emi_1a = emi_cython(row_totals, col_totals) / cm.grand_total emi_1b = emi_fortran(row_totals, col_totals) / cm.grand_total assert_almost_equal(emi_1a, 0.15042, 5) assert_almost_equal(emi_1b, 0.15042, 5) emi_2a = emi_cython(col_totals, row_totals) / cm.grand_total emi_2b = emi_fortran(col_totals, row_totals) / cm.grand_total assert_almost_equal(emi_2a, 0.15042, 5) assert_almost_equal(emi_2b, 0.15042, 5) # Adjusted mutual information (1) ami_1 = adjusted_mutual_info_score(labels_a, labels_b) assert_almost_equal(ami_1, 0.27502, 5) ami_2 = adjusted_mutual_info_score(labels_a, labels_b) assert_almost_equal(ami_2, 0.27502, 5) # Adjusted mutual information (2) ami_1 = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3]) assert_equal(ami_1, 1.0) ami_2 = adjusted_mutual_info_score([2, 2, 3, 3], [1, 1, 2, 2]) assert_equal(ami_2, 1.0) # Test AMI with a very large array a110 = np.array([list(labels_a) * 110]).flatten() b110 = np.array([list(labels_b) * 110]).flatten() ami = adjusted_mutual_info_score(a110, b110) assert_almost_equal(ami, 0.37, 2) # not accurate to more than 2 places