def test_emi_matlab(self): """Compare EMI values with reference MATLAB code http://www.mathworks.com/matlabcentral/fileexchange/33144-the-adjusted-mutual-information """ ltrue = "11 11 11 11 11 11 11 10 10 10 10 13 13 13 13 13 13 13 13 13 12 \ 12 12 12 12 15 15 15 15 15 15 15 14 14 14 14 14 17 17 17 17 16 16 16 16 \ 16 16 19 19 19 19 19 19 19 18 18 18 18 18 18 18 20 20 20 20 20 20 1 1 1 \ 1 3 3 2 2 2 5 5 5 4 4 4 4 7 7 7 7 7 7 7 7 7 6 6 6 9 9 9 8 8".split() lpred = "1 19 19 13 2 20 20 8 12 5 17 10 10 13 15 20 20 6 9 8 9 10 15 \ 14 8 11 11 10 13 17 19 5 9 1 2 20 15 19 19 12 14 1 18 18 3 2 5 8 8 7 17 \ 17 17 16 11 11 14 17 16 6 8 13 17 1 3 7 9 9 1 5 18 13 17 13 12 20 11 4 \ 14 19 15 13 5 13 12 16 4 4 7 6 6 8 2 16 16 18 3 7 1 10".split() cm = ClusteringMetrics.from_labels(ltrue, lpred) ami = cm.adjusted_mutual_info() self.assertAlmostEqual(0.0352424389209073, ami, 12) rmarg = np.asarray(cm.row_totals.values(), dtype=np.int64) cmarg = np.asarray(cm.col_totals.values(), dtype=np.int64) emi1 = emi_fortran(rmarg, cmarg) emi2 = emi_cython(rmarg, cmarg) self.assertAlmostEqual(emi1, emi2, 10)
def test_adjusted_mutual_info_score(): # Compute the Adjusted Mutual Information and test against known values labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]) labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2]) # Mutual information mi_1 = mutual_info_score(labels_a, labels_b) assert_almost_equal(mi_1, 0.41022, 5) mi_2 = mutual_info_score(labels_b, labels_a) assert_almost_equal(mi_2, 0.41022, 5) # Expected mutual information cm = ClusteringMetrics.from_labels(labels_a, labels_b) row_totals = np.fromiter(cm.iter_row_totals(), dtype=np.int64) col_totals = np.fromiter(cm.iter_col_totals(), dtype=np.int64) emi_1a = emi_cython(row_totals, col_totals) / cm.grand_total emi_1b = emi_fortran(row_totals, col_totals) / cm.grand_total assert_almost_equal(emi_1a, 0.15042, 5) assert_almost_equal(emi_1b, 0.15042, 5) emi_2a = emi_cython(col_totals, row_totals) / cm.grand_total emi_2b = emi_fortran(col_totals, row_totals) / cm.grand_total assert_almost_equal(emi_2a, 0.15042, 5) assert_almost_equal(emi_2b, 0.15042, 5) # Adjusted mutual information (1) ami_1 = adjusted_mutual_info_score(labels_a, labels_b) assert_almost_equal(ami_1, 0.27502, 5) ami_2 = adjusted_mutual_info_score(labels_a, labels_b) assert_almost_equal(ami_2, 0.27502, 5) # Adjusted mutual information (2) ami_1 = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3]) assert_equal(ami_1, 1.0) ami_2 = adjusted_mutual_info_score([2, 2, 3, 3], [1, 1, 2, 2]) assert_equal(ami_2, 1.0) # Test AMI with a very large array a110 = np.array([list(labels_a) * 110]).flatten() b110 = np.array([list(labels_b) * 110]).flatten() ami = adjusted_mutual_info_score(a110, b110) assert_almost_equal(ami, 0.37, 2) # not accurate to more than 2 places