def test_split_join(): """test split-join and related metrics Example given in http://stats.stackexchange.com/a/25001/37267 For two different clustering pairs below, one can be obtained from the other by moving only two points, {9, 10} for the first pair, and {11, 12} for the second pair. The split-join distance for the two pairs is thus the same. Mirkin and VI distance is larger for the first pair (C1 and C2). This is not a fault of these measures as the clusterings in C3 and C4 do appear to capture more information than in the case of C1 and C2, and so their similarities should be greater. """ C1 = [{1, 2, 3, 4, 5, 6, 7, 8}, {9, 10, 11, 12, 13, 14, 15, 16}] C2 = [{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {11, 12, 13, 14, 15, 16}] cm = ClusteringMetrics.from_partitions(C1, C2) assert_equal(cm.mirkin_mismatch_coeff(normalize=False), 56) assert_almost_equal(cm.vi_distance(normalize=False), 0.594, 3) assert_equal(cm.split_join_distance(normalize=False), 4) C3 = [{1, 2, 3, 4}, {5, 6, 7, 8, 9, 10}, {11, 12, 13, 14, 15, 16}] C4 = [{1, 2, 3, 4}, {5, 6, 7, 8, 9, 10, 11, 12}, {13, 14, 15, 16}] cm = ClusteringMetrics.from_partitions(C3, C4) assert_equal(cm.mirkin_mismatch_coeff(normalize=False), 40) assert_almost_equal(cm.vi_distance(normalize=False), 0.520, 3) assert_equal(cm.split_join_distance(normalize=False), 4)
def test_mt_metrics(): """Table 1 in Vilain et al. (1995) """ # row 1 p1 = ["A B C D".split()] p2 = ["A B".split(), "C D".split()] cm = ClusteringMetrics.from_partitions(p1, p2) assert_array_almost_equal(cm.muc_scores()[:2], [1.0, 0.6667], 4) # row 2 p1 = ["A B".split(), "C D".split()] p2 = ["A B C D".split()] cm = ClusteringMetrics.from_partitions(p1, p2) assert_array_almost_equal(cm.muc_scores()[:2], [0.6667, 1.0], 4) # row 3 p1 = ["A B C D".split()] p2 = ["A B C D".split()] cm = ClusteringMetrics.from_partitions(p1, p2) assert_array_almost_equal(cm.muc_scores()[:2], [1.0, 1.0], 4) # row 4 is exactly the same as row 1 # row 5 p1 = ["A B C".split()] p2 = ["A C".split(), "B"] cm = ClusteringMetrics.from_partitions(p1, p2) assert_array_almost_equal(cm.muc_scores()[:2], [1.0, 0.5], 4)
def test_bc_metrics(): """Examples 1 and 2, listing in Figure 9, Bagga & Baldwin (1998) """ p1 = ["1 2 3 4 5".split(), "6 7".split(), "8 9 A B C".split()] p2 = ["1 2 3 4 5".split(), "6 7 8 9 A B C".split()] cm = ClusteringMetrics.from_partitions(p1, p2) assert_array_almost_equal(cm.bc_metrics()[:2], [0.76, 1.0], 2) assert_array_almost_equal(cm.muc_scores()[:2], [0.9, 1.0], 4) p2 = ["1 2 3 4 5 8 9 A B C".split(), "6 7".split()] cm = ClusteringMetrics.from_partitions(p1, p2) assert_array_almost_equal(cm.bc_metrics()[:2], [0.58, 1.0], 2) assert_array_almost_equal(cm.muc_scores()[:2], [0.9, 1.0], 4)
def test_RxC_metrics(): """Alternative implementations should coincide for RxC matrices """ for _ in xrange(100): ltrue = np.random.randint(low=0, high=5, size=(20,)) lpred = np.random.randint(low=0, high=5, size=(20,)) cm = ClusteringMetrics.from_labels(ltrue, lpred) # homogeneity, completeness, V-measure expected_v = cm.vi_similarity_m3() expected_hcv = sklearn_hcv(ltrue, lpred) actual_hcv = cm.entropy_scores() assert_array_almost_equal(actual_hcv, expected_hcv) assert_array_almost_equal(actual_hcv[2], expected_v) # mutual information score expected_mi = sklearn_mi(ltrue, lpred) actual_mi = mutual_info_score(ltrue, lpred) assert_array_almost_equal(actual_mi, expected_mi) # adjusted mutual information expected_ami = sklearn_ami(ltrue, lpred) actual_ami = adjusted_mutual_info_score(ltrue, lpred) assert_array_almost_equal(actual_ami, expected_ami) # adjusted rand index expected_ari = sklearn_ari(ltrue, lpred) actual_ari = adjusted_rand_score(ltrue, lpred) assert_array_almost_equal(actual_ari, expected_ari)
def test_emi_matlab(self): """Compare EMI values with reference MATLAB code http://www.mathworks.com/matlabcentral/fileexchange/33144-the-adjusted-mutual-information """ ltrue = "11 11 11 11 11 11 11 10 10 10 10 13 13 13 13 13 13 13 13 13 12 \ 12 12 12 12 15 15 15 15 15 15 15 14 14 14 14 14 17 17 17 17 16 16 16 16 \ 16 16 19 19 19 19 19 19 19 18 18 18 18 18 18 18 20 20 20 20 20 20 1 1 1 \ 1 3 3 2 2 2 5 5 5 4 4 4 4 7 7 7 7 7 7 7 7 7 6 6 6 9 9 9 8 8".split() lpred = "1 19 19 13 2 20 20 8 12 5 17 10 10 13 15 20 20 6 9 8 9 10 15 \ 14 8 11 11 10 13 17 19 5 9 1 2 20 15 19 19 12 14 1 18 18 3 2 5 8 8 7 17 \ 17 17 16 11 11 14 17 16 6 8 13 17 1 3 7 9 9 1 5 18 13 17 13 12 20 11 4 \ 14 19 15 13 5 13 12 16 4 4 7 6 6 8 2 16 16 18 3 7 1 10".split() cm = ClusteringMetrics.from_labels(ltrue, lpred) ami = cm.adjusted_mutual_info() self.assertAlmostEqual(0.0352424389209073, ami, 12) rmarg = np.asarray(cm.row_totals.values(), dtype=np.int64) cmarg = np.asarray(cm.col_totals.values(), dtype=np.int64) emi1 = emi_fortran(rmarg, cmarg) emi2 = emi_cython(rmarg, cmarg) self.assertAlmostEqual(emi1, emi2, 10)
def test_RxC_general(): """General conteingency-table mathods """ for _ in xrange(100): size = np.random.randint(4, 100) a = np.random.randint(low=0, high=np.random.randint(low=2, high=100), size=(size,)) b = np.random.randint(low=0, high=np.random.randint(low=2, high=100), size=(size,)) cm = ClusteringMetrics.from_labels(a, b) assert_almost_equal( cm.assignment_score(model=None), assignment_score_slow(cm, rpad=False, cpad=False)) assert_almost_equal( cm.assignment_score(model=None), assignment_score_slow(cm, rpad=True, cpad=True)) for model in ['m1', 'm2r', 'm2c', 'm3']: assert_almost_equal( cm.grand_total, sum(cm.expected(model=model).itervalues())) assert_almost_equal( cm.assignment_score(model=model), cm.adjust_to_null(cm.assignment_score, model=model)[0]) assert_almost_equal( cm.split_join_similarity(model=model), cm.adjust_to_null(cm.split_join_similarity, model=model)[0])
def test_perfect(): p1 = [['A', 'B', 'C']] p2 = [['A', 'B', 'C']] cm = ClusteringMetrics.from_partitions(p1, p2) assert_almost_equal(cm.assignment_score(), 1.0, 4) assert_almost_equal(cm.vi_similarity(), 1.0, 4) assert_almost_equal(cm.split_join_similarity(), 1.0, 4) assert_almost_equal(cm.talburt_wang_index(), 1.0, 4) assert_array_almost_equal(cm.entropy_scores(), (1.0,) * 3, 4) assert_array_almost_equal(cm.bc_metrics(), (1.0,) * 3, 4) assert_array_almost_equal(cm.muc_scores(), (1.0,) * 3, 4)
def test_m1(): """M1 model """ t2 = ClusteringMetrics(rows=10 * np.ones((2, 2), dtype=int)) t8 = ClusteringMetrics(rows=10 * np.ones((8, 8), dtype=int)) assert_almost_equal(0.0, t2.vi_similarity_m1()) assert_almost_equal(0.0, t8.vi_similarity_m1()) assert_almost_equal(0.0, t2.split_join_similarity_m1()) assert_almost_equal(0.0, t8.split_join_similarity_m1()) assert_almost_equal(0.0, t2.assignment_score_m1()) assert_almost_equal(0.0, t8.assignment_score_m1())
def add_incidence_metrics(args, clusters, pairs): """Add metrics based on incidence matrix of classes and clusters """ args_metrics = args.metrics if set(utils.INCIDENCE_METRICS) & set(args_metrics): from lsh_hdc.metrics import ClusteringMetrics labels = clusters_to_labels( clusters, double_negs=bool(args.double_negs), join_negs=bool(args.join_negs) ) cm = ClusteringMetrics.from_labels(*labels) pairwise_metrics = set(utils.PAIRWISE_METRICS) & set(args_metrics) append_scores(cm, pairs, pairwise_metrics) contingency_metrics = set(utils.CONTINGENCY_METRICS) & set(args_metrics) append_scores(cm, pairs, contingency_metrics)
def test_adjusted_mutual_info_score(): # Compute the Adjusted Mutual Information and test against known values labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]) labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2]) # Mutual information mi_1 = mutual_info_score(labels_a, labels_b) assert_almost_equal(mi_1, 0.41022, 5) mi_2 = mutual_info_score(labels_b, labels_a) assert_almost_equal(mi_2, 0.41022, 5) # Expected mutual information cm = ClusteringMetrics.from_labels(labels_a, labels_b) row_totals = np.fromiter(cm.iter_row_totals(), dtype=np.int64) col_totals = np.fromiter(cm.iter_col_totals(), dtype=np.int64) emi_1a = emi_cython(row_totals, col_totals) / cm.grand_total emi_1b = emi_fortran(row_totals, col_totals) / cm.grand_total assert_almost_equal(emi_1a, 0.15042, 5) assert_almost_equal(emi_1b, 0.15042, 5) emi_2a = emi_cython(col_totals, row_totals) / cm.grand_total emi_2b = emi_fortran(col_totals, row_totals) / cm.grand_total assert_almost_equal(emi_2a, 0.15042, 5) assert_almost_equal(emi_2b, 0.15042, 5) # Adjusted mutual information (1) ami_1 = adjusted_mutual_info_score(labels_a, labels_b) assert_almost_equal(ami_1, 0.27502, 5) ami_2 = adjusted_mutual_info_score(labels_a, labels_b) assert_almost_equal(ami_2, 0.27502, 5) # Adjusted mutual information (2) ami_1 = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3]) assert_equal(ami_1, 1.0) ami_2 = adjusted_mutual_info_score([2, 2, 3, 3], [1, 1, 2, 2]) assert_equal(ami_2, 1.0) # Test AMI with a very large array a110 = np.array([list(labels_a) * 110]).flatten() b110 = np.array([list(labels_b) * 110]).flatten() ami = adjusted_mutual_info_score(a110, b110) assert_almost_equal(ami, 0.37, 2) # not accurate to more than 2 places
def test_IR_example(): """Test example from IR book by Manning et al. The example gives 3 clusters and 17 points total. It is described on http://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-clustering-1.html """ ltrue = (0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2) lpred = (0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 2, 2, 1, 2, 2, 2) cm = ClusteringMetrics.from_labels(ltrue, lpred) # test perfect variants rd = cm.row_diag() cd = cm.col_diag() assert_almost_equal(rd.assignment_score(model='m3'), 1.0, 6) assert_almost_equal(cd.assignment_score(model='m3'), 1.0, 6) assert_almost_equal(cd.assignment_score(model='m3', discrete=True), 1.0, 6) assert_almost_equal(rd.assignment_score(model='m3'), 1.0, 6) assert_almost_equal(rd.assignment_score(model='m3', discrete=True), 1.0, 6) # test that no redraws happen by default assert_almost_equal(cm.assignment_score(model='m3'), cm.assignment_score(model='m3'), 6) ex = cm.expected(discrete=False) assert_almost_equal(ex.assignment_score(model='m3'), 0.0, 6) # test that H1 results in greater score than H0 ex = cm.expected(discrete=True) assert_greater( cm.assignment_score(model='m3'), ex.assignment_score(model='m3')) # test entropy metrics h, c, v = cm.entropy_scores() assert_almost_equal(h, 0.371468, 6) assert_almost_equal(c, 0.357908, 6) assert_almost_equal(v, 0.364562, 6) assert_almost_equal(cm.vi_similarity(model=None), 0.517754, 6) assert_almost_equal(cm.vi_similarity(model='m1'), 0.378167, 6) assert_almost_equal(cm.vi_similarity(model='m2r'), 0.365605, 6) assert_almost_equal(cm.vi_similarity(model='m2c'), 0.377165, 6) assert_almost_equal(cm.vi_similarity(model='m3'), 0.364562, 6) assert_almost_equal(cm.mirkin_match_coeff(), 0.695502, 6) assert_almost_equal(cm.rand_index(), 0.676471, 6) assert_almost_equal(cm.fowlkes_mallows(), 0.476731, 6) assert_almost_equal(cm.assignment_score(model=None), 0.705882, 6) assert_almost_equal(cm.assignment_score(model='m3'), 0.554974, 6) assert_almost_equal(cm.chisq_score(), 11.900000, 6) assert_almost_equal(cm.g_score(), 13.325845, 6) # test metrics that are based on pairwise co-association matrix conf = cm.pairwise assert_almost_equal(conf.chisq_score(), 8.063241, 6) assert_almost_equal(conf.g_score(), 7.804221, 6) assert_almost_equal(conf.jaccard_coeff(), 0.312500, 6) assert_almost_equal(conf.ochiai_coeff(), 0.476731, 6) assert_almost_equal(conf.dice_coeff(), 0.476190, 6) assert_almost_equal(conf.sokal_sneath_coeff(), 0.185185, 6) assert_almost_equal(conf.kappa(), 0.242915, 6) assert_almost_equal(conf.accuracy(), 0.676471, 6) assert_almost_equal(conf.precision(), 0.500000, 6) assert_almost_equal(conf.recall(), 0.454545, 6) exp_tw = _talburt_wang_index(ltrue, lpred) act_tw = cm.talburt_wang_index() assert_almost_equal(exp_tw, act_tw, 6)
if os.path.exists(PATH): print "Loading data from %s" % PATH with open(PATH, 'r') as fh: ltrue, lpred = pickle.load(fh) else: shape = (ARGS.num_samples,) ltrue = np.random.randint(low=0, high=ARGS.max_classes, size=shape) lpred = np.random.randint(low=0, high=ARGS.max_clusters, size=shape) print "Saving generated data to %s" % PATH with open(PATH, 'w') as fh: pickle.dump((ltrue, lpred), fh, protocol=pickle.HIGHEST_PROTOCOL) if ARGS.implementation == 'oo': from lsh_hdc.metrics import ClusteringMetrics cm = ClusteringMetrics.from_labels(ltrue, lpred) method = getattr(cm, METHODS[ARGS.method][1]) line = "method()" elif ARGS.implementation == 'sklearn': import sklearn.metrics.cluster as module method = getattr(module, METHODS[ARGS.method][0]) line = "method(ltrue, lpred)" elif ARGS.implementation == 'proposed': import lsh_hdc.metrics as module method = getattr(module, METHODS[ARGS.method][0]) line = "method(ltrue, lpred)" else: raise argparse.ArgumentError('Unknown value for --implementation') print "Sanity check:"
def matrix_from_labels(*args): ltrue, lpred = args return ClusteringMetrics.from_labels(ltrue, lpred)