def test_emi_matlab(self): """Compare EMI values with reference MATLAB code http://www.mathworks.com/matlabcentral/fileexchange/33144-the-adjusted-mutual-information """ ltrue = "11 11 11 11 11 11 11 10 10 10 10 13 13 13 13 13 13 13 13 13 12 \ 12 12 12 12 15 15 15 15 15 15 15 14 14 14 14 14 17 17 17 17 16 16 16 16 \ 16 16 19 19 19 19 19 19 19 18 18 18 18 18 18 18 20 20 20 20 20 20 1 1 1 \ 1 3 3 2 2 2 5 5 5 4 4 4 4 7 7 7 7 7 7 7 7 7 6 6 6 9 9 9 8 8".split() lpred = "1 19 19 13 2 20 20 8 12 5 17 10 10 13 15 20 20 6 9 8 9 10 15 \ 14 8 11 11 10 13 17 19 5 9 1 2 20 15 19 19 12 14 1 18 18 3 2 5 8 8 7 17 \ 17 17 16 11 11 14 17 16 6 8 13 17 1 3 7 9 9 1 5 18 13 17 13 12 20 11 4 \ 14 19 15 13 5 13 12 16 4 4 7 6 6 8 2 16 16 18 3 7 1 10".split() cm = ClusteringMetrics.from_labels(ltrue, lpred) ami = cm.adjusted_mutual_info() self.assertAlmostEqual(0.0352424389209073, ami, 12) rmarg = np.asarray(cm.row_totals.values(), dtype=np.int64) cmarg = np.asarray(cm.col_totals.values(), dtype=np.int64) emi1 = emi_fortran(rmarg, cmarg) emi2 = emi_cython(rmarg, cmarg) self.assertAlmostEqual(emi1, emi2, 10)
def test_RxC_metrics(): """Alternative implementations should coincide for RxC matrices """ for _ in xrange(100): ltrue = np.random.randint(low=0, high=5, size=(20,)) lpred = np.random.randint(low=0, high=5, size=(20,)) cm = ClusteringMetrics.from_labels(ltrue, lpred) # homogeneity, completeness, V-measure expected_v = cm.vi_similarity_m3() expected_hcv = sklearn_hcv(ltrue, lpred) actual_hcv = cm.entropy_scores() assert_array_almost_equal(actual_hcv, expected_hcv) assert_array_almost_equal(actual_hcv[2], expected_v) # mutual information score expected_mi = sklearn_mi(ltrue, lpred) actual_mi = mutual_info_score(ltrue, lpred) assert_array_almost_equal(actual_mi, expected_mi) # adjusted mutual information expected_ami = sklearn_ami(ltrue, lpred) actual_ami = adjusted_mutual_info_score(ltrue, lpred) assert_array_almost_equal(actual_ami, expected_ami) # adjusted rand index expected_ari = sklearn_ari(ltrue, lpred) actual_ari = adjusted_rand_score(ltrue, lpred) assert_array_almost_equal(actual_ari, expected_ari)
def test_RxC_general(): """General conteingency-table mathods """ for _ in xrange(100): size = np.random.randint(4, 100) a = np.random.randint(low=0, high=np.random.randint(low=2, high=100), size=(size,)) b = np.random.randint(low=0, high=np.random.randint(low=2, high=100), size=(size,)) cm = ClusteringMetrics.from_labels(a, b) assert_almost_equal( cm.assignment_score(model=None), assignment_score_slow(cm, rpad=False, cpad=False)) assert_almost_equal( cm.assignment_score(model=None), assignment_score_slow(cm, rpad=True, cpad=True)) for model in ['m1', 'm2r', 'm2c', 'm3']: assert_almost_equal( cm.grand_total, sum(cm.expected(model=model).itervalues())) assert_almost_equal( cm.assignment_score(model=model), cm.adjust_to_null(cm.assignment_score, model=model)[0]) assert_almost_equal( cm.split_join_similarity(model=model), cm.adjust_to_null(cm.split_join_similarity, model=model)[0])
def add_incidence_metrics(args, clusters, pairs): """Add metrics based on incidence matrix of classes and clusters """ args_metrics = args.metrics if set(utils.INCIDENCE_METRICS) & set(args_metrics): from lsh_hdc.metrics import ClusteringMetrics labels = clusters_to_labels( clusters, double_negs=bool(args.double_negs), join_negs=bool(args.join_negs) ) cm = ClusteringMetrics.from_labels(*labels) pairwise_metrics = set(utils.PAIRWISE_METRICS) & set(args_metrics) append_scores(cm, pairs, pairwise_metrics) contingency_metrics = set(utils.CONTINGENCY_METRICS) & set(args_metrics) append_scores(cm, pairs, contingency_metrics)
def test_adjusted_mutual_info_score(): # Compute the Adjusted Mutual Information and test against known values labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]) labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2]) # Mutual information mi_1 = mutual_info_score(labels_a, labels_b) assert_almost_equal(mi_1, 0.41022, 5) mi_2 = mutual_info_score(labels_b, labels_a) assert_almost_equal(mi_2, 0.41022, 5) # Expected mutual information cm = ClusteringMetrics.from_labels(labels_a, labels_b) row_totals = np.fromiter(cm.iter_row_totals(), dtype=np.int64) col_totals = np.fromiter(cm.iter_col_totals(), dtype=np.int64) emi_1a = emi_cython(row_totals, col_totals) / cm.grand_total emi_1b = emi_fortran(row_totals, col_totals) / cm.grand_total assert_almost_equal(emi_1a, 0.15042, 5) assert_almost_equal(emi_1b, 0.15042, 5) emi_2a = emi_cython(col_totals, row_totals) / cm.grand_total emi_2b = emi_fortran(col_totals, row_totals) / cm.grand_total assert_almost_equal(emi_2a, 0.15042, 5) assert_almost_equal(emi_2b, 0.15042, 5) # Adjusted mutual information (1) ami_1 = adjusted_mutual_info_score(labels_a, labels_b) assert_almost_equal(ami_1, 0.27502, 5) ami_2 = adjusted_mutual_info_score(labels_a, labels_b) assert_almost_equal(ami_2, 0.27502, 5) # Adjusted mutual information (2) ami_1 = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3]) assert_equal(ami_1, 1.0) ami_2 = adjusted_mutual_info_score([2, 2, 3, 3], [1, 1, 2, 2]) assert_equal(ami_2, 1.0) # Test AMI with a very large array a110 = np.array([list(labels_a) * 110]).flatten() b110 = np.array([list(labels_b) * 110]).flatten() ami = adjusted_mutual_info_score(a110, b110) assert_almost_equal(ami, 0.37, 2) # not accurate to more than 2 places
def test_IR_example(): """Test example from IR book by Manning et al. The example gives 3 clusters and 17 points total. It is described on http://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-clustering-1.html """ ltrue = (0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2) lpred = (0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 2, 2, 1, 2, 2, 2) cm = ClusteringMetrics.from_labels(ltrue, lpred) # test perfect variants rd = cm.row_diag() cd = cm.col_diag() assert_almost_equal(rd.assignment_score(model='m3'), 1.0, 6) assert_almost_equal(cd.assignment_score(model='m3'), 1.0, 6) assert_almost_equal(cd.assignment_score(model='m3', discrete=True), 1.0, 6) assert_almost_equal(rd.assignment_score(model='m3'), 1.0, 6) assert_almost_equal(rd.assignment_score(model='m3', discrete=True), 1.0, 6) # test that no redraws happen by default assert_almost_equal(cm.assignment_score(model='m3'), cm.assignment_score(model='m3'), 6) ex = cm.expected(discrete=False) assert_almost_equal(ex.assignment_score(model='m3'), 0.0, 6) # test that H1 results in greater score than H0 ex = cm.expected(discrete=True) assert_greater( cm.assignment_score(model='m3'), ex.assignment_score(model='m3')) # test entropy metrics h, c, v = cm.entropy_scores() assert_almost_equal(h, 0.371468, 6) assert_almost_equal(c, 0.357908, 6) assert_almost_equal(v, 0.364562, 6) assert_almost_equal(cm.vi_similarity(model=None), 0.517754, 6) assert_almost_equal(cm.vi_similarity(model='m1'), 0.378167, 6) assert_almost_equal(cm.vi_similarity(model='m2r'), 0.365605, 6) assert_almost_equal(cm.vi_similarity(model='m2c'), 0.377165, 6) assert_almost_equal(cm.vi_similarity(model='m3'), 0.364562, 6) assert_almost_equal(cm.mirkin_match_coeff(), 0.695502, 6) assert_almost_equal(cm.rand_index(), 0.676471, 6) assert_almost_equal(cm.fowlkes_mallows(), 0.476731, 6) assert_almost_equal(cm.assignment_score(model=None), 0.705882, 6) assert_almost_equal(cm.assignment_score(model='m3'), 0.554974, 6) assert_almost_equal(cm.chisq_score(), 11.900000, 6) assert_almost_equal(cm.g_score(), 13.325845, 6) # test metrics that are based on pairwise co-association matrix conf = cm.pairwise assert_almost_equal(conf.chisq_score(), 8.063241, 6) assert_almost_equal(conf.g_score(), 7.804221, 6) assert_almost_equal(conf.jaccard_coeff(), 0.312500, 6) assert_almost_equal(conf.ochiai_coeff(), 0.476731, 6) assert_almost_equal(conf.dice_coeff(), 0.476190, 6) assert_almost_equal(conf.sokal_sneath_coeff(), 0.185185, 6) assert_almost_equal(conf.kappa(), 0.242915, 6) assert_almost_equal(conf.accuracy(), 0.676471, 6) assert_almost_equal(conf.precision(), 0.500000, 6) assert_almost_equal(conf.recall(), 0.454545, 6) exp_tw = _talburt_wang_index(ltrue, lpred) act_tw = cm.talburt_wang_index() assert_almost_equal(exp_tw, act_tw, 6)
if os.path.exists(PATH): print "Loading data from %s" % PATH with open(PATH, 'r') as fh: ltrue, lpred = pickle.load(fh) else: shape = (ARGS.num_samples,) ltrue = np.random.randint(low=0, high=ARGS.max_classes, size=shape) lpred = np.random.randint(low=0, high=ARGS.max_clusters, size=shape) print "Saving generated data to %s" % PATH with open(PATH, 'w') as fh: pickle.dump((ltrue, lpred), fh, protocol=pickle.HIGHEST_PROTOCOL) if ARGS.implementation == 'oo': from lsh_hdc.metrics import ClusteringMetrics cm = ClusteringMetrics.from_labels(ltrue, lpred) method = getattr(cm, METHODS[ARGS.method][1]) line = "method()" elif ARGS.implementation == 'sklearn': import sklearn.metrics.cluster as module method = getattr(module, METHODS[ARGS.method][0]) line = "method(ltrue, lpred)" elif ARGS.implementation == 'proposed': import lsh_hdc.metrics as module method = getattr(module, METHODS[ARGS.method][0]) line = "method(ltrue, lpred)" else: raise argparse.ArgumentError('Unknown value for --implementation') print "Sanity check:"
def matrix_from_labels(*args): ltrue, lpred = args return ClusteringMetrics.from_labels(ltrue, lpred)