Ejemplo n.º 1
0
def test_recall():
    cdict = {0: {0}, 1: {1}, 2: {0}, 3: {1}}
    ldict = {0: {0}, 1: {1}, 2: {1}, 3: {1}}
    assert bcubed.recall(cdict, ldict) == 2 / 3

    cdict = {0: {0}, 1: {1}, 2: {0}, 3: {1}}
    ldict = {0: {0}, 1: {1}, 2: {2}, 3: {1}}
    assert bcubed.recall(cdict, ldict) == 1
Ejemplo n.º 2
0
def compute(title, cdict, ldict):
    """Compute extended BCubed precision and recall, and print the results."""
    precision = bcubed.precision(cdict, ldict)
    recall = bcubed.recall(cdict, ldict)
    fscore = bcubed.fscore(precision, recall)
    print("{}: precision={:.2f}, recall={:.2f}, fscore={:.2f}".format(
        title, precision, recall, fscore))
def bcubed_scores(cdict: dict, gdict: dict) -> Tuple[float, float, float]:

    precision = bcubed.precision(cdict, gdict)
    recall = bcubed.recall(cdict, gdict)
    f1_score = bcubed.fscore(precision, recall)

    return precision, recall, f1_score
Ejemplo n.º 4
0
def BCubed_Recall_score(true_labels, predicted_labels):
    ldict = {}
    cdict = {}
    for i in range(len(true_labels)):
        ldict[i] = set([true_labels[i]])
        cdict[i] = set([predicted_labels[i]])
    return bcubed.recall(cdict, ldict)
Ejemplo n.º 5
0
def computeBcubed(title, cdict, ldict):
    """Compute extended BCubed precision and recall, and print the results."""
    precision = bcubed.precision(cdict, ldict)
    recall = bcubed.recall(cdict, ldict)
    fscore = bcubed.fscore(precision, recall)

    return precision, recall, fscore
Ejemplo n.º 6
0
def compute(title, cdict, ldict):
    """Compute extended BCubed precision and recall, and print the results."""
    precision = bcubed.precision(cdict, ldict)
    recall = bcubed.recall(cdict, ldict)
    fscore = bcubed.fscore(precision, recall)
    print("{}: precision={:.2f}, recall={:.2f}, fscore={:.2f}".format(
        title, precision, recall, fscore))
Ejemplo n.º 7
0
Archivo: scorer.py Proyecto: ttthy/ure
def check_with_bcubed_lib(gold, pred):
    import bcubed
    ldict = dict([('item{}'.format(i), set([k])) for i, k in enumerate(gold)])
    cdict = dict([('item{}'.format(i), set([k])) for i, k in enumerate(pred)])

    precision = bcubed.precision(cdict, ldict)
    recall = bcubed.recall(cdict, ldict)
    fscore = bcubed.fscore(precision, recall)

    print('P={} R={} F1={}'.format(precision, recall, fscore))
Ejemplo n.º 8
0
def bcubed(gold_lst, predicted_lst):
    """
    Takes gold, predicted.
    Returns recall, precision, f1score
    """
    gold = {i:{cluster} for i,cluster in enumerate(gold_lst)}
    pred = {i:{cluster} for i,cluster in enumerate(predicted_lst)}
    precision = b3.precision(pred, gold)
    recall = b3.recall(pred, gold)
    return recall, precision, b3.fscore(precision, recall)
Ejemplo n.º 9
0
 def __init__(self, truth, coms):
     self.ground_truth = self.process_input(truth)
     self.communities = self.process_input(coms)
     # FIXME: fix keyerror for nodes not in ground truth
     try:
         self.precision = bcubed.precision(self.communities, self.ground_truth)
         self.recall = bcubed.recall(self.communities, self.ground_truth)
         self.fscore = bcubed.fscore(self.precision, self.recall)
     except KeyError:
         self.precision = 0
         self.recall = 0
         self.fscore = 0
Ejemplo n.º 10
0
def evaluateBCubed(goldLabels, results):
    res_map = {}
    gold_map = {}
    for i in range(0, len(results)):
        res_map[i] = set()
        res_map[i].add(results[i])
        gold_map[i] = set()
        gold_map[i].add(goldLabels[i])
    p = bcubed.precision(res_map, gold_map)
    r = bcubed.recall(res_map, gold_map)
    f = bcubed.fscore(p, r)

    return [p, r, f]
Ejemplo n.º 11
0
def main():
    """Main method."""
    k = 35

    # write ground truth vocabulary to gt_input.txt and get ground truth
    # dictionary
    ldict = aggregate_input_and_ground_truths()
    logging.info("Done generating ldict and ground truth text file.")

    # if file containing clusters hasn't already been created, create it
    if not os.path.isfile("./clusters.txt"):

        preprocess()

        # train word2vec and cluster output from the full vocab
        word2vec.word2clusters("./text8-phrases-extra", "./clusters.txt", k, verbose=True, min_count=1)

        logging.info("Done training.")
        logging.info("Done creating clusters.")

    # load clusters
    clusters = word2vec.load_clusters("./clusters.txt")

    # build cluster dictionary from full vocabulary
    cdict = {}
    for i in range(0, k):
        for word in clusters.get_words_on_cluster(i):
            cdict[word] = set([i])

    logging.info("Done generating cdict.")

    # trim cluster dictionary down to only keys included in ground truths
    trimmed_cdict = {}
    for key in ldict.keys():
        try:
            trimmed_cdict[key] = cdict[key]
        except:
            pass

    logging.info("done trimming cdict; begining scoring\n")

    # compute bcubed score
    precision = bcubed.precision(trimmed_cdict, ldict)
    recall = bcubed.recall(trimmed_cdict, ldict)
    fscore = bcubed.fscore(precision, recall)

    print "precision: {p}, \t recall: {r}, \t fscore: {f}".format(p=precision, r=recall, f=fscore)

    logging.info("done scoring\n")
    def bcubed(self, x_test, y_test):
        ldict = {}
        cdict = {}

        labels_pred = self.predict(x_test)
        labels_pred = (labels_pred > PROB_THRESHOLD)

        for i, label in enumerate(y_test):
            ldict[i] = {int(label)}
            cdict[i] = {int(labels_pred[i])}

        precision = bcubed.precision(cdict, ldict)
        recall = bcubed.recall(cdict, ldict)
        fscore = bcubed.fscore(precision, recall)

        print('B-cubed metric:\nPrecision = {}\nRecall = {}\nF-score = {}'.format(precision, recall, fscore))
def external_eval_clusters(y_true, y_pred):
    """
    :param y_true: true cluster ids
    :param y_pred: predicted cluster ids
    :return: external evaluation metrics of clustering quality.
    The metrics are purity, inverse purity, harmonic mean, b-cubed precision, recall and their harmonic mean.
    """
    purity = purity_score(y_true, y_pred)
    inverse_purity = purity_score(y_true, y_pred, inv=True)
    f_purity = f_purity_score(y_true, y_pred)

    ldict = {i: {cluster_idx} for i, cluster_idx in enumerate(y_true)}
    cdict = {i: {cluster_idx} for i, cluster_idx in enumerate(y_pred)}
    bcubed_precision = bcubed.precision(cdict, ldict)
    bcubed_recall = bcubed.recall(cdict, ldict)
    bcubed_fscore = bcubed.fscore(bcubed_precision, bcubed_recall)

    return purity, inverse_purity, f_purity, bcubed_precision, bcubed_recall, bcubed_fscore
Ejemplo n.º 14
0
def calculate_bcubed():

    with open(str(sys.argv[1])) as predictions, open('GroundTruthClusters.csv') as labels:
        predictions.readline()
        reader = csv.reader(predictions)
        clustering = dict((rows[0], set([rows[1]])) for rows in reader)
        # print clustering

        labels.readline()
        reader = csv.reader(labels)
        truth = dict((rows[0], set([rows[1]])) for rows in reader)

        precision = bcubed.precision(clustering, truth)
        recall = bcubed.recall(clustering, truth)
        fscore = bcubed.fscore(precision, recall)

        print precision
        print recall
        print fscore
Ejemplo n.º 15
0
    if abs(numpy.linalg.norm(clusters - clusters_prev)) < eps:
        break

final_clust_lis = list()
for x in range(num_clusters):
    final_clust_lis.append(list())

dict_pred = {}

for i in range(len(master_word_list)):
    word = master_word_list[i]
    #if len(word_vec_dict[word])==0:
    #continue
    final_clust_lis[clusters[i]].append(word)
    dict_pred[word] = dict_pred.get(word, [])
    dict_pred[word].append(clusters[i])

for key in dict_pred.keys():
    dict_pred[key] = set(dict_pred[key])

precision = bcubed.precision(dict_pred, dict_gold)
recall = bcubed.recall(dict_pred, dict_gold)

print "Precision = ", precision
print "Recall = ", recall
print "F-Score = ", (2 * precision * recall) / (precision + recall)

#print centroids
#print count
#print counters
Ejemplo n.º 16
0
import bcubed
import b3
import numpy as np
import pdb

num_cases = 10

num_clusters = np.random.randint(1, 10, (num_cases, ))
num_labels = np.random.randint(1, 10, (num_cases, ))
num_elements = np.random.randint(1000, 2000, (num_cases, ))

for i in xrange(num_cases):
    L = np.random.randint(1, num_clusters[i] + 1, (num_elements[i], ))
    K = np.random.randint(1, num_labels[i] + 1, (num_elements[i], ))

    [my_f, my_p, my_r] = b3.calc_b3(L, K)

    Ldict = {i: set([L[i]]) for i in xrange(num_elements[i])}
    Cdict = {i: set([K[i]]) for i in xrange(num_elements[i])}

    p = bcubed.precision(Cdict, Ldict)
    r = bcubed.recall(Cdict, Ldict)
    f = bcubed.fscore(p, r)

    # Check
    if (abs(p - my_p) > 0.0001 or abs(r - my_r) > 0.001
            or abs(f - my_f) > 0.0001):
        print("ERROR")
Ejemplo n.º 17
0
def f_score(community_dict, gt_dict): # calculating f_score
        precision = bcubed.precision(community_dict, gt_dict)
        recall = bcubed.recall(community_dict, gt_dict)
        fscore = bcubed.fscore(precision, recall)
        return fscore
Ejemplo n.º 18
0
def evaluate_clustering(base_labels,
                        computed_labels,
                        data=None,
                        metric='euclidean',
                        silent=False):
    """
    Print evaluation metrics for the clustering results
    
    :param base_labels: labels from a reference clustering
    :param computed_labels: lables assigned by the clustering
    :param data: the data matrix or a list of uuids
    :param metric: metric to use for the silhouette method
    :param silent: flag, if true avoid printing
    :return:
    """

    # Converts labels list to dictionaries for the BCubed library
    base_dict = {k: {v} for k, v in dict(enumerate(base_labels)).items()}
    computed_dict = {
        k: {v}
        for k, v in dict(enumerate(computed_labels)).items()
    }
    num_clusters = len(
        set(computed_labels)) - (1 if -1 in computed_labels else 0)

    ars = metrics.adjusted_rand_score(base_labels, computed_labels)
    ami = metrics.adjusted_mutual_info_score(base_labels, computed_labels)
    fm = metrics.fowlkes_mallows_score(base_labels, computed_labels)
    h = metrics.homogeneity_score(base_labels, computed_labels)
    c = metrics.completeness_score(base_labels, computed_labels)
    p = bcubed.precision(base_dict, computed_dict)
    r = bcubed.recall(base_dict, computed_dict)
    fs = bcubed.fscore(p, r)
    p_p, p_r, p_q = cluster_metrics(base_labels, computed_labels)

    if not silent:
        print('-' * 80)
        print('Clustering evaluation')
        print('Number of clusters', num_clusters)
        print('Number of distinct families', len(set(base_labels)))
        print('Adjusted Rand index:', ars)
        print('Adjusted Mutual Information:', ami)
        print('Fowlkes-Mallows:', fm)
        print('Homogeneity:', h)
        print('Completeness:', c)
        print('BCubed Precision:', p)
        print('BCubed Recall:', r)
        print('BCubed FScore:', fs)
        print('Paper Precision:', p_p)
        print('Paper Recall:', p_r)
        print('Paper Quality:', p_q)

    if data is not None:
        sh = metrics.silhouette_score(data,
                                      computed_labels,
                                      metric=metric,
                                      random_state=42)

        if not silent:
            print('Silhouette', sh)

        ret = (ars, ami, fm, h, c, p, r, fs, p_p, p_r, p_q, sh)

    else:
        ret = (ars, ami, fm, h, c, p, r, fs, p_p, p_r, p_q)

    return ret
Ejemplo n.º 19
0
import bcubed
import b3
import numpy as np
import pdb

num_cases = 10

num_clusters = np.random.randint(1,10,(num_cases,))
num_labels = np.random.randint(1,10,(num_cases,))
num_elements = np.random.randint(1000,2000,(num_cases,))

for i in xrange(num_cases):
  L = np.random.randint(1,num_clusters[i]+1,(num_elements[i],))
  K = np.random.randint(1,num_labels[i]+1,(num_elements[i],))
  
  [my_f,my_p,my_r] = b3.calc_b3(L,K)
  
  Ldict = { i:set([L[i]])  for i in xrange(num_elements[i])}
  Cdict = { i:set([K[i]])  for i in xrange(num_elements[i])}
  
  p = bcubed.precision(Cdict,Ldict)
  r = bcubed.recall(Cdict,Ldict)
  f = bcubed.fscore(p,r)
 
  # Check 
  if(abs(p - my_p) > 0.0001 or abs(r - my_r) > 0.001 or abs(f - my_f) > 0.0001):
    print("ERROR") 

Ejemplo n.º 20
0
        for ele in words:
            if ele not in set_of_all_tokens:
                sys_dict["-"].add(ele)
        gold_dict = invert_dict(gold_dict)
        sys_dict = invert_dict(sys_dict)
        print(sys_dict)
        x = 0
        y = 0
        for k, v in gold_dict.items():
            x += 1
        for k, v in sys_dict.items():
            y += 1
        if x != y:
            print(x, y)
            print(dir)
            print(gold_dict)
            print(sys_dict)
            for k in sys_dict.keys():
                if k not in gold_dict.keys():
                    print(k)
        if sys_dict:
            precision = bcubed.precision(sys_dict, gold_dict)
            recall = bcubed.recall(sys_dict, gold_dict)
            fscore = bcubed.fscore(precision, recall)
            # print(fscore)
            total_f1score += fscore
            count += 1
    # print("total", total_f1score / count)
    with open("f1score", "a") as f:
        f.write("f1score: " + str(total_f1score / count))
Ejemplo n.º 21
0
	# print len(allMentions)
	# print cluster_gold
	for key1, value1 in cluster_test.iteritems():
	    for key2, value2 in cluster_test[key1].iteritems():
	        cluster_test[key1][key2] = set([cluster_test[key1][key2]])
	for key1, value1 in cluster_gold.iteritems():
	    for key2, value2 in cluster_gold[key1].iteritems():
	        cluster_gold[key1][key2] = set([cluster_gold[key1][key2]])


	all_precision = []
	all_recall = []
	all_fscore = []
	for key, value in cluster_test.iteritems():
	    precision = bcubed.precision(cluster_test[key], cluster_gold[key])
	    recall = bcubed.recall(cluster_test[key], cluster_gold[key])
	    fscore = bcubed.fscore(precision, recall)
	    print 'precision: ' + str(precision)
	    all_precision.append(precision)
	    print 'recall: ' + str(recall)
	    all_recall.append(recall)
	    print 'fscore: ' + str(fscore)
	    all_fscore.append(fscore)
	    print ''
	print 'avg b-cubed precision: ' + str(sum(all_precision)/len(all_precision))
	print 'avg b-cubed recall: ' + str(sum(all_recall)/len(all_recall))
	print 'avg b-cubed fscore: ' + str(sum(all_fscore)/len(all_fscore))
	print 'number of negative predictions = ' + str(sum(y_predicted==0)) + ' and positive predictions = ' + str(sum(y_predicted == 1))
	#TO-DOS
	#1. Try other dimensional word embeddings
    def _eval_clustering(self, labels_true, labels_predicted):
        # To address when COP-KMeans fails to satisfy all constraints at a k:
        if labels_predicted is None:
            # return an empty dictionary to expose in the final output
            return {"nmi": None,
                    "ami": None,
                    "ari": None,
                    "fms": None,
                    "v_measure": None,
                    "bcubed_precision": None,
                    "bcubed_recall": None,
                    "bcubed_fscore": None,
                    "Silhouette": None,
                    "Calinski_harabasz": None,
                    "Davies_Bouldin": None
                    }

        nmi = normalized_mutual_info_score(labels_true,
                                           labels_predicted,
                                           average_method="max")

        ami = adjusted_mutual_info_score(labels_true,
                                         labels_predicted,
                                         average_method="arithmetic")

        ari = adjusted_rand_score(labels_true,
                                  labels_predicted)

        v_measure = v_measure_score(labels_true,
                                    labels_predicted,
                                    beta=1.0)

        fms = fowlkes_mallows_score(labels_true,
                                    labels_predicted)

        # Reshape labels for BCubed measures
        true_dict = self._reshape_labels_as_dicts(labels_true)
        pred_dict = self._reshape_labels_as_dicts(labels_predicted)

        bcubed_precision = bcubed.precision(cdict=pred_dict, ldict=true_dict)
        bcubed_recall = bcubed.recall(cdict=pred_dict, ldict=true_dict)
        bcubed_f1 = bcubed.fscore(bcubed_precision, bcubed_recall)

        # =====================================================================
        # Unsupervised Metrics
        # =====================================================================
        if not labels_predicted.nunique() in (1, len(self.data)):
            sil = silhouette_score(X=self.data,
                                   labels=labels_predicted,
                                   metric=self.distance_metric,
                                   random_state=13712)

            ch = calinski_harabasz_score(X=self.data, labels=labels_predicted)

            dv = davies_bouldin_score(X=self.data, labels=labels_predicted)
        else:
            sil = None
            ch = None
            dv = None

        ret = {}
        ret.update({"nmi": round(nmi, 4),
                    "ami": round(ami, 4),
                    "ari": round(ari, 4),
                    "fms": round(fms, 4),
                    "v_measure": round(v_measure, 4),
                    "bcubed_precision": round(bcubed_precision, 4),
                    "bcubed_recall": round(bcubed_recall, 4),
                    "bcubed_fscore": round(bcubed_f1, 4),
                    "Silhouette": round(sil, 4
                                        ) if sil is not None else None,
                    "Calinski_harabasz": round(ch, 4
                                               ) if ch is not None else None,
                    "Davies_Bouldin": round(dv, 4
                                            ) if dv is not None else None
                    # Here goes the unsupervised indices
                    })

        return ret
Ejemplo n.º 23
0
def evaluate_bcubed(judgments, gold):
    precision = bcubed.precision(judgments, gold)
    recall = bcubed.recall(judgments, gold)
    fscore = bcubed.fscore(precision, recall)
    return precision, recall, fscore