Python fscore Exemples, bcubed.fscore Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : example.py Projet : mensaochun/python-bcubed

def compute(title, cdict, ldict):
    """Compute extended BCubed precision and recall, and print the results."""
    precision = bcubed.precision(cdict, ldict)
    recall = bcubed.recall(cdict, ldict)
    fscore = bcubed.fscore(precision, recall)
    print("{}: precision={:.2f}, recall={:.2f}, fscore={:.2f}".format(
        title, precision, recall, fscore))

Exemple #2

0

Afficher le fichier

Fichier : clusterAuthors.py Projet : helenpy/clusterPAN2017

def computeBcubed(title, cdict, ldict):
    """Compute extended BCubed precision and recall, and print the results."""
    precision = bcubed.precision(cdict, ldict)
    recall = bcubed.recall(cdict, ldict)
    fscore = bcubed.fscore(precision, recall)

    return precision, recall, fscore

Exemple #3

0

Afficher le fichier

Fichier : example.py Projet : hhromic/python-bcubed

def compute(title, cdict, ldict):
    """Compute extended BCubed precision and recall, and print the results."""
    precision = bcubed.precision(cdict, ldict)
    recall = bcubed.recall(cdict, ldict)
    fscore = bcubed.fscore(precision, recall)
    print("{}: precision={:.2f}, recall={:.2f}, fscore={:.2f}".format(
        title, precision, recall, fscore))

Exemple #4

0

Afficher le fichier

Fichier : evaluation.py Projet : VanHoang85/Knowledge_Graph_Construction

def bcubed_scores(cdict: dict, gdict: dict) -> Tuple[float, float, float]:

    precision = bcubed.precision(cdict, gdict)
    recall = bcubed.recall(cdict, gdict)
    f1_score = bcubed.fscore(precision, recall)

    return precision, recall, f1_score

Exemple #5

0

Afficher le fichier

Fichier : scorer.py Projet : ttthy/ure

def check_with_bcubed_lib(gold, pred):
    import bcubed
    ldict = dict([('item{}'.format(i), set([k])) for i, k in enumerate(gold)])
    cdict = dict([('item{}'.format(i), set([k])) for i, k in enumerate(pred)])

    precision = bcubed.precision(cdict, ldict)
    recall = bcubed.recall(cdict, ldict)
    fscore = bcubed.fscore(precision, recall)

    print('P={} R={} F1={}'.format(precision, recall, fscore))

Exemple #6

0

Afficher le fichier

Fichier : clustering_evaluation.py Projet : tony32769/events

def bcubed(gold_lst, predicted_lst):
    """
    Takes gold, predicted.
    Returns recall, precision, f1score
    """
    gold = {i:{cluster} for i,cluster in enumerate(gold_lst)}
    pred = {i:{cluster} for i,cluster in enumerate(predicted_lst)}
    precision = b3.precision(pred, gold)
    recall = b3.recall(pred, gold)
    return recall, precision, b3.fscore(precision, recall)

Exemple #7

0

Afficher le fichier

Fichier : Bcubed.py Projet : DimitrisCC/CommunityTracking

 def __init__(self, truth, coms):
     self.ground_truth = self.process_input(truth)
     self.communities = self.process_input(coms)
     # FIXME: fix keyerror for nodes not in ground truth
     try:
         self.precision = bcubed.precision(self.communities, self.ground_truth)
         self.recall = bcubed.recall(self.communities, self.ground_truth)
         self.fscore = bcubed.fscore(self.precision, self.recall)
     except KeyError:
         self.precision = 0
         self.recall = 0
         self.fscore = 0

Exemple #8

0

Afficher le fichier

def evaluateBCubed(goldLabels, results):
    res_map = {}
    gold_map = {}
    for i in range(0, len(results)):
        res_map[i] = set()
        res_map[i].add(results[i])
        gold_map[i] = set()
        gold_map[i].add(goldLabels[i])
    p = bcubed.precision(res_map, gold_map)
    r = bcubed.recall(res_map, gold_map)
    f = bcubed.fscore(p, r)

    return [p, r, f]

Exemple #9

0

Afficher le fichier

Fichier : w2v_cluster.py Projet : nickmarton/NLP

def main():
    """Main method."""
    k = 35

    # write ground truth vocabulary to gt_input.txt and get ground truth
    # dictionary
    ldict = aggregate_input_and_ground_truths()
    logging.info("Done generating ldict and ground truth text file.")

    # if file containing clusters hasn't already been created, create it
    if not os.path.isfile("./clusters.txt"):

        preprocess()

        # train word2vec and cluster output from the full vocab
        word2vec.word2clusters("./text8-phrases-extra", "./clusters.txt", k, verbose=True, min_count=1)

        logging.info("Done training.")
        logging.info("Done creating clusters.")

    # load clusters
    clusters = word2vec.load_clusters("./clusters.txt")

    # build cluster dictionary from full vocabulary
    cdict = {}
    for i in range(0, k):
        for word in clusters.get_words_on_cluster(i):
            cdict[word] = set([i])

    logging.info("Done generating cdict.")

    # trim cluster dictionary down to only keys included in ground truths
    trimmed_cdict = {}
    for key in ldict.keys():
        try:
            trimmed_cdict[key] = cdict[key]
        except:
            pass

    logging.info("done trimming cdict; begining scoring\n")

    # compute bcubed score
    precision = bcubed.precision(trimmed_cdict, ldict)
    recall = bcubed.recall(trimmed_cdict, ldict)
    fscore = bcubed.fscore(precision, recall)

    print "precision: {p}, \t recall: {r}, \t fscore: {f}".format(p=precision, r=recall, f=fscore)

    logging.info("done scoring\n")

Exemple #10

0

Afficher le fichier

Fichier : nn_model_test_copy.py Projet : annkupriyanova/Coreference-Resolution

    def bcubed(self, x_test, y_test):
        ldict = {}
        cdict = {}

        labels_pred = self.predict(x_test)
        labels_pred = (labels_pred > PROB_THRESHOLD)

        for i, label in enumerate(y_test):
            ldict[i] = {int(label)}
            cdict[i] = {int(labels_pred[i])}

        precision = bcubed.precision(cdict, ldict)
        recall = bcubed.recall(cdict, ldict)
        fscore = bcubed.fscore(precision, recall)

        print('B-cubed metric:\nPrecision = {}\nRecall = {}\nF-score = {}'.format(precision, recall, fscore))

Exemple #11

0

Afficher le fichier

Fichier : clustering.py Projet : yongzx/Semi-supervised-Deep-Embedded-Clustering-with-Anomaly-Detection-for-Semantic-Frame-Induction

def external_eval_clusters(y_true, y_pred):
    """
    :param y_true: true cluster ids
    :param y_pred: predicted cluster ids
    :return: external evaluation metrics of clustering quality.
    The metrics are purity, inverse purity, harmonic mean, b-cubed precision, recall and their harmonic mean.
    """
    purity = purity_score(y_true, y_pred)
    inverse_purity = purity_score(y_true, y_pred, inv=True)
    f_purity = f_purity_score(y_true, y_pred)

    ldict = {i: {cluster_idx} for i, cluster_idx in enumerate(y_true)}
    cdict = {i: {cluster_idx} for i, cluster_idx in enumerate(y_pred)}
    bcubed_precision = bcubed.precision(cdict, ldict)
    bcubed_recall = bcubed.recall(cdict, ldict)
    bcubed_fscore = bcubed.fscore(bcubed_precision, bcubed_recall)

    return purity, inverse_purity, f_purity, bcubed_precision, bcubed_recall, bcubed_fscore

Exemple #12

0

Afficher le fichier

def calculate_bcubed():

    with open(str(sys.argv[1])) as predictions, open('GroundTruthClusters.csv') as labels:
        predictions.readline()
        reader = csv.reader(predictions)
        clustering = dict((rows[0], set([rows[1]])) for rows in reader)
        # print clustering

        labels.readline()
        reader = csv.reader(labels)
        truth = dict((rows[0], set([rows[1]])) for rows in reader)

        precision = bcubed.precision(clustering, truth)
        recall = bcubed.recall(clustering, truth)
        fscore = bcubed.fscore(precision, recall)

        print precision
        print recall
        print fscore

Exemple #13

0

Afficher le fichier

def f_score(community_dict, gt_dict): # calculating f_score
        precision = bcubed.precision(community_dict, gt_dict)
        recall = bcubed.recall(community_dict, gt_dict)
        fscore = bcubed.fscore(precision, recall)
        return fscore

Exemple #14

0

Afficher le fichier

Fichier : evaluation.py Projet : ClonedOne/malwords

def evaluate_clustering(base_labels,
                        computed_labels,
                        data=None,
                        metric='euclidean',
                        silent=False):
    """
    Print evaluation metrics for the clustering results
    
    :param base_labels: labels from a reference clustering
    :param computed_labels: lables assigned by the clustering
    :param data: the data matrix or a list of uuids
    :param metric: metric to use for the silhouette method
    :param silent: flag, if true avoid printing
    :return:
    """

    # Converts labels list to dictionaries for the BCubed library
    base_dict = {k: {v} for k, v in dict(enumerate(base_labels)).items()}
    computed_dict = {
        k: {v}
        for k, v in dict(enumerate(computed_labels)).items()
    }
    num_clusters = len(
        set(computed_labels)) - (1 if -1 in computed_labels else 0)

    ars = metrics.adjusted_rand_score(base_labels, computed_labels)
    ami = metrics.adjusted_mutual_info_score(base_labels, computed_labels)
    fm = metrics.fowlkes_mallows_score(base_labels, computed_labels)
    h = metrics.homogeneity_score(base_labels, computed_labels)
    c = metrics.completeness_score(base_labels, computed_labels)
    p = bcubed.precision(base_dict, computed_dict)
    r = bcubed.recall(base_dict, computed_dict)
    fs = bcubed.fscore(p, r)
    p_p, p_r, p_q = cluster_metrics(base_labels, computed_labels)

    if not silent:
        print('-' * 80)
        print('Clustering evaluation')
        print('Number of clusters', num_clusters)
        print('Number of distinct families', len(set(base_labels)))
        print('Adjusted Rand index:', ars)
        print('Adjusted Mutual Information:', ami)
        print('Fowlkes-Mallows:', fm)
        print('Homogeneity:', h)
        print('Completeness:', c)
        print('BCubed Precision:', p)
        print('BCubed Recall:', r)
        print('BCubed FScore:', fs)
        print('Paper Precision:', p_p)
        print('Paper Recall:', p_r)
        print('Paper Quality:', p_q)

    if data is not None:
        sh = metrics.silhouette_score(data,
                                      computed_labels,
                                      metric=metric,
                                      random_state=42)

        if not silent:
            print('Silhouette', sh)

        ret = (ars, ami, fm, h, c, p, r, fs, p_p, p_r, p_q, sh)

    else:
        ret = (ars, ami, fm, h, c, p, r, fs, p_p, p_r, p_q)

    return ret

Exemple #15

0

Afficher le fichier

Fichier : make_test_cases.py Projet : m-wiesner/BCUBED

import bcubed
import b3
import numpy as np
import pdb

num_cases = 10

num_clusters = np.random.randint(1,10,(num_cases,))
num_labels = np.random.randint(1,10,(num_cases,))
num_elements = np.random.randint(1000,2000,(num_cases,))

for i in xrange(num_cases):
  L = np.random.randint(1,num_clusters[i]+1,(num_elements[i],))
  K = np.random.randint(1,num_labels[i]+1,(num_elements[i],))
  
  [my_f,my_p,my_r] = b3.calc_b3(L,K)
  
  Ldict = { i:set([L[i]])  for i in xrange(num_elements[i])}
  Cdict = { i:set([K[i]])  for i in xrange(num_elements[i])}
  
  p = bcubed.precision(Cdict,Ldict)
  r = bcubed.recall(Cdict,Ldict)
  f = bcubed.fscore(p,r)
 
  # Check 
  if(abs(p - my_p) > 0.0001 or abs(r - my_r) > 0.001 or abs(f - my_f) > 0.0001):
    print("ERROR")

Exemple #16

0

Afficher le fichier

Fichier : make_test_cases.py Projet : RunOrVeith/BCUBED

import bcubed
import b3
import numpy as np
import pdb

num_cases = 10

num_clusters = np.random.randint(1, 10, (num_cases, ))
num_labels = np.random.randint(1, 10, (num_cases, ))
num_elements = np.random.randint(1000, 2000, (num_cases, ))

for i in xrange(num_cases):
    L = np.random.randint(1, num_clusters[i] + 1, (num_elements[i], ))
    K = np.random.randint(1, num_labels[i] + 1, (num_elements[i], ))

    [my_f, my_p, my_r] = b3.calc_b3(L, K)

    Ldict = {i: set([L[i]]) for i in xrange(num_elements[i])}
    Cdict = {i: set([K[i]]) for i in xrange(num_elements[i])}

    p = bcubed.precision(Cdict, Ldict)
    r = bcubed.recall(Cdict, Ldict)
    f = bcubed.fscore(p, r)

    # Check
    if (abs(p - my_p) > 0.0001 or abs(r - my_r) > 0.001
            or abs(f - my_f) > 0.0001):
        print("ERROR")

Exemple #17

0

Afficher le fichier

Fichier : eval_cognate_clusters.py Projet : Anaphory/eval_cogident

        gold_codings = {
            str(form): str(row["cogid"])
            for form, row in gold_forms.items()}
    else:
        gold_dataset = get_dataset(args.gold)
        gold_cognatesets = cognate_sets(gold_dataset, code_column="COGID")
        gold_codings = {
            str(form): code
            for code, forms in gold_cognatesets.items()
            for form in forms}


    concept_codes = {}
    for concept, id in iterate_concept_and_id():
        gold_c, c = concept_codes.setdefault(concept, ([], []))
        gold_c.append(''.join([str(s) for s in gold_codings.get(id, ())]))
        c.append(''.join([str(s) for s in codings.get(id, ())]))

    v = 0
    r = 0
    a = 0
    b = 0
    for concept, (gold_c, c) in concept_codes.items():
        v += metrics.v_measure_score(gold_c, c)
        r += metrics.adjusted_rand_score(gold_c, c)
        a += metrics.adjusted_mutual_info_score(gold_c, c)
        b += bcubed.fscore(bcubed.simple_precision(c, gold_c),
                           bcubed.simple_recall(c, gold_c))
    norm = len(concept_codes)
    print(args.codings, b/norm, v/norm, r/norm, a/norm)

Exemple #18

0

Afficher le fichier

	# print len(allMentions)
	# print cluster_gold
	for key1, value1 in cluster_test.iteritems():
	    for key2, value2 in cluster_test[key1].iteritems():
	        cluster_test[key1][key2] = set([cluster_test[key1][key2]])
	for key1, value1 in cluster_gold.iteritems():
	    for key2, value2 in cluster_gold[key1].iteritems():
	        cluster_gold[key1][key2] = set([cluster_gold[key1][key2]])


	all_precision = []
	all_recall = []
	all_fscore = []
	for key, value in cluster_test.iteritems():
	    precision = bcubed.precision(cluster_test[key], cluster_gold[key])
	    recall = bcubed.recall(cluster_test[key], cluster_gold[key])
	    fscore = bcubed.fscore(precision, recall)
	    print 'precision: ' + str(precision)
	    all_precision.append(precision)
	    print 'recall: ' + str(recall)
	    all_recall.append(recall)
	    print 'fscore: ' + str(fscore)
	    all_fscore.append(fscore)
	    print ''
	print 'avg b-cubed precision: ' + str(sum(all_precision)/len(all_precision))
	print 'avg b-cubed recall: ' + str(sum(all_recall)/len(all_recall))
	print 'avg b-cubed fscore: ' + str(sum(all_fscore)/len(all_fscore))
	print 'number of negative predictions = ' + str(sum(y_predicted==0)) + ' and positive predictions = ' + str(sum(y_predicted == 1))
	#TO-DOS
	#1. Try other dimensional word embeddings

Exemple #19

0

Afficher le fichier

Fichier : clustering.py Projet : rtrad89/authorship_clustering_code_repo

    def _eval_clustering(self, labels_true, labels_predicted):
        # To address when COP-KMeans fails to satisfy all constraints at a k:
        if labels_predicted is None:
            # return an empty dictionary to expose in the final output
            return {"nmi": None,
                    "ami": None,
                    "ari": None,
                    "fms": None,
                    "v_measure": None,
                    "bcubed_precision": None,
                    "bcubed_recall": None,
                    "bcubed_fscore": None,
                    "Silhouette": None,
                    "Calinski_harabasz": None,
                    "Davies_Bouldin": None
                    }

        nmi = normalized_mutual_info_score(labels_true,
                                           labels_predicted,
                                           average_method="max")

        ami = adjusted_mutual_info_score(labels_true,
                                         labels_predicted,
                                         average_method="arithmetic")

        ari = adjusted_rand_score(labels_true,
                                  labels_predicted)

        v_measure = v_measure_score(labels_true,
                                    labels_predicted,
                                    beta=1.0)

        fms = fowlkes_mallows_score(labels_true,
                                    labels_predicted)

        # Reshape labels for BCubed measures
        true_dict = self._reshape_labels_as_dicts(labels_true)
        pred_dict = self._reshape_labels_as_dicts(labels_predicted)

        bcubed_precision = bcubed.precision(cdict=pred_dict, ldict=true_dict)
        bcubed_recall = bcubed.recall(cdict=pred_dict, ldict=true_dict)
        bcubed_f1 = bcubed.fscore(bcubed_precision, bcubed_recall)

        # =====================================================================
        # Unsupervised Metrics
        # =====================================================================
        if not labels_predicted.nunique() in (1, len(self.data)):
            sil = silhouette_score(X=self.data,
                                   labels=labels_predicted,
                                   metric=self.distance_metric,
                                   random_state=13712)

            ch = calinski_harabasz_score(X=self.data, labels=labels_predicted)

            dv = davies_bouldin_score(X=self.data, labels=labels_predicted)
        else:
            sil = None
            ch = None
            dv = None

        ret = {}
        ret.update({"nmi": round(nmi, 4),
                    "ami": round(ami, 4),
                    "ari": round(ari, 4),
                    "fms": round(fms, 4),
                    "v_measure": round(v_measure, 4),
                    "bcubed_precision": round(bcubed_precision, 4),
                    "bcubed_recall": round(bcubed_recall, 4),
                    "bcubed_fscore": round(bcubed_f1, 4),
                    "Silhouette": round(sil, 4
                                        ) if sil is not None else None,
                    "Calinski_harabasz": round(ch, 4
                                               ) if ch is not None else None,
                    "Davies_Bouldin": round(dv, 4
                                            ) if dv is not None else None
                    # Here goes the unsupervised indices
                    })

        return ret

Exemple #20

0

Afficher le fichier

def evaluate_bcubed(judgments, gold):
    precision = bcubed.precision(judgments, gold)
    recall = bcubed.recall(judgments, gold)
    fscore = bcubed.fscore(precision, recall)
    return precision, recall, fscore