Beispiel #1
0
    def v_measure(self, beta=1):
        """Computes Rosenberg and Hirschberg's V-measure (EMNLP '07),
        which ranges between 0 and 1 (1 is best). The beta parameter
        can be used to weigh homogeneity or completeness; the default
        is balanced harmonic mean, beta > 1 favors homogeneity."""
        h_c = entropy_of_multinomial(self.gold_sizes.values())
        h_k = entropy_of_multinomial(
            [sum(table.values()) for table in self.by_test.values()])

        if h_c == 0:
            h**o = 1
        else:
            h_c_given_k = self.conditional_entropy_gold_given_test()

            h**o = 1 - h_c_given_k / h_c

        if h_k == 0:
            comp = 1
        else:
            h_k_given_c = conditional_entropy_Y_Given_X(
                dict(self.as_confusion_items()))

            comp = 1 - h_k_given_c / h_k

        return fscore(h**o, comp, beta) #computes the harmonic mean
Beispiel #2
0
 def normalized_vi(self):
     """Calculates NVI (Reichart and Rappoport '09), which is
     VI/H(C), variation of information normalized by the entropy of
     the true clustering. This metric has value 0 for perfect
     clusterings and 1 for the single-cluster clustering;
     'reasonable' clusterings have scores in between."""
     hc = entropy_of_multinomial(self.gold_sizes.values())
     if hc == 0:
         return 0
     return self.variation_of_information() / hc
Beispiel #3
0
    def normalized_mutual_information(self):
        """Normalized mutual information (Strehl and Ghosh JMLR '02
        "Cluster Ensembles"), eq 2: mutual information normalized by
        the square root of the product of entropies. The value is
        between 0 and 1, and is 1 for identical clusterings."""
        denom = (sqrt(
            entropy_of_multinomial(self.gold_sizes.values()) *
            entropy_of_multinomial([sum(table.values())
                                    for table in self.by_test.values()])))
        if denom == 0:
            if entropy_of_multinomial(self.gold_sizes.values()) == 0:
                #gold clustering is entirely uninformative
                #so anything we do is good
                return 1
            else:
                #induced clustering is entirely uninformative
                return 0

        return self.mutual_information() / denom
Beispiel #4
0
 def entropy(self):
     from Probably import entropy_of_multinomial
     return entropy_of_multinomial(self.values())