Example #1
0
 def proficiency2(self):
     "weighted average of n^2 proficiencies by entropy*entropy"
     top = 0
     ae = dict([(c, util.bin_entropy(self.observations, n))
                for (c, n) in self.actual.iteritems()])
     spe = 0  # sum of predicted entropies
     for (pc, pn) in self.predicted.iteritems():
         pe = util.bin_entropy(self.observations, pn)
         spe += pe
         if pe > 0:
             for (ac, an) in self.actual.iteritems():
                 top += ae[ac] * pe * util.bin_mutual_info(
                     self.observations, pn, an, self.tp.get((ac, pc), 0))
     return top / (spe * sum(ae.itervalues()))
Example #2
0
 def proficiency2 (self):
     "weighted average of n^2 proficiencies by entropy*entropy"
     top = 0
     ae = dict([(c,util.bin_entropy(self.observations,n))
                for (c,n) in self.actual.iteritems()])
     spe = 0                 # sum of predicted entropies
     for (pc,pn) in self.predicted.iteritems():
         pe = util.bin_entropy(self.observations,pn)
         spe += pe
         if pe > 0:
             for (ac,an) in self.actual.iteritems():
                 top += ae[ac] * pe * util.bin_mutual_info(
                     self.observations,pn,an,self.tp.get((ac,pc),0))
     return top / (spe * sum(ae.itervalues()))
Example #3
0
 def proficiency_raw (self):
     mutual_information = 0.0
     actual_entropy = 0.0
     for (c,an) in self.actual.iteritems():
         ae = util.bin_entropy(self.observations,an)
         actual_entropy += ae
         mutual_information += util.bin_mutual_info(
             self.observations,self.predicted.get(c,0),an,self.tp.get((c,c),0))
     if actual_entropy == 0:
         return 0
     return mutual_information / actual_entropy
Example #4
0
 def proficiency_raw(self):
     mutual_information = 0.0
     actual_entropy = 0.0
     for (c, an) in self.actual.iteritems():
         ae = util.bin_entropy(self.observations, an)
         actual_entropy += ae
         mutual_information += util.bin_mutual_info(
             self.observations, self.predicted.get(c, 0), an,
             self.tp.get((c, c), 0))
     if actual_entropy == 0:
         return 0
     return mutual_information / actual_entropy
Example #5
0
 def assignment(self):
     # https://pypi.python.org/pypi/munkres
     import munkres
     taxonomy = list(self.check_taxonomy())
     costs = []
     actual_entropy = []
     for ac in taxonomy:
         an = self.actual.get(ac, 0)
         ae = util.bin_entropy(self.observations, an)
         actual_entropy.append(ae)
         if ae == 0:
             costs.append([0 for _pc in taxonomy])
         else:
             # negative MI because munkres minimizes costs
             costs.append([
                 -util.bin_mutual_info(self.observations,
                                       self.predicted.get(pc, 0), an,
                                       self.tp.get((ac, pc), 0))
                 for pc in taxonomy
             ])
     m = munkres.Munkres()
     indexes = m.compute(costs)
     mutual_information = 0
     reassigned = []
     for row, col in indexes:
         mutual_information += -costs[row][col]
         if row != col:
             ac = taxonomy[row]
             pc = taxonomy[col]
             c = -100 * costs[row][col]
             if c > 0:
                 c /= actual_entropy[row]
             reassigned.append(
                 (c, ac, self.actual.get(ac,
                                         0), pc, self.predicted.get(pc, 0)))
     if len(reassigned) > 0:
         reassigned.sort(key=operator.itemgetter(0), reverse=True)
         MuLabCat.logger.warn(
             "Reassigned %d categories:\n%s", len(reassigned), "\n".join([
                 "  Proficiency=%.2f%%: Actual [%s](%d) = Predicted [%s](%d)"
                 % (p, ac, an, pc, pn) for (p, ac, an, pc, pn) in reassigned
                 if (p >= 10 and an >= 5 and pn >= 5)
             ]))
     actual_entropy = sum(actual_entropy)
     return (taxonomy, indexes,
             0 if actual_entropy == 0 else mutual_information /
             actual_entropy)
Example #6
0
 def assignment (self):
     # https://pypi.python.org/pypi/munkres
     import munkres
     taxonomy = list(self.check_taxonomy())
     costs = []
     actual_entropy = []
     for ac in taxonomy:
         an = self.actual.get(ac,0)
         ae = util.bin_entropy(self.observations,an)
         actual_entropy.append(ae)
         if ae == 0:
             costs.append([0 for _pc in taxonomy])
         else:
             # negative MI because munkres minimizes costs
             costs.append([- util.bin_mutual_info(
                 self.observations,self.predicted.get(pc,0),
                 an,self.tp.get((ac,pc),0))
                           for pc in taxonomy])
     m = munkres.Munkres()
     indexes = m.compute(costs)
     mutual_information = 0
     reassigned = []
     for row, col in indexes:
         mutual_information += - costs[row][col]
         if row != col:
             ac = taxonomy[row]
             pc = taxonomy[col]
             c = -100*costs[row][col]
             if c > 0:
                 c /= actual_entropy[row]
             reassigned.append((c,ac,self.actual.get(ac,0),pc,self.predicted.get(pc,0)))
     if len(reassigned) > 0:
         reassigned.sort(key=operator.itemgetter(0),reverse=True)
         MuLabCat.logger.warn("Reassigned %d categories:\n%s",len(reassigned),"\n".join(
             ["  Proficiency=%.2f%%: Actual [%s](%d) = Predicted [%s](%d)" % (p,ac,an,pc,pn)
              for (p,ac,an,pc,pn) in reassigned if (p >= 10 and an >= 5 and pn >= 5)]))
     actual_entropy = sum(actual_entropy)
     return (taxonomy,indexes, 0 if actual_entropy == 0 else mutual_information / actual_entropy)