def proficiency2(self): "weighted average of n^2 proficiencies by entropy*entropy" top = 0 ae = dict([(c, util.bin_entropy(self.observations, n)) for (c, n) in self.actual.iteritems()]) spe = 0 # sum of predicted entropies for (pc, pn) in self.predicted.iteritems(): pe = util.bin_entropy(self.observations, pn) spe += pe if pe > 0: for (ac, an) in self.actual.iteritems(): top += ae[ac] * pe * util.bin_mutual_info( self.observations, pn, an, self.tp.get((ac, pc), 0)) return top / (spe * sum(ae.itervalues()))
def proficiency2 (self): "weighted average of n^2 proficiencies by entropy*entropy" top = 0 ae = dict([(c,util.bin_entropy(self.observations,n)) for (c,n) in self.actual.iteritems()]) spe = 0 # sum of predicted entropies for (pc,pn) in self.predicted.iteritems(): pe = util.bin_entropy(self.observations,pn) spe += pe if pe > 0: for (ac,an) in self.actual.iteritems(): top += ae[ac] * pe * util.bin_mutual_info( self.observations,pn,an,self.tp.get((ac,pc),0)) return top / (spe * sum(ae.itervalues()))
def proficiency_raw (self): mutual_information = 0.0 actual_entropy = 0.0 for (c,an) in self.actual.iteritems(): ae = util.bin_entropy(self.observations,an) actual_entropy += ae mutual_information += util.bin_mutual_info( self.observations,self.predicted.get(c,0),an,self.tp.get((c,c),0)) if actual_entropy == 0: return 0 return mutual_information / actual_entropy
def proficiency_raw(self): mutual_information = 0.0 actual_entropy = 0.0 for (c, an) in self.actual.iteritems(): ae = util.bin_entropy(self.observations, an) actual_entropy += ae mutual_information += util.bin_mutual_info( self.observations, self.predicted.get(c, 0), an, self.tp.get((c, c), 0)) if actual_entropy == 0: return 0 return mutual_information / actual_entropy
def assignment(self): # https://pypi.python.org/pypi/munkres import munkres taxonomy = list(self.check_taxonomy()) costs = [] actual_entropy = [] for ac in taxonomy: an = self.actual.get(ac, 0) ae = util.bin_entropy(self.observations, an) actual_entropy.append(ae) if ae == 0: costs.append([0 for _pc in taxonomy]) else: # negative MI because munkres minimizes costs costs.append([ -util.bin_mutual_info(self.observations, self.predicted.get(pc, 0), an, self.tp.get((ac, pc), 0)) for pc in taxonomy ]) m = munkres.Munkres() indexes = m.compute(costs) mutual_information = 0 reassigned = [] for row, col in indexes: mutual_information += -costs[row][col] if row != col: ac = taxonomy[row] pc = taxonomy[col] c = -100 * costs[row][col] if c > 0: c /= actual_entropy[row] reassigned.append( (c, ac, self.actual.get(ac, 0), pc, self.predicted.get(pc, 0))) if len(reassigned) > 0: reassigned.sort(key=operator.itemgetter(0), reverse=True) MuLabCat.logger.warn( "Reassigned %d categories:\n%s", len(reassigned), "\n".join([ " Proficiency=%.2f%%: Actual [%s](%d) = Predicted [%s](%d)" % (p, ac, an, pc, pn) for (p, ac, an, pc, pn) in reassigned if (p >= 10 and an >= 5 and pn >= 5) ])) actual_entropy = sum(actual_entropy) return (taxonomy, indexes, 0 if actual_entropy == 0 else mutual_information / actual_entropy)
def assignment (self): # https://pypi.python.org/pypi/munkres import munkres taxonomy = list(self.check_taxonomy()) costs = [] actual_entropy = [] for ac in taxonomy: an = self.actual.get(ac,0) ae = util.bin_entropy(self.observations,an) actual_entropy.append(ae) if ae == 0: costs.append([0 for _pc in taxonomy]) else: # negative MI because munkres minimizes costs costs.append([- util.bin_mutual_info( self.observations,self.predicted.get(pc,0), an,self.tp.get((ac,pc),0)) for pc in taxonomy]) m = munkres.Munkres() indexes = m.compute(costs) mutual_information = 0 reassigned = [] for row, col in indexes: mutual_information += - costs[row][col] if row != col: ac = taxonomy[row] pc = taxonomy[col] c = -100*costs[row][col] if c > 0: c /= actual_entropy[row] reassigned.append((c,ac,self.actual.get(ac,0),pc,self.predicted.get(pc,0))) if len(reassigned) > 0: reassigned.sort(key=operator.itemgetter(0),reverse=True) MuLabCat.logger.warn("Reassigned %d categories:\n%s",len(reassigned),"\n".join( [" Proficiency=%.2f%%: Actual [%s](%d) = Predicted [%s](%d)" % (p,ac,an,pc,pn) for (p,ac,an,pc,pn) in reassigned if (p >= 10 and an >= 5 and pn >= 5)])) actual_entropy = sum(actual_entropy) return (taxonomy,indexes, 0 if actual_entropy == 0 else mutual_information / actual_entropy)