def compute_metric(self, prob_metric): """ Compute log ratios from the probability table. """ for acc,acc2 in prob_metric: notacc = get_not_id(acc) #kdrew: for all acc2's in prob_metric notacc2 = get_not_id(acc2) acc2_acc = prob_metric.get_metric(acc, acc2) acc2_notacc = prob_metric.get_metric(notacc, acc2) lr = acc2_acc / (acc2_notacc + TINY_NUM) #print "likelihood ratio: ", lr #kdrew: compute log ratio and add in TINY_NUM to avoid log(0) log_value = math.log(lr + TINY_NUM) self.set_metric(acc, acc2,log_value)
def get_metric(self, acc1, acc2): try: if acc1 == None: return TINY_NUM if acc2 == None: if is_not_id(acc1): return (1.0 - self.get_metric(get_not_id(acc1),None)) else: return self._dict[self._key(acc1, None)] else: #kdrew: P(notacc2 | notacc1) if is_not_id(acc1) and is_not_id(acc2): rawacc2 = get_not_id(acc2) #kdrew: P(notacc2|notacc1) return 1.0 - self.get_metric(acc1, rawacc2) #kdrew: P(notacc2 | acc1) elif is_not_id(acc2): rawacc2 = get_not_id(acc2) #kdrew: P(notacc2|acc1) return 1.0 - self.get_metric(acc1, rawacc2) #kdrew: P(acc2 | notacc1) elif is_not_id(acc1): rawacc1 = get_not_id(acc1) p1 = self.get_metric(rawacc1,None) p2 = self.get_metric(acc2,None) p2G1 = self.get_metric(rawacc1, acc2) #kdrew: tests for small numerator and returns TINY_NUM if smaller #kdrew: fixes problem with P(all|notall) returning 1.0 if (p2 - p2G1 * p1) <= TINY_NUM: return TINY_NUM else: #kdrew: P(acc2|notacc1) return (p2 - p2G1 * p1) / (1.0 - p1) else: return self._dict[self._key(acc1, acc2)] except KeyError: return TINY_NUM
def get_metric(self, acc1, acc2=None): if None == acc2: try: if is_not_id(acc1): all_freq = self.get_metric(ALL_TERM) return all_freq - self.get_metric(get_not_id(acc1.get_id())) else: return Metric.get_metric(self,acc1, None) except KeyError: return 0 else: #kdrew: F(notacc2 , notacc1) if is_not_id(acc1) and is_not_id(acc2): acc_acc2_freq = self.get_metric(get_not_id(acc1), get_not_id(acc2)) acc_freq = self.get_metric(get_not_id(acc1)) acc2_freq = self.get_metric(get_not_id(acc2)) all_freq = self.get_metric(ALL_TERM) return all_freq - acc_freq - acc2_freq + acc_acc2_freq #kdrew: F(notacc2 , acc1) elif is_not_id(acc2): acc_acc2_freq = self.get_metric(acc1, get_not_id(acc2)) acc_freq = self.get_metric(acc1) return acc_freq - acc_acc2_freq #kdrew: F(acc2 , notacc1) elif is_not_id(acc1): acc_acc2_freq = self.get_metric(acc2, get_not_id(acc1)) acc2_freq = self.get_metric(acc2) return acc2_freq - acc_acc2_freq #kdrew: F(acc1, acc2) else: try: return Metric.get_metric(self,acc1, acc2) except KeyError: #kdrew: if the one combination of predictors doesn't work reverse them and try again try: return Metric.get_metric(self,acc2, acc1) except KeyError: return 0
def compute_mi(self, acc, acc2, prob_metric): acc_prob = prob_metric.get_metric(acc,None) acc2_prob = prob_metric.get_metric(acc2,None) not_acc_prob = prob_metric.get_metric(get_not_id(acc),None) not_acc2_prob = prob_metric.get_metric(get_not_id(acc2),None) acc_acc2_prob = prob_metric.get_metric(acc, acc2) * acc_prob not_acc_acc2_prob = prob_metric.get_metric(get_not_id(acc), acc2) * not_acc_prob acc_not_acc2_prob = prob_metric.get_metric(acc, get_not_id(acc2)) * acc_prob not_acc_not_acc2_prob = prob_metric.get_metric(get_not_id(acc), get_not_id(acc2)) * acc_prob tmp_MI = acc_acc2_prob * math.log((acc_acc2_prob / (acc_prob * acc2_prob + TINY_NUM)) + TINY_NUM) tmp_MI += not_acc_acc2_prob * math.log((not_acc_acc2_prob / (not_acc_prob * acc2_prob + TINY_NUM)) + TINY_NUM) #kdrew: generalize this so it can be a parameter (class level) #kdrew: original code only did P(acc2|acc) and P(acc2|not_acc) #kdrew: do not do "not" "not" because general terms wash out everything #tmp_MI += acc_not_acc2_prob * math.log((acc_not_acc2_prob/(acc_prob * not_acc2_prob+TINY_NUM))+TINY_NUM) #tmp_MI += not_acc_not_acc2_prob * math.log((not_acc_not_acc2_prob/(not_acc_prob * not_acc2_prob+TINY_NUM))+TINY_NUM) return tmp_MI