Esempio n. 1
0
	def compute_metric(self, prob_metric):
		"""
		Compute log ratios from the probability table.
		"""
		for acc,acc2 in prob_metric:

			notacc = get_not_id(acc)
			#kdrew: for all acc2's in prob_metric
			notacc2 = get_not_id(acc2)

			acc2_acc = prob_metric.get_metric(acc, acc2)
			acc2_notacc = prob_metric.get_metric(notacc, acc2)
			lr = acc2_acc / (acc2_notacc + TINY_NUM)
			#print "likelihood ratio: ", lr
			#kdrew: compute log ratio and add in TINY_NUM to avoid log(0)
			log_value = math.log(lr + TINY_NUM)
			self.set_metric(acc, acc2,log_value)
Esempio n. 2
0
	def get_metric(self, acc1, acc2):
		try:
			if acc1 == None:
				return TINY_NUM
			if acc2 == None:
				if is_not_id(acc1):
					return (1.0 - self.get_metric(get_not_id(acc1),None))
				else:
					return self._dict[self._key(acc1, None)]
			else:
				#kdrew: P(notacc2 | notacc1)
				if is_not_id(acc1) and is_not_id(acc2):
					rawacc2 = get_not_id(acc2)
					#kdrew: P(notacc2|notacc1)
					return 1.0 - self.get_metric(acc1, rawacc2)

				#kdrew: P(notacc2 | acc1)
				elif is_not_id(acc2):
					rawacc2 = get_not_id(acc2)
					#kdrew: P(notacc2|acc1)
					return 1.0 - self.get_metric(acc1, rawacc2)

				#kdrew: P(acc2 | notacc1)
				elif is_not_id(acc1):
					rawacc1 = get_not_id(acc1)
					p1 = self.get_metric(rawacc1,None)
					p2 = self.get_metric(acc2,None)
					p2G1 = self.get_metric(rawacc1, acc2)

					#kdrew: tests for small numerator and returns TINY_NUM if smaller
					#kdrew: fixes problem with P(all|notall) returning 1.0
					if (p2 - p2G1 * p1) <= TINY_NUM:
						return TINY_NUM
					else:
						#kdrew: P(acc2|notacc1)
						return (p2 - p2G1 * p1) / (1.0 - p1)

				else:
					return self._dict[self._key(acc1, acc2)]

		except KeyError:
			return TINY_NUM
Esempio n. 3
0
	def get_metric(self, acc1, acc2=None):
		if None == acc2:
			try:
				if is_not_id(acc1):
					all_freq = self.get_metric(ALL_TERM) 
					return  all_freq - self.get_metric(get_not_id(acc1.get_id()))
				else:
					return Metric.get_metric(self,acc1, None)
			except KeyError:
				return 0

		else:
			#kdrew: F(notacc2 , notacc1)
			if is_not_id(acc1) and is_not_id(acc2):
				acc_acc2_freq = self.get_metric(get_not_id(acc1), get_not_id(acc2))
				acc_freq = self.get_metric(get_not_id(acc1))
				acc2_freq = self.get_metric(get_not_id(acc2))
				all_freq = self.get_metric(ALL_TERM) 
				return all_freq - acc_freq - acc2_freq + acc_acc2_freq

			#kdrew: F(notacc2 , acc1)
			elif is_not_id(acc2):
				acc_acc2_freq = self.get_metric(acc1, get_not_id(acc2))
				acc_freq = self.get_metric(acc1)
				return acc_freq - acc_acc2_freq

			#kdrew: F(acc2 , notacc1)
			elif is_not_id(acc1):
				acc_acc2_freq = self.get_metric(acc2, get_not_id(acc1))
				acc2_freq = self.get_metric(acc2)
				return acc2_freq - acc_acc2_freq

			#kdrew: F(acc1, acc2)
			else:
				try:
					return Metric.get_metric(self,acc1, acc2)

				except KeyError:
					#kdrew: if the one combination of predictors doesn't work reverse them and try again
					try:
						return Metric.get_metric(self,acc2, acc1)
					except KeyError:
						return 0
Esempio n. 4
0
	def compute_mi(self, acc, acc2, prob_metric):
		acc_prob = prob_metric.get_metric(acc,None)
		acc2_prob = prob_metric.get_metric(acc2,None)
		not_acc_prob = prob_metric.get_metric(get_not_id(acc),None)
		not_acc2_prob = prob_metric.get_metric(get_not_id(acc2),None)

		acc_acc2_prob = prob_metric.get_metric(acc, acc2) * acc_prob
		not_acc_acc2_prob = prob_metric.get_metric(get_not_id(acc), acc2) * not_acc_prob
		acc_not_acc2_prob = prob_metric.get_metric(acc, get_not_id(acc2)) * acc_prob
		not_acc_not_acc2_prob = prob_metric.get_metric(get_not_id(acc), get_not_id(acc2)) * acc_prob

		tmp_MI = acc_acc2_prob * math.log((acc_acc2_prob / (acc_prob * acc2_prob + TINY_NUM)) + TINY_NUM)
		tmp_MI += not_acc_acc2_prob * math.log((not_acc_acc2_prob / (not_acc_prob * acc2_prob + TINY_NUM)) + TINY_NUM)

		#kdrew: generalize this so it can be a parameter (class level)
		#kdrew: original code only did P(acc2|acc) and P(acc2|not_acc)
		#kdrew: do not do "not" "not" because general terms wash out everything
		#tmp_MI += acc_not_acc2_prob * math.log((acc_not_acc2_prob/(acc_prob * not_acc2_prob+TINY_NUM))+TINY_NUM)
		#tmp_MI += not_acc_not_acc2_prob * math.log((not_acc_not_acc2_prob/(not_acc_prob * not_acc2_prob+TINY_NUM))+TINY_NUM)

		return tmp_MI