Exemple #1
0
	def __init__(self, _OD, _paramsets={'C':100,'gamma':0.1}, parallel = True, train=True):
		self.OD = _OD
		self.parallel = parallel
		self.params = {}
		for b in self.OD.bins:
			if _paramsets.has_key(b):
				self.params[b] = _paramsets[b]
			else:
				self.params[b] = _paramsets
		self.ind = self.OD.indAttr()
		self.dep = self.OD.observedContours()
		self.indWeights = self.OD.indQuantities
		
		self.svr = {}
		if train:
			self.train()
		self.MU = MisclassUncert(self,self.OD)
Exemple #2
0
class ExpectedDistribution:
	
	def __init__(self, _OD, _paramsets={'C':100,'gamma':0.1}, parallel = True, train=True):
		self.OD = _OD
		self.parallel = parallel
		self.params = {}
		for b in self.OD.bins:
			if _paramsets.has_key(b):
				self.params[b] = _paramsets[b]
			else:
				self.params[b] = _paramsets
		self.ind = self.OD.indAttr()
		self.dep = self.OD.observedContours()
		self.indWeights = self.OD.indQuantities
		
		self.svr = {}
		if train:
			self.train()
		self.MU = MisclassUncert(self,self.OD)
	
	def train(self):
		regressors = []
		if self.parallel:
			regressors = Parallel(n_jobs=-1)(delayed(trainBin)(self.params[b], np.atleast_2d(self.ind).T, self.dep[b],self.indWeights) for b in self.OD.bins)
		else:
			for b in self.OD.bins:
				regressors.append(trainBin(self.params[b],np.atleast_2d(self.ind).T, self.dep[b],self.indWeights))
				#self.svr[b] = SVR(cache_size=1000,kernel='rbf', C=self.params[b]['C'], gamma=self.params[b]['gamma'])
				#self.svr[b].fit(np.array([self.ind]).T,self.dep[b])
				
		
		for i,model in enumerate(regressors):
			self.svr[self.OD.bins[i]] = model
	
	def misclassUncertainty(self, values, ignore_eps=True):
		return self.MU.misclassUncertainty(values, ignore_eps)
		
	# Get the bin distribution predicted by the SVR model
	def getExpectationsAt(self, vals, returnScaled=True, medianOnly=False):
		if medianOnly:
			results = self.svr[0.5].predict(vals)
			if not returnScaled:
				results = self.OD.unscalePoints(results)
			return results

		# get the result predicted for each input value
		results = {}
		if self.parallel and len(vals) > 1:
			predlist = Parallel(n_jobs=-1)(delayed(predictBin)(self.svr[b],vals) for b in self.OD.bins)
			for i,pred in enumerate(predlist):
				results[self.OD.bins[i]] = pred
		else:
			for b in self.OD.bins:
				results[b] = self.svr[b].predict(vals)
	
		# Flatten lines which cross
		# if b < 0.5 flatten b to its larger neighbor
		reverse_bins = self.OD.bins[::-1]
		for i,b in enumerate(reverse_bins):
			if b < 0.5:
				results[b] = np.minimum(results[b],results[reverse_bins[i-1]])
		# if b > 0.5 flatten b to its smaller neighbor
		for i,b in enumerate(self.OD.bins):
			if b > 0.5:
				results[b] = np.maximum(results[b],results[self.OD.bins[i-1]])
	
		# undo scaling to display points corresponding to the original values
		if not returnScaled:
			for b in self.OD.bins:
				results[b] = self.OD.unscalePoints(results[b])
		return results
	
	def getParams(self):
		return self.svr[0.5].get_params() 
	
	'''SurpriseCalc has been moved to S.py and split up