Beispiel #1
0
	def _calcImpactValues1(self):
		#This method will calculate the impact values as described 
		#in self.impactValues.
		#The method called is elasticNetLinReg.fitFullCV.
		#reruns the entire fitting process on full regressors
		#indices  the selected coefs
		#X is the full reggressor matrix
		#y is the response vector
		#alpha	alpha values to consider
		#nSamp	number of folds in cross validation
		# returns the impact values (mse with coef removed)
		
		indices = self.indices
		X = self._X
		y = self._y
		alpha = self._alphaRange
		nSamp = self._nSamp
		
		# pre set the impact values at zero
		n = len(indices)
		iVals = np.zeros(n)
		for i in range(n):
			index = indices[i]
			# remove the coef
			Xhat = np.delete(X,index,axis=1)
			# rerun the fit
			tmp, a, b, c = enet.fitFull(Xhat,y,alpha,nSamp,self._sampling)
			# get the error, only one model so get the scalar value
			iVals[i] = tmp.errors[0]
		
		return iVals
Beispiel #2
0
	def calcFit(self,alpha=np.arange(.1,1.1,.1),nSamp=10,sampling='cv'):
		"""Calculates the regression using cross validation 
		to determine the parameters.  Calls 
		elasticNetLinReg.fitFullCV.
		alpha	eNet param to test for
		nSamp	number of rounds for sampling
		sampling	sampling method, cross validation ('cv')
				bootstrap ('bs'), bootstrap .632 ('bs632')
				see elasticNetLinReg.fitSampling 
		return: void
		sets most properties of the model object including coef estimates 
		"""
		# may need these latter
		self._alphaRange = alpha
		self._nSamp = nSamp	
		self._sampling = sampling	

		# run the full cv fit
		# I have found that doing this iterativly while removing unsleceted varriables is better
		# *** hack, having the function passing back nullErr as a single value
		enm, mc,vc, nullErr = enet.fitFull(self._X,self._y,alpha,nSamp,sampling)
		self._nullErr = nullErr		
		# remember the elastic net object is typically for many models (lambda scanning)
		# fitFullCV should return a single model, but many properties will be
		# in arrays of higher dimessions then needed here
		# this seems kinda sloppy
		self._c = enm.coef
		self._i = enm.indices
		self._lam = np.float64(enm.lambdas[0]) # had probs w/usr defined values as float
		self._al = np.float64(enm.alpha) # had probs w/usr defined values as float
		self._c0 = enm.intercept[0]
		self._err = enm.errors[0]
		self._fitCalcd = True
		# the vectors from enet are for all possible regressors
		# lets sparseify that down a bit
		# *** analysis allert
		# the typical coef will be from the full fit across the data
		# however the cross validation can(will) have diffrent values at 
		# each fold, this means that some coef may be present in some 
		# folds and not in others, thus some non zero mean coef values
		# will be lost here (but I ask you, are they important???)
		self._mc = mc[enm.indices]
		self._vc = vc[enm.indices]

		# Ok we have a sparse rep of the non-zero regressors...
		# or do we??
		# the original glmnet function will add indices 
		# to the sparse list when they are needed, but will nerver remove them
		# from this list; even if the values are reset to zero latter in the fit.
		# It is possible that the model we choose has zero coef values in this list,
		# lets kill those now to save on computation latter!
		nonZero = np.abs(self._c)>1E-52
		self._i = self._i[nonZero]
		self._c = self._c[nonZero]
		self._mc = self._mc[nonZero]
		self._vc = self._vc[nonZero]