def fitSamplingNull(response,nSamp, method='cv', memlimit=None, largest=None, **kwargs): nObs = len(response) # lets partition the data via our sampling method if method=='cv': t,v = st.kFoldCV(range(nObs),nSamp,randomise=True) elif (method=='bs') or (method=='bs632'): t,v = st.kRoundBS(range(nObs),nSamp) else: raise ValueError('Sampling method not correct') smse = 0 sSqmse = 0 for i in range(nSamp): # get the training values y = response[t[i]] Yval = response[v[i]] nVal = float(len(Yval)) mse = np.sum((Yval-np.mean(y))**2)/nVal # sum the rows (errors for given model) smse = smse + mse sSqmse = sSqmse + mse**2 # now it is time to average and send back # I am putting the errors in a container nSampFlt = float(nSamp) meanmse = smse/nSampFlt varmse = sSqmse/nSampFlt - meanmse**2 if method=='bs632': yhat = fullEnm.predict(regressors) resubmse = np.sum((yhat.T-response)**2,1)/float(nObs) meanmse = 0.632*meanmse+(1-0.632)*resubmse return meanmse, varmse
def fitSampling(regressors, response, alpha, nSamp, method='cv', memlimit=None, largest=None, **kwargs): """Performs an elastic net constrained linear regression, see fit, with selected sampleing method to estimate errors using nSamp number of sampleings. methods: 'cv' cross validation with nSamp number of folds 'bs' bootstrap 'bs632' boostrap 632 (weighted average of bs and training error) Returns a TrainingError object (cvTools) and an ENetModel object for the full fit (err,enm). Function requires cvTools """ nObs,nRegs = regressors.shape # get the full model fit fullEnm = enet.fit(regressors, response, alpha, memlimit, largest, **kwargs) # get the lambda values determined in the full fit (going to force these lambdas for all cv's) lam = fullEnm.lambdas # the lambdas may have been user defined, don't want it defined twice if kwargs.has_key('lambdas'): del kwargs['lambdas'] # lets partition the data via our sampling method if method=='cv': t,v = st.kFoldCV(range(nObs),nSamp,randomise=True) elif (method=='bs') or (method=='bs632'): t,v = st.kRoundBS(range(nObs),nSamp) else: raise ValueError('Sampling method not correct') # lets consider many versions of errors # with our error being mean squared error # we want the epected mean squared error # and the corisponding variance over the diffrent versions nModels = len(lam) smse = np.zeros(nModels) sSqmse = np.zeros(nModels) allVals = np.zeros((nModels,nSamp)) # loop through the folds for i in range(nSamp): # get the training values X = regressors[t[i]] y = response[t[i]] enm = enet.fit(X, y, alpha, memlimit, largest, lambdas=lam, **kwargs) # get the validation values Xval = regressors[v[i]] Yval = response[v[i]] nVal = float(len(Yval)) # get the predicted responses from validation regressors Yhat = enm.predict(Xval) # what is the mean squared error? # notice the T was necassary to do the subtraction # the rows are the models and the cols are the observations mse = np.sum((Yhat.T-Yval)**2,1)/nVal # sum the rows (errors for given model) smse = smse + mse sSqmse = sSqmse + mse**2 allVals[:,i] = mse # now it is time to average and send back # I am putting the errors in a container nSampFlt = float(nSamp) meanmse = smse/nSampFlt varmse = sSqmse/nSampFlt - meanmse**2 if method=='bs632': yhat = fullEnm.predict(regressors) resubmse = np.sum((yhat.T-response)**2,1)/float(nObs) meanmse = 0.632*meanmse+(1-0.632)*resubmse err = enet.ENetTrainError(lam,nSamp,meanmse,varmse,[0],[0],alpha) err.setParamName('lambda') fullEnm.setErrors(err.mErr) return err, fullEnm, allVals