def getTStat(X,y,alpha,lam,nSamp=100): # here we are doing residual bootstrap # to identify the std err and report # the t-stat (mean/st err) nObs,nRegs = X.shape # sd is done by res boot so we need to get the res enm = enet.fit(X,y, alpha,lambdas=[lam]) yHat = enm.predict(X)[:,0] res = y - yHat resCent = res-np.mean(res) ySample = np.zeros((nObs,nSamp)) # now we need the samples for i in range(nSamp): resSample = st.sampleWR(resCent) ySample[:,i] = yHat+resSample # residual bs time sc = np.zeros(nRegs) sSqc = np.zeros(nRegs) for i in range(nSamp): # need the coef # they change so we need to map the back to the original tmpEnm = enet.fit(X,ySample[:,i], alpha,lambdas=[lam]) sc[tmpEnm.indices] = sc[tmpEnm.indices] + tmpEnm.coef[:,0] sSqc[tmpEnm.indices] = sSqc[tmpEnm.indices] + tmpEnm.coef[:,0]**2 # get averages and variances aveCoef = sc/float(nSamp) sdCoef = np.sqrt(sSqc/float(nSamp) - aveCoef**2) # get tstat # due to the sparsity of lasso # its possible for a coef to be zero # on all samples, thus a zero st error # we are going to remove the zeros sdCoef[sdCoef<1E-52] = 1E-52 tStat = np.abs(aveCoef/sdCoef) return tStat
def estStErr(self,nSamp=100): X = self._X y = self._y nObs,nRegs = X.shape lam = self._lam yHat = self._yHat intercept= self._intercept globalCoef = self._globalCoef coefIndex = self._coefIndex notEmpty = self._notEmpty alpha = self._alpha # get the bootstrap residual response samples res = y - yHat resCent = res-np.mean(res) ySample = np.zeros((nObs,nSamp)) self._ySample = ySample for i in range(nSamp): resSample = st.sampleWR(resCent) ySample[:,i] = yHat+resSample if notEmpty: # working on subset now Xhat = X[:,coefIndex] self._Xhat = Xhat nObs,nRegsHat = Xhat.shape sdXhat = np.sqrt(np.var(Xhat,0)) self._sdXhat = sdXhat # residual bs time sumErr = 0 sumSqErr = 0 sumNullErr = 0 sumSqNullErr = 0 sc = np.zeros(nRegsHat) sSqc = np.zeros(nRegsHat) sumSup = np.zeros(nRegsHat) for i in range(nSamp): # cv to get the errors err,tmpEnm,tmpallVals = fitSampling(Xhat,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr # cv over this thing to get the null model errors nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv') sumNullErr = sumNullErr + nullErr sumSqNullErr = sumSqNullErr + nullErr**2 # need the coef # they change so we need to map the back to the original tmpEnm = enet.fit(Xhat,ySample[:,i], alpha,lambdas=[lam]) sc[tmpEnm.indices] = sc[tmpEnm.indices] + tmpEnm.coef[:,0] sSqc[tmpEnm.indices] = sSqc[tmpEnm.indices] + tmpEnm.coef[:,0]**2 # find supports occur = np.zeros(len(tmpEnm.coef[:,0])) occur[abs(tmpEnm.coef[:,0])>1E-25] = 1.0 sumSup[tmpEnm.indices] = sumSup[tmpEnm.indices] + occur # get averages and variances aveErr = sumErr/nSamp self._aveErr = aveErr self._sdErr = np.sqrt(sumSqErr/nSamp - aveErr**2) aveNullErr = sumNullErr/nSamp self._aveNullErr=aveNullErr self._sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2) aveCoef = sc/nSamp self._aveCoef = aveCoef self._sdCoef = np.sqrt(sSqc/nSamp - aveCoef**2) self._pSup = sumSup/nSamp else: # residual bs time sumNullErr = 0 sumSqNullErr = 0 for i in range(nSamp): # cv over this thing to get the null model errors nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv') sumNullErr = sumNullErr + nullErr sumSqNullErr = sumSqNullErr + nullErr**2 # get averages and variances aveNullErr = sumNullErr/nSamp sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2) self._aveNullErr = aveNullErr self._sdNullErr = sdNullErr self._aveErr = aveNullErr self._sdErr = sdNullErr
def estModel(XFull,y,nSamp=100,alphaList=np.array([1]),estErr=True,estImp=False,reduceX=False,params=[]): """Estimate a mean and standard deviation for an elastic net model using bootstrap residual. Note: Bootstrap resampling is used to select model parameters, then the bs res at these params is used on the full feature set X to calculate means and standard errors. Note: if estErr then 10 fold CV is used to estimate the prediction error at each iteration of the bs. This is ten extra iterations at each bs res sample, but reduces the bias in prediction error. The mean and sdDev of the CV error is then reported. Note: If params are passed then we assume its a tuple with the (lambda,alpha) model parameters. In this case model selection is bipassed. and these params are used. """ nObs,nRegsFull = XFull.shape # select full model values if len(params)==2: lam,alpha = params enm = enet.fit(XFull,y,alpha,lambdas=[lam])[0] else: enm = select(XFull,y,nSamp,alphaList) lam = enm.lambdas[0] yHat = enm.predict(XFull) intercept = enm.intercept[0] globalCoef =enm.coef[np.abs(enm.coef)>1E-21] coefIndex = enm.indices[np.abs(enm.coef)>1E-21] alpha = enm.alpha # now is when we reduce the x if we need too! if reduceX: nRegs = len(coefIndex) if nRegs > 0: X = XFull[:,coefIndex] nObs, _ = X.shape else: X = XFull nRegs = nRegsFull # get the bootstrap residual response samples res = y - yHat resCent = res-np.mean(res) ySample = np.zeros((nObs,nSamp)) for i in range(nSamp): resSample = st.sampleWR(resCent) ySample[:,i] = yHat+resSample if nRegs > 0: # residual bs time if estErr: sumErr = 0 sumSqErr = 0 sumNullErr = 0 sumSqNullErr = 0 sc = np.zeros(nRegs) sSqc = np.zeros(nRegs) ac = lil_matrix((nRegs,nSamp)) sumSup = np.zeros(nRegs) for i in range(nSamp): # cv to get the errors if estErr: err,tmpEnm,tmpallVals = fitSampling(X,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr # cv over this thing to get the null model errors nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv') sumNullErr = sumNullErr + nullErr sumSqNullErr = sumSqNullErr + nullErr**2 # need the coef # they change so we need to map the back to the original tmpEnm = enet.fit(X,ySample[:,i], alpha,lambdas=[lam]) sc[tmpEnm.indices] = sc[tmpEnm.indices] + tmpEnm.coef[:,0] sSqc[tmpEnm.indices] = sSqc[tmpEnm.indices] + tmpEnm.coef[:,0]**2 if len(tmpEnm.indices)>0: ac[tmpEnm.indices,i] = tmpEnm.coef # find supports occur = np.zeros(len(tmpEnm.coef[:,0])) occur[abs(tmpEnm.coef[:,0])>1E-25] = 1.0 sumSup[tmpEnm.indices] = sumSup[tmpEnm.indices] + occur # get averages and variances if estErr: aveErr = sumErr/nSamp sdErr = np.sqrt(sumSqErr/nSamp - aveErr**2) aveNullErr = sumNullErr/nSamp sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2) aveCoef = sc/nSamp sdCoef = np.sqrt(sSqc/nSamp - aveCoef**2) #some crazy stuff here becase of the way scipy mat is shaped medCoef = np.array(np.median(ac.todense(),1))[:,0] pSup = sumSup/nSamp indices = np.arange(nRegs)[np.abs(medCoef)>1E-21] # put it in a dict for simplicity solution = {} if estErr: solution['aveErr'] = aveErr solution['sdErr'] = sdErr solution['aveNullErr'] = aveNullErr solution['sdNullErr'] = sdNullErr if reduceX: # need to go back to the original indicies solution['aveCoef'] = np.zeros(nRegsFull) solution['sdCoef'] = np.zeros(nRegsFull) solution['medCoef'] = np.zeros(nRegsFull) solution['pSup'] = np.zeros(nRegsFull) solution['aveCoef'][coefIndex] = aveCoef solution['sdCoef'][coefIndex] = sdCoef solution['medCoef'][coefIndex] = medCoef solution['pSup'][coefIndex] = pSup solution['indices'] = coefIndex[indices] else: solution['aveCoef'] = aveCoef solution['sdCoef'] = sdCoef solution['medCoef'] = medCoef solution['pSup'] = pSup solution['indices'] = indices nRegsHat = len(indices) if nRegsHat>0 and estImp: Xhat = X[:,indices] # lets do the leave one out importance deal errOutHat = np.zeros(nRegsHat) if nRegsHat>1: for j in range(nRegsHat): Xprime = np.delete(Xhat,j,axis=1) # residual bs time sumErr = 0 sumSqErr = 0 for i in range(nSamp): # cv to get the errors err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr errOutHat[j] = sumErr/nSamp elif nRegsHat==1: errOutHat[0] = aveNullErr # lets do leave only one errInHat = np.zeros(nRegsHat) for j in range(nRegsHat): Xprime = np.zeros((nObs,1)) Xprime[:,0] = Xhat[:,j] # residual bs time sumErr = 0 sumSqErr = 0 for i in range(nSamp): # cv to get the errors err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr errInHat[j] = sumErr/nSamp errOut = np.zeros(nRegs) errOut[indices] = errOutHat solution['errOut'] = errOut errIn = np.zeros(nRegs) errIn[indices] = errInHat solution['errIn'] = errIn else: solution = {} if estErr: sumNullErr = 0 sumSqNullErr = 0 for i in range(nSamp): # cv over this thing to get the null model errors nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv') sumNullErr = sumNullErr + nullErr sumSqNullErr = sumSqNullErr + nullErr**2 # get averages and variances aveNullErr = sumNullErr/nSamp sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2) aveErr = aveNullErr sdErr = sdNullErr solution['aveErr'] = aveErr solution['sdErr'] = sdErr solution['aveNullErr'] = aveNullErr solution['sdNullErr'] = sdNullErr solution['aveCoef'] = np.zeros(nRegsFull) solution['sdCoef'] = np.zeros(nRegsFull) solution['medCoef'] = np.zeros(nRegsFull) solution['pSup'] = np.zeros(nRegsFull) solution['indices'] = np.array([]) return solution, enm
def runTest(X,y): nSamp = 250 nObs,nRegs = X.shape # selection via bootstrap err,enm,allVals = fitSampling(X,y,1,nSamp,method='bs') errV = err.mErr tmpIndex = np.argmin(errV) # get the bootstrap full values bsAll = allVals[tmpIndex,:] # other important values lam = enm.lambdas[tmpIndex] yHat = enm.predict(X)[:,tmpIndex] coefIndex = enm.indices # get the bootstrap residual response samples res = y - yHat resCent = res-np.mean(res) ySample = np.zeros((nObs,nSamp)) for i in range(nSamp): resSample = st.sampleWR(resCent) ySample[:,i] = yHat+resSample # get the cv error estimated over bs residual responses errSample = np.zeros(nSamp) for i in range(nSamp): err,tmp,tmpallVals = fitSampling(X,ySample[:,i],1,10,method='cv',lambdas=[lam]) errV = err.mErr #should be only one value here if len(errV)>1: raise ValueError('something wrong with bs res cv') errSample[i] = errV[0] bsResAll = errSample #now let repeat this stuff on the selected sample Xhat = X[:,coefIndex] err,enm,allVals = fitSampling(Xhat,y,1,nSamp,method='bs',lambdas=[lam]) bsSub = allVals # get the cv error estimated over bs residual responses errSample = np.zeros(nSamp) for i in range(nSamp): err,tmpenm,tmpallVals = fitSampling(Xhat,ySample[:,i],1,10,method='cv',lambdas=[lam]) errV = err.mErr #should be only one value here if len(errV)>1: raise ValueError('something wrong with bs res cv') errSample[i] = errV[0] bsResSub = errSample vals = np.zeros((4,nSamp)) vals[0,:] = bsAll vals[1,:] = bsResAll vals[2,:] = bsSub vals[3,:] = bsResSub return vals
def run(X,y,name): nSamp = 100 alphaList = np.array([1])#np.arange(.1,1.1,.1) nObs,nRegs = X.shape sdY = np.sqrt(np.var(y)) # selection via bootstrap bestMin = 1E10 for a in alphaList: tmpErr,tmpEnm,allVals = fitSampling(X,y,a,nSamp,method='bs') tmpErrV = tmpErr.mErr tmpMin = np.min(tmpErrV) print tmpMin if tmpMin < bestMin: bestMin = tmpMin modelIndex = np.argmin(tmpErrV) enm = tmpEnm err = tmpErr alpha = a # important values lam = enm.lambdas[modelIndex] yHat = enm.predict(X)[:,modelIndex] intercept = enm.intercept[modelIndex] globalCoef = enm.coef[np.abs(enm.coef[:,modelIndex])>1E-21,modelIndex] coefIndex = enm.indices[np.abs(enm.coef[:,modelIndex])>1E-21] notEmpty = len(coefIndex) > 0 # get the bootstrap residual response samples res = y - yHat resCent = res-np.mean(res) ySample = np.zeros((nObs,nSamp)) for i in range(nSamp): resSample = st.sampleWR(resCent) ySample[:,i] = yHat+resSample notEmpty = len(coefIndex) > 0 if notEmpty: # working on subset now Xhat = X[:,coefIndex] nObs,nRegsHat = Xhat.shape sdXhat = np.sqrt(np.var(Xhat,0)) # residual bs time sumErr = 0 sumSqErr = 0 sumNullErr = 0 sumSqNullErr = 0 sc = np.zeros(nRegsHat) sSqc = np.zeros(nRegsHat) sumSup = np.zeros(nRegsHat) for i in range(nSamp): # cv to get the errors err,tmpEnm,tmpallVals = fitSampling(Xhat,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr # cv over this thing to get the null model errors nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv') sumNullErr = sumNullErr + nullErr sumSqNullErr = sumSqNullErr + nullErr**2 # need the coef # they change so we need to map the back to the original tmpEnm = enet.fit(Xhat,ySample[:,i], alpha,lambdas=[lam]) sc[tmpEnm.indices] = sc[tmpEnm.indices] + tmpEnm.coef[:,0] sSqc[tmpEnm.indices] = sSqc[tmpEnm.indices] + tmpEnm.coef[:,0]**2 # find supports occur = np.zeros(len(tmpEnm.coef[:,0])) occur[abs(tmpEnm.coef[:,0])>1E-25] = 1.0 sumSup[tmpEnm.indices] = sumSup[tmpEnm.indices] + occur # get averages and variances aveErr = sumErr/nSamp sdErr = np.sqrt(sumSqErr/nSamp - aveErr**2) aveNullErr = sumNullErr/nSamp sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2) aveCoef = sc/nSamp sdCoef = np.sqrt(sSqc/nSamp - aveCoef**2) pSup = sumSup/nSamp # let do the leave one out importance deal codN = np.zeros(nRegsHat) if nRegsHat>1: for j in range(nRegsHat): Xprime = np.delete(Xhat,j,axis=1) # residual bs time sumErr = 0 sumSqErr = 0 for i in range(nSamp): # cv to get the errors err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr codN[j] = sumErr/nSamp elif nRegsHat==1: codN[0] = aveNullErr # lets do leave only one cod1 = np.zeros(nRegsHat) for j in range(nRegsHat): Xprime = np.zeros((nObs,1)) Xprime[:,0] = Xhat[:,j] # residual bs time sumErr = 0 sumSqErr = 0 for i in range(nSamp): # cv to get the errors err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr cod1[j] = sumErr/nSamp # now we are going to estimate # some pvalues. it should # be noted: that we want to use # permutation, to get a real feel # for random or unrelated data # but we dont want to run a bs # for each perm (but we should) # so in here we are using the # ols stderr to get the test stat # we will record a bunch of stuff # from here to look at latter p,tStat,tStatPerm,olsSE = regStat.netTTestPermute(Xhat,y,lam,alpha,nperm=1000) n,m = tStatPerm.shape #***** # would like to check if any values are nan # this most likly means the gpd failed in goodness of fit for tail # will use direct permutation values as the estimate in that case # *** some other form of automated checking might be good here for i in range(n): if np.isnan(p[i]): z = tStatPerm[i,:] tmp = np.sum(z>tStat[i]) p[i] = float(tmp)/float(m) else: # residual bs time sumNullErr = 0 sumSqNullErr = 0 for i in range(nSamp): # cv over this thing to get the null model errors nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv') sumNullErr = sumNullErr + nullErr sumSqNullErr = sumSqNullErr + nullErr**2 # get averages and variances aveNullErr = sumNullErr/nSamp sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2) aveErr = aveNullErr sdErr = sdNullErr # we have it all, lets print it f = open('SLR2run_'+name+'.dat','w') lam.tofile(f,sep="\t") f.write("\n") alpha.tofile(f,sep="\t") f.write("\n") intercept.tofile(f,sep="\t") f.write("\n") aveErr.tofile(f,sep="\t") f.write("\n") sdErr.tofile(f,sep="\t") f.write("\n") aveNullErr.tofile(f,sep="\t") f.write("\n") sdNullErr.tofile(f,sep="\t") f.write("\n") sdY.tofile(f,sep="\t") f.write("\n") if notEmpty: coefIndex.tofile(f,sep="\t") f.write("\n") sdXhat.tofile(f,sep="\t") f.write("\n") globalCoef.tofile(f,sep="\t") f.write("\n") aveCoef.tofile(f,sep="\t") f.write("\n") sdCoef.tofile(f,sep="\t") f.write("\n") pSup.tofile(f,sep="\t") f.write("\n") codN.tofile(f,sep="\t") f.write("\n") cod1.tofile(f,sep="\t") f.write("\n") p.tofile(f,sep="\t") f.write("\n") olsSE.tofile(f,sep="\t") f.write("\n") f.close()
def estModel(XFull,y,nSamp=100,alphaList=np.array([1]),indType='coef',estErr=True,estImp=True,reduceX=False,params=[],): """Estimate a mean, median and standard deviation for an elastic net model using bootstrap residual. Bootstrap resampling is used to select model parameters, then the bs res at these params is used on the full feature set X to calculate the stats. nSamp is used for selection and stat estimates. Options *indType* determines which stat to use for indicies. Indices report the non zero entries in the sparse regression model. Possible types: coef - use coefs from full fit after the selection (defult) ave - use the avereage coefs after the bs, typically includes many more regressors, not recomended as the average removes sparsity benifit. med - use the median value after the bs, typically fewer regressors chosen then 'coef' if *estErr* then 10 fold CV is used to estimate the prediction error at each iteration of the bs. This is ten extra iterations at each bs res sample, but reduces the bias in prediction error. The mean and sdDev of the CV error is then reported. If *estImp* then the importance of each selected regressor is estimated. For errOut this is the error if the regressor is removed, multi varriate error. For errIn this is the error if the regressor is alone, univariate error. If *reduceX* then the regressor matrix is ruduced based on the full model fit after selection. Only non zero coef are kept, much faster, but biases the other stats. NOTE: This was never tested after the last migration, its possible the indices in the solution do not match the orginal ones If *params* are passed then we assume its a tuple with the (lambda,alpha) model parameters. In this case model selection is bipassed. and these params are used. """ nObs,nRegsFull = XFull.shape # select full model values if len(params)==2: lam,alpha = params enm = enet.fit(XFull,y,alpha,lambdas=[lam])[0] else: enm = select(XFull,y,nSamp,alphaList) lam = enm.lambdas[0] yHat = enm.predict(XFull) intercept = enm.intercept[0] globalCoef =enm.coef[np.abs(enm.coef)>1E-21] coefIndex = enm.indices[np.abs(enm.coef)>1E-21] alpha = enm.alpha # now is when we reduce the x if we need too! if reduceX: nRegs = len(coefIndex) if nRegs > 0: X = XFull[:,coefIndex] nObs, _ = X.shape else: X = XFull nRegs = nRegsFull # get the bootstrap residual response samples res = y - yHat resCent = res-np.mean(res) ySample = np.zeros((nObs,nSamp)) for i in range(nSamp): resSample = st.sampleWR(resCent) ySample[:,i] = yHat+resSample if nRegs > 0: # residual bs time if estErr: sumErr = 0 sumSqErr = 0 sumNullErr = 0 sumSqNullErr = 0 sc = np.zeros(nRegs) sSqc = np.zeros(nRegs) ac = lil_matrix((nRegs,nSamp)) sumSup = np.zeros(nRegs) for i in range(nSamp): # cv to get the errors if estErr: err,tmpEnm,tmpallVals = fitSampling(X,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr # cv over this thing to get the null model errors nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv') sumNullErr = sumNullErr + nullErr sumSqNullErr = sumSqNullErr + nullErr**2 # need the coef # they change so we need to map the back to the original tmpEnm = enet.fit(X,ySample[:,i], alpha,lambdas=[lam]) sc[tmpEnm.indices] = sc[tmpEnm.indices] + tmpEnm.coef[:,0] sSqc[tmpEnm.indices] = sSqc[tmpEnm.indices] + tmpEnm.coef[:,0]**2 if len(tmpEnm.indices)>0: ac[tmpEnm.indices,i] = tmpEnm.coef # find supports occur = np.zeros(len(tmpEnm.coef[:,0])) occur[abs(tmpEnm.coef[:,0])>1E-25] = 1.0 sumSup[tmpEnm.indices] = sumSup[tmpEnm.indices] + occur # get averages and variances if estErr: aveErr = sumErr/nSamp sdErr = np.sqrt(sumSqErr/nSamp - aveErr**2) aveNullErr = sumNullErr/nSamp sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2) aveCoef = sc/nSamp sdCoef = np.sqrt(sSqc/nSamp - aveCoef**2) #some crazy stuff here becase of the way scipy mat is shaped medCoef = np.array(np.median(ac.todense(),1))[:,0] pSup = sumSup/nSamp # lets do the selection if indType=='coef': indices = coefIndex elif indType=='med': indices = np.arange(nRegs)[np.abs(medCoef)>1E-21] elif indType=='ave': indices = np.arange(nRegs)[np.abs(aveCoef)>1E-21] else: raise ValueError('The indType '+indType+' is not valid.') # put it in a dict for simplicity solution = {} if estErr: solution['aveErr'] = aveErr solution['sdErr'] = sdErr solution['aveNullErr'] = aveNullErr solution['sdNullErr'] = sdNullErr if reduceX: # need to go back to the original indicies solution['aveCoef'] = np.zeros(nRegsFull) solution['sdCoef'] = np.zeros(nRegsFull) solution['medCoef'] = np.zeros(nRegsFull) solution['pSup'] = np.zeros(nRegsFull) solution['aveCoef'][coefIndex] = aveCoef solution['sdCoef'][coefIndex] = sdCoef solution['medCoef'][coefIndex] = medCoef solution['pSup'][coefIndex] = pSup solution['indices'] = coefIndex[indices] else: solution['aveCoef'] = aveCoef solution['sdCoef'] = sdCoef solution['medCoef'] = medCoef solution['pSup'] = pSup solution['indices'] = indices nRegsHat = len(indices) if nRegsHat>0 and estImp: Xhat = X[:,indices] # lets do the leave one out importance deal errOutHat = np.zeros(nRegsHat) if nRegsHat>1: for j in range(nRegsHat): Xprime = np.delete(Xhat,j,axis=1) # residual bs time sumErr = 0 sumSqErr = 0 for i in range(nSamp): # cv to get the errors err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr errOutHat[j] = sumErr/nSamp elif nRegsHat==1: errOutHat[0] = aveNullErr # lets do leave only one errInHat = np.zeros(nRegsHat) for j in range(nRegsHat): Xprime = np.zeros((nObs,1)) Xprime[:,0] = Xhat[:,j] # residual bs time sumErr = 0 sumSqErr = 0 for i in range(nSamp): # cv to get the errors err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr errInHat[j] = sumErr/nSamp errOut = np.zeros(nRegs) errOut[indices] = errOutHat solution['errOut'] = errOut errIn = np.zeros(nRegs) errIn[indices] = errInHat solution['errIn'] = errIn else: solution = {} if estErr: sumNullErr = 0 sumSqNullErr = 0 for i in range(nSamp): # cv over this thing to get the null model errors nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv') sumNullErr = sumNullErr + nullErr sumSqNullErr = sumSqNullErr + nullErr**2 # get averages and variances aveNullErr = sumNullErr/nSamp sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2) aveErr = aveNullErr sdErr = sdNullErr solution['aveErr'] = aveErr solution['sdErr'] = sdErr solution['aveNullErr'] = aveNullErr solution['sdNullErr'] = sdNullErr solution['aveCoef'] = np.zeros(nRegsFull) solution['sdCoef'] = np.zeros(nRegsFull) solution['medCoef'] = np.zeros(nRegsFull) solution['pSup'] = np.zeros(nRegsFull) solution['indices'] = np.array([]) return solution, enm