def _calcPValues2(self,X,nperm=1000): # tested using the generalized linear model elastic # net fit (with predetermined alpha and lambda parameters) to evaluate # the t-statistic, permutation testing is used for the null # distribution and a genral pareto distribution is used # to estimate the tail of the permutation dist when possible. # The method called is olsStat.ttestPermute # nperm number of permutations used to estimate null distribution # X is the typical regressor matrix # y is the typical response vector # nperm is the number of permutations to do import regStat y = self._y p,tStat,tStatPerm,coef = regStat.netTTestPermute(X,y,self.alpha,self.lam,nperm) n,m = tStatPerm.shape # would like to check if any values are nan # this most likly means the gpd failed in goodness of fit for tail # will use direct permutation values as the estimate in that case # *** some other form of automated checking might be good here for i in range(n): if np.isnan(p[i]): z = tStatPerm[i,:] tmp = np.sum(z>tStat[i]) p[i] = float(tmp)/float(m) return p
def run(X,y,name): nSamp = 100 alphaList = np.array([1])#np.arange(.1,1.1,.1) nObs,nRegs = X.shape sdY = np.sqrt(np.var(y)) # selection via bootstrap bestMin = 1E10 for a in alphaList: tmpErr,tmpEnm,allVals = fitSampling(X,y,a,nSamp,method='bs') tmpErrV = tmpErr.mErr tmpMin = np.min(tmpErrV) print tmpMin if tmpMin < bestMin: bestMin = tmpMin modelIndex = np.argmin(tmpErrV) enm = tmpEnm err = tmpErr alpha = a # important values lam = enm.lambdas[modelIndex] yHat = enm.predict(X)[:,modelIndex] intercept = enm.intercept[modelIndex] globalCoef = enm.coef[np.abs(enm.coef[:,modelIndex])>1E-21,modelIndex] coefIndex = enm.indices[np.abs(enm.coef[:,modelIndex])>1E-21] notEmpty = len(coefIndex) > 0 # get the bootstrap residual response samples res = y - yHat resCent = res-np.mean(res) ySample = np.zeros((nObs,nSamp)) for i in range(nSamp): resSample = st.sampleWR(resCent) ySample[:,i] = yHat+resSample notEmpty = len(coefIndex) > 0 if notEmpty: # working on subset now Xhat = X[:,coefIndex] nObs,nRegsHat = Xhat.shape sdXhat = np.sqrt(np.var(Xhat,0)) # residual bs time sumErr = 0 sumSqErr = 0 sumNullErr = 0 sumSqNullErr = 0 sc = np.zeros(nRegsHat) sSqc = np.zeros(nRegsHat) sumSup = np.zeros(nRegsHat) for i in range(nSamp): # cv to get the errors err,tmpEnm,tmpallVals = fitSampling(Xhat,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr # cv over this thing to get the null model errors nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv') sumNullErr = sumNullErr + nullErr sumSqNullErr = sumSqNullErr + nullErr**2 # need the coef # they change so we need to map the back to the original tmpEnm = enet.fit(Xhat,ySample[:,i], alpha,lambdas=[lam]) sc[tmpEnm.indices] = sc[tmpEnm.indices] + tmpEnm.coef[:,0] sSqc[tmpEnm.indices] = sSqc[tmpEnm.indices] + tmpEnm.coef[:,0]**2 # find supports occur = np.zeros(len(tmpEnm.coef[:,0])) occur[abs(tmpEnm.coef[:,0])>1E-25] = 1.0 sumSup[tmpEnm.indices] = sumSup[tmpEnm.indices] + occur # get averages and variances aveErr = sumErr/nSamp sdErr = np.sqrt(sumSqErr/nSamp - aveErr**2) aveNullErr = sumNullErr/nSamp sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2) aveCoef = sc/nSamp sdCoef = np.sqrt(sSqc/nSamp - aveCoef**2) pSup = sumSup/nSamp # let do the leave one out importance deal codN = np.zeros(nRegsHat) if nRegsHat>1: for j in range(nRegsHat): Xprime = np.delete(Xhat,j,axis=1) # residual bs time sumErr = 0 sumSqErr = 0 for i in range(nSamp): # cv to get the errors err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr codN[j] = sumErr/nSamp elif nRegsHat==1: codN[0] = aveNullErr # lets do leave only one cod1 = np.zeros(nRegsHat) for j in range(nRegsHat): Xprime = np.zeros((nObs,1)) Xprime[:,0] = Xhat[:,j] # residual bs time sumErr = 0 sumSqErr = 0 for i in range(nSamp): # cv to get the errors err,tmpenm,tmpallVals = fitSampling(Xprime,ySample[:,i],alpha,10,method='cv',lambdas=[lam]) sumErr = err.mErr[0] + sumErr sumSqErr = err.mErr[0]**2 + sumSqErr cod1[j] = sumErr/nSamp # now we are going to estimate # some pvalues. it should # be noted: that we want to use # permutation, to get a real feel # for random or unrelated data # but we dont want to run a bs # for each perm (but we should) # so in here we are using the # ols stderr to get the test stat # we will record a bunch of stuff # from here to look at latter p,tStat,tStatPerm,olsSE = regStat.netTTestPermute(Xhat,y,lam,alpha,nperm=1000) n,m = tStatPerm.shape #***** # would like to check if any values are nan # this most likly means the gpd failed in goodness of fit for tail # will use direct permutation values as the estimate in that case # *** some other form of automated checking might be good here for i in range(n): if np.isnan(p[i]): z = tStatPerm[i,:] tmp = np.sum(z>tStat[i]) p[i] = float(tmp)/float(m) else: # residual bs time sumNullErr = 0 sumSqNullErr = 0 for i in range(nSamp): # cv over this thing to get the null model errors nullErr,a = fitSamplingNull(ySample[:,i],10, method='cv') sumNullErr = sumNullErr + nullErr sumSqNullErr = sumSqNullErr + nullErr**2 # get averages and variances aveNullErr = sumNullErr/nSamp sdNullErr = np.sqrt(sumSqNullErr/nSamp - aveNullErr**2) aveErr = aveNullErr sdErr = sdNullErr # we have it all, lets print it f = open('SLR2run_'+name+'.dat','w') lam.tofile(f,sep="\t") f.write("\n") alpha.tofile(f,sep="\t") f.write("\n") intercept.tofile(f,sep="\t") f.write("\n") aveErr.tofile(f,sep="\t") f.write("\n") sdErr.tofile(f,sep="\t") f.write("\n") aveNullErr.tofile(f,sep="\t") f.write("\n") sdNullErr.tofile(f,sep="\t") f.write("\n") sdY.tofile(f,sep="\t") f.write("\n") if notEmpty: coefIndex.tofile(f,sep="\t") f.write("\n") sdXhat.tofile(f,sep="\t") f.write("\n") globalCoef.tofile(f,sep="\t") f.write("\n") aveCoef.tofile(f,sep="\t") f.write("\n") sdCoef.tofile(f,sep="\t") f.write("\n") pSup.tofile(f,sep="\t") f.write("\n") codN.tofile(f,sep="\t") f.write("\n") cod1.tofile(f,sep="\t") f.write("\n") p.tofile(f,sep="\t") f.write("\n") olsSE.tofile(f,sep="\t") f.write("\n") f.close()