def lrtpvals_qqfit(nperm, lrt, lrtperm, alteqnull, alteqnullperm, qmax=None, abserr=False,fitdof=True, dof=None, nullfitfile=None): ''' Fit the parameters of the null distribution using "quantile regession" on some fraction of the most signficant data points ''' if (nperm > 0): logging.info("estimating mixture parameters for permuted data\nusing quantile regression of log-pvalues with qmax=" + str(qmax) + " from permutations...") mix = c2.chi2mixture( lrt = lrtperm, qmax = qmax, alteqnull = alteqnullperm,abserr=abserr,fitdof=fitdof,dof=dof) res = mix.fit_params_Qreg() # paramter fitting imax=res['imax'] mse=res['mse'] logging.info("# of pvals used for nullfit=" + str(imax)) pv_adj = mix.sf(lrt=lrt,alteqnull=alteqnull) # getting p-values for real data logging.info(" Done") logging.info("adjusting the observed p-values ...") logging.info(" Done") elif nullfitfile is not None: logging.info("estimating mixture parameters for permuted data\nusing quantile regression of log-pvalues with qmax=" + str(qmax) + " from STORED permutations in " + nullfitfile) #read in p-vals and alteqnull for from file colnames={"2*(LL(alt)-LL(null))","alteqnull","setsize"} import fastlmm.util.util as ut import numpy as np dat=ut.extractcols(nullfitfile,colnameset=colnames,dtypeset={"2*(LL(alt)-LL(null))": np.float64}) lrtfile=dat["2*(LL(alt)-LL(null))"] alteqnullfile=dat["alteqnull"] mix = c2.chi2mixture( lrt = lrtfile, qmax=qmax, alteqnull=alteqnullfile,abserr=abserr,fitdof=fitdof,dof=dof) res = mix.fit_params_Qreg() # paramter fitting imax=res['imax'] mse=res['mse'] logging.info("# of pvals used for nullfit=" + str(imax)) pv_adj = mix.sf(lrt=lrt,alteqnull=alteqnull) # getting p-values for real data logging.info(" Done") logging.info("adjusting the observed p-values ...") logging.info(" Done") else: logging.info("nperm = " + str(nperm) + " : No permutations were performed.") logging.info("estimating mixture parameters for non-permuted data\nusing quantile regression of log-pvalues with qmax=" + str(qmax) + "...") mix = c2.chi2mixture( lrt = lrt, qmax = qmax, alteqnull = alteqnull,abserr=abserr,fitdof=fitdof,dof=dof) res = mix.fit_params_Qreg() # paramter fitting imax=res['imax'] logging.info("# of pvals used for nullfit=" + str(imax)) pv_adj = mix.sf() #getting p-values logging.info(" Done") if mix.mixture==0: #raise Exception("only zero dof component items found") logging.info("*****WARNING*****: only zero dof component items found") return pv_adj,mix.mixture,mix.scale,mix.dof#,imax,mse
def lrtpvals_qqfit_file(filein, qmax=0.1): import fastlmm.util.stats.chi2mixture as c2 import fastlmm.util.stats.plotp import pandas as pd colname="2*(LL(alt)-LL(null))" lrtperm=pd.read_csv(filein,delimiter = '\t',dtype={colname:np.float64},usecols=[colname])[colname].values print "found " + str(len(lrtperm)) + "null test stats" mix = c2.chi2mixture( lrt = lrtperm, qmax = qmax, alteqnull = None) res = mix.fit_params_Qreg() # paramter fitting print "mixture (non-zero dof)="+ str(mix.mixture) + "\n" print "dof="+str(res["dof"]) + "\n" print "scale="+str(res["scale"]) + "\n" import ipdb; ipdb.set_trace()