def fit_logistic(X_hold,Y_hold,Firth=False,resBase=None,LRtest=True): """ Fits a logistic regression model using standard (when Firth = False) or Firth's method (when Firth = True). resBase is the result of a previous call to a regression that is used to store data for Firth's method. LRtest indicates if the likelihood ratio test should be reported. """ if not Firth: res = GLM(Y_hold, X_hold, family=families.Binomial()).fit()#XXX Confirm this with logistic using older XXXX # AICc adjustment res.aicc = statsmodels.tools.eval_measures.aicc(res.llf, nobs=res.nobs, df_modelwc=res.df_model+1) # Correct BIC res.bic = statsmodels.tools.eval_measures.bic(res.llf, nobs=res.nobs, df_modelwc=res.df_model+1) else: if resBase is None: sys.stderr.write('resBase must be provided to do Firth regression\n') sys.exit(1) elif type(resBase) is not statsmodels.genmod.generalized_linear_model.GLMResultsWrapper: sys.stderr.write('resBase must be type statsmodels.genmod.generalized_linear_model.GLMResultsWrapper\n') sys.exit(2) else: res = resBase #Do Firth's logistic regression (rint, rbeta, rbse, rfitll, pi) = fit_firth(Y_hold, X_hold, start_vec = None) if LRtest: # LRT null_X = np.delete(arr=X_hold,obj=range(int(np.size(X_hold)/len(X_hold)))[1:int(np.size(X_hold)/len(X_hold))],axis=1) (null_intercept, null_beta, null_bse, null_fitll, null_pi) = fit_firth(Y_hold, null_X, start_vec = None) lrstat = -2.*(null_fitll - rfitll) lrt_pvalue = 1. if lrstat > 0.: # non-convergence lrt_pvalue = stats.chi2.sf(lrstat, 1) res.llnull = null_fitll res.lrstat = lrstat res.lrt_pval = lrt_pvalue # AICc adjustment for Firth model aicc = statsmodels.tools.eval_measures.aicc(rfitll, nobs=len(Y_hold), df_modelwc=np.shape(X_hold)[1]) # AIC aic = statsmodels.tools.eval_measures.aic(rfitll, nobs=len(Y_hold), df_modelwc=np.shape(X_hold)[1]) # BIC bic = statsmodels.tools.eval_measures.bic(rfitll, nobs=len(Y_hold), df_modelwc=np.shape(X_hold)[1]) #Store parameters, standard errors, likelihoods, and statistics rint = np.array([rint]) rbeta = np.array(rbeta) res.params = np.concatenate([rint,rbeta]) res.bse = rbse res.llf = rfitll res.aicc = aicc res.aic = aic res.bic = bic #Get Wald p vals for parameters res.pvalues = 1. - chi2.cdf(x=(res.params/res.bse)**2, df=1) #Add predicted y res.predict = pi return res
def iterate_logistic(X_hold,Y_hold, fixed_columns = [0], Firth=False): """ Fits logistic regression to the provided data while using the fixed_columns in the regression. Firth specifies if Firth regression should be used. Returns matrices of fitted betas, pvalues, aic, aicc (second order aic), and bic """ l = np.size(fixed_columns)+1 k = np.shape(X_hold)[1] betas = np.zeros([k,l]) pvalues = np.zeros([k,l]) aic = np.zeros([k,1]) aicc = np.zeros([k,1]) bic = np.zeros([k,1]) # Fit constant if Firth: null_X = np.delete(arr=X_hold,obj=range(int(np.size(X_hold)/len(X_hold)))[1:int(np.size(X_hold)/len(X_hold))],axis=1) (null_intercept, null_beta, null_bse, null_fitll, null_pi) = fit_firth(Y_hold, null_X, start_vec = None) #Using this as a way to return a model in the same class as GLM. res = GLM(Y_hold, null_X, family=families.Binomial()).fit() # AICc adjustment for Firth model res.aicc = statsmodels.tools.eval_measures.aicc(null_fitll, nobs=res.nobs, df_modelwc=res.df_model+1) # AIC res.aic = statsmodels.tools.eval_measures.aic(null_fitll, nobs=res.nobs, df_modelwc=res.df_model+1) # BIC res.bic = statsmodels.tools.eval_measures.bic(null_fitll, nobs=res.nobs, df_modelwc=res.df_model+1) #Store parameters, standard errors, likelihoods, and statistics res.params = np.array([null_intercept]) #Get Wald p vals for parameters res.pvalues = 1. - chi2.cdf(x=(res.params/null_bse)**2, df=1) else: res = fit_logistic(X_hold[:,0],Y_hold) betas[0,:] = res.params pvalues[0,:] = res.pvalues aic[0] = res.aic aicc[0] = res.aicc bic[0] = res.bic #Set variable for use later resBase = copy.deepcopy(res) NAN = ~np.isnan(X_hold).any(axis=0) for i in range(1,k): if NAN[i]: if i not in fixed_columns: columns = fixed_columns.copy() columns.append(i) res = fit_logistic(X_hold[:,columns],Y_hold, Firth=Firth, resBase=resBase,LRtest=False) betas[i,:] = res.params pvalues[i,:] = res.pvalues aic[i] = res.aic aicc[i] = res.aicc bic[i] = res.bic return betas, pvalues,aic,aicc,bic