def initialize(cls): from statsmodels.discrete.discrete_model import NegativeBinomial formula = "Days ~ C(Duration, Sum)*C(Weight, Sum)" mod = NegativeBinomial.from_formula(formula, cls.data, loglike_method='nb1') cls.res = mod.fit(cov_type='HC0')
def nb_cluster(formula, cluster, covs, coef): """Model a cluster of correlated features with the negative binomial""" methylated = np.array([f.methylated for f in cluster]) counts = np.array([f.counts for f in cluster]) try: res = [NegativeBinomial.from_formula(formula, covs, offset=np.log(count))\ .fit(disp=0) for methylation, count in izip(methylated, counts)] except: return dict(t=np.nan, coef=np.nan, covar="NA", p=np.nan, corr=np.nan) methylations = methylated / counts # for correlation below. r = get_ptc(res, coef) nan = np.isnan(r['p']) r['p'] = zscore_combine(r['p'][~nan], corr(methylations[~nan])) r['t'], r['coef'] = np.mean(r['t']), np.mean(r['coef']) return r
test.setupClass() data = test.data.drop([0,1,2]) res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit(use_t=False) res_glm = glm("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)", data).fit(cov_type='HC0') res_poi_2 = poisson("Days ~ C(Weight) + C(Duration)", data).fit(cov_type='HC0') print('\nOLS') print(res_ols.wald_test_terms()) print('\nGLM') print(res_glm.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight'])) print('\nPoisson 1') print(res_poi.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight'])) print('\nPoisson 2') print(res_poi_2.wald_test_terms(skip_single=False)) from statsmodels.discrete.discrete_model import NegativeBinomial res_nb2 = NegativeBinomial.from_formula("Days ~ C(Weight) * C(Duration)", data).fit() print('\nNegative Binomial nb2') print(res_nb2.wald_test_terms(skip_single=False)) res_nb1 = NegativeBinomial.from_formula("Days ~ C(Weight) * C(Duration)", data, loglike_method='nb1').fit(cov_type='HC0') print('\nNegative Binomial nb2') print(res_nb1.wald_test_terms(skip_single=False))
res_glm = glm("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)", data).fit(cov_type='HC0') res_poi_2 = poisson("Days ~ C(Weight) + C(Duration)", data).fit(cov_type='HC0') print('\nOLS') print(res_ols.wald_test_terms()) print('\nGLM') print( res_glm.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight'])) print('\nPoisson 1') print( res_poi.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight'])) print('\nPoisson 2') print(res_poi_2.wald_test_terms(skip_single=False)) from statsmodels.discrete.discrete_model import NegativeBinomial res_nb2 = NegativeBinomial.from_formula("Days ~ C(Weight) * C(Duration)", data).fit() print('\nNegative Binomial nb2') print(res_nb2.wald_test_terms(skip_single=False)) res_nb1 = NegativeBinomial.from_formula( "Days ~ C(Weight) * C(Duration)", data, loglike_method='nb1').fit(cov_type='HC0') print('\nNegative Binomial nb2') print(res_nb1.wald_test_terms(skip_single=False))