def test_chisquare(): # TODO: no tests for ``value`` yet res1 = Holder() res2 = Holder() #> freq = c(1048, 660, 510, 420, 362) #> pr1 = c(1020, 690, 510, 420, 360) #> pr2 = c(1050, 660, 510, 420, 360) #> c = chisq.test(freq, p=pr1, rescale.p = TRUE) #> cat_items(c, "res1.") res1.statistic = 2.084086388178453 res1.parameter = 4 res1.p_value = 0.72029651761105 res1.method = 'Chi-squared test for given probabilities' res1.data_name = 'freq' res1.observed = np.array([ 1048, 660, 510, 420, 362 ]) res1.expected = np.array([ 1020, 690, 510, 420, 360 ]) res1.residuals = np.array([ 0.876714007519206, -1.142080481440321, -2.517068894406109e-15, -2.773674830645328e-15, 0.105409255338946 ]) #> c = chisq.test(freq, p=pr2, rescale.p = TRUE) #> cat_items(c, "res2.") res2.statistic = 0.01492063492063492 res2.parameter = 4 res2.p_value = 0.999972309849908 res2.method = 'Chi-squared test for given probabilities' res2.data_name = 'freq' res2.observed = np.array([ 1048, 660, 510, 420, 362 ]) res2.expected = np.array([ 1050, 660, 510, 420, 360 ]) res2.residuals = np.array([ -0.06172133998483677, 0, -2.517068894406109e-15, -2.773674830645328e-15, 0.105409255338946 ]) freq = np.array([1048, 660, 510, 420, 362]) pr1 = np.array([1020, 690, 510, 420, 360]) pr2 = np.array([1050, 660, 510, 420, 360]) for pr, res in zip([pr1, pr2], [res1, res2]): stat, pval = chisquare(freq, pr) assert_almost_equal(stat, res.statistic, decimal=12) assert_almost_equal(pval, res.p_value, decimal=13)
def log_results(log, result, source): """ Log the fitting results. Notes ----- The resulting mixture parameters are stored into a 2d array with rows [location in degrees (mu), shape (kappa), probability]. """ sparams = result.model.get_summary_params(result.full_params)[:, [1, 0, 2]] sparams[:, 0] = tr.transform_pi_deg(tr.fix_range(sparams[:, 0]), neg_shift=source.neg_shift) converged = result.mle_retvals['converged'] fit_criteria = [-result.llf, result.aic, result.bic] print 'llf / nobs:', fit_criteria[0] / result.model.endog.shape[0] chisquare = result.gof_chisquare() # Chisquare test with effect size. alpha = 0.05 # Significance level. data = source.source_data.data n_obs = data[:, 1].sum() rad_diff = data[1, 0] - data[0, 0] pdf = result.model.pdf_mix(result.full_params, data[:, 0]) probs = pdf * rad_diff * n_obs effect_size = gof.chisquare_effectsize(data[:, 1], probs) chi2 = gof.chisquare(data[:, 1], probs, value=effect_size) power = gof.chisquare_power(effect_size, n_obs, data.shape[0], alpha=alpha) chisquare_all = list(chisquare) + [n_obs, effect_size] \ + list(chi2) + [power] log.write_row(source.current.dir_base, source.current.base_names, chisquare_all, sparams, converged, fit_criteria)
print("probs", probs) probs_d = probs.copy() delta = 0.01 probs_d[0] += delta probs_d[1] -= delta probs_cs = probs.cumsum() #rvs = np.random.multinomial(n_bins, probs, size=10) #rvs = np.round(np.random.randn(10), 2) rvs = np.argmax(np.random.rand(nobs,1) < probs_cs, 1) print(probs) print(np.bincount(rvs) * (1. / nobs)) freq = np.bincount(rvs) print(stats.chisquare(freq, nobs*probs)) print('null', chisquare(freq, nobs*probs)) print('delta', chisquare(freq, nobs*probs_d)) chisq_null, pval_null = chisquare(freq, nobs*probs) # effect size ? d_null = ((freq / float(nobs) - probs)**2 / probs).sum() print(d_null) d_delta = ((freq / float(nobs) - probs_d)**2 / probs_d).sum() print(d_delta) d_null_alt = ((probs - probs_d)**2 / probs_d).sum() print(d_null_alt) print('\nchisquare with value') chisq, pval = chisquare(freq, nobs*probs_d) print(stats.ncx2.sf(chisq_null, n_bins, 0.001 * nobs)) print(stats.ncx2.sf(chisq, n_bins, 0.001 * nobs))