Ejemplo n.º 1
0
def test_chisquare():
    # TODO: no tests for ``value`` yet
    res1 = Holder()
    res2 = Holder()
    #> freq = c(1048,  660,  510,  420,  362)
    #> pr1 = c(1020,  690,  510,  420,  360)
    #> pr2 = c(1050,  660,  510,  420,  360)
    #> c = chisq.test(freq, p=pr1, rescale.p = TRUE)
    #> cat_items(c, "res1.")
    res1.statistic = 2.084086388178453
    res1.parameter = 4
    res1.p_value = 0.72029651761105
    res1.method = 'Chi-squared test for given probabilities'
    res1.data_name = 'freq'
    res1.observed = np.array([
         1048, 660, 510, 420, 362
        ])
    res1.expected = np.array([
         1020, 690, 510, 420, 360
        ])
    res1.residuals = np.array([
         0.876714007519206, -1.142080481440321, -2.517068894406109e-15,
         -2.773674830645328e-15, 0.105409255338946
        ])


    #> c = chisq.test(freq, p=pr2, rescale.p = TRUE)
    #> cat_items(c, "res2.")
    res2.statistic = 0.01492063492063492
    res2.parameter = 4
    res2.p_value = 0.999972309849908
    res2.method = 'Chi-squared test for given probabilities'
    res2.data_name = 'freq'
    res2.observed = np.array([
         1048, 660, 510, 420, 362
        ])
    res2.expected = np.array([
         1050, 660, 510, 420, 360
        ])
    res2.residuals = np.array([
         -0.06172133998483677, 0, -2.517068894406109e-15,
         -2.773674830645328e-15, 0.105409255338946
        ])

    freq = np.array([1048,  660,  510,  420,  362])
    pr1 = np.array([1020,  690,  510,  420,  360])
    pr2 = np.array([1050,  660,  510,  420,  360])

    for pr, res in zip([pr1, pr2], [res1, res2]):
        stat, pval = chisquare(freq, pr)
        assert_almost_equal(stat, res.statistic, decimal=12)
        assert_almost_equal(pval, res.p_value, decimal=13)
Ejemplo n.º 2
0
def test_chisquare():
    # TODO: no tests for ``value`` yet
    res1 = Holder()
    res2 = Holder()
    #> freq = c(1048,  660,  510,  420,  362)
    #> pr1 = c(1020,  690,  510,  420,  360)
    #> pr2 = c(1050,  660,  510,  420,  360)
    #> c = chisq.test(freq, p=pr1, rescale.p = TRUE)
    #> cat_items(c, "res1.")
    res1.statistic = 2.084086388178453
    res1.parameter = 4
    res1.p_value = 0.72029651761105
    res1.method = 'Chi-squared test for given probabilities'
    res1.data_name = 'freq'
    res1.observed = np.array([
         1048, 660, 510, 420, 362
        ])
    res1.expected = np.array([
         1020, 690, 510, 420, 360
        ])
    res1.residuals = np.array([
         0.876714007519206, -1.142080481440321, -2.517068894406109e-15,
         -2.773674830645328e-15, 0.105409255338946
        ])


    #> c = chisq.test(freq, p=pr2, rescale.p = TRUE)
    #> cat_items(c, "res2.")
    res2.statistic = 0.01492063492063492
    res2.parameter = 4
    res2.p_value = 0.999972309849908
    res2.method = 'Chi-squared test for given probabilities'
    res2.data_name = 'freq'
    res2.observed = np.array([
         1048, 660, 510, 420, 362
        ])
    res2.expected = np.array([
         1050, 660, 510, 420, 360
        ])
    res2.residuals = np.array([
         -0.06172133998483677, 0, -2.517068894406109e-15,
         -2.773674830645328e-15, 0.105409255338946
        ])

    freq = np.array([1048,  660,  510,  420,  362])
    pr1 = np.array([1020,  690,  510,  420,  360])
    pr2 = np.array([1050,  660,  510,  420,  360])

    for pr, res in zip([pr1, pr2], [res1, res2]):
        stat, pval = chisquare(freq, pr)
        assert_almost_equal(stat, res.statistic, decimal=12)
        assert_almost_equal(pval, res.p_value, decimal=13)
Ejemplo n.º 3
0
def log_results(log, result, source):
    """
    Log the fitting results.

    Notes
    -----
    The resulting mixture parameters are stored into a 2d array with rows
    [location in degrees (mu), shape (kappa), probability].
    """
    sparams = result.model.get_summary_params(result.full_params)[:, [1, 0, 2]]
    sparams[:, 0] = tr.transform_pi_deg(tr.fix_range(sparams[:, 0]),
                                        neg_shift=source.neg_shift)
    converged = result.mle_retvals['converged']

    fit_criteria = [-result.llf, result.aic, result.bic]
    print 'llf / nobs:', fit_criteria[0] / result.model.endog.shape[0]

    chisquare = result.gof_chisquare()

    # Chisquare test with effect size.
    alpha = 0.05 # Significance level.
    data = source.source_data.data
    n_obs = data[:, 1].sum()
    rad_diff = data[1, 0] - data[0, 0]

    pdf = result.model.pdf_mix(result.full_params, data[:, 0])
    probs = pdf * rad_diff * n_obs
    effect_size = gof.chisquare_effectsize(data[:, 1], probs)
    chi2 = gof.chisquare(data[:, 1], probs, value=effect_size)
    power = gof.chisquare_power(effect_size, n_obs,
                                data.shape[0], alpha=alpha)

    chisquare_all = list(chisquare) + [n_obs, effect_size] \
                    + list(chi2) + [power]

    log.write_row(source.current.dir_base, source.current.base_names,
                  chisquare_all, sparams, converged, fit_criteria)
Ejemplo n.º 4
0
def log_results(log, result, source):
    """
    Log the fitting results.

    Notes
    -----
    The resulting mixture parameters are stored into a 2d array with rows
    [location in degrees (mu), shape (kappa), probability].
    """
    sparams = result.model.get_summary_params(result.full_params)[:, [1, 0, 2]]
    sparams[:, 0] = tr.transform_pi_deg(tr.fix_range(sparams[:, 0]),
                                        neg_shift=source.neg_shift)
    converged = result.mle_retvals['converged']

    fit_criteria = [-result.llf, result.aic, result.bic]
    print 'llf / nobs:', fit_criteria[0] / result.model.endog.shape[0]

    chisquare = result.gof_chisquare()

    # Chisquare test with effect size.
    alpha = 0.05  # Significance level.
    data = source.source_data.data
    n_obs = data[:, 1].sum()
    rad_diff = data[1, 0] - data[0, 0]

    pdf = result.model.pdf_mix(result.full_params, data[:, 0])
    probs = pdf * rad_diff * n_obs
    effect_size = gof.chisquare_effectsize(data[:, 1], probs)
    chi2 = gof.chisquare(data[:, 1], probs, value=effect_size)
    power = gof.chisquare_power(effect_size, n_obs, data.shape[0], alpha=alpha)

    chisquare_all = list(chisquare) + [n_obs, effect_size] \
                    + list(chi2) + [power]

    log.write_row(source.current.dir_base, source.current.base_names,
                  chisquare_all, sparams, converged, fit_criteria)
Ejemplo n.º 5
0
print("probs", probs)
probs_d = probs.copy()
delta = 0.01
probs_d[0] += delta
probs_d[1] -= delta
probs_cs = probs.cumsum()
#rvs = np.random.multinomial(n_bins, probs, size=10)
#rvs = np.round(np.random.randn(10), 2)
rvs = np.argmax(np.random.rand(nobs,1) < probs_cs, 1)
print(probs)
print(np.bincount(rvs) * (1. / nobs))


freq = np.bincount(rvs)
print(stats.chisquare(freq, nobs*probs))
print('null', chisquare(freq, nobs*probs))
print('delta', chisquare(freq, nobs*probs_d))
chisq_null, pval_null = chisquare(freq, nobs*probs)

# effect size ?
d_null = ((freq / float(nobs) - probs)**2 / probs).sum()
print(d_null)
d_delta = ((freq / float(nobs) - probs_d)**2 / probs_d).sum()
print(d_delta)
d_null_alt = ((probs - probs_d)**2 / probs_d).sum()
print(d_null_alt)

print('\nchisquare with value')
chisq, pval = chisquare(freq, nobs*probs_d)
print(stats.ncx2.sf(chisq_null, n_bins, 0.001 * nobs))
print(stats.ncx2.sf(chisq, n_bins, 0.001 * nobs))