def pf(q,df1,df2,ncp=0):
    """
    Calculates the cumulative of the F-distribution
    """
    from scipy.stats import f,ncf
    if ncp==0:
        result=f.cdf(x=q,dfn=df1,dfd=df2,loc=0,scale=1)
    else:
        result=ncf.cdf(x=q,dfn=df1,dfd=df2,nc=ncp,loc=0,scale=1)
    return result
Beispiel #2
0
def anova2_test_power(arms, alpha, mus, n, sigma):
    n_cell = 0
    ndf = 0
    ddf = 0
    n_cell = arms * 2
    ndf = n_cell - 1
    ddf = n * n_cell - ndf - 1
    nu = n * np.sum((mus - np.mean(mus))**2) / sigma / sigma
    f_cut = f.ppf(1 - alpha, ndf, ddf)
    power = 1 - ncf.cdf(f_cut, ndf, ddf, nu)
    return power
Beispiel #3
0
def anovaf_test_power(arms, alpha, mus, n, sigma):
    ndf = arms - 1

    ddf = (arms - 1) * (n)

    nu = n * sum((mus - np.mean(mus))**2) / sigma / sigma

    f_cut = f.ppf(1 - alpha, ndf, ddf)

    power = 1 - ncf.cdf(f_cut, ndf, ddf, nu)

    return power
Beispiel #4
0
def f_power(model, design, effect_size, alpha):
    """Calculates the power of an F test.

    This calculates the probability that the F-statistic is above its critical
    value (alpha) given an effect of some size.

    :param model: A patsy formula for which to calculate power.
    :type model: patsy.formula
    :param design: A pandas.DataFrame representing a design.
    :type design: pandas.DataFrame
    :param effect_size: The size of the effect that the test should be able to detect (also called a signal to noise
        ratio).
    :type effect_size: float
    :param alpha: The critical value that we want the test to be above.
    :type alpha: float between 0 and 1
    :returns: A list of percentage probabilities that an F-test could detect an effect of the given size at the given
        alpha value for a particular column.

    Usage:
      >>> design = dexpy.factorial.build_factorial(4, 8)
      >>> print(dexpy.power.f_power("1 + A + B + C + D", design, 2.0, 0.05))
      [ 95.016, 49.003, 49.003, 49.003, 49.003 ]
    """
    X = dmatrix(model, design)
    residual_df = X.shape[0] - X.shape[1]

    XtXi = np.linalg.inv(np.dot(np.transpose(X), X))
    non_centrality = 1 / np.diag(XtXi)

    # pre-calculate crit value for 1 df, most common case
    crit_value = f.ppf(1 - alpha, 1, residual_df)

    power = []
    for t in range(0, X.shape[1]):
        nc = adjust_non_centrality(non_centrality[t], X[:,t])
        nc *= effect_size * effect_size / 4.0
        p = (1 - ncf.cdf(crit_value, 1, residual_df, nc))
        power.append(p)

    return power
Beispiel #5
0
    def _compute_ppvals_33(self, pcurvetype="full"):
        family = self._df_results["family"].values
        df1, df2, stat, pvals, ncp33 = self._df_results[[
            "df1", "df2", "stat", "p", "ncp33"
        ]].to_numpy().T
        if pcurvetype == "full":
            pthresh = .05  # Only keep p-values smaller than .05
            propsig = 1 / 3  # Under 33% power, 1/3 of p-values should be lower than .05
        else:
            pthresh = .025  # Only keep p-values smaller than .025
            # We don't know which prop of p-values should be smaller than .025 under 33% power, so compute it
            propsig = 3 * self._compute_prop_lower_33(.025, family, df1, df2,
                                                      pvals, ncp33)

        # We then stretch the ppval on the [0, 1] interval.
        pp_33_f = (1 / propsig) * (ncf.cdf(stat, df1, df2, ncp33) -
                                   (1 - propsig))
        pp_33_chi = (1 / propsig) * (ncx2.cdf(stat, df1, ncp33) -
                                     (1 - propsig))
        pp_33 = np.where(family == "F", pp_33_f, pp_33_chi)
        return np.array([
            self._bound_pvals(pp) if p < pthresh else np.nan
            for (p, pp) in zip(pvals, pp_33)
        ])
Beispiel #6
0
plt.annotate("beta= "+str(np.round(betacalc,3)),(mean_h0-stderr,.1*ymax))
plt.annotate("n= "+str(n),(alpha_xval+stderr/2,.2*ymax))
plt.legend(loc=1)
plt.xlabel("Effect Size")
plt.ylabel("Probability")
plt.title("Sample Size and Power Calculation")
plt.show()

#optimize beta to desired value
print("Power of test as entered (n={})= {:.2f}%".format(n,100*(1-betacalc)))
print("[nct power= {:.2f}%]".format((1-betacalc)*100))

#compute F dist power :
fcrit=f.ppf(1-alpha*sided,1,n-1)
dsr=mean_h1/sigma
betacalc_f=ncf.cdf(fcrit,1,n-1,n*dsr**2)  
print("[fct power= {:.2f}%]".format((1-betacalc_f)*100))

def opt_n(n):
    stderr_n=sigma/n**.5
    lam_n=(mean_h1-mean_h0)/stderr_n
    alpha_xval_n=t.isf(alphas,df=n-1,scale=stderr_n)
    betah_n=nct.cdf(alpha_xval_n,df=n-1,nc=lam_n,scale=stderr_n)
    if sided==2:
        betal_n=nct.cdf(-alpha_xval_n,df=n-1,nc=lam_n)
        betacalc_n=betah_n-betal_n
    else:
        betacalc_n=betah_n
    
    return betacalc_n
Beispiel #7
0
# Display the probability density function (``pdf``):

x = np.linspace(ncf.ppf(0.01, dfn, dfd, nc), ncf.ppf(0.99, dfn, dfd, nc), 100)
ax.plot(x, ncf.pdf(x, dfn, dfd, nc), 'r-', lw=5, alpha=0.6, label='ncf pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = ncf(dfn, dfd, nc)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = ncf.ppf([0.001, 0.5, 0.999], dfn, dfd, nc)
np.allclose([0.001, 0.5, 0.999], ncf.cdf(vals, dfn, dfd, nc))
# True

# Generate random numbers:

r = ncf.rvs(dfn, dfd, nc, size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()