Ejemplo n.º 1
0
def IPN_kendallW(X):
    """
    Kendall's W
    X is 2D numpy-array (n*k ratings matrix)
    n is the number of objects and k is the number of judges.
    """
    [n, k] = np.shape(X)

    # if tiedrank ...
    R = np.zeros_like(X)
    for i in range(0, np.shape(X)[1]):
        R[:, i] = rankdata(X[:, i])
    R_new = np.sort(np.round(R), axis=0)
    A = np.matlib.repmat(np.array(range(1, n + 1)), k, 1).T
    T = np.sum(np.array((A - R_new), dtype=bool), axis=0) + 1
    RS = np.sum(R, axis=1)
    S = np.sum(np.square(RS)) - n * math.pow(np.mean(RS), 2)
    F = k * k * (n * n * n - n) - k * np.sum(np.power(T, 3) - T)
    W = float(12) * S / F

    Fdist = W * (k - 1) / (1 - W)
    nu1 = n - 1 - (2 / float(k))
    nu2 = nu1 * (k - 1)
    p = f.pdf(Fdist, nu1, nu2)

    return W, p, Fdist
Ejemplo n.º 2
0
def IPN_kendallW(X):
    """
    Kendall's W
    X is 2D numpy-array (n*k ratings matrix)
    n is the number of objects and k is the number of judges.
    """
    [n, k] = np.shape(X)

    # if tiedrank ...
    R = np.zeros_like(X)
    for i in range(0, np.shape(X)[1]):   
        R[:,i] = rankdata(X[:,i])
    R_new = np.sort(np.round(R), axis=0)
    A = np.matlib.repmat(np.array(range(1,n+1)), k, 1).T
    T = np.sum(np.array((A-R_new), dtype=bool), axis=0) +1    
    RS = np.sum(R, axis=1)
    S = np.sum(np.square(RS)) - n * math.pow(np.mean(RS),2)
    F = k * k* (n * n * n-n)- k*np.sum(np.power(T, 3)-T)
    W = float(12)*S/F

    Fdist = W*(k-1) / (1-W)
    nu1 = n-1-(2/float(k));
    nu2 = nu1*(k-1);
    p = f.pdf(Fdist, nu1, nu2)

    return W, p, Fdist
Ejemplo n.º 3
0
 def f_contrast(self):
     '''
             Returns p-value for F_contrast: H0: b0 = b1 = b2 = ... = bn = 0
             '''
     fvalue = (self.ssr) / (self.n) / (self.ssrh0 / (self.n))
     pvalue = f.pdf(fvalue, self.k - 1, self.n - self.k)
     return (pvalue)
Ejemplo n.º 4
0
def plot_f_distrubiton(fvalue, dfn, dfd):
    # Set figure
    plt.figure(figsize=(8, 6))

    # Set degrees of freedom

    rejection_reg = f.ppf(q=.95, dfn=dfn, dfd=dfd)
    mean, var, skew, kurt = f.stats(dfn, dfd, moments='mvsk')

    x = np.linspace(f.ppf(0.01, dfn, dfd), f.ppf(0.99, dfn, dfd), 100)

    # Plot values
    plt.plot(x,
             f.pdf(x, dfn, dfd),
             alpha=0.6,
             label=' X ~ F({}, {})'.format(dfn, dfd))
    plt.axvline(x=fvalue)
    plt.vlines(rejection_reg,
               0.0,
               1.0,
               linestyles="dashdot",
               label="Crit. Value: {:.2f}".format(rejection_reg))
    plt.legend()
    plt.ylim(0.0, 1.0)
    plt.xlim(0.0, 20.0, 5)
    plt.title('F-Distribution dfn:{}, dfd:{}'.format(dfn, dfd))
def app_time(x, dfn, dfd, a, b):
    mean = 0.0
    dist = np.divide(f.pdf(x, dfn, dfd), (f.cdf(b, dfn, dfd) - f.cdf(a, dfn, dfd))) # f-dist for duration, truncated from a to b
    dist = np.divide(dist, np.sum(dist)) # normalization

    for item in zip(x, dist): mean = mean + (item[0] * item[1]) # expectation of duration

    return dist, mean
Ejemplo n.º 6
0
def main():
	# Input parameters
	Nminusk = 10000
	kminus1 = 2
	step = 0.001

	# Integrate the F-distribution to get the critical value
	F = 0.
	integrate = 0
	while integrate < 0.95:
		F += step
		integrate += f.pdf(F, kminus1, Nminusk)*step
		if integrate > 0.95:
			print "F value at 95%% confidence level is %0.1f" % F
			break

	# Plot the F-distribution
	x = np.linspace(0, 100, 1000)
	plt.plot(x,f.pdf(x, kminus1, Nminusk), color="blue", linewidth=3)
	plt.axvline(F, color="black", linestyle="--", linewidth=2)
	plt.xlim(0, 5)
	plt.xlabel('$x$')
	plt.ylabel(r'$F(x, %d, %d)$' % (kminus1, Nminusk))
	plt.title("$F(x, %d, %d)$ Distribution" % (kminus1, Nminusk))
	plt.legend()
	plt.show()

	# Calculate the required number of users
	download_rate_estimate = 0.02
	sigma2_s = download_rate_estimate*(1. - download_rate_estimate)
	N = 5.3792*sigma2_s/(0.1*download_rate_estimate)**2
	print "estimate of N = %d" % round(N)

	# Run the obtained results through the F test
	input_downloads = [500, 620, 490]
	download_fractions = [entry/N for entry in input_downloads]
	print "F test result = %0.4f" % Ftest(download_fractions, sigma2_s, N)

	# Perform individual t-test
	print "The 96.6%% confidence interval is = (%0.2f %0.2f)" % (norm.interval(0.966, loc=0, scale=1))
	for fraction in download_fractions[1:]:
		print "t value = %0.2f (for a measured download rate of %0.4f)" % (ttest(N, fraction, N, download_fractions[0]), fraction)

  	return 
Ejemplo n.º 7
0
def df(x,df1,df2,ncp=0):
    """
    Calculates the density/point estimate of the F-distribution
    """
    from scipy.stats import f,ncf
    if ncp==0:
        result=f.pdf(x=x,dfn=df1,dfd=df2,loc=0,scale=1)
    else:
        result=ncf.pdf(x=x,dfn=df1,dfd=df2,nc=ncp,loc=0,scale=1)
    return result
Ejemplo n.º 8
0
def app_time(x, dfn, dfd, a, b):
    mean = 0.0
    dist = np.divide(
        f.pdf(x, dfn, dfd),
        (f.cdf(b, dfn, dfd) -
         f.cdf(a, dfn, dfd)))  # f-dist for duration, truncated from a to b
    dist = np.divide(dist, np.sum(dist))  # normalization

    for item in zip(x, dist):
        mean = mean + (item[0] * item[1])  # expectation of duration

    return dist, mean
Ejemplo n.º 9
0
def chi2_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x,df))
    
    #simulate the chi2 distribution
    y = []
    n=10
    for i in range(1000):
        chi2r=0.0
        r = norm.rvs(size=n)
        for j in range(n):
            chi2r=chi2r+r[j]**2
        y.append(chi2r)

    ax.hist(y, normed=True, alpha=0.2) 
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(-4, 4, 100)
    ax.plot(x, t.pdf(x,df))
    
    #simulate the t-distribution
    y = []
    for i in range(1000):
        rx = norm.rvs()
        ry = chi2.rvs(df)
        rt = rx/np.sqrt(ry/df)
        y.append(rt)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))
    
    #simulate the F-distribution
    y = []
    for i in range(1000):
        rx = chi2.rvs(dfn)
        ry = chi2.rvs(dfm)
        rf = np.sqrt(rx/dfn)/np.sqrt(ry/dfm)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
Ejemplo n.º 10
0
    def plot_f_distrib_for_many_coefficients(self, features):
        from scipy.stats import f

        # Remove a particular subset of features
        X = np.delete(self.X, [self.features.index(_) for _ in features], 1)

        # Prediction from reduced model
        XT = X.T
        std_error_matrix = inv(XT.dot(X))
        beta = std_error_matrix.dot(XT).dot(self.y)
        y_hat = X.dot(beta)
        rss_reduced_model = np.sum((self.y - y_hat)**2)

        dfn = len(features)
        dfd = self.df

        # This should be distributed as chi squared
        # with degrees of freedom equal to number
        # of dropped features
        rss_diff = (rss_reduced_model - self.rss)
        chi_1 = rss_diff / dfn
        chi_2 = self.pop_var
        f_score = chi_1 / chi_2

        # 5% and 95% percentile
        f_05, f_95 = f.ppf([0.05, 0.95], dfn, dfd)

        x = np.linspace(0.001, 5.0)

        plt.axvline(x=f_05)
        plt.axvline(x=f_95)

        plt.scatter(f_score, f.pdf(f_score, dfn, dfd), marker='o', color='red')
        plt.plot(x, f.pdf(x, dfn, dfd), color='gray', lw=5, alpha=0.6)
        plt.title('f-distribtion for dropping features: {0}'.format(features))
        plt.show()
def sampling_distribution():
    fig, ax = plt.subplots(1, 1)
    # display the probability density function
    dfn, dfm = 10, 5
    x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))

    # simulate the sampling distribution
    y = []
    for i in range(1000):
        r1 = norm.rvs(loc=5, scale=2, size=dfn + 1)
        r2 = norm.rvs(loc=3, scale=2, size=dfm + 1)
        rf = np.var(r1) / np.var(r2)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('sampling_distribution.png')
def F_distribution():
    fig, ax = plt.subplots(1, 1)
    # display the probability density function
    dfn, dfm = 10, 5
    x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))

    # simulate the F-distribution
    y = []
    for i in range(1000):
        rx = chi2.rvs(dfn)
        ry = chi2.rvs(dfm)
        rf = np.sqrt(rx / dfn) / np.sqrt(ry / dfm)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('F_distribution.png')
 def getDataSizeDistribution(self):
     '''
     TODO: Read the data size distribution from input file. But it seems that we do not have this column in the input file
     Therefore, we just use Gaussian distribution
     Similar to the function above
     :return: A ndarray of float numbers, with shape (self.deviceNum,)
     '''
     #return np.random.rand(self.deviceNum)
     #return f.pdf(np.random.uniform(0,4,self.deviceNum), 1, 1)
     #return np.random.zipf(1.5,self.deviceNum)
     #return np.random.zipf(1.5,self.deviceNum)
     if self.distribution=='normal':
         return (np.random.rand(self.deviceNum)*(self.parameter[1]-self.parameter[0])+self.parameter[0])
     elif self.distribution=='f':
         return f.pdf(np.random.uniform(0,4,self.deviceNum), self.parameter[0], self.parameter[1])
     elif self.distribution=='zipf':
         return np.random.zipf(self.parameter,self.deviceNum)
Ejemplo n.º 14
0
Archivo: 39.py Proyecto: XNYu/Statistic
def sampling_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x=np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))
    
    #simulate the sampling distribution
    y = []
    for i in range(1000):
        r1 = norm.rvs(loc=5, scale=2, size=dfn+1)
        r2 = norm.rvs(loc=3, scale=2, size=dfm+1)
        rf =np.var(r1)/np.var(r2)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('sampling_distribution.png')
Ejemplo n.º 15
0
 def f_distribution_critical_value(f_value,
                                   df_numerator,
                                   df_denominator,
                                   loc=0,
                                   scale=1):
     """
     Gets critical value of a f-distribution
     :param f_value: observed f-value
     :param df_numerator:    degrees of freedom of numerator
     :param df_denominator:  degrees of freedom of denominator
     :param loc:
     :param scale:
     :return:
     """
     return float(
         FDistribution.pdf(f_value, df_numerator, df_denominator, loc,
                           scale))
Ejemplo n.º 16
0
 def getErrorRateDistribution(self):
     if self.distributionError == 'normal':
         res = (np.random.normal(self.parameterError[0],
                                 self.parameterError[1], self.deviceNum))
         for i in range(res.shape[0]):
             if res[i] < 0:
                 res[i] = 0
             if res[i] > 1:
                 res[i] = 0.9
         return res
     elif self.distributionError == 'f':
         return f.pdf(np.random.uniform(0, 4, self.deviceNum),
                      self.parameterError[0], self.parameterError[1]) / 5
     elif self.distributionError == 'zipf':
         res = np.random.zipf(self.parameterError, self.deviceNum) / 100
         for i in range(res.shape[0]):
             if res[i] > 1:
                 res[i] = 0.9
         return res
Ejemplo n.º 17
0
def MalinowskyParameters(data, l):
    if np.shape(data)[0] < np.shape(data)[1]:
        data = np.transpose(data)
    n_row = np.shape(data)[0]
    #numbers of rows
    n_col = np.shape(data)[1]
    #numbers of columns
    ind = np.zeros(n_col - 1)
    ie = np.zeros(n_col - 1)
    index = range(1, n_col)
    for i in range(0, n_col - 1):
        ind[i] = (np.sqrt(
            (np.sum(l[i + 1:n_col])) /
            (n_row * (n_col - index[i])))) / (n_col - index[i])**2
        ie[i] = np.sqrt(index[i] * (np.sum(l[i + 1:n_col])) /
                        (n_row * n_col * (n_col - index[i])))
    pc = np.arange(1., n_col + 1,
                   1)  # maximum number of cumponents (i.e number of spectra)
    p = np.zeros(np.size(pc))
    for i in range(0, np.size(pc)):
        p[i] = (n_row - pc[i] + 1) * (n_col - pc[i] + 1)
    s1 = np.zeros(np.size(pc))
    s2 = np.zeros(np.size(pc))
    fi = np.zeros(np.size(l) - 1)
    result = np.zeros(np.size(l) - 1)
    a = pc + 1
    for i in range(0, n_col - 1):
        s1[i] = np.sum(
            (n_row - a[i:np.size(pc)] + 1) * (n_col - a[i:np.size(pc)] + 1))
    for j in range(0, n_col - 1):
        s2[j] = np.sum(l[j + 1:np.size(pc) + 1])
    for i in range(0, n_col - 1):
        fi[i] = (s1[i] / p[i]) * (l[i] / s2[i])
    for i in range(0, n_col - 1):
        result[i] = ((integrate.quad(lambda x: f.pdf(x, 1, (n_col - 1) - i),
                                     fi[i], np.inf))[0]) * 100
    statistic = pd.DataFrame({'IND': ind, 'IE': ie, 'F': result})
    statistic.index = statistic.index + 1
    return statistic, pc
    def getDataSizeDistribution(self):
        res = None
        if self.distributionData == 'normal':
            res = (np.random.normal(self.parameterError[0],
                                    self.parameterError[1], self.deviceNum))
        elif self.distributionData == 'f':
            res = f.pdf(np.random.uniform(0, 4, self.deviceNum),
                        self.parameterData[0], self.parameterData[1])
        elif self.distributionData == 'zipf':
            res = np.random.zipf(self.parameterData, self.deviceNum)

        if self.isRelated == False:
            return res
        else:
            index = self.allErrorRate.argsort()
            res.sort()
            finalRes = copy.deepcopy(res)
            j = 0
            for i in index:
                finalRes[i] = res[j]
                j += 1
            return finalRes
Ejemplo n.º 19
0
from scipy.stats import chi2
plt.plot(cvalues, chi2.pdf(cvalues, 1), 'b-', label="Chi2(1)")
plt.plot(cvalues, chi2.pdf(cvalues, 5), 'r-', label="Chi2(5)")
plt.plot(cvalues, chi2.pdf(cvalues, 30), 'g-', label="Chi2(30)")
plt.legend()

#sample_chi2 = np.random.chisquare(1, 10)
#sns.distplot(sample_chi2)
plt.show()

# F-distribution

from scipy.stats import f
fvalues = np.linspace(.1, 5, 100)
# pdf(x, df1, df2): Probability density function at x of F.
plt.plot(fvalues, f.pdf(fvalues, 1, 30), 'b-', label="F(1, 30)")
plt.plot(fvalues, f.pdf(fvalues, 5, 30), 'r-', label="F(5, 30)")
plt.legend()
# cdf(x, df1, df2): Cumulative distribution function of F.
# ie.
proba_at_f_inf_3 = f.cdf(3, 1, 30) # P(F(1,30) < 3)
# ppf(q, df1, df2): Percent point function (inverse of cdf) at q of F.
f_at_proba_inf_95 = f.ppf(.95, 1, 30) # q such P(F(1,30) < .95)
assert f.cdf(f_at_proba_inf_95, 1, 30) == .95
# sf(x, df1, df2): Survival function (1 - cdf) at x of F.
proba_at_f_sup_3 = f.sf(3, 1, 30) # P(F(1,30) > 3)
assert proba_at_f_inf_3 + proba_at_f_sup_3 == 1
# p-value: P(F(1, 30)) < 0.05
low_proba_fvalues = fvalues[fvalues > f_at_proba_inf_95]
plt.fill_between(low_proba_fvalues, 0, f.pdf(low_proba_fvalues, 1, 30),
                 alpha=.8, label="P < 0.05")
    
    #perform detection test
    T_balanced[indice]=compute_GLRT_statistic(Y_balanced,w)
    T_unbalanced[indice]=compute_GLRT_statistic(Y_unbalanced,w)

threshold=threshold_from_pfa(pfa,N)
print("seuil: %f" %threshold)

#MARK: Display signal
xbins=np.linspace(0.001,25,100)
xbins_middle=(xbins[1:]+xbins[:-1])/2
n_balanced, bins1, p1 = plt.hist(T_balanced, bins=xbins, normed=1, histtype='stepfilled')
n_unbalanced, bins2, p2 = plt.hist(T_unbalanced, bins=xbins, normed=1, histtype='stepfilled')

#theoretical pdf
plt.plot(xbins_middle,  f.pdf(xbins_middle, 4,(3*N-6)), 'k--', linewidth=1.5)
ncf_lambda=compute_lambda(w,sym_unbalanced,N,sigmaB_unbalanced)
plt.plot(xbins_middle,  ncf.pdf(xbins_middle, 4,(3*N-6),ncf_lambda), 'k--', linewidth=1.5)
plt.axvline(threshold,color='r')
plt.show()

#MARK: export csv file
output=np.zeros((5,np.size(xbins_middle)))
if csv_on==1:
    output[0,:]=xbins_middle
    output[1,:]=n_balanced
    output[2,:]=f.pdf(xbins_middle, 4,(3*N-6))
    output[3,:]=n_unbalanced
    output[4,:]=ncf.pdf(xbins_middle, 4,(3*N-6),ncf_lambda)
    np.savetxt(filename, output.T,header="bins,hist_balanced,pdf_balanced,hist_unbalanced,pdf_unbalanced", delimiter=",")
Ejemplo n.º 21
0
 def pdf(self, F):
     return f.pdf(F, self.dfn, self.dfd, loc=self.loc, scale=self.scale)
Ejemplo n.º 22
0
from scipy.stats import f
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

dfn, dfd = 29, 18
mean, var, skew, kurt = f.stats(dfn, dfd, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(f.ppf(0.01, dfn, dfd),
                f.ppf(0.99, dfn, dfd), 100)
ax.plot(x, f.pdf(x, dfn, dfd),
       'r-', lw=5, alpha=0.6, label='f pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = f(dfn, dfd)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = f.ppf([0.001, 0.5, 0.999], dfn, dfd)
np.allclose([0.001, 0.5, 0.999], f.cdf(vals, dfn, dfd))
# True
Ejemplo n.º 23
0
'''
@Author: Runsen
@微信公众号: 润森笔记
@博客: https://blog.csdn.net/weixin_44510615
@Date: 2020/7/5
'''
import numpy as np

import matplotlib.pyplot as plt
from scipy.stats import f
x = np.linspace(0, 3, 100)
plt.plot(x, f.pdf(x, 20, 20), 'k-', label='y=f(x,20,20)')
plt.plot(x, f.pdf(x, 10, 10), 'r-', label='y=f(x,10,10)')
plt.plot(x, f.pdf(x, 10, 5), 'g-', label='y=f(x,10,5)')
plt.plot(x, f.pdf(x, 10, 20), 'b-', label='y=f(x,10,20)')
plt.plot(x, f.pdf(x, 5, 5), 'r--', label='y=f(x,5,5)')
plt.plot(x, f.pdf(x, 5, 10), 'g--', label='y=f(x,5,10)')
plt.plot(x, f.pdf(x, 5, 1), 'y-', label='y=f(x,5,1)')
plt.legend()
plt.show()
def sampling_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x=np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))
Ejemplo n.º 25
0
from scipy.stats import f
print(f.pdf(3,5,2))
Ejemplo n.º 26
0
def fplot(dname, ref, alt, year=2017, savename='fplotX', nbins=130):
    ref_pt, ref_rho = ref
    alt_pt, alt_rho = alt
    p1 = (ref_pt + 1) * (ref_rho + 1)
    p2 = (alt_pt + 1) * (alt_rho + 1)

    path = '{dname}/bkgtest_{ref_pt}-{ref_rho}_{alt_pt}-{alt_rho}'.format(
        dname=dname,
        ref_pt=ref_pt,
        ref_rho=ref_rho,
        alt_pt=alt_pt,
        alt_rho=alt_rho)
    print("X", path)
    base_dict = skim_gofs(get_names(path))
    alt_dict = skim_gofs(get_names(path, alt=True))

    base, alt = [], []
    for i in range(len(base_dict)):
        try:
            ia = alt_dict[i]
            ib = base_dict[i]
            alt.append(ia)
            base.append(ib)
        except:
            pass

    if len(alt) != len(base):
        raise ValueError("Number of toys for base and ref does not match.")
    fvals = fval(base, alt, p1, p2, nbins)
    f_data = fval(
        get_vals(
            '{dname}/bkgtest_{ref_pt}-{ref_rho}_{alt_pt}-{alt_rho}/refbase.root'
            .format(dname=dname,
                    ref_pt=ref_pt,
                    ref_rho=ref_rho,
                    alt_pt=alt_pt,
                    alt_rho=alt_rho)),
        get_vals(
            '{dname}/bkgtest_{ref_pt}-{ref_rho}_{alt_pt}-{alt_rho}/refalt.root'
            .format(dname=dname,
                    ref_pt=ref_pt,
                    ref_rho=ref_rho,
                    alt_pt=alt_pt,
                    alt_rho=alt_rho)), p1, p2, nbins)[0]
    print(f_data)

    from scipy.stats import f
    x_lim = max(np.percentile(fvals, 90), f_data * 1.2)
    x = np.linspace(0, x_lim, 200)
    bins = np.linspace(0, x_lim, 30)
    width = bins[1] - bins[0]

    fig, ax = plt.subplots()
    trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
    ax.plot(x,
            len(base) * width * f.pdf(x, p2 - p1, nbins - p2),
            color='red',
            label='F-dist, ndf({},{})'.format(p2 - p1, nbins - p2))
    ax.hist(fvals,
            bins,
            facecolor='none',
            edgecolor='black',
            histtype='stepfilled',
            lw=2,
            label="Toys, N = {}".format(len(fvals)))
    ax.hist(fvals[fvals > f_data],
            bins,
            facecolor='steelblue',
            edgecolor='gray',
            histtype='stepfilled',
            alpha=0.3,
            label='p-value = {}'.format(
                round(float(len(fvals[fvals > f_data])) / len(fvals), 3)))
    ax.annotate(
        "",
        xy=(f_data, 0),
        xycoords=trans,
        xytext=(f_data, 0.25),
        textcoords=trans,
        arrowprops=dict(lw='4',
                        color='b',
                        arrowstyle="->,head_length=1.5,head_width=0.5"),
    )
    ax.plot([], [],
            color='blue',
            lw=2,
            label="Observed = {:.3f}".format(f_data))

    title = "TF({},{}) x TF({},{})".format(ref_pt, ref_rho, alt_pt, alt_rho)
    ax.legend(title=title)
    hep.cms.label(data=True, year=year, ax=ax)
    ax.set_xlim(0, x_lim)
    xlab = r"$\frac{-2log(\lambda_1/\lambda_2)/(p_2-p_1)}{-2log\lambda_2/(n-p_2)}$"
    ax.set_xlabel(xlab, x=1, ha='right')
    ax.set_ylabel("Pseudoexperiments", y=1, ha='right')

    fig.savefig('{}.pdf'.format(savename),
                dpi=300,
                transparent=True,
                bbox_inches='tight')
    fig.savefig('{}.png'.format(savename),
                dpi=300,
                transparent=True,
                bbox_inches='tight')
Ejemplo n.º 27
0
def f23test(data, B, fix):
    F = np.zeros(2, dtype=float)    # pdf
    C = np.zeros(2, dtype=float)    # p-values
    B2 = np.zeros(2, dtype=float)   # for the 2nd order EOS, assign K0 and V0 from user guesses
    B2[0] = B[0]
    B2[1] = B[-1]
                
    if data.EOS_type == GEOST_thermo.types().names[0]:   # Figure out what EOS's to use
        # If birch murnaghan
        odr_model = odrpack.Model(fcn=GEOST_thermo.BM2_V, fjacb=GEOST_thermo.BM2_V_JACB,
                                  fjacd=GEOST_thermo.BM2_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B2, ifixb=[fix[0],fix[-1]])
        odr.set_job(deriv=3)          # Use user-supplied derivatives
        output = odr.run()            # Output of ODR run
        ref_B2  = output.beta         # LSQ best-fit parameters
        err_B2  = output.sd_beta      # Parameter errors (1-sigma)
        f2 = GEOST_thermo.BM2_V(ref_B2,data.V)
        df2 = data.V.shape[0] - 2
                
        odr_model = odrpack.Model(fcn=GEOST_thermo.BM3_V, fjacb=GEOST_thermo.BM3_V_JACB,
                                  fjacd=GEOST_thermo.BM3_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B, ifixb=fix)
        odr.set_job(deriv=3)          # Use user-supplied derivatives, but CHECK THEM!!!
        output = odr.run()            # Output of ODR run
        ref_B3  = output.beta         # LSQ best-fit parameters
        err_B3  = output.sd_beta      # Parameter errors (1-sigma)
        f3 = GEOST_thermo.BM3_V(ref_B3,data.V)
        df3 = data.V.shape[0] - 3
        
    elif data.EOS_type == GEOST_thermo.types().names[1]:
        # If Natural strain
        odr_model = odrpack.Model(fcn=GEOST_thermo.NS2_V, fjacb=GEOST_thermo.NS2_V_JACB, fjacd=GEOST_thermo.NS2_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B2, ifixb=[fix[0],fix[-1]])
        odr.set_job(deriv=3)          # Use user-supplied derivatives
        output = odr.run()            # Output of ODR run
        ref_B2  = output.beta         # LSQ best-fit parameters
        err_B2  = output.sd_beta      # Parameter errors (1-sigma)
        f2 = GEOST_thermo.NS2_V(ref_B2,data.V)
        df2 = data.V.shape[0] - 2
            
        odr_model = odrpack.Model(fcn=GEOST_thermo.NS3_V, fjacb=GEOST_thermo.NS3_V_JACB,
                                  fjacd=GEOST_thermo.NS3_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B, ifixb=fix)
        odr.set_job(deriv=3)          # Use user-supplied derivatives, but CHECK THEM!!!
        output = odr.run()            # Output of ODR run
        ref_B3  = output.beta         # LSQ best-fit parameters
        err_B3  = output.sd_beta      # Parameter errors (1-sigma)
        f3 = GEOST_thermo.NS3_V(ref_B3,data.V)
        df3 = data.V.shape[0] - 3
        
    chisq_2 = float(0)
    chisq_3 = float(0)
    for i in range(data.P.shape[0]):   # Compute the chi-squared for each EOS
        chisq_2 += (data.P[i] - f2[i])**2
        chisq_3 += (data.P[i] - f3[i])**2
                
    # Compute the F-statistic
    Fx23 = (chisq_2 - chisq_3)/(chisq_3/df3)
    
    # Use Scipy's built-in F-distribution methods
    F = f_dist.pdf(Fx23, 1, df3)
        
    # Compute the P-value
    C = 1 - f_dist.cdf(Fx23, 1, df3)

    x1 = np.linspace(0.01, 2*Fx23, 128)
    pdf23 = f_dist.pdf(x1, 1, df3)
    cdf23 = f_dist.cdf(x1, 1, df3)

    # Results
    plt.figure()
    fig = plt.gcf()
    fig.canvas.set_window_title("F-Test Results")
    plt.plot(x1, pdf23, 'r', linewidth=3, alpha=0.8)
    plt.plot(x1, cdf23, 'b', linewidth=3, alpha=0.8)
    plt.plot(Fx23, f_dist.pdf(Fx23, 1, df3), 'ko')
    plt.fill_between(x1, 0, pdf23, where=f_dist.cdf(Fx23, 1, df3)<cdf23, facecolor='red', alpha=0.2)
    plt.xticks(fontsize=14)
    plt.ylim([-0.01,1.01])
    plt.title("F test for 2nd vs. 3rd order EOS", fontsize=14)
    plt.xlabel(r"$\left( \chi^{2}_{2} - \chi^{2}_{3} \right) / \left(\chi^{2}_{3} / \nu_{3} \right)$",
               fontsize=12)
    plt.ylabel(r"PDF/CDF")
    plt.legend(['PDF', 'CDF', r"$F_{X}$"], loc='upper right', numpoints=1)
    plt.text(Fx23, f_dist.pdf(Fx23, 1, df3)+0.05, "p-value= {:8.4f}".format(1-f_dist.cdf(Fx23, 1, df3)), fontsize=14)    
    plt.tight_layout()
    return [chisq_2/df2, chisq_3/df3, Fx23, f_dist.cdf(Fx23, 1, df3)]
Ejemplo n.º 28
0
def fqtest(VT, P, VTerr, Perr, debye, Bhit, fix):
    F = np.zeros(2, dtype=float)    # pdf
    C = np.zeros(2, dtype=float)    # p-values
    Bhit2 = np.zeros(3, dtype=float)   # for q fixed at 1
    Bhit2[0] = Bhit[0]
    Bhit2[1] = Bhit[1]
    Bhit2[2] = 1.
    fix2 = list(fix)
    fix2[-1] = 0
    odr_model2 = odrpack.Model(fcn=debye.P_thermal, fjacb=debye.JACB, fjacd=debye.JACD)
    odr_data2  = odrpack.RealData(x=VT, y=P, sx=VTerr, sy=Perr)
    odr2 = odrpack.ODR(odr_data2, odr_model2, beta0=Bhit2, ifixb=fix2)
    odr2.set_job(deriv=1)          # Use user-supplied derivatives
    output2 = odr2.run()            # Output of ODR run
    ref_B2  = output2.beta         # LSQ best-fit parameters
    err_B2  = output2.sd_beta      # Parameter errors (1-sigma)
    f2 = debye.P_thermal(ref_B2, VT)
    df2 = VT.shape[1] - 2

    odr_model3 = odrpack.Model(fcn=debye.P_thermal, fjacb=debye.JACB, fjacd=debye.JACD)
    odr_data3  = odrpack.RealData(x=VT, y=P, sx=VTerr, sy=Perr)
    odr3 = odrpack.ODR(odr_data3, odr_model3, beta0=Bhit, ifixb=fix)
    odr3.set_job(deriv=1)            # NB: using numerical derivatives here!
    output3 = odr3.run()             # Output of ODR run
    ref_B3  = output3.beta           # LSQ best-fit parameters
    err_B3  = output3.sd_beta        # Parameter errors (1-sigma)
    f3 = debye.P_thermal(ref_B3, VT)
    df3 = VT.shape[1] - 3

    chisq_2 = float(0)
    chisq_3 = float(0)
    for i in range(P.shape[0]):   # Compute the chi-squared for each EOS
        chisq_2 += (P[i] - f2[i])**2
        chisq_3 += (P[i] - f3[i])**2

    # Compute the F-statistic
    Fx23  = (chisq_2 - chisq_3)/(chisq_3/df3)
    x1    = np.linspace(0.01, 2*Fx23, 128)
    pdf23 = f_dist.pdf(x1, 1, df3)
    cdf23 = f_dist.cdf(x1, 1, df3)

    # Use Scipy's built-in F-distribution methods
    F = f_dist.pdf(Fx23, 1, df3)
        
    # Compute the P-value
    C = 1 - f_dist.cdf(Fx23, 1, df3)

    # Finally, Make the plot. SHould be a 1 row 2 column plot showing
    # f-test for 2nd to 3rd order EOS and 3rd to 4th order EOS.
    plt.figure()
    fig = plt.gcf()
    fig.canvas.set_window_title("F-Test Results")
    plt.plot(x1, pdf23, 'r', linewidth=3, alpha=0.8)
    plt.plot(x1, cdf23, 'b', linewidth=3, alpha=0.8)
    plt.plot(Fx23, f_dist.pdf(Fx23, 1, df3), 'ko')
    plt.fill_between(x1, 0, pdf23, where=f_dist.cdf(Fx23, 1, df3)<cdf23, facecolor='red', alpha=0.2)
    plt.xticks(fontsize=14)
    plt.ylim([-0.01,1.01])
    plt.title("Comparing q != 1", fontsize=14)
    plt.xlabel(r"$\left( \chi^{2}_{2} - \chi^{2}_{3} \right) / \left(\chi^{2}_{3} / \nu_{3} \right)$",
               fontsize=12)
    plt.legend(['PDF', 'CDF', r"$F_{X}$"], loc='upper right', numpoints=1)
    plt.text(Fx23, f_dist.pdf(Fx23, 1, df3)+0.05, "p-value= {:8.4f}".format(1-f_dist.cdf(Fx23, 1, df3)))
    
    plt.tight_layout()    
    return [chisq_2, output2.stopreason[0], chisq_3, output3.stopreason[0], Fx23, f_dist.cdf(Fx23, 1, df3)]
Ejemplo n.º 29
0
## Compute F score
n = y.size
fval = ss_reg / (ss_res / (n - 2))

'''
- Compute the p-value:
  * Plot the F(1,n) distribution for 100 f values within [10, 25]. Draw P(F(1,n)>F) ie. color the surface defined by x values larger than F below the F(1,n).
  * P(F(1,n)>F) is the p-value, compute it.
'''
## Plot the F(1, n) distribution for 100 f values within [10, 25] 
## Depict P(F(1, n) > F) ie. folor the surface defined by x values larger than F beloww the F(1, n)
from scipy.stats import f
fvalues = np.linspace(10, 25, 100)

plt.plot(fvalues, f.pdf(fvalues, 1, 30), 'b-', label="F(1, 30)")

upper_fval_fvalues = fvalues[fvalues > fval]
plt.fill_between(upper_fval_fvalues, 0, f.pdf(upper_fval_fvalues, 1, 30), alpha=.8)

# pdf(x, df1, df2): Probability density function at x of the given RV.
plt.legend()

## P(F(1, n) > F) is the p-value, compute it

# Survival function (1 - `cdf`)
pval = f.sf(fval, 1, n - 2)



## With statmodels
Ejemplo n.º 30
0
    xsensors_m = xsensors_m - xsensors_m[0,:]
    M = size(xsensors_m,0);
T_sec = 30 ; 
N = int(T_sec*Fs_Hz)
Lruns = 10000
Fstat = zeros(Lruns)

for ir in range(0,Lruns):
    x = random.randn(N,M)
    F = maxfstat(x, Fs_Hz, xsensors_m, gridaz_deg,gridel_deg, 
                 gridc_mps)
    Fstat[ir] = F[0]
#%%

xtheo = linspace(0.5,1.5,100)
ytheo = f.pdf(xtheo,N,N*(M-1))

HorizontalSize = 5
VerticalSize   = 3
figsimul=plt.figure(num=2,figsize=(HorizontalSize,VerticalSize), edgecolor='k', facecolor = [1,1,0.92]);
figsimul.clf()
h1 = plt.hist(Fstat, normed=True, bins=30, label='histogram')
h2 = plt.plot(xtheo,ytheo,'r',linewidth=2, label='Fisher')
plt.legend(loc='best')

dirfigsave = '/Users/maurice/etudes/stephenA/propal2/figures/'
tt='%sthetafixFisher.pdf' %dirfigsave
plt.show()

figsimul.savefig(tt,format='pdf')
Ejemplo n.º 31
0
from scipy.stats import f

print(f.pdf(3, 5, 2))
Ejemplo n.º 32
0
from scipy.stats import f
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

dfn, dfd = 29, 18
mean, var, skew, kurt = f.stats(dfn, dfd, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(f.ppf(0.01, dfn, dfd), f.ppf(0.99, dfn, dfd), 100)
ax.plot(x, f.pdf(x, dfn, dfd), 'r-', lw=5, alpha=0.6, label='f pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = f(dfn, dfd)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = f.ppf([0.001, 0.5, 0.999], dfn, dfd)
np.allclose([0.001, 0.5, 0.999], f.cdf(vals, dfn, dfd))
# True

# Generate random numbers:
Ejemplo n.º 33
0
            print 'Ftest'
            ##########################    F-TEST    ###############################
            bigN = len(AllWave)  # Number of data points
            Nparam1 = 3.
            Nparam2 = 5.
            dof1 = bigN - Nparam1
            dof2 = bigN - Nparam2

            #chi1 = 300. #428.65 #383.6 #really bad
            #chi2 = 300.  #results in chisqr close to 1

            ftest = (chisq1 / (dof1)) / (chisq2 / (dof2))
            #RJ+BB vs RJ+BB+BB
            #ftest = ( (chi1-chi2)/(dof1-dof2) ) / (chi2/dof2)

            proba_at_f_pdf = f.pdf(ftest, dof1, dof2)
            proba_at_f_cdf = f.cdf(ftest, dof1, dof2)  # P(F(1,30) < 3)
            f_at_proba_98 = f.ppf(.98, dof1, dof2)  # q such P(F(1,30) < .95)
            proba_at_norm_idf = Norm.isf(proba_at_f_cdf)  # P(F(1,30) < 3)
            proba_at_norm_ppf = Norm.ppf(proba_at_f_cdf)  # P(F(1,30) < 3)

            print ''
            print '-----------'
            print 'Source:         ', Source
            print 'ftest:          ', ftest
            print 'proba_at_f_pdf: ', proba_at_f_pdf
            print 'proba_at_f_cdf: ', proba_at_f_cdf
            print 'f_at_proba_98:  ', f_at_proba_98
            print 'proba_at_norm_isf: ', proba_at_norm_idf  # inverse survival function
            print 'proba_at_norm_ppf: ', proba_at_norm_ppf, ' sigma'  # Number of sigma away
            print '-----------'
Ejemplo n.º 34
0
    xsensors_m = xsensors_m - xsensors_m[0, :]
    M = size(xsensors_m, 0)
T_sec = 30
N = int(T_sec * Fs_Hz)
Lruns = 10000
Fstat = zeros(Lruns)

for ir in range(0, Lruns):
    x = random.randn(N, M)
    F = maxfstat(x, Fs_Hz, xsensors_m, gridaz_deg, gridel_deg, gridc_mps)
    Fstat[ir] = F[0]
#%%

xtheo = linspace(0.5, 1.5, 100)
ytheo = f.pdf(xtheo, N, N * (M - 1))

HorizontalSize = 5
VerticalSize = 3
figsimul = plt.figure(num=2,
                      figsize=(HorizontalSize, VerticalSize),
                      edgecolor='k',
                      facecolor=[1, 1, 0.92])
figsimul.clf()
h1 = plt.hist(Fstat, normed=True, bins=30, label='histogram')
h2 = plt.plot(xtheo, ytheo, 'r', linewidth=2, label='Fisher')
plt.legend(loc='best')

dirfigsave = '/Users/maurice/etudes/stephenA/propal2/figures/'
tt = '%sthetafixFisher.pdf' % dirfigsave
plt.show()
Ejemplo n.º 35
0
def f34test(data, B, fix):    
    F = np.zeros(2, dtype=float)    # pdf
    C = np.zeros(2, dtype=float)    # p-values
    B3 = np.zeros(3, dtype=float)   # for the 2nd order EOS, assign K0 and V0 from user guesses
    B3[0] = B[0]
    B3[1] = B[1]
    B3[2] = B[3]
                
    if data.EOS_type == GEOST_thermo.types().names[0]:   # Figure out what EOS's to use
        # If birch murnaghan
        odr_model = odrpack.Model(fcn=GEOST_thermo.BM3_V, fjacb=GEOST_thermo.BM3_V_JACB,
                                  fjacd=GEOST_thermo.BM3_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B3, ifixb=[fix[0],fix[1],fix[3]])
        odr.set_job(deriv=3)          # Use user-supplied derivatives
        output = odr.run()            # Output of ODR run
        ref_B3  = output.beta         # LSQ best-fit parameters
        err_B3  = output.sd_beta      # Parameter errors (1-sigma)
        f3 = GEOST_thermo.BM3_V(ref_B3,data.V)
        df3 = data.V.shape[0] - 3
                
        odr_model = odrpack.Model(fcn=GEOST_thermo.BM4_V, fjacb=GEOST_thermo.BM4_V_JACB,
                                  fjacd=GEOST_thermo.BM4_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B, ifixb=fix)
        odr.set_job(deriv=3)          # Use user-supplied derivatives, but CHECK THEM!!!
        output = odr.run()            # Output of ODR run
        ref_B4  = output.beta         # LSQ best-fit parameters
        err_B4  = output.sd_beta      # Parameter errors (1-sigma)
        f4 = GEOST_thermo.BM3_V(ref_B3,data.V)
        df4 = data.V.shape[0] - 4
        
    elif data.EOS_type == GEOST_thermo.types().names[1]:
        # If Natural strain
        odr_model = odrpack.Model(fcn=GEOST_thermo.NS3_V, fjacb=GEOST_thermo.NS3_V_JACB, fjacd=GEOST_thermo.NS3_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B3, ifixb=[fix[0], fix[1], fix[3]])
        odr.set_job(deriv=3)          # Use user-supplied derivatives, but CHECK THEM!!!
        output = odr.run()            # Output of ODR run
        ref_B3  = output.beta         # LSQ best-fit parameters
        err_B3  = output.sd_beta      # Parameter errors (1-sigma)
        f3 = GEOST_thermo.NS3_V(ref_B3,data.V)
        df3 = data.V.shape[0] - 3
        
        odr_model = odrpack.Model(fcn=GEOST_thermo.NS4_V, fjacb=GEOST_thermo.NS4_V_JACB,
                                  fjacd=GEOST_thermo.NS4_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B, ifixb=fix)
        odr.set_job(deriv=3)          # Use user-supplied derivatives, but CHECK THEM!!!
        output = odr.run()            # Output of ODR run
        ref_B4  = output.beta         # LSQ best-fit parameters
        err_B4  = output.sd_beta      # Parameter errors (1-sigma)
        f4 = GEOST_thermo.NS4_V(ref_B4,data.V)
        df4 = data.V.shape[0] - 4
                    
    elif data.EOS_type == GEOST_thermo.types().names[2]:
        self.LOG_PRINT("ERROR in PLOTS: Cannot do F-test using Vinet EOS.")
    else:
        self.LOG_PRINT("ERROR in PLOTS: Unrecognized value of EOS_SELECT")

    chisq_3 = float(0)
    chisq_4 = float(0)
    for i in range(data.P.shape[0]):   # Compute the chi-squared for each EOS
        chisq_3 += (data.P[i] - f3[i])**2
        chisq_4 += (data.P[i] - f4[i])**2
                
    # Compute the F-statistic
    Fx34 = (chisq_3 - chisq_4)/(chisq_4/df4)

    x1 = np.linspace(0.01, 2*Fx34, 128)
    pdf34 = f_dist.pdf(x1, 1, df4)
    cdf34 = f_dist.cdf(x1, 1, df4)

    # Use Scipy's built-in F-distribution methods
    F = f_dist.pdf(Fx34, 1, df4)
        
    # Compute the P-value
    C = 1 - f_dist.cdf(Fx34, 1, df4)

    # Finally, Make the plot. SHould be a 1 row 2 column plot showing
    # f-test for 2nd to 3rd order EOS and 3rd to 4th order EOS.
    plt.figure()
    fig = plt.gcf()
    fig.canvas.set_window_title("F-Test Results")
    plt.plot(x1, pdf34, 'r', linewidth=3, alpha=0.8)
    plt.plot(x1, cdf34, 'b', linewidth=3, alpha=0.8)
    plt.plot(Fx34, f_dist.pdf(Fx34, 1, df4), 'ko')
    plt.fill_between(x1, 0, pdf34, where=f_dist.cdf(Fx34, 1, df4)<cdf34, facecolor='red', alpha=0.2)
    plt.xticks(fontsize=14)
    plt.ylim([-0.01,1.01])
    plt.title("Comparing 3rd vs. 4th order EOS", fontsize=14)
    plt.xlabel(r"$\left( \chi^{2}_{3} - \chi^{2}_{4} \right) / \left(\chi^{2}_{4} / \nu_{4} \right)$",
               fontsize=12)
    plt.legend(['PDF', 'CDF', r"$F_{X}$"], loc='upper right', numpoints=1)
    plt.text(Fx34, f_dist.pdf(Fx34, 1, df4)+0.05, "p-value= {:8.4f}".format(1-f_dist.cdf(Fx34, 1, df4)), fontsize=14)
    plt.tight_layout()
    return [chisq_3/df3, chisq_4/df4, Fx34, f_dist.cdf(Fx34, 1, df4)]
Ejemplo n.º 36
0
$F$分布のモジュール名は`f`。
```
t.pdf(x, dfn, dfd)
t.cdf(x, dfn, dfd)
t.ppf(a, dfn, dfd)
t.rvs(dfn, dfd, size=1)
```
* `dfn`:分子の自由度(numerator degree of freedom)
* `dfd`:分母自由度(denominator degree of freedom)

`scipy.stats`の`f`を読み込む,確率密度関数の図を描く。

from scipy.stats import f
x = np.linspace(0.001,5,1000)
y = f.pdf(x, dfn=5, dfd=1)
plt.plot(x,y)
pass

`dfn=5`, `dfd=1の時に`x`の値が`0.1`以下の確率は何か?

f.cdf(0.1, dfn=5, dfd=1)

`dfn=5`, `dfd=1の時に`x`の値が`5`以上の確率は何か?

1-f.cdf(5, dfn=5, dfd=1)

### 一様分布 (Uniform Distribution)

一様分布のモジュール名は`uniform`。
```
Ejemplo n.º 37
0
    )
else:
    print(
        "[2]No se rechaza la hipotesis nula de los bloques en el analisis bidireccional, por lo tanto no hay diferencias entre los bloques"
    )
print("Analisis Unidireccional")
print("El valor sacado de la tabla Anova Unidireccional por filas es: ", FTU1)
if FU1 > FTU1:
    print(
        "[3]Se rechaza la hipotesis nula de las filas (tratamientos), entonces existe una diferencia entre ellas"
    )
else:
    print(
        "[3]No se rechaza la hipotesis nula de las filas (tratamientos), entonces no existe una diferencia entre ellas"
    )
print("El valor sacado de la tabla Anova Unidireccional por columnas es: ",
      FTU2)
if FU1 > FTU1:
    print(
        "[4]Se rechaza la hipotesis nula de las columnas (bloques), entonces existe una diferencia entre ellas"
    )
else:
    print(
        "[4]No se rechaza la hipotesis nula de las columnas (bloques), entonces no existe una diferencia entre ellas"
    )

from scipy.stats import f

val = f.pdf(0.05, 3, 6)
print(val)
#H0 at a level a if T2 >(p(n-1))/(n-p)F_{p,n-p}(a) where
#F is the F distribution.
num_dof = 2
den_dof = 45 - 2
a = .43
#rv =  f.pdf(dfn=num_dof, dfd=den_dof, a, loc=0, scale=1)
#central F suppositions
rv = (2 * (45 + 1 - 2) / (45 + 1 - 2 - 1)) * f.ppf(a, num_dof, den_dof)
#rm =  (2*(45+1-2)/(45+1-2-1))*f.cdf(a, num_dof, den_dof)

fig = plt.figure(figsize=(13, 8))
x = np.linspace(f.ppf(0.01, num_dof, den_dof), f.ppf(0.99, num_dof, den_dof),
                100)
x_alpha = np.linspace(f.ppf(0.95, num_dof, den_dof),
                      f.ppf(0.99, num_dof, den_dof), 100)
plt.plot(x, f.pdf(x, num_dof, den_dof), 'b-', lw=3, label='f pdf')
plt.title('Fischer PDF for dfn = {}, dfd = {}'.format(num_dof, den_dof))
plt.fill_between(x_alpha,
                 f.pdf(x_alpha, num_dof, den_dof),
                 color='r',
                 label=r'$\alpha$ = 0.05%')
plt.legend(loc='upper right')
plt.savefig('./fischer.pdf')

print(t, rv)

#NOW WE DO THE SAME WITH THE VALUES FROM THE PAIRED LINES METHOD:
x_paired = [
    0.6647315155340111, 0.6528186562246899, 0.4926493627791132,
    1.2617404216106476, 0.7570001093882107
]
Ejemplo n.º 39
0
#== compute the p-value with the asymptotic distribution
#   (not independent)
ppv  = pvalunderH0(FF, N, xsensors_m, Fs_Hz,
                   range_azimuth_deg, 
                   range_elevation_deg, range_velocity_mps);

# pvalues with he limG independent and Findependent
ppvG = 1-norm.cdf(FF,1.0,sqrt(2.0*M/(M-1.0)/N))**Q;
ppvF = 1-f.cdf(FF,N,N*(M-1))**Q;

# pdf of the max of the limG independent and Findependent
linx        = linspace(0.69,1.3,200)
sigmaGlim   = sqrt(2.0*M/(M-1.0)/N)
nu1         = N
nu2         = N*(M-1)
pdffromF    = f.pdf(linx,nu1,nu2)
pdffromFind = Q * pdffromF * (f.cdf(linx,nu1,nu2)**(Q-1));
pdffromGind = Q * norm.pdf(linx,1.0,sigmaGlim) * (norm.cdf(linx,1.0,sigmaGlim)**(Q-1));


dirfigsave = '/Users/maurice/etudes/stephenA/propal2/figures/'

#%%
#
#HorizontalSize = 6
#VerticalSize   = 6
#figpvalFoT     = plt.figure(num=1,figsize=(HorizontalSize,VerticalSize), 
#                            edgecolor='k', facecolor = [1,1,0.92]);
#plt.subplot(2,1,1)
#plt.ylabel("Frequency")
#plt.title("based on the asymptotic distribution")
# Remember from the last chapter that we can partition the total variance in the data ($SS_{total}$) into the variance that is explained by the model ($SS_{model}$) and the variance that is not ($SS_{error}$).  We can then compute a *mean square* for each of these by dividing them by their degrees of freedom; for the error this is $N - p$ (where $p$ is the number of means that we have computed), and for the model this is $p - 1$:
#
# $$
# MS_{model} =\frac{SS_{model}}{df_{model}}= \frac{SS_{model}}{p-1}
# $$
#
# $$
# MS_{error} = \frac{SS_{error}}{df_{error}} = \frac{SS_{error}}{N - p}
# $$
#
# With ANOVA, we want to test whether the variance accounted for by the model is greater than what we would expect by chance, under the null hypothesis of no differences between means.  Whereas for the t distribution the expected value is zero under the null hypothesis, that's not the case here, since sums of squares are always positive numbers.  Fortunately, there is another standard distribution that describes how ratios of sums of squares are distributed under the null hypothesis: The *F* distribution (see figure \@ref(fig:FDist)). This distribution has two degrees of freedom, which correspond to the degrees of freedom for the numerator (which in this case is the model), and the denominator (which in this case is the error).

# %%
from scipy.stats import f
x=np.arange(0.1,10,0.1)
ax=sns.lineplot(x=x,y=f.pdf(x, 1, 1),color='r',label='df(1,1)')
ax=sns.lineplot(x=x,y=f.pdf(x, 50, 1),color='g',label='df(50,1)')
ax=sns.lineplot(x=x,y=f.pdf(x, 50, 10),color='b',label='df(50,10)')
ax.set(xlabel='F Values', ylabel='Density')
plt.legend()
plt.show()

# %% [markdown]
# To create an ANOVA model, we extend the idea of *dummy coding* that you encountered in the last chapter. Remember that for the t-test comparing two means, we created a single dummy variable that took the value of 1 for one of the conditions and zero for the others.  Here we extend that idea by creating two dummy variables, one that codes for the Drug 1 condition and the other that codes for the Drug 2 condition.  Just as in the t-test, we will have one condition (in this case, placebo) that doesn't have a dummy variable, and thus represents the baseline against which the others are compared; its mean defines the intercept of the model. Let's create the dummy coding for drugs 1 and 2.

# %%
df['drug1']=df['group']=='drug1'
df['drug2']=df['group']=='drug2'

# %% [markdown]
# Now we can fit a model using the same approach that we used in the previous chapter:
Ejemplo n.º 41
0
def fplot(fvals,
          f_data,
          ref,
          alt,
          year=2017,
          nbins=130,
          savename=None,
          mc=False):
    ref_pt, ref_rho = ref
    alt_pt, alt_rho = alt
    p1 = (ref_pt + 1) * (ref_rho + 1)
    p2 = (alt_pt + 1) * (alt_rho + 1)

    from scipy.stats import f
    x_lim = max(np.percentile(fvals, 95), f_data * 1.05, np.median(fvals) * 3)
    x = np.linspace(0, x_lim, 200)
    bins = np.linspace(0, x_lim, 30)
    width = bins[1] - bins[0]

    goodvals = fvals[fvals > 0]

    fig, ax = plt.subplots()
    trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
    with np.errstate(divide='ignore'):
        ax.plot(x,
                len(goodvals) * width * f.pdf(x, p2 - p1, nbins - p2),
                color='red',
                label='F-dist, ndf({},{})'.format(p2 - p1, nbins - p2))
    ax.hist(fvals,
            bins,
            facecolor='none',
            edgecolor='black',
            histtype='stepfilled',
            lw=2,
            label="Toys > 0, N = {}".format(len(goodvals)))
    ax.hist(goodvals[goodvals > f_data],
            bins,
            facecolor='steelblue',
            edgecolor='gray',
            histtype='stepfilled',
            alpha=0.3,
            label='p-value = {}'.format(
                round(
                    float(len(goodvals[goodvals > f_data])) / len(goodvals),
                    3)))
    ax.annotate(
        "",
        xy=(f_data, 0),
        xycoords=trans,
        xytext=(f_data, 0.25),
        textcoords=trans,
        arrowprops=dict(lw='4',
                        color='b',
                        arrowstyle="->,head_length=1.5,head_width=0.5"),
    )
    ax.plot([], [],
            color='blue',
            lw=2,
            label="Observed = {:.3f}".format(f_data))

    title = "TF({},{}) x TF({},{})".format(ref_pt, ref_rho, alt_pt, alt_rho)
    ax.legend(title=title)
    hep.cms.label(data=not mc, year=year, ax=ax)
    ax.set_xlim(0, x_lim)
    xlab = r"$\frac{-2log(\lambda_1/\lambda_2)/(p_2-p_1)}{-2log\lambda_2/(n-p_2)}$"
    ax.set_xlabel(xlab, x=1, ha='right')
    ax.set_ylabel("Pseudoexperiments", y=1, ha='right')

    if savename is not None:
        fig.savefig('{}.pdf'.format(savename),
                    dpi=300,
                    transparent=True,
                    bbox_inches='tight')
        fig.savefig('{}.png'.format(savename),
                    dpi=300,
                    transparent=True,
                    bbox_inches='tight')
import numpy as np
from scipy.stats import f
import matplotlib.pyplot as plt

x=np.arange(0,10,.001)
plt.plot(x,f.pdf(x,1,1), x,f.pdf(x,3,8), x,f.pdf(x,8,3), x,f.pdf(x, 40,40))

plt.xlim(0,10)
plt.ylim(0,1.5)

plt.xlabel('$x$',fontsize=20)
plt.ylabel('$P(X=x | K_1,K_2)$',fontsize=18)

plt.title('Fisher-Distribution',fontsize=20)
plt.legend(['$K_1=1, K_2=1$','$K_1=3, K_2=8$', '$K_1=8 ,K_2=3$', '$K_1=40, K_2=40$'])
plt.show()