Python pdf Beispiele, scipy.stats.f.pdf Python Beispiele

Beispiel #1

0

Datei anzeigen

def IPN_kendallW(X):
    """
    Kendall's W
    X is 2D numpy-array (n*k ratings matrix)
    n is the number of objects and k is the number of judges.
    """
    [n, k] = np.shape(X)

    # if tiedrank ...
    R = np.zeros_like(X)
    for i in range(0, np.shape(X)[1]):
        R[:, i] = rankdata(X[:, i])
    R_new = np.sort(np.round(R), axis=0)
    A = np.matlib.repmat(np.array(range(1, n + 1)), k, 1).T
    T = np.sum(np.array((A - R_new), dtype=bool), axis=0) + 1
    RS = np.sum(R, axis=1)
    S = np.sum(np.square(RS)) - n * math.pow(np.mean(RS), 2)
    F = k * k * (n * n * n - n) - k * np.sum(np.power(T, 3) - T)
    W = float(12) * S / F

    Fdist = W * (k - 1) / (1 - W)
    nu1 = n - 1 - (2 / float(k))
    nu2 = nu1 * (k - 1)
    p = f.pdf(Fdist, nu1, nu2)

    return W, p, Fdist

Beispiel #2

0

Datei anzeigen

Datei: concordance.py Projekt: sheyma/stroke_preprop

def IPN_kendallW(X):
    """
    Kendall's W
    X is 2D numpy-array (n*k ratings matrix)
    n is the number of objects and k is the number of judges.
    """
    [n, k] = np.shape(X)

    # if tiedrank ...
    R = np.zeros_like(X)
    for i in range(0, np.shape(X)[1]):   
        R[:,i] = rankdata(X[:,i])
    R_new = np.sort(np.round(R), axis=0)
    A = np.matlib.repmat(np.array(range(1,n+1)), k, 1).T
    T = np.sum(np.array((A-R_new), dtype=bool), axis=0) +1    
    RS = np.sum(R, axis=1)
    S = np.sum(np.square(RS)) - n * math.pow(np.mean(RS),2)
    F = k * k* (n * n * n-n)- k*np.sum(np.power(T, 3)-T)
    W = float(12)*S/F

    Fdist = W*(k-1) / (1-W)
    nu1 = n-1-(2/float(k));
    nu2 = nu1*(k-1);
    p = f.pdf(Fdist, nu1, nu2)

    return W, p, Fdist

Beispiel #3

0

Datei anzeigen

 def f_contrast(self):
     '''
             Returns p-value for F_contrast: H0: b0 = b1 = b2 = ... = bn = 0
             '''
     fvalue = (self.ssr) / (self.n) / (self.ssrh0 / (self.n))
     pvalue = f.pdf(fvalue, self.k - 1, self.n - self.k)
     return (pvalue)

Beispiel #4

0

Datei anzeigen

Datei: MultipleLinearRegression.py Projekt: jw5971/StatQuest

def plot_f_distrubiton(fvalue, dfn, dfd):
    # Set figure
    plt.figure(figsize=(8, 6))

    # Set degrees of freedom

    rejection_reg = f.ppf(q=.95, dfn=dfn, dfd=dfd)
    mean, var, skew, kurt = f.stats(dfn, dfd, moments='mvsk')

    x = np.linspace(f.ppf(0.01, dfn, dfd), f.ppf(0.99, dfn, dfd), 100)

    # Plot values
    plt.plot(x,
             f.pdf(x, dfn, dfd),
             alpha=0.6,
             label=' X ~ F({}, {})'.format(dfn, dfd))
    plt.axvline(x=fvalue)
    plt.vlines(rejection_reg,
               0.0,
               1.0,
               linestyles="dashdot",
               label="Crit. Value: {:.2f}".format(rejection_reg))
    plt.legend()
    plt.ylim(0.0, 1.0)
    plt.xlim(0.0, 20.0, 5)
    plt.title('F-Distribution dfn:{}, dfd:{}'.format(dfn, dfd))

Beispiel #5

0

Datei anzeigen

Datei: extra.py Projekt: GiorgosMethe/Load-Profile-Decomposition

def app_time(x, dfn, dfd, a, b):
    mean = 0.0
    dist = np.divide(f.pdf(x, dfn, dfd), (f.cdf(b, dfn, dfd) - f.cdf(a, dfn, dfd))) # f-dist for duration, truncated from a to b
    dist = np.divide(dist, np.sum(dist)) # normalization

    for item in zip(x, dist): mean = mean + (item[0] * item[1]) # expectation of duration

    return dist, mean

Beispiel #6

0

Datei anzeigen

def main():
	# Input parameters
	Nminusk = 10000
	kminus1 = 2
	step = 0.001

	# Integrate the F-distribution to get the critical value
	F = 0.
	integrate = 0
	while integrate < 0.95:
		F += step
		integrate += f.pdf(F, kminus1, Nminusk)*step
		if integrate > 0.95:
			print "F value at 95%% confidence level is %0.1f" % F
			break

	# Plot the F-distribution
	x = np.linspace(0, 100, 1000)
	plt.plot(x,f.pdf(x, kminus1, Nminusk), color="blue", linewidth=3)
	plt.axvline(F, color="black", linestyle="--", linewidth=2)
	plt.xlim(0, 5)
	plt.xlabel('$x$')
	plt.ylabel(r'$F(x, %d, %d)$' % (kminus1, Nminusk))
	plt.title("$F(x, %d, %d)$ Distribution" % (kminus1, Nminusk))
	plt.legend()
	plt.show()

	# Calculate the required number of users
	download_rate_estimate = 0.02
	sigma2_s = download_rate_estimate*(1. - download_rate_estimate)
	N = 5.3792*sigma2_s/(0.1*download_rate_estimate)**2
	print "estimate of N = %d" % round(N)

	# Run the obtained results through the F test
	input_downloads = [500, 620, 490]
	download_fractions = [entry/N for entry in input_downloads]
	print "F test result = %0.4f" % Ftest(download_fractions, sigma2_s, N)

	# Perform individual t-test
	print "The 96.6%% confidence interval is = (%0.2f %0.2f)" % (norm.interval(0.966, loc=0, scale=1))
	for fraction in download_fractions[1:]:
		print "t value = %0.2f (for a measured download rate of %0.4f)" % (ttest(N, fraction, N, download_fractions[0]), fraction)

  	return

Beispiel #7

0

Datei anzeigen

Datei: R_Functions.py Projekt: PablodelaCuesta/Statistics

def df(x,df1,df2,ncp=0):
    """
    Calculates the density/point estimate of the F-distribution
    """
    from scipy.stats import f,ncf
    if ncp==0:
        result=f.pdf(x=x,dfn=df1,dfd=df2,loc=0,scale=1)
    else:
        result=ncf.pdf(x=x,dfn=df1,dfd=df2,nc=ncp,loc=0,scale=1)
    return result

Beispiel #8

0

Datei anzeigen

def app_time(x, dfn, dfd, a, b):
    mean = 0.0
    dist = np.divide(
        f.pdf(x, dfn, dfd),
        (f.cdf(b, dfn, dfd) -
         f.cdf(a, dfn, dfd)))  # f-dist for duration, truncated from a to b
    dist = np.divide(dist, np.sum(dist))  # normalization

    for item in zip(x, dist):
        mean = mean + (item[0] * item[1])  # expectation of duration

    return dist, mean

Beispiel #9

0

Datei anzeigen

Datei: 32_kafang.py Projekt: cuiods/Coding

def chi2_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x,df))
    
    #simulate the chi2 distribution
    y = []
    n=10
    for i in range(1000):
        chi2r=0.0
        r = norm.rvs(size=n)
        for j in range(n):
            chi2r=chi2r+r[j]**2
        y.append(chi2r)

    ax.hist(y, normed=True, alpha=0.2) 
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(-4, 4, 100)
    ax.plot(x, t.pdf(x,df))
    
    #simulate the t-distribution
    y = []
    for i in range(1000):
        rx = norm.rvs()
        ry = chi2.rvs(df)
        rt = rx/np.sqrt(ry/df)
        y.append(rt)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))
    
    #simulate the F-distribution
    y = []
    for i in range(1000):
        rx = chi2.rvs(dfn)
        ry = chi2.rvs(dfm)
        rf = np.sqrt(rx/dfn)/np.sqrt(ry/dfm)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()

Beispiel #10

0

Datei anzeigen

Datei: linear_regression.py Projekt: zamomar0946/astrology

    def plot_f_distrib_for_many_coefficients(self, features):
        from scipy.stats import f

        # Remove a particular subset of features
        X = np.delete(self.X, [self.features.index(_) for _ in features], 1)

        # Prediction from reduced model
        XT = X.T
        std_error_matrix = inv(XT.dot(X))
        beta = std_error_matrix.dot(XT).dot(self.y)
        y_hat = X.dot(beta)
        rss_reduced_model = np.sum((self.y - y_hat)**2)

        dfn = len(features)
        dfd = self.df

        # This should be distributed as chi squared
        # with degrees of freedom equal to number
        # of dropped features
        rss_diff = (rss_reduced_model - self.rss)
        chi_1 = rss_diff / dfn
        chi_2 = self.pop_var
        f_score = chi_1 / chi_2

        # 5% and 95% percentile
        f_05, f_95 = f.ppf([0.05, 0.95], dfn, dfd)

        x = np.linspace(0.001, 5.0)

        plt.axvline(x=f_05)
        plt.axvline(x=f_95)

        plt.scatter(f_score, f.pdf(f_score, dfn, dfd), marker='o', color='red')
        plt.plot(x, f.pdf(x, dfn, dfd), color='gray', lw=5, alpha=0.6)
        plt.title('f-distribtion for dropping features: {0}'.format(features))
        plt.show()

Beispiel #11

0

Datei anzeigen

Datei: Exercise39.py Projekt: Angelica-dahurica/Python_Statistics

def sampling_distribution():
    fig, ax = plt.subplots(1, 1)
    # display the probability density function
    dfn, dfm = 10, 5
    x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))

    # simulate the sampling distribution
    y = []
    for i in range(1000):
        r1 = norm.rvs(loc=5, scale=2, size=dfn + 1)
        r2 = norm.rvs(loc=3, scale=2, size=dfm + 1)
        rf = np.var(r1) / np.var(r2)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('sampling_distribution.png')

Beispiel #12

0

Datei anzeigen

Datei: Exercise34.py Projekt: Angelica-dahurica/Python_Statistics

def F_distribution():
    fig, ax = plt.subplots(1, 1)
    # display the probability density function
    dfn, dfm = 10, 5
    x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))

    # simulate the F-distribution
    y = []
    for i in range(1000):
        rx = chi2.rvs(dfn)
        ry = chi2.rvs(dfm)
        rf = np.sqrt(rx / dfn) / np.sqrt(ry / dfm)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('F_distribution.png')

Beispiel #13

0

Datei anzeigen

Datei: Scheduler.py Projekt: yanyuting123/Improving-Reliability-for-Federated-Learning-in-Mobile-Edge-Networks

 def getDataSizeDistribution(self):
     '''
     TODO: Read the data size distribution from input file. But it seems that we do not have this column in the input file
     Therefore, we just use Gaussian distribution
     Similar to the function above
     :return: A ndarray of float numbers, with shape (self.deviceNum,)
     '''
     #return np.random.rand(self.deviceNum)
     #return f.pdf(np.random.uniform(0,4,self.deviceNum), 1, 1)
     #return np.random.zipf(1.5,self.deviceNum)
     #return np.random.zipf(1.5,self.deviceNum)
     if self.distribution=='normal':
         return (np.random.rand(self.deviceNum)*(self.parameter[1]-self.parameter[0])+self.parameter[0])
     elif self.distribution=='f':
         return f.pdf(np.random.uniform(0,4,self.deviceNum), self.parameter[0], self.parameter[1])
     elif self.distribution=='zipf':
         return np.random.zipf(self.parameter,self.deviceNum)

Beispiel #14

0

Datei anzeigen

Datei: 39.py Projekt: XNYu/Statistic

def sampling_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x=np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))
    
    #simulate the sampling distribution
    y = []
    for i in range(1000):
        r1 = norm.rvs(loc=5, scale=2, size=dfn+1)
        r2 = norm.rvs(loc=3, scale=2, size=dfm+1)
        rf =np.var(r1)/np.var(r2)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('sampling_distribution.png')

Beispiel #15

0

Datei anzeigen

 def f_distribution_critical_value(f_value,
                                   df_numerator,
                                   df_denominator,
                                   loc=0,
                                   scale=1):
     """
     Gets critical value of a f-distribution
     :param f_value: observed f-value
     :param df_numerator:    degrees of freedom of numerator
     :param df_denominator:  degrees of freedom of denominator
     :param loc:
     :param scale:
     :return:
     """
     return float(
         FDistribution.pdf(f_value, df_numerator, df_denominator, loc,
                           scale))

Beispiel #16

0

Datei anzeigen

 def getErrorRateDistribution(self):
     if self.distributionError == 'normal':
         res = (np.random.normal(self.parameterError[0],
                                 self.parameterError[1], self.deviceNum))
         for i in range(res.shape[0]):
             if res[i] < 0:
                 res[i] = 0
             if res[i] > 1:
                 res[i] = 0.9
         return res
     elif self.distributionError == 'f':
         return f.pdf(np.random.uniform(0, 4, self.deviceNum),
                      self.parameterError[0], self.parameterError[1]) / 5
     elif self.distributionError == 'zipf':
         res = np.random.zipf(self.parameterError, self.deviceNum) / 100
         for i in range(res.shape[0]):
             if res[i] > 1:
                 res[i] = 0.9
         return res

Beispiel #17

0

Datei anzeigen

def MalinowskyParameters(data, l):
    if np.shape(data)[0] < np.shape(data)[1]:
        data = np.transpose(data)
    n_row = np.shape(data)[0]
    #numbers of rows
    n_col = np.shape(data)[1]
    #numbers of columns
    ind = np.zeros(n_col - 1)
    ie = np.zeros(n_col - 1)
    index = range(1, n_col)
    for i in range(0, n_col - 1):
        ind[i] = (np.sqrt(
            (np.sum(l[i + 1:n_col])) /
            (n_row * (n_col - index[i])))) / (n_col - index[i])**2
        ie[i] = np.sqrt(index[i] * (np.sum(l[i + 1:n_col])) /
                        (n_row * n_col * (n_col - index[i])))
    pc = np.arange(1., n_col + 1,
                   1)  # maximum number of cumponents (i.e number of spectra)
    p = np.zeros(np.size(pc))
    for i in range(0, np.size(pc)):
        p[i] = (n_row - pc[i] + 1) * (n_col - pc[i] + 1)
    s1 = np.zeros(np.size(pc))
    s2 = np.zeros(np.size(pc))
    fi = np.zeros(np.size(l) - 1)
    result = np.zeros(np.size(l) - 1)
    a = pc + 1
    for i in range(0, n_col - 1):
        s1[i] = np.sum(
            (n_row - a[i:np.size(pc)] + 1) * (n_col - a[i:np.size(pc)] + 1))
    for j in range(0, n_col - 1):
        s2[j] = np.sum(l[j + 1:np.size(pc) + 1])
    for i in range(0, n_col - 1):
        fi[i] = (s1[i] / p[i]) * (l[i] / s2[i])
    for i in range(0, n_col - 1):
        result[i] = ((integrate.quad(lambda x: f.pdf(x, 1, (n_col - 1) - i),
                                     fi[i], np.inf))[0]) * 100
    statistic = pd.DataFrame({'IND': ind, 'IE': ie, 'F': result})
    statistic.index = statistic.index + 1
    return statistic, pc

Beispiel #18

0

Datei anzeigen

Datei: Scheduler.py Projekt: Huage001/Improving-Reliability-for-Federated-Learning-in-Mobile-Edge-Networks

    def getDataSizeDistribution(self):
        res = None
        if self.distributionData == 'normal':
            res = (np.random.normal(self.parameterError[0],
                                    self.parameterError[1], self.deviceNum))
        elif self.distributionData == 'f':
            res = f.pdf(np.random.uniform(0, 4, self.deviceNum),
                        self.parameterData[0], self.parameterData[1])
        elif self.distributionData == 'zipf':
            res = np.random.zipf(self.parameterData, self.deviceNum)

        if self.isRelated == False:
            return res
        else:
            index = self.allErrorRate.argsort()
            res.sort()
            finalRes = copy.deepcopy(res)
            j = 0
            for i in index:
                finalRes[i] = res[j]
                j += 1
            return finalRes

Beispiel #19

0

Datei anzeigen

from scipy.stats import chi2
plt.plot(cvalues, chi2.pdf(cvalues, 1), 'b-', label="Chi2(1)")
plt.plot(cvalues, chi2.pdf(cvalues, 5), 'r-', label="Chi2(5)")
plt.plot(cvalues, chi2.pdf(cvalues, 30), 'g-', label="Chi2(30)")
plt.legend()

#sample_chi2 = np.random.chisquare(1, 10)
#sns.distplot(sample_chi2)
plt.show()

# F-distribution

from scipy.stats import f
fvalues = np.linspace(.1, 5, 100)
# pdf(x, df1, df2): Probability density function at x of F.
plt.plot(fvalues, f.pdf(fvalues, 1, 30), 'b-', label="F(1, 30)")
plt.plot(fvalues, f.pdf(fvalues, 5, 30), 'r-', label="F(5, 30)")
plt.legend()
# cdf(x, df1, df2): Cumulative distribution function of F.
# ie.
proba_at_f_inf_3 = f.cdf(3, 1, 30) # P(F(1,30) < 3)
# ppf(q, df1, df2): Percent point function (inverse of cdf) at q of F.
f_at_proba_inf_95 = f.ppf(.95, 1, 30) # q such P(F(1,30) < .95)
assert f.cdf(f_at_proba_inf_95, 1, 30) == .95
# sf(x, df1, df2): Survival function (1 - cdf) at x of F.
proba_at_f_sup_3 = f.sf(3, 1, 30) # P(F(1,30) > 3)
assert proba_at_f_inf_3 + proba_at_f_sup_3 == 1
# p-value: P(F(1, 30)) < 0.05
low_proba_fvalues = fvalues[fvalues > f_at_proba_inf_95]
plt.fill_between(low_proba_fvalues, 0, f.pdf(low_proba_fvalues, 1, 30),
                 alpha=.8, label="P < 0.05")

Beispiel #20

0

Datei anzeigen

Datei: unbalanced_detection_example.py Projekt: vincentchoqueuse/ISEN2015_presentation

    
    #perform detection test
    T_balanced[indice]=compute_GLRT_statistic(Y_balanced,w)
    T_unbalanced[indice]=compute_GLRT_statistic(Y_unbalanced,w)

threshold=threshold_from_pfa(pfa,N)
print("seuil: %f" %threshold)

#MARK: Display signal
xbins=np.linspace(0.001,25,100)
xbins_middle=(xbins[1:]+xbins[:-1])/2
n_balanced, bins1, p1 = plt.hist(T_balanced, bins=xbins, normed=1, histtype='stepfilled')
n_unbalanced, bins2, p2 = plt.hist(T_unbalanced, bins=xbins, normed=1, histtype='stepfilled')

#theoretical pdf
plt.plot(xbins_middle,  f.pdf(xbins_middle, 4,(3*N-6)), 'k--', linewidth=1.5)
ncf_lambda=compute_lambda(w,sym_unbalanced,N,sigmaB_unbalanced)
plt.plot(xbins_middle,  ncf.pdf(xbins_middle, 4,(3*N-6),ncf_lambda), 'k--', linewidth=1.5)
plt.axvline(threshold,color='r')
plt.show()

#MARK: export csv file
output=np.zeros((5,np.size(xbins_middle)))
if csv_on==1:
    output[0,:]=xbins_middle
    output[1,:]=n_balanced
    output[2,:]=f.pdf(xbins_middle, 4,(3*N-6))
    output[3,:]=n_unbalanced
    output[4,:]=ncf.pdf(xbins_middle, 4,(3*N-6),ncf_lambda)
    np.savetxt(filename, output.T,header="bins,hist_balanced,pdf_balanced,hist_unbalanced,pdf_unbalanced", delimiter=",")

Beispiel #21

0

Datei anzeigen

Datei: stats.py Projekt: lanyinghong/epistasis

 def pdf(self, F):
     return f.pdf(F, self.dfn, self.dfd, loc=self.loc, scale=self.scale)

Beispiel #22

0

Datei anzeigen

Datei: scipy-stats-f-1.py Projekt: tiantianxia/pythonProgram

from scipy.stats import f
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

dfn, dfd = 29, 18
mean, var, skew, kurt = f.stats(dfn, dfd, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(f.ppf(0.01, dfn, dfd),
                f.ppf(0.99, dfn, dfd), 100)
ax.plot(x, f.pdf(x, dfn, dfd),
       'r-', lw=5, alpha=0.6, label='f pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = f(dfn, dfd)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = f.ppf([0.001, 0.5, 0.999], dfn, dfd)
np.allclose([0.001, 0.5, 0.999], f.cdf(vals, dfn, dfd))
# True

Beispiel #23

0

Datei anzeigen

Datei: F.py Projekt: MaoliRUNsen/machine_learning_book

'''
@Author： Runsen
@微信公众号： 润森笔记
@博客： https://blog.csdn.net/weixin_44510615
@Date： 2020/7/5
'''
import numpy as np

import matplotlib.pyplot as plt
from scipy.stats import f
x = np.linspace(0, 3, 100)
plt.plot(x, f.pdf(x, 20, 20), 'k-', label='y=f(x,20,20)')
plt.plot(x, f.pdf(x, 10, 10), 'r-', label='y=f(x,10,10)')
plt.plot(x, f.pdf(x, 10, 5), 'g-', label='y=f(x,10,5)')
plt.plot(x, f.pdf(x, 10, 20), 'b-', label='y=f(x,10,20)')
plt.plot(x, f.pdf(x, 5, 5), 'r--', label='y=f(x,5,5)')
plt.plot(x, f.pdf(x, 5, 10), 'g--', label='y=f(x,5,10)')
plt.plot(x, f.pdf(x, 5, 1), 'y-', label='y=f(x,5,1)')
plt.legend()
plt.show()

Beispiel #24

0

Datei anzeigen

Datei: noa39_F_SampleDistribution.py Projekt: czqInNanjing/SmallProject

def sampling_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x=np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))

Beispiel #25

0

Datei anzeigen

Datei: stat_fisher.py Projekt: herrmanntom/setlX

from scipy.stats import f
print(f.pdf(3,5,2))

Beispiel #26

0

Datei anzeigen

def fplot(dname, ref, alt, year=2017, savename='fplotX', nbins=130):
    ref_pt, ref_rho = ref
    alt_pt, alt_rho = alt
    p1 = (ref_pt + 1) * (ref_rho + 1)
    p2 = (alt_pt + 1) * (alt_rho + 1)

    path = '{dname}/bkgtest_{ref_pt}-{ref_rho}_{alt_pt}-{alt_rho}'.format(
        dname=dname,
        ref_pt=ref_pt,
        ref_rho=ref_rho,
        alt_pt=alt_pt,
        alt_rho=alt_rho)
    print("X", path)
    base_dict = skim_gofs(get_names(path))
    alt_dict = skim_gofs(get_names(path, alt=True))

    base, alt = [], []
    for i in range(len(base_dict)):
        try:
            ia = alt_dict[i]
            ib = base_dict[i]
            alt.append(ia)
            base.append(ib)
        except:
            pass

    if len(alt) != len(base):
        raise ValueError("Number of toys for base and ref does not match.")
    fvals = fval(base, alt, p1, p2, nbins)
    f_data = fval(
        get_vals(
            '{dname}/bkgtest_{ref_pt}-{ref_rho}_{alt_pt}-{alt_rho}/refbase.root'
            .format(dname=dname,
                    ref_pt=ref_pt,
                    ref_rho=ref_rho,
                    alt_pt=alt_pt,
                    alt_rho=alt_rho)),
        get_vals(
            '{dname}/bkgtest_{ref_pt}-{ref_rho}_{alt_pt}-{alt_rho}/refalt.root'
            .format(dname=dname,
                    ref_pt=ref_pt,
                    ref_rho=ref_rho,
                    alt_pt=alt_pt,
                    alt_rho=alt_rho)), p1, p2, nbins)[0]
    print(f_data)

    from scipy.stats import f
    x_lim = max(np.percentile(fvals, 90), f_data * 1.2)
    x = np.linspace(0, x_lim, 200)
    bins = np.linspace(0, x_lim, 30)
    width = bins[1] - bins[0]

    fig, ax = plt.subplots()
    trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
    ax.plot(x,
            len(base) * width * f.pdf(x, p2 - p1, nbins - p2),
            color='red',
            label='F-dist, ndf({},{})'.format(p2 - p1, nbins - p2))
    ax.hist(fvals,
            bins,
            facecolor='none',
            edgecolor='black',
            histtype='stepfilled',
            lw=2,
            label="Toys, N = {}".format(len(fvals)))
    ax.hist(fvals[fvals > f_data],
            bins,
            facecolor='steelblue',
            edgecolor='gray',
            histtype='stepfilled',
            alpha=0.3,
            label='p-value = {}'.format(
                round(float(len(fvals[fvals > f_data])) / len(fvals), 3)))
    ax.annotate(
        "",
        xy=(f_data, 0),
        xycoords=trans,
        xytext=(f_data, 0.25),
        textcoords=trans,
        arrowprops=dict(lw='4',
                        color='b',
                        arrowstyle="->,head_length=1.5,head_width=0.5"),
    )
    ax.plot([], [],
            color='blue',
            lw=2,
            label="Observed = {:.3f}".format(f_data))

    title = "TF({},{}) x TF({},{})".format(ref_pt, ref_rho, alt_pt, alt_rho)
    ax.legend(title=title)
    hep.cms.label(data=True, year=year, ax=ax)
    ax.set_xlim(0, x_lim)
    xlab = r"$\frac{-2log(\lambda_1/\lambda_2)/(p_2-p_1)}{-2log\lambda_2/(n-p_2)}$"
    ax.set_xlabel(xlab, x=1, ha='right')
    ax.set_ylabel("Pseudoexperiments", y=1, ha='right')

    fig.savefig('{}.pdf'.format(savename),
                dpi=300,
                transparent=True,
                bbox_inches='tight')
    fig.savefig('{}.png'.format(savename),
                dpi=300,
                transparent=True,
                bbox_inches='tight')

Beispiel #27

0

Datei anzeigen

Datei: GEOST_plot.py Projekt: 0x6a7074/GEOST

def f23test(data, B, fix):
    F = np.zeros(2, dtype=float)    # pdf
    C = np.zeros(2, dtype=float)    # p-values
    B2 = np.zeros(2, dtype=float)   # for the 2nd order EOS, assign K0 and V0 from user guesses
    B2[0] = B[0]
    B2[1] = B[-1]
                
    if data.EOS_type == GEOST_thermo.types().names[0]:   # Figure out what EOS's to use
        # If birch murnaghan
        odr_model = odrpack.Model(fcn=GEOST_thermo.BM2_V, fjacb=GEOST_thermo.BM2_V_JACB,
                                  fjacd=GEOST_thermo.BM2_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B2, ifixb=[fix[0],fix[-1]])
        odr.set_job(deriv=3)          # Use user-supplied derivatives
        output = odr.run()            # Output of ODR run
        ref_B2  = output.beta         # LSQ best-fit parameters
        err_B2  = output.sd_beta      # Parameter errors (1-sigma)
        f2 = GEOST_thermo.BM2_V(ref_B2,data.V)
        df2 = data.V.shape[0] - 2
                
        odr_model = odrpack.Model(fcn=GEOST_thermo.BM3_V, fjacb=GEOST_thermo.BM3_V_JACB,
                                  fjacd=GEOST_thermo.BM3_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B, ifixb=fix)
        odr.set_job(deriv=3)          # Use user-supplied derivatives, but CHECK THEM!!!
        output = odr.run()            # Output of ODR run
        ref_B3  = output.beta         # LSQ best-fit parameters
        err_B3  = output.sd_beta      # Parameter errors (1-sigma)
        f3 = GEOST_thermo.BM3_V(ref_B3,data.V)
        df3 = data.V.shape[0] - 3
        
    elif data.EOS_type == GEOST_thermo.types().names[1]:
        # If Natural strain
        odr_model = odrpack.Model(fcn=GEOST_thermo.NS2_V, fjacb=GEOST_thermo.NS2_V_JACB, fjacd=GEOST_thermo.NS2_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B2, ifixb=[fix[0],fix[-1]])
        odr.set_job(deriv=3)          # Use user-supplied derivatives
        output = odr.run()            # Output of ODR run
        ref_B2  = output.beta         # LSQ best-fit parameters
        err_B2  = output.sd_beta      # Parameter errors (1-sigma)
        f2 = GEOST_thermo.NS2_V(ref_B2,data.V)
        df2 = data.V.shape[0] - 2
            
        odr_model = odrpack.Model(fcn=GEOST_thermo.NS3_V, fjacb=GEOST_thermo.NS3_V_JACB,
                                  fjacd=GEOST_thermo.NS3_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B, ifixb=fix)
        odr.set_job(deriv=3)          # Use user-supplied derivatives, but CHECK THEM!!!
        output = odr.run()            # Output of ODR run
        ref_B3  = output.beta         # LSQ best-fit parameters
        err_B3  = output.sd_beta      # Parameter errors (1-sigma)
        f3 = GEOST_thermo.NS3_V(ref_B3,data.V)
        df3 = data.V.shape[0] - 3
        
    chisq_2 = float(0)
    chisq_3 = float(0)
    for i in range(data.P.shape[0]):   # Compute the chi-squared for each EOS
        chisq_2 += (data.P[i] - f2[i])**2
        chisq_3 += (data.P[i] - f3[i])**2
                
    # Compute the F-statistic
    Fx23 = (chisq_2 - chisq_3)/(chisq_3/df3)
    
    # Use Scipy's built-in F-distribution methods
    F = f_dist.pdf(Fx23, 1, df3)
        
    # Compute the P-value
    C = 1 - f_dist.cdf(Fx23, 1, df3)

    x1 = np.linspace(0.01, 2*Fx23, 128)
    pdf23 = f_dist.pdf(x1, 1, df3)
    cdf23 = f_dist.cdf(x1, 1, df3)

    # Results
    plt.figure()
    fig = plt.gcf()
    fig.canvas.set_window_title("F-Test Results")
    plt.plot(x1, pdf23, 'r', linewidth=3, alpha=0.8)
    plt.plot(x1, cdf23, 'b', linewidth=3, alpha=0.8)
    plt.plot(Fx23, f_dist.pdf(Fx23, 1, df3), 'ko')
    plt.fill_between(x1, 0, pdf23, where=f_dist.cdf(Fx23, 1, df3)<cdf23, facecolor='red', alpha=0.2)
    plt.xticks(fontsize=14)
    plt.ylim([-0.01,1.01])
    plt.title("F test for 2nd vs. 3rd order EOS", fontsize=14)
    plt.xlabel(r"$\left( \chi^{2}_{2} - \chi^{2}_{3} \right) / \left(\chi^{2}_{3} / \nu_{3} \right)$",
               fontsize=12)
    plt.ylabel(r"PDF/CDF")
    plt.legend(['PDF', 'CDF', r"$F_{X}$"], loc='upper right', numpoints=1)
    plt.text(Fx23, f_dist.pdf(Fx23, 1, df3)+0.05, "p-value= {:8.4f}".format(1-f_dist.cdf(Fx23, 1, df3)), fontsize=14)    
    plt.tight_layout()
    return [chisq_2/df2, chisq_3/df3, Fx23, f_dist.cdf(Fx23, 1, df3)]

Beispiel #28

0

Datei anzeigen

Datei: GEOST_plot.py Projekt: 0x6a7074/GEOST

def fqtest(VT, P, VTerr, Perr, debye, Bhit, fix):
    F = np.zeros(2, dtype=float)    # pdf
    C = np.zeros(2, dtype=float)    # p-values
    Bhit2 = np.zeros(3, dtype=float)   # for q fixed at 1
    Bhit2[0] = Bhit[0]
    Bhit2[1] = Bhit[1]
    Bhit2[2] = 1.
    fix2 = list(fix)
    fix2[-1] = 0
    odr_model2 = odrpack.Model(fcn=debye.P_thermal, fjacb=debye.JACB, fjacd=debye.JACD)
    odr_data2  = odrpack.RealData(x=VT, y=P, sx=VTerr, sy=Perr)
    odr2 = odrpack.ODR(odr_data2, odr_model2, beta0=Bhit2, ifixb=fix2)
    odr2.set_job(deriv=1)          # Use user-supplied derivatives
    output2 = odr2.run()            # Output of ODR run
    ref_B2  = output2.beta         # LSQ best-fit parameters
    err_B2  = output2.sd_beta      # Parameter errors (1-sigma)
    f2 = debye.P_thermal(ref_B2, VT)
    df2 = VT.shape[1] - 2

    odr_model3 = odrpack.Model(fcn=debye.P_thermal, fjacb=debye.JACB, fjacd=debye.JACD)
    odr_data3  = odrpack.RealData(x=VT, y=P, sx=VTerr, sy=Perr)
    odr3 = odrpack.ODR(odr_data3, odr_model3, beta0=Bhit, ifixb=fix)
    odr3.set_job(deriv=1)            # NB: using numerical derivatives here!
    output3 = odr3.run()             # Output of ODR run
    ref_B3  = output3.beta           # LSQ best-fit parameters
    err_B3  = output3.sd_beta        # Parameter errors (1-sigma)
    f3 = debye.P_thermal(ref_B3, VT)
    df3 = VT.shape[1] - 3

    chisq_2 = float(0)
    chisq_3 = float(0)
    for i in range(P.shape[0]):   # Compute the chi-squared for each EOS
        chisq_2 += (P[i] - f2[i])**2
        chisq_3 += (P[i] - f3[i])**2

    # Compute the F-statistic
    Fx23  = (chisq_2 - chisq_3)/(chisq_3/df3)
    x1    = np.linspace(0.01, 2*Fx23, 128)
    pdf23 = f_dist.pdf(x1, 1, df3)
    cdf23 = f_dist.cdf(x1, 1, df3)

    # Use Scipy's built-in F-distribution methods
    F = f_dist.pdf(Fx23, 1, df3)
        
    # Compute the P-value
    C = 1 - f_dist.cdf(Fx23, 1, df3)

    # Finally, Make the plot. SHould be a 1 row 2 column plot showing
    # f-test for 2nd to 3rd order EOS and 3rd to 4th order EOS.
    plt.figure()
    fig = plt.gcf()
    fig.canvas.set_window_title("F-Test Results")
    plt.plot(x1, pdf23, 'r', linewidth=3, alpha=0.8)
    plt.plot(x1, cdf23, 'b', linewidth=3, alpha=0.8)
    plt.plot(Fx23, f_dist.pdf(Fx23, 1, df3), 'ko')
    plt.fill_between(x1, 0, pdf23, where=f_dist.cdf(Fx23, 1, df3)<cdf23, facecolor='red', alpha=0.2)
    plt.xticks(fontsize=14)
    plt.ylim([-0.01,1.01])
    plt.title("Comparing q != 1", fontsize=14)
    plt.xlabel(r"$\left( \chi^{2}_{2} - \chi^{2}_{3} \right) / \left(\chi^{2}_{3} / \nu_{3} \right)$",
               fontsize=12)
    plt.legend(['PDF', 'CDF', r"$F_{X}$"], loc='upper right', numpoints=1)
    plt.text(Fx23, f_dist.pdf(Fx23, 1, df3)+0.05, "p-value= {:8.4f}".format(1-f_dist.cdf(Fx23, 1, df3)))
    
    plt.tight_layout()    
    return [chisq_2, output2.stopreason[0], chisq_3, output3.stopreason[0], Fx23, f_dist.cdf(Fx23, 1, df3)]

Beispiel #29

0

Datei anzeigen

Datei: stat_univar.py Projekt: JacquesStout/pylearn-doc

## Compute F score
n = y.size
fval = ss_reg / (ss_res / (n - 2))

'''
- Compute the p-value:
  * Plot the F(1,n) distribution for 100 f values within [10, 25]. Draw P(F(1,n)>F) ie. color the surface defined by x values larger than F below the F(1,n).
  * P(F(1,n)>F) is the p-value, compute it.
'''
## Plot the F(1, n) distribution for 100 f values within [10, 25] 
## Depict P(F(1, n) > F) ie. folor the surface defined by x values larger than F beloww the F(1, n)
from scipy.stats import f
fvalues = np.linspace(10, 25, 100)

plt.plot(fvalues, f.pdf(fvalues, 1, 30), 'b-', label="F(1, 30)")

upper_fval_fvalues = fvalues[fvalues > fval]
plt.fill_between(upper_fval_fvalues, 0, f.pdf(upper_fval_fvalues, 1, 30), alpha=.8)

# pdf(x, df1, df2): Probability density function at x of the given RV.
plt.legend()

## P(F(1, n) > F) is the p-value, compute it

# Survival function (1 - `cdf`)
pval = f.sf(fval, 1, n - 2)



## With statmodels

Beispiel #30

0

Datei anzeigen

Datei: fisherH0fixedtheta.py Projekt: charbit/pytoolsIS

    xsensors_m = xsensors_m - xsensors_m[0,:]
    M = size(xsensors_m,0);
T_sec = 30 ; 
N = int(T_sec*Fs_Hz)
Lruns = 10000
Fstat = zeros(Lruns)

for ir in range(0,Lruns):
    x = random.randn(N,M)
    F = maxfstat(x, Fs_Hz, xsensors_m, gridaz_deg,gridel_deg, 
                 gridc_mps)
    Fstat[ir] = F[0]
#%%

xtheo = linspace(0.5,1.5,100)
ytheo = f.pdf(xtheo,N,N*(M-1))

HorizontalSize = 5
VerticalSize   = 3
figsimul=plt.figure(num=2,figsize=(HorizontalSize,VerticalSize), edgecolor='k', facecolor = [1,1,0.92]);
figsimul.clf()
h1 = plt.hist(Fstat, normed=True, bins=30, label='histogram')
h2 = plt.plot(xtheo,ytheo,'r',linewidth=2, label='Fisher')
plt.legend(loc='best')

dirfigsave = '/Users/maurice/etudes/stephenA/propal2/figures/'
tt='%sthetafixFisher.pdf' %dirfigsave
plt.show()

figsimul.savefig(tt,format='pdf')

Beispiel #31

0

Datei anzeigen

Datei: stat_fisher.py Projekt: sebschmitt/setlX

from scipy.stats import f

print(f.pdf(3, 5, 2))

Beispiel #32

0

Datei anzeigen

from scipy.stats import f
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

dfn, dfd = 29, 18
mean, var, skew, kurt = f.stats(dfn, dfd, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(f.ppf(0.01, dfn, dfd), f.ppf(0.99, dfn, dfd), 100)
ax.plot(x, f.pdf(x, dfn, dfd), 'r-', lw=5, alpha=0.6, label='f pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = f(dfn, dfd)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = f.ppf([0.001, 0.5, 0.999], dfn, dfd)
np.allclose([0.001, 0.5, 0.999], f.cdf(vals, dfn, dfd))
# True

# Generate random numbers:

Beispiel #33

0

Datei anzeigen

Datei: temperatures.py Projekt: Acunajon/Theseus

            print 'Ftest'
            ##########################    F-TEST    ###############################
            bigN = len(AllWave)  # Number of data points
            Nparam1 = 3.
            Nparam2 = 5.
            dof1 = bigN - Nparam1
            dof2 = bigN - Nparam2

            #chi1 = 300. #428.65 #383.6 #really bad
            #chi2 = 300.  #results in chisqr close to 1

            ftest = (chisq1 / (dof1)) / (chisq2 / (dof2))
            #RJ+BB vs RJ+BB+BB
            #ftest = ( (chi1-chi2)/(dof1-dof2) ) / (chi2/dof2)

            proba_at_f_pdf = f.pdf(ftest, dof1, dof2)
            proba_at_f_cdf = f.cdf(ftest, dof1, dof2)  # P(F(1,30) < 3)
            f_at_proba_98 = f.ppf(.98, dof1, dof2)  # q such P(F(1,30) < .95)
            proba_at_norm_idf = Norm.isf(proba_at_f_cdf)  # P(F(1,30) < 3)
            proba_at_norm_ppf = Norm.ppf(proba_at_f_cdf)  # P(F(1,30) < 3)

            print ''
            print '-----------'
            print 'Source:         ', Source
            print 'ftest:          ', ftest
            print 'proba_at_f_pdf: ', proba_at_f_pdf
            print 'proba_at_f_cdf: ', proba_at_f_cdf
            print 'f_at_proba_98:  ', f_at_proba_98
            print 'proba_at_norm_isf: ', proba_at_norm_idf  # inverse survival function
            print 'proba_at_norm_ppf: ', proba_at_norm_ppf, ' sigma'  # Number of sigma away
            print '-----------'

Beispiel #34

0

Datei anzeigen

    xsensors_m = xsensors_m - xsensors_m[0, :]
    M = size(xsensors_m, 0)
T_sec = 30
N = int(T_sec * Fs_Hz)
Lruns = 10000
Fstat = zeros(Lruns)

for ir in range(0, Lruns):
    x = random.randn(N, M)
    F = maxfstat(x, Fs_Hz, xsensors_m, gridaz_deg, gridel_deg, gridc_mps)
    Fstat[ir] = F[0]
#%%

xtheo = linspace(0.5, 1.5, 100)
ytheo = f.pdf(xtheo, N, N * (M - 1))

HorizontalSize = 5
VerticalSize = 3
figsimul = plt.figure(num=2,
                      figsize=(HorizontalSize, VerticalSize),
                      edgecolor='k',
                      facecolor=[1, 1, 0.92])
figsimul.clf()
h1 = plt.hist(Fstat, normed=True, bins=30, label='histogram')
h2 = plt.plot(xtheo, ytheo, 'r', linewidth=2, label='Fisher')
plt.legend(loc='best')

dirfigsave = '/Users/maurice/etudes/stephenA/propal2/figures/'
tt = '%sthetafixFisher.pdf' % dirfigsave
plt.show()

Beispiel #35

0

Datei anzeigen

Datei: GEOST_plot.py Projekt: 0x6a7074/GEOST

def f34test(data, B, fix):    
    F = np.zeros(2, dtype=float)    # pdf
    C = np.zeros(2, dtype=float)    # p-values
    B3 = np.zeros(3, dtype=float)   # for the 2nd order EOS, assign K0 and V0 from user guesses
    B3[0] = B[0]
    B3[1] = B[1]
    B3[2] = B[3]
                
    if data.EOS_type == GEOST_thermo.types().names[0]:   # Figure out what EOS's to use
        # If birch murnaghan
        odr_model = odrpack.Model(fcn=GEOST_thermo.BM3_V, fjacb=GEOST_thermo.BM3_V_JACB,
                                  fjacd=GEOST_thermo.BM3_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B3, ifixb=[fix[0],fix[1],fix[3]])
        odr.set_job(deriv=3)          # Use user-supplied derivatives
        output = odr.run()            # Output of ODR run
        ref_B3  = output.beta         # LSQ best-fit parameters
        err_B3  = output.sd_beta      # Parameter errors (1-sigma)
        f3 = GEOST_thermo.BM3_V(ref_B3,data.V)
        df3 = data.V.shape[0] - 3
                
        odr_model = odrpack.Model(fcn=GEOST_thermo.BM4_V, fjacb=GEOST_thermo.BM4_V_JACB,
                                  fjacd=GEOST_thermo.BM4_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B, ifixb=fix)
        odr.set_job(deriv=3)          # Use user-supplied derivatives, but CHECK THEM!!!
        output = odr.run()            # Output of ODR run
        ref_B4  = output.beta         # LSQ best-fit parameters
        err_B4  = output.sd_beta      # Parameter errors (1-sigma)
        f4 = GEOST_thermo.BM3_V(ref_B3,data.V)
        df4 = data.V.shape[0] - 4
        
    elif data.EOS_type == GEOST_thermo.types().names[1]:
        # If Natural strain
        odr_model = odrpack.Model(fcn=GEOST_thermo.NS3_V, fjacb=GEOST_thermo.NS3_V_JACB, fjacd=GEOST_thermo.NS3_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B3, ifixb=[fix[0], fix[1], fix[3]])
        odr.set_job(deriv=3)          # Use user-supplied derivatives, but CHECK THEM!!!
        output = odr.run()            # Output of ODR run
        ref_B3  = output.beta         # LSQ best-fit parameters
        err_B3  = output.sd_beta      # Parameter errors (1-sigma)
        f3 = GEOST_thermo.NS3_V(ref_B3,data.V)
        df3 = data.V.shape[0] - 3
        
        odr_model = odrpack.Model(fcn=GEOST_thermo.NS4_V, fjacb=GEOST_thermo.NS4_V_JACB,
                                  fjacd=GEOST_thermo.NS4_V_JACD)
        odr_data  = odrpack.RealData(x=data.V, y=data.P, sx=data.Verr, sy=data.Perr)
        odr = odrpack.ODR(odr_data, odr_model, beta0=B, ifixb=fix)
        odr.set_job(deriv=3)          # Use user-supplied derivatives, but CHECK THEM!!!
        output = odr.run()            # Output of ODR run
        ref_B4  = output.beta         # LSQ best-fit parameters
        err_B4  = output.sd_beta      # Parameter errors (1-sigma)
        f4 = GEOST_thermo.NS4_V(ref_B4,data.V)
        df4 = data.V.shape[0] - 4
                    
    elif data.EOS_type == GEOST_thermo.types().names[2]:
        self.LOG_PRINT("ERROR in PLOTS: Cannot do F-test using Vinet EOS.")
    else:
        self.LOG_PRINT("ERROR in PLOTS: Unrecognized value of EOS_SELECT")

    chisq_3 = float(0)
    chisq_4 = float(0)
    for i in range(data.P.shape[0]):   # Compute the chi-squared for each EOS
        chisq_3 += (data.P[i] - f3[i])**2
        chisq_4 += (data.P[i] - f4[i])**2
                
    # Compute the F-statistic
    Fx34 = (chisq_3 - chisq_4)/(chisq_4/df4)

    x1 = np.linspace(0.01, 2*Fx34, 128)
    pdf34 = f_dist.pdf(x1, 1, df4)
    cdf34 = f_dist.cdf(x1, 1, df4)

    # Use Scipy's built-in F-distribution methods
    F = f_dist.pdf(Fx34, 1, df4)
        
    # Compute the P-value
    C = 1 - f_dist.cdf(Fx34, 1, df4)

    # Finally, Make the plot. SHould be a 1 row 2 column plot showing
    # f-test for 2nd to 3rd order EOS and 3rd to 4th order EOS.
    plt.figure()
    fig = plt.gcf()
    fig.canvas.set_window_title("F-Test Results")
    plt.plot(x1, pdf34, 'r', linewidth=3, alpha=0.8)
    plt.plot(x1, cdf34, 'b', linewidth=3, alpha=0.8)
    plt.plot(Fx34, f_dist.pdf(Fx34, 1, df4), 'ko')
    plt.fill_between(x1, 0, pdf34, where=f_dist.cdf(Fx34, 1, df4)<cdf34, facecolor='red', alpha=0.2)
    plt.xticks(fontsize=14)
    plt.ylim([-0.01,1.01])
    plt.title("Comparing 3rd vs. 4th order EOS", fontsize=14)
    plt.xlabel(r"$\left( \chi^{2}_{3} - \chi^{2}_{4} \right) / \left(\chi^{2}_{4} / \nu_{4} \right)$",
               fontsize=12)
    plt.legend(['PDF', 'CDF', r"$F_{X}$"], loc='upper right', numpoints=1)
    plt.text(Fx34, f_dist.pdf(Fx34, 1, df4)+0.05, "p-value= {:8.4f}".format(1-f_dist.cdf(Fx34, 1, df4)), fontsize=14)
    plt.tight_layout()
    return [chisq_3/df3, chisq_4/df4, Fx34, f_dist.cdf(Fx34, 1, df4)]

Beispiel #36

0

Datei anzeigen

$F$分布のモジュール名は`f`。
```
t.pdf(x, dfn, dfd)
t.cdf(x, dfn, dfd)
t.ppf(a, dfn, dfd)
t.rvs(dfn, dfd, size=1)
```
* `dfn`：分子の自由度（numerator degree of freedom）
* `dfd`：分母自由度（denominator degree of freedom）

`scipy.stats`の`f`を読み込む，確率密度関数の図を描く。

from scipy.stats import f
x = np.linspace(0.001,5,1000)
y = f.pdf(x, dfn=5, dfd=1)
plt.plot(x,y)
pass

`dfn=5`, `dfd=1の時に`x`の値が`0.1`以下の確率は何か？

f.cdf(0.1, dfn=5, dfd=1)

`dfn=5`, `dfd=1の時に`x`の値が`5`以上の確率は何か？

1-f.cdf(5, dfn=5, dfd=1)

### 一様分布 (Uniform Distribution)

一様分布のモジュール名は`uniform`。
```

Beispiel #37

0

Datei anzeigen

Datei: EstadisticaPrograma.py Projekt: MarcoAHS/Python

    )
else:
    print(
        "[2]No se rechaza la hipotesis nula de los bloques en el analisis bidireccional, por lo tanto no hay diferencias entre los bloques"
    )
print("Analisis Unidireccional")
print("El valor sacado de la tabla Anova Unidireccional por filas es: ", FTU1)
if FU1 > FTU1:
    print(
        "[3]Se rechaza la hipotesis nula de las filas (tratamientos), entonces existe una diferencia entre ellas"
    )
else:
    print(
        "[3]No se rechaza la hipotesis nula de las filas (tratamientos), entonces no existe una diferencia entre ellas"
    )
print("El valor sacado de la tabla Anova Unidireccional por columnas es: ",
      FTU2)
if FU1 > FTU1:
    print(
        "[4]Se rechaza la hipotesis nula de las columnas (bloques), entonces existe una diferencia entre ellas"
    )
else:
    print(
        "[4]No se rechaza la hipotesis nula de las columnas (bloques), entonces no existe una diferencia entre ellas"
    )

from scipy.stats import f

val = f.pdf(0.05, 3, 6)
print(val)

Beispiel #38

0

Datei anzeigen

Datei: hotelling_two_samples.py Projekt: GiacomoBoldrini/PET_lab

#H0 at a level a if T2 >(p(n-1))/(n-p)F_{p,n-p}(a) where
#F is the F distribution.
num_dof = 2
den_dof = 45 - 2
a = .43
#rv =  f.pdf(dfn=num_dof, dfd=den_dof, a, loc=0, scale=1)
#central F suppositions
rv = (2 * (45 + 1 - 2) / (45 + 1 - 2 - 1)) * f.ppf(a, num_dof, den_dof)
#rm =  (2*(45+1-2)/(45+1-2-1))*f.cdf(a, num_dof, den_dof)

fig = plt.figure(figsize=(13, 8))
x = np.linspace(f.ppf(0.01, num_dof, den_dof), f.ppf(0.99, num_dof, den_dof),
                100)
x_alpha = np.linspace(f.ppf(0.95, num_dof, den_dof),
                      f.ppf(0.99, num_dof, den_dof), 100)
plt.plot(x, f.pdf(x, num_dof, den_dof), 'b-', lw=3, label='f pdf')
plt.title('Fischer PDF for dfn = {}, dfd = {}'.format(num_dof, den_dof))
plt.fill_between(x_alpha,
                 f.pdf(x_alpha, num_dof, den_dof),
                 color='r',
                 label=r'$\alpha$ = 0.05%')
plt.legend(loc='upper right')
plt.savefig('./fischer.pdf')

print(t, rv)

#NOW WE DO THE SAME WITH THE VALUES FROM THE PAIRED LINES METHOD:
x_paired = [
    0.6647315155340111, 0.6528186562246899, 0.4926493627791132,
    1.2617404216106476, 0.7570001093882107
]

Beispiel #39

0

Datei anzeigen

#== compute the p-value with the asymptotic distribution
#   (not independent)
ppv  = pvalunderH0(FF, N, xsensors_m, Fs_Hz,
                   range_azimuth_deg, 
                   range_elevation_deg, range_velocity_mps);

# pvalues with he limG independent and Findependent
ppvG = 1-norm.cdf(FF,1.0,sqrt(2.0*M/(M-1.0)/N))**Q;
ppvF = 1-f.cdf(FF,N,N*(M-1))**Q;

# pdf of the max of the limG independent and Findependent
linx        = linspace(0.69,1.3,200)
sigmaGlim   = sqrt(2.0*M/(M-1.0)/N)
nu1         = N
nu2         = N*(M-1)
pdffromF    = f.pdf(linx,nu1,nu2)
pdffromFind = Q * pdffromF * (f.cdf(linx,nu1,nu2)**(Q-1));
pdffromGind = Q * norm.pdf(linx,1.0,sigmaGlim) * (norm.cdf(linx,1.0,sigmaGlim)**(Q-1));


dirfigsave = '/Users/maurice/etudes/stephenA/propal2/figures/'

#%%
#
#HorizontalSize = 6
#VerticalSize   = 6
#figpvalFoT     = plt.figure(num=1,figsize=(HorizontalSize,VerticalSize), 
#                            edgecolor='k', facecolor = [1,1,0.92]);
#plt.subplot(2,1,1)
#plt.ylabel("Frequency")
#plt.title("based on the asymptotic distribution")

Beispiel #40

0

Datei anzeigen

Datei: 15-ComparingMeans.py Projekt: john-s-butler-dit/statsthinking21-python

# Remember from the last chapter that we can partition the total variance in the data ($SS_{total}$) into the variance that is explained by the model ($SS_{model}$) and the variance that is not ($SS_{error}$).  We can then compute a *mean square* for each of these by dividing them by their degrees of freedom; for the error this is $N - p$ (where $p$ is the number of means that we have computed), and for the model this is $p - 1$:
#
# $$
# MS_{model} =\frac{SS_{model}}{df_{model}}= \frac{SS_{model}}{p-1}
# $$
#
# $$
# MS_{error} = \frac{SS_{error}}{df_{error}} = \frac{SS_{error}}{N - p}
# $$
#
# With ANOVA, we want to test whether the variance accounted for by the model is greater than what we would expect by chance, under the null hypothesis of no differences between means.  Whereas for the t distribution the expected value is zero under the null hypothesis, that's not the case here, since sums of squares are always positive numbers.  Fortunately, there is another standard distribution that describes how ratios of sums of squares are distributed under the null hypothesis: The *F* distribution (see figure \@ref(fig:FDist)). This distribution has two degrees of freedom, which correspond to the degrees of freedom for the numerator (which in this case is the model), and the denominator (which in this case is the error).

# %%
from scipy.stats import f
x=np.arange(0.1,10,0.1)
ax=sns.lineplot(x=x,y=f.pdf(x, 1, 1),color='r',label='df(1,1)')
ax=sns.lineplot(x=x,y=f.pdf(x, 50, 1),color='g',label='df(50,1)')
ax=sns.lineplot(x=x,y=f.pdf(x, 50, 10),color='b',label='df(50,10)')
ax.set(xlabel='F Values', ylabel='Density')
plt.legend()
plt.show()

# %% [markdown]
# To create an ANOVA model, we extend the idea of *dummy coding* that you encountered in the last chapter. Remember that for the t-test comparing two means, we created a single dummy variable that took the value of 1 for one of the conditions and zero for the others.  Here we extend that idea by creating two dummy variables, one that codes for the Drug 1 condition and the other that codes for the Drug 2 condition.  Just as in the t-test, we will have one condition (in this case, placebo) that doesn't have a dummy variable, and thus represents the baseline against which the others are compared; its mean defines the intercept of the model. Let's create the dummy coding for drugs 1 and 2.

# %%
df['drug1']=df['group']=='drug1'
df['drug2']=df['group']=='drug2'

# %% [markdown]
# Now we can fit a model using the same approach that we used in the previous chapter:

Beispiel #41

0

Datei anzeigen

Datei: new_plot_ftests.py Projekt: andrzejnovak/higgstocharm

def fplot(fvals,
          f_data,
          ref,
          alt,
          year=2017,
          nbins=130,
          savename=None,
          mc=False):
    ref_pt, ref_rho = ref
    alt_pt, alt_rho = alt
    p1 = (ref_pt + 1) * (ref_rho + 1)
    p2 = (alt_pt + 1) * (alt_rho + 1)

    from scipy.stats import f
    x_lim = max(np.percentile(fvals, 95), f_data * 1.05, np.median(fvals) * 3)
    x = np.linspace(0, x_lim, 200)
    bins = np.linspace(0, x_lim, 30)
    width = bins[1] - bins[0]

    goodvals = fvals[fvals > 0]

    fig, ax = plt.subplots()
    trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)
    with np.errstate(divide='ignore'):
        ax.plot(x,
                len(goodvals) * width * f.pdf(x, p2 - p1, nbins - p2),
                color='red',
                label='F-dist, ndf({},{})'.format(p2 - p1, nbins - p2))
    ax.hist(fvals,
            bins,
            facecolor='none',
            edgecolor='black',
            histtype='stepfilled',
            lw=2,
            label="Toys > 0, N = {}".format(len(goodvals)))
    ax.hist(goodvals[goodvals > f_data],
            bins,
            facecolor='steelblue',
            edgecolor='gray',
            histtype='stepfilled',
            alpha=0.3,
            label='p-value = {}'.format(
                round(
                    float(len(goodvals[goodvals > f_data])) / len(goodvals),
                    3)))
    ax.annotate(
        "",
        xy=(f_data, 0),
        xycoords=trans,
        xytext=(f_data, 0.25),
        textcoords=trans,
        arrowprops=dict(lw='4',
                        color='b',
                        arrowstyle="->,head_length=1.5,head_width=0.5"),
    )
    ax.plot([], [],
            color='blue',
            lw=2,
            label="Observed = {:.3f}".format(f_data))

    title = "TF({},{}) x TF({},{})".format(ref_pt, ref_rho, alt_pt, alt_rho)
    ax.legend(title=title)
    hep.cms.label(data=not mc, year=year, ax=ax)
    ax.set_xlim(0, x_lim)
    xlab = r"$\frac{-2log(\lambda_1/\lambda_2)/(p_2-p_1)}{-2log\lambda_2/(n-p_2)}$"
    ax.set_xlabel(xlab, x=1, ha='right')
    ax.set_ylabel("Pseudoexperiments", y=1, ha='right')

    if savename is not None:
        fig.savefig('{}.pdf'.format(savename),
                    dpi=300,
                    transparent=True,
                    bbox_inches='tight')
        fig.savefig('{}.png'.format(savename),
                    dpi=300,
                    transparent=True,
                    bbox_inches='tight')

Beispiel #42

0

Datei anzeigen

Datei: f.py Projekt: snowflowersnowflake/wangliangster.github.io

import numpy as np
from scipy.stats import f
import matplotlib.pyplot as plt

x=np.arange(0,10,.001)
plt.plot(x,f.pdf(x,1,1), x,f.pdf(x,3,8), x,f.pdf(x,8,3), x,f.pdf(x, 40,40))

plt.xlim(0,10)
plt.ylim(0,1.5)

plt.xlabel('$x$',fontsize=20)
plt.ylabel('$P(X=x | K_1,K_2)$',fontsize=18)

plt.title('Fisher-Distribution',fontsize=20)
plt.legend(['$K_1=1, K_2=1$','$K_1=3, K_2=8$', '$K_1=8 ,K_2=3$', '$K_1=40, K_2=40$'])
plt.show()