def fit_binomial(x, n, alpha=0.01):
    ''' If n=0, the distribution is uniform in [0,1]. '''
    from scipy.stats import f
    if n == 0:
        ml = 0.5
    else:
        ml = float(x) / float(n)
    
    # Lower limits
    if x == 0:
        lb = 0
    else:    
        nu1 = 2 * x;
        nu2 = 2 * (n - x + 1)
        F = f.ppf(alpha / 2, nu1, nu2)
        lb = (nu1 * F) / (nu2 + nu1 * F)
        
    if x == n:
        ub = 1
    else:
        nu1 = 2 * (x + 1);
        nu2 = 2 * (n - x);
        F = f.ppf(1 - alpha / 2, nu1, nu2)
        ub = (nu1 * F) / (nu2 + nu1 * F)
         
    
    assert 0 <= lb <= ml <= ub <= 1
    res = np.ndarray((), dtype=fit_dtype)    
    res['mean'] = ml
    res['upper'] = ub
    res['lower'] = lb
    res['confidence'] = 0.01
    
    res['skewed'] = not (lb < 0.5 < ub)
    return res
Beispiel #2
0
def chi2_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x,df))
    
    #simulate the chi2 distribution
    y = []
    n=10
    for i in range(1000):
        chi2r=0.0
        r = norm.rvs(size=n)
        for j in range(n):
            chi2r=chi2r+r[j]**2
        y.append(chi2r)

    ax.hist(y, normed=True, alpha=0.2) 
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(-4, 4, 100)
    ax.plot(x, t.pdf(x,df))
    
    #simulate the t-distribution
    y = []
    for i in range(1000):
        rx = norm.rvs()
        ry = chi2.rvs(df)
        rt = rx/np.sqrt(ry/df)
        y.append(rt)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))
    
    #simulate the F-distribution
    y = []
    for i in range(1000):
        rx = chi2.rvs(dfn)
        ry = chi2.rvs(dfm)
        rf = np.sqrt(rx/dfn)/np.sqrt(ry/dfm)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
Beispiel #3
0
def meanTest(data, means_old):
    """
    Use Hotelling T-squared test to determine if both components have equal means

    @param means_old: old mean values for one component, shape: (n_features,)

    @return: True if covars are equal, False if not
    """
    X = data
    n_samples = data.shape[0]
    n_features = data.shape[1]

    S = np.cov(X, rowvar=0) # if rowvar = 0, each row represents an observation
    Sinv = linalg.inv(S)

    m = X.mean(axis=0) - means_old
    T_squared = n_samples * np.dot(np.dot(m.T, Sinv), m)

    test_statistic = ((n_samples - n_features) * T_squared) / float(n_features*(n_samples - 1))

    alphaPercentile = 0.05
    threshold = f.ppf(alphaPercentile, n_features, (n_samples - n_features))
    # f.ppf returns the k-percentile of the f-distribution

    if test_statistic <= threshold:
        print("Mean test passed")
        return True
    else:
        return False
Beispiel #4
0
def sampling_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x=np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))
    
    #simulate the sampling distribution
    y = []
    for i in range(1000):
        r1 = norm.rvs(loc=5, scale=2, size=dfn+1)
        r2 = norm.rvs(loc=3, scale=2, size=dfm+1)
        rf =np.var(r1)/np.var(r2)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('sampling_distribution.png')
def F_distribution():
    fig, ax = plt.subplots(1, 1)
    # display the probability density function
    dfn, dfm = 10, 5
    x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))

    # simulate the F-distribution
    y = []
    for i in range(1000):
        rx = chi2.rvs(dfn)
        ry = chi2.rvs(dfm)
        rf = np.sqrt(rx / dfn) / np.sqrt(ry / dfm)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('F_distribution.png')
def Computing_UCL(num_train_beats,dim_proejected, alpha):
    '''
    Computing upper control limit (UCL)
    :param num_train_beats: number of ECG beats in training set
    :param alpha: predefined alpha level (0.01 in the paper)
    :return: UCL value
    '''
    return (dim_proejected*((num_train_beats-1)*(num_train_beats+1)) * f.ppf(1-alpha, dim_proejected, num_train_beats-dim_proejected)) / (num_train_beats*(num_train_beats-dim_proejected))
def calc_scipy():
    # read data
    data = loadtxt(DATA_PATH, delimiter=",", skiprows=1)

    # calculation
    F_value, p_value = f_oneway(data[:,0], data[:,1], data[:,2])

    # omake
    df1 = data.shape[1] - 1
    df2 = data.shape[0] * data.shape[1] - data.shape[1]
    F_dist = f.ppf(0.95, df1, df2)
    F_dist_001 = f.ppf(0.99, df1, df2)

    # output
    print '[Scipy]'
    print 'F value:', F_value
    print 'p value:', p_value
    print 'F dist(0.05):', F_dist, abs(F_value)>F_dist
    print 'F dist(0.01):', F_dist_001, abs(F_value)>F_dist_001
Beispiel #8
0
    def plot(self, data, size, newdata=None):

        sizes = data[:, 0]
        sample = data[:, 1:]

        samples = dict()
        for n, value in zip(sizes, sample):
            if n in samples:
                samples[n] = np.vstack([samples[n], value])
            else:
                samples[n] = value

        m = len(samples.keys())
        n = len(samples[1])
        p = len(samples[1].T)

        variance, S = [], []
        for i in range(m):
            mat = np.cov(samples[i + 1].T, ddof=1)
            variance.append(mat.diagonal())
            S.append(cova(mat))

        variance, S = np.array(variance), np.array(S)

        means = np.array([samples[xs + 1].mean(axis=0) for xs in range(m)])
        means_total = means.mean(axis=0)

        Smat = var_cov(variance, S)
        Smat_inv = np.linalg.inv(Smat)

        values = []
        for i in range(m):
            a = means[i] - means_total
            values.append(5 * a @ Smat_inv @ a.T)

        p1 = (p * (m - 1) * (n - 1))
        p2 = (m * n - m - p + 1)
        lcl = (p1 / p2) * f.ppf(0.00135, p, p2)
        center = (p1 / p2) * f.ppf(0.50, p, p2)
        ucl = (p1 / p2) * f.ppf(0.99865, p, p2)

        return (values, center, lcl, ucl, self._title)
Beispiel #9
0
def Fmask(Fimg, dfnum, dfdenom, pvalue=1.0e-04):
    """
    Create mask for use in estimating pooled covariance based on
    an F contrast.
    """

    ## TODO check nipy.algorithms.statistics.models.contrast to see if rank is
    ## correctly set -- I don't think it is right now.
    print(dfnum, dfdenom)
    thresh = FDbn.ppf(pvalue, dfnum, dfdenom)
    return Image(np.greater(np.asarray(Fimg), thresh), Fimg.grid.copy())
Beispiel #10
0
    def calculate_t2_statistics(self, k, name='t2', plot=True):
        from scipy.stats import f

        index = np.nonzero(self.y == 0)
        x_normal = self.x[index[0], :]

        xhat_normal, wl, vl = pca(x_normal, k)  # dados na nova base

        mu = np.mean(x_normal, 0)

        n = x_normal.shape[0]
        p = 0.05
        dfn = k
        dfd = n - k
        f_density = f.ppf(1.0 - p / 2, dfn, dfd)
        t2_thr = (1.0 * k * (n - 1) / (n - k)) * f_density

        # dados na nova base utilizndo o pca calculado apenas com dados
        # normais.

        xhat = np.real(np.dot(self.x - mu, vl))

        lamb = np.eye(k)
        for i in range(0, k):
            lamb[i, :] = wl[i] * lamb[i,:]

        t2_statistics = np.array([])
        for i in range(0, xhat.shape[0]):
            t2_statistics = np.append(t2_statistics, np.dot(
                np.dot(xhat[i, :], np.linalg.inv(lamb)), np.transpose(xhat[i,:])))

        t2_statistics = np.reshape(t2_statistics, (-1, 1))
        yhat = t2_statistics > t2_thr

        time_fault = np.nonzero(self.y != 0)[0][0]

        if plot:
            plt.semilogy(np.arange(0, xhat.shape[0]), t2_statistics)
            plt.hold(True)
            plt.semilogy(t2_thr * np.ones_like(t2_statistics), '--g')
            plt.axvline(x=time_fault, ymin=0, ymax=1, linestyle='--', color='r')
    
            plt.xlabel('time (min)')
            plt.ylabel('log($t^2$ statistics)')
            plt.legend(
                ['$log(t^2)$ statistic', '$t^2$ threshold', 'fault starting time'])
    
            plt.title(
                '$t^2$ statistic over time for fault {:s} using {:d} principal components'.format(name, k))
            plt.savefig(
                self.path + 'figures/' + name + 't2vstime_k=' + str(k) + '.eps')
            plt.clf()
        return yhat, t2_statistics, {'lamb': lamb, 'thr': t2_thr}
def infer_by_chi(l1, l2, alpha=0.01):
    ss1 = s_2(l1)
    ss2 = s_2(l2)
    fv = ss1 / ss2
    from scipy.stats import f

    res = f.ppf(1 - alpha, len(l1) - 1, len(l2) - 1)
    print('res {} fv {}'.format(res, fv))
    if res > fv:
        print('sigma1 <= sigma2')
    else:
        print('sigma1 > sigma2')
def binofit(x, n, alpha=0.01):
    ''' Copied from Matlab. '''
    # Lower limits
    
    if x == 0:
        lb = 0
    else:    
        nu1 = 2 * x;
        nu2 = 2 * (n - x + 1);
        F = f.ppf(alpha / 2, nu1, nu2);
        lb = (nu1 * F) / (nu2 + nu1 * F);
        
    if x == n:
        ub = 1
    else:
        nu1 = 2 * (x + 1);
        nu2 = 2 * (n - x);
        F = f.ppf(1 - alpha / 2, nu1, nu2);
        ub = (nu1 * F) / (nu2 + nu1 * F);
        
    
    return (lb, ub)
Beispiel #13
0
def gaussian_concentration(dataframe, unity, alpha=0.05):
    """
    """
    if dataframe.shape[0] > 1:
        X, Y, Z = SI(dataframe, unity)
        Sigma = np.cov(np.vstack((X, Y, Z)))
        U, axes, R = np.linalg.svd(Sigma)
        p, n = axes.size, min(len(X), len(Y), len(Z))
        fppf = F.ppf(1-alpha, p, n-p)*(n-1)*p*(n+1)/n/(n-p)
        axes = np.sqrt(axes*fppf)
        vol = (np.pi**(p/2)/gamma(p/2+1)* np.prod(axes))
        return (1-alpha)*dataframe.shape[0]/vol
    else:
        return float("nan")
Beispiel #14
0
def binconf(x, n, alpha=0.05):
    nu1 = 2 * (n - x + 1)
    nu2 = 2 * x
    if x > 0:
        ll = x/(x+ f.ppf(1-alpha/2, nu1, nu2) * (n - x +1))
    else:
        ll = 0
    nu1p = nu2+2
    nu2p = nu1-2
    if x<n:
        pp = f.ppf(1-alpha/2, nu1p, nu2p)
    else:
        pp=1
    ul = ((x+1)*pp)/(n-x+(x+1)*pp)
    zcrit = -1 * norm.ppf(alpha/2)
    z2 = zcrit**2
    p = x/n
    cl = (p + z2/2/n + np.array([-1, 1]) * zcrit * sqrt((p*(1-p) + z2/4/n)/n))/(1+z2/n)
    if x==1:
        cl[0]=-log(1-alpha)/n
    elif x==(n-1):
        cl[1]=1+log(1-alpha)/n

    return ll, ul, cl
Beispiel #15
0
def exact_interval(y1, y2, M1, M2, alpha):
    """Return exact bound.
    Note: All input values must be of float type.
    """
    n = y1 + y2
    df11 = 2 * (n - y1 + 1)
    df12 = 2 * y1
    df21 = 2 * (y1 + 1)
    df22 = 2 * (n - y1)
    f_alpha_over_two = f.ppf(1 - alpha / 2, df11, df12)
    lb_pi = y1 / (y1 + (n - y1 + 1) * f_alpha_over_two)
    ub_pi = 1 - (n - y1) / (n - y1 + (y1 + 1) * f_alpha_over_two)
    lb = lb_pi / (1 - lb_pi) * M2 / M1
    ub = ub_pi / (1 - ub_pi) * M2 / M1
    return (lb, ub)
Beispiel #16
0
 def regiaoAbrangencia(self,PA=0.95):
     '''
     Método para avaliação da região de abrangência
     '''
     
     Fisher = f.ppf(PA,self.NP,(self.NE*self.NY-self.NP))            
     Comparacao = self.Otimizacao.best_fitness*(1+float(self.NP)/(self.NE*self.NY-float(self.NP))*Fisher)
     
     Regiao = []; Hist_Posicoes = []; Hist_Fitness = []
     for it in xrange(self.Otimizacao.itmax):
         for ID_particula in xrange(self.Otimizacao.Num_particulas):
             if self.Otimizacao.historico_fitness[it][ID_particula] <= Comparacao:
                 Regiao.append(self.Otimizacao.historico_posicoes[it][ID_particula])
             Hist_Posicoes.append(self.Otimizacao.historico_posicoes[it][ID_particula])
             Hist_Fitness.append(self.Otimizacao.historico_fitness[it][ID_particula])
         
     self.parametros._parametro(self.parametros.estimativa,self.parametros.matriz_covariancia,Regiao)
     
     return (Hist_Posicoes, Hist_Fitness)
def chapter_6(input_list):
	for choice in input_list:
		if choice == 1:
			print "Already explained in the example 6.1"
		if choice == 7:
			means1 = np.matrix('204.4;556.6')
			means2 = np.matrix('130;355')
			cov_matrix1 = np.matrix('13825.3,23823.4;23823.4,73107.4')
			cov_matrix2 = np.matrix('8632,19616.7;19616.7,55964.5')
			print 'cov_matrix1',cov_matrix1
			print 'cov_matrix2',cov_matrix2
			n1,n2 = 45,55
			s_pooled = ((n1-1)*(cov_matrix1) + (n2-1)*(cov_matrix2))/float(n1 + n2 -2)
			print 's_pooled',s_pooled
			mean_diff = means1-means2
			print 'mean_diff',mean_diff
			T_square = (np.transpose(mean_diff)*(((n1*n2/(n2+n1))*(inv(s_pooled)))*mean_diff))
			print 'T_square',T_square
			c_square = (n1+n2-2)*(2)*f.ppf(0.95,2,n1+n2-2-1)/(n1+n2-2-1)
			print 'c_square',c_square
			a = (inv(s_pooled))*(mean_diff)
			print 'Linear Combination',a
Beispiel #18
0
    def var_var(self, alpha):
        f0 = self.S1 ** 2 / self.S2 ** 2
        n1, n2 = self.n1, self.n2
        # hypothesis testing2
        H1a = f.ppf(1 - alpha / 2.0, n1 - 1, n2 - 1) < f0 or f.ppf(alpha / 2.0, n1 - 1, n2 - 1) > f0
        H1b = f.ppf(alpha / 2.0, n1 - 1, n2 - 1) < f0
        H1c = f.ppf(1 - alpha / 2.0, n1 - 1, n2 - 1) > f0
        # p-value
        p1a = np.max(np.array([f.sf(f0, n1 - 1, n2 - 1), 1 - f.sf(f0, n1 - 1, n2 - 1)]))
        p1b = f.sf(f0, n1 - 1, n2 - 1)
        p1c = 1 - f.sf(f0, n1 - 1, n2 - 1)

        # confidence intervals: the minimum level of significance
        # alpha for which the null hypothesis is rejected
        c1 = self.S1 ** 2 / self.S1 ** 2 * f.ppf(alpha / 2.0, n2 - 1, n1 - 1)
        c2 = self.S1 ** 2 / self.S1 ** 2 * f.ppf(1 - alpha / 2.0, n2 - 1, n1 - 1)
        return H1a, H1b, H1c, p1a, p1b, p1c, (c1, c2)
Beispiel #19
0
    def get_scale(confProb, apriori, dim, df=0):
        """
        returns confidence scale for
        apriori: True/False: apriori/aposteriori standard deviation
        dimension dim
        df: degrees of freedom
        """

        if apriori:
            # for apriori standard deviations
            from scipy.stats import chi2
            from math import sqrt

            return sqrt(chi2.ppf(confProb, dim))
        else:
            if df == 0:
                raise ConfidenceScaleError, "Zero degrees of freedom"

            # for aposteriori standard deviations
            from scipy.stats import f
            from math import sqrt

            return sqrt(dim*f.ppf(confProb, dim, df))
Beispiel #20
0
 def get_fisher_critical(probability, f3, f4):
     return f.ppf(probability, f3, f4)
Beispiel #21
0
p = model2.df_model
n = train.shape[0]
# 计算回归离差平方和
RSS = np.sum((model2.fittedvalues-ybar) ** 2)
# 计算误差平方和
ESS = np.sum(model2.resid ** 2)
# 计算F统计量的值
F = (RSS/p)/(ESS/(n-p-1))
print('F统计量的值:',F)
# 返回模型中的F值
model2.fvalue

# 导入模块
from scipy.stats import f
# 计算F分布的理论值
F_Theroy = f.ppf(q=0.95, dfn = p,dfd = n-p-1)
print('F分布的理论值为:',F_Theroy)

# 模型的概览信息
model2.summary()


# 正态性检验
# 直方图法
# 导入第三方模块
import scipy.stats as stats
# 中文和负号的正常显示
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False
# 绘制直方图
sns.distplot(a = Profit_New.Profit, bins = 10, fit = stats.norm, norm_hist = True,
Beispiel #22
0
x_4 = read_column_from_csv(column_number=2 + (variation - 1) * 4,
                           file='data/4problem.csv')
y_1 = read_column_from_csv(column_number=3 + (variation - 1) * 4,
                           file='data/4problem.csv')

y_estimation = [
    coefficient_vector[0] + coefficient_vector[1] * x_2[i] +
    coefficient_vector[2] * x_3[i] + coefficient_vector[3] * x_4[i]
    for i in range(40)
]

ess_ur = ess(y_1, y_estimation)
rss_ur = rss(y_1, y_estimation)

# Fisher dist
f_crit = f.ppf(0.95, k - 1, n - k)
f_real = ess_ur / (k - 1) / (rss_ur / (n - k))

print('F (95%, k-1, n-4) is {}'.format(f_crit))
print('ess / (k - 1) / (rss / (n - k)) is {}'.format(f_real))

if f_crit < f_real:
    print(
        'Отвергаем гипотезу о значимости модели регрессии в целом (b1=b2=b3=b4=0)'
    )
else:
    print(
        'Принимаем гипотезу о значимости модели регрессии в целом (b1=b2=b3=b4=0)'
    )

print()
Beispiel #23
0
# -*- coding: utf-8 -*-
'''
Goal : 
Author : Yonghan Jung, ISyE, KAIST 
Date : 15
Comment 
- 

'''

''' Library '''
import numpy as np
from scipy.stats import f

''' Function or Class '''


class Example:
    def __init__(self):
        return None


if __name__ == "__main__":
    DoF1 = 10
    DoF2 = 20

    print f.ppf(0.90, DoF1, DoF2)
Beispiel #24
0
            y_hat.append(
                b0 + b1 * x_matrix[i][0] + b2 * x_matrix[i][1] + b3 * x_matrix[i][2] + b12 * x_matrix[i][0] *
                x_matrix[i][1] +
                b13 * x_matrix[i][0] * x_matrix[i][2] + b123 * x_matrix[i][0] * x_matrix[i][1] * x_matrix[i][2])

            print(
                f"^y{chr(8321 + i)} = {b0:.2f}{b1:+.2f}*x{chr(8321 + i)}\u2081{b2:+.2f}*x{chr(8321 + i)}\u2082{b3:+.2f}*x{chr(8321 + i)}\u2083{b12:+.2f}*x{chr(8321 + i)}\u2081"
                f"*x{chr(8321 + i)}\u2082{b13:+.2f}*x{chr(8321 + i)}\u2081*x{chr(8321 + i)}\u2083{b123:+.2f}*x{chr(8321 + i)}\u2081*x{chr(8321 + i)}\u2082*x{chr(8321 + i)}\u2083 "
                f"= {y_hat[i]:.2f}")

        """FISHER"""

        d = 2
        f4 = N - d
        S2_ad = 0
        for i in range(N):
            S2_ad += (m / (N - d) * ((y_hat[i] - my_list[i]) ** 2))

        Fp = S2_ad / S2B
        Ft = f.ppf(1 - q, f4, f3)
        print("Fp:", Fp)
        print("Ft:", Ft)
        if Fp > Ft:
            print("Рівняння регресії не адекватно оригіналу при рівні значимості 0,05")
            print("Щоб почати з початку введіть довільний символ в консоль\n")
            if input() == 0:
                break
        else:
            print("Рівняння регресії адекватно оригіналу при рівні значимості 0,05")
            break
    title = 'Model Bias - ' + month[i]
    filename = FIG_PATH + 'u200_bias_' + month[i] + '.png'
    #interp model grid to obs grid and compute test
    winds_s4_mean_new = winds_s4_mean.u[i, :, :].interp_like(
        winds_erai_mean.u[mm[i], :, :], method='linear')
    winds_s4_sd_new = winds_s4_sd.u[i, :, :].interp_like(
        winds_erai_sd.u[mm[i], :, :], method='linear')
    SE = np.sqrt(
        np.power(winds_erai_sd.u.values[mm[i], :, :], 2) / 36 +
        np.power(winds_s4_sd_new.values, 2) / (36 * 51))
    tt = (winds_s4_mean_new.values - winds_erai_mean.u.values[mm[i], :, :])
    DF = 36 + 51 * 36 - 2
    #DF = np.power(SE, 4) / (np.power(np.power(winds_erai_sd[7 + i, :, :], 2)/ 36) / 35 +
    #			np.power(np.power(winds_s4_sd_new, 2)/ (36 * 51)) /(36 * 51 - 1))
    t_cut = t.ppf(0.025, DF)
    df_cut = f.ppf([0.025, 0.975], 35, 36 * 51 - 1)
    F = np.power(winds_erai_sd.u.values[mm[i], :, :], 2) / np.power(
        winds_s4_sd_new.values, 2)
    tt = ma.masked_array(tt,
                         mask=np.logical_and((tt / SE) > t_cut,
                                             (tt / SE) < np.abs(t_cut)))
    F = ma.masked_array(F, mask=np.logical_and(F > df_cut[0], F < df_cut[1]))
    PlotMeanStdTest(tt, F, lat_erai, lon_erai, title, filename)

#seasonal means
season = ['ASO', 'SON', 'OND', 'NDJ', 'DJF']
lmonth = ['Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for i in np.arange(0, 5):
    winds_erai_seas_mean = winds_erai['u'].sel(
        **{
Beispiel #26
0
MSE = SSE[0, 0] / (n - p - 1)
# Calculamos el MSR
MSR = SSR[0, 0] / p
# Calculamos el MST
MST = SST[0, 0] / (n - 1)

# Calculamos F

# In[65]:

F = (Rsq * (n - p - 1)) / ((1 - Rsq) * p)  #F

# In[66]:

Rango = 0.9  # se define un rango, es decir cuanto porcentaje de la curva se quiere
Ftest = f.ppf(Rango, p, n - (p + 1))
P_i = np.zeros(p)
if F > Ftest:
    tzeros = beta[:, 0] / sigma_i
    P_value = 2 * (1 - norm.cdf(tzeros))  # se integran las colas
    for i in range(p):
        if P_value[i] < 0.5:
            P_i[i] = 1
        else:
            P_i[i] = 0
else:
    #print("paila")
    quit()

# In[75]:
Beispiel #27
0
def p_lo(alpha, n, N):
    quant_lo = f.ppf(alpha, 2 * n, 2 * (N - n + 1), loc=0, scale=1)
    return n * quant_lo / (N - n + 1 + n * quant_lo)
Beispiel #28
0
def cohren(f1, f2, q=0.05):
    q1 = q / f1
    fisher_value = f.ppf(q=1 - q1, dfn=f2, dfd=(f1 - 1) * f2)
    return fisher_value / (fisher_value + f1 - 1)
Beispiel #29
0
    def computeUCLD(self, npc, nob, p_value, phase):
        """
        UCL (Upper Control Limit) for D-statistic
        
        .. [Ref] PCA-based multivariate statistical network monitoring for anomaly detection
            http://www.sciencedirect.com/science/article/pii/S0167404816300116
        
        Parameters
        ----------
        npc: int 
            Number of PCs
        nob: int 
            Number of observations
        p_value: float 
            p-value of the test, in (0,1]
        phase: int 
            SPC phase
            1: Phase I
            2: Phase II
            
        Return
        ------
        lim: float64
            control limit at a 1-``p_value`` confidence level.
            
        Raises
        ------
        MSPCError
            When something is going wrong during the mathematical operations
            
        Examples
        --------
        >>> from msnm.utils import datautils as tools
        >>> import numpy as np
        >>> import scipy.io as sio
        >>> from msnm.modules.ma import pca
        >>> from msnm.modules.ma import mspc

        >>> # Original data (complete workspace of matlab example in mspc.m of MEDA)
        >>> originalData = './datatest/data_adicov_mspc.mat'
          
        >>> # Calibration matrix
        >>> data = sio.loadmat(originalData)
        >>> x = data['X']
        >>> weights = np.ones((x.shape[0],1))
          
        >>> # data preprocess auto-scaled
        >>> xcs, average, scale = tools.preprocess2D(x,2,weights)
          
        >>> #PCA
        >>> pcaModel = pca.PCA()
        >>> pcaModel.setPCs(1) # like in mspc.m example
        >>> pcaModel.setData(xcs)
        >>> pcaModel.runPCA()
          
        >>> T = pcaModel.getScores()
        >>> P = pcaModel.getLoadings()
        
        >>> # Compute UCLs
        >>> mspcInstance = mspc.MSPC()
        >>> # Number of observations
        >>> nob = x.shape[0]
        >>> # Compute UCL for D-statistics with 95% of confidence level
        >>> mspcInstance.computeUCLD(npcs, nob, 0.05, 2)
        
        >>> print "UCLd --> %f" % mspcInstance.getUCLD()
        
        """

        method_name = "computeUCLD()"

        try:

            if phase == 2:
                lim = (npc * (nob * nob - 1.0) /
                       (nob *
                        (nob - npc))) * fisher.ppf(1 - p_value, npc, nob - npc)
            else:
                lim = (nob - 1.0)**2 / nob * beta.ppf(1 - p_value, npc / 2.0,
                                                      (nob - npc - 1) / 2.0)

            # Check is the limit is and ndarray of [1x1] dimensions and get the float value
            if isinstance(lim, np.ndarray):
                lim = lim[0, 0]

            # TODO: Sometimes after computations numpy takes UCLq as complex with 0j imaginary part
            if isinstance(lim, complex):
                logging.warn(
                    "UCLd has a complex value of %s. Getting just the real part.",
                    lim)
                lim = lim.real

            self._UCLD = lim

        except Exception:
            raise MSPCError(self, sys.exc_info()[0], method_name)
Beispiel #30
0
def main(n, m):
    x1_min = -30
    x1_max = 0
    x2_min = -35
    x2_max = 10
    x3_min = 0
    x3_max = 20
    x01 = (x1_max + x1_min) / 2
    x02 = (x2_max + x2_min) / 2
    x03 = (x3_max + x3_min) / 2

    dx1 = x1_max - x01
    dx2 = x2_max - x02
    dx3 = x3_max - x03

    xn = [[-1, -1, -1, +1, +1, +1, -1, +1, +1, +1],
          [-1, -1, +1, +1, -1, -1, +1, +1, +1, +1],
          [-1, +1, -1, -1, +1, -1, +1, +1, +1, +1],
          [-1, +1, +1, -1, -1, +1, -1, +1, +1, +1],
          [+1, -1, -1, -1, -1, +1, +1, +1, +1, +1],
          [+1, -1, +1, -1, +1, -1, -1, +1, +1, +1],
          [+1, +1, -1, +1, -1, -1, -1, +1, +1, +1],
          [+1, +1, +1, +1, +1, +1, +1, +1, +1, +1],
          [-1.73, 0, 0, 0, 0, 0, 0, 2.9929, 0, 0],
          [+1.73, 0, 0, 0, 0, 0, 0, 2.9929, 0, 0],
          [0, -1.73, 0, 0, 0, 0, 0, 0, 2.9929, 0],
          [0, +1.73, 0, 0, 0, 0, 0, 0, 2.9929, 0],
          [0, 0, -1.73, 0, 0, 0, 0, 0, 0, 2.9929],
          [0, 0, +1.73, 0, 0, 0, 0, 0, 0, 2.9929],
          [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

    x1 = [
        x1_min, x1_min, x1_min, x1_min, x1_max, x1_max, x1_max, x1_max,
        -1.73 * dx1 + x01, 1.73 * dx1 + x01, x01, x01, x01, x01, x01
    ]
    x2 = [
        x2_min, x2_min, x2_max, x2_max, x2_min, x2_min, x2_max, x2_max, x02,
        x02, -1.73 * dx2 + x02, 1.73 * dx2 + x02, x02, x02, x02
    ]
    x3 = [
        x3_min, x3_max, x3_min, x3_max, x3_min, x3_max, x3_min, x3_max, x03,
        x03, x03, x03, -1.73 * dx3 + x03, 1.73 * dx3 + x03, x03
    ]

    x1x2 = [0] * 15
    x1x3 = [0] * 15
    x2x3 = [0] * 15
    x1x2x3 = [0] * 15
    x1kv = [0] * 15
    x2kv = [0] * 15
    x3kv = [0] * 15

    for i in range(15):
        x1x2[i] = x1[i] * x2[i]
        x1x3[i] = x1[i] * x3[i]
        x2x3[i] = x2[i] * x3[i]
        x1x2x3[i] = x1[i] * x2[i] * x3[i]
        x1kv[i] = x1[i]**2
        x2kv[i] = x2[i]**2
        x3kv[i] = x3[i]**2

    tmp_list_a = list(
        zip(x1, x2, x3, x1x2, x1x3, x2x3, x1x2x3, x1kv, x2kv, x3kv))

    plan_table = BeautifulTable()
    plan_table.columns.header = [
        'X1', 'X2', 'X3', 'X1X2', 'X1X3', 'X2X3', 'X1X2X3', 'X1X1', 'X2X2',
        'X3X3'
    ]
    print("Planning matrix with naturalized coefficients X:")
    for i in range(len(tmp_list_a)):
        plan_table.rows.append(tmp_list_a[i])
    print(plan_table)

    def func(X1, X2, X3):
        y = 5.4 + 3.6 * X1 + 6.6 * X2 + 7.7 * X3 + 8.0 * X1 * X1 + 0.3 * X2 * X2 + 2.5 * X3 * X3 + 5.9 * X1 * X2 + 0.3 * X1 * X3 + 7.2 * X2 * X3 + 5.3 * X1 * X2 * X3 + random.randint(
            0, 10) - 5
        return y

    y = [[
        func(tmp_list_a[j][0], tmp_list_a[j][1], tmp_list_a[j][2])
        for _ in range(m)
    ] for j in range(15)]

    plan_y = BeautifulTable()
    plan_y.columns.header = ['y1', 'y2', 'y3']
    print("Planning matrix y:")
    for i in range(len(y)):
        plan_y.rows.append(y[i])
    print(plan_y)

    aver_y = []
    for i in range(len(y)):
        aver_y.append(np.mean(y[i], axis=0))
    print("Average response values:\n{}".format(aver_y))

    disp = []
    for i in range(len(y)):
        a = 0
        for k in y[i]:
            a += (k - np.mean(y[i], axis=0))**2
        disp.append(a / len(y[i]))
    print("Dispersion:\n{}".format(disp))

    def finds_value(num):
        a = 0
        for j in range(15):
            a += aver_y[j] * tmp_list_a[j][num - 1] / 15
        return a

    def a(f, s):
        a = 0
        for j in range(15):
            a += tmp_list_a[j][f - 1] * tmp_list_a[j][s - 1] / 15
        return a

    my = sum(aver_y) / 15
    mx = []
    for i in range(10):
        number_lst = []
        for j in range(15):
            number_lst.append(tmp_list_a[j][i])
        mx.append(sum(number_lst) / len(number_lst))

    determinant1 = [[
        1, mx[0], mx[1], mx[2], mx[3], mx[4], mx[5], mx[6], mx[7], mx[8], mx[9]
    ],
                    [
                        mx[0],
                        a(1, 1),
                        a(1, 2),
                        a(1, 3),
                        a(1, 4),
                        a(1, 5),
                        a(1, 6),
                        a(1, 7),
                        a(1, 8),
                        a(1, 9),
                        a(1, 10)
                    ],
                    [
                        mx[1],
                        a(2, 1),
                        a(2, 2),
                        a(2, 3),
                        a(2, 4),
                        a(2, 5),
                        a(2, 6),
                        a(2, 7),
                        a(2, 8),
                        a(2, 9),
                        a(2, 10)
                    ],
                    [
                        mx[2],
                        a(3, 1),
                        a(3, 2),
                        a(3, 3),
                        a(3, 4),
                        a(3, 5),
                        a(3, 6),
                        a(3, 7),
                        a(3, 8),
                        a(3, 9),
                        a(3, 10)
                    ],
                    [
                        mx[3],
                        a(4, 1),
                        a(4, 2),
                        a(4, 3),
                        a(4, 4),
                        a(4, 5),
                        a(4, 6),
                        a(4, 7),
                        a(4, 8),
                        a(4, 9),
                        a(4, 10)
                    ],
                    [
                        mx[4],
                        a(5, 1),
                        a(5, 2),
                        a(5, 3),
                        a(5, 4),
                        a(5, 5),
                        a(5, 6),
                        a(5, 7),
                        a(5, 8),
                        a(5, 9),
                        a(5, 10)
                    ],
                    [
                        mx[5],
                        a(6, 1),
                        a(6, 2),
                        a(6, 3),
                        a(6, 4),
                        a(6, 5),
                        a(6, 6),
                        a(6, 7),
                        a(6, 8),
                        a(6, 9),
                        a(6, 10)
                    ],
                    [
                        mx[6],
                        a(7, 1),
                        a(7, 2),
                        a(7, 3),
                        a(7, 4),
                        a(7, 5),
                        a(7, 6),
                        a(7, 7),
                        a(7, 8),
                        a(7, 9),
                        a(7, 10)
                    ],
                    [
                        mx[7],
                        a(8, 1),
                        a(8, 2),
                        a(8, 3),
                        a(8, 4),
                        a(8, 5),
                        a(8, 6),
                        a(8, 7),
                        a(8, 8),
                        a(8, 9),
                        a(8, 10)
                    ],
                    [
                        mx[8],
                        a(9, 1),
                        a(9, 2),
                        a(9, 3),
                        a(9, 4),
                        a(9, 5),
                        a(9, 6),
                        a(9, 7),
                        a(9, 8),
                        a(9, 9),
                        a(9, 10)
                    ],
                    [
                        mx[9],
                        a(10, 1),
                        a(10, 2),
                        a(10, 3),
                        a(10, 4),
                        a(10, 5),
                        a(10, 6),
                        a(10, 7),
                        a(10, 8),
                        a(10, 9),
                        a(10, 10)
                    ]]

    determinant2 = [
        my,
        finds_value(1),
        finds_value(2),
        finds_value(3),
        finds_value(4),
        finds_value(5),
        finds_value(6),
        finds_value(7),
        finds_value(8),
        finds_value(9),
        finds_value(10)
    ]

    beta = solve(determinant1, determinant2)
    print("Regression equation:")
    print(
        "y = {} + {} * X1 + {} * X2 + {} * X3 + {} * Х1X2 + {} * Х1X3 + {} * Х2X3"
        "+ {} * Х1Х2X3 + {} * X11^2 + {} * X22^2 + {} * X33^2".format(
            beta[0], beta[1], beta[2], beta[3], beta[4], beta[5], beta[6],
            beta[7], beta[8], beta[9], beta[10]))
    y_i = [0] * 15

    for k in range(15):
        y_i[k] = beta[0] + beta[1] * tmp_list_a[k][0] + beta[2] * tmp_list_a[k][1] + beta[3] * tmp_list_a[k][2] + \
                 beta[4] * tmp_list_a[k][3] + beta[5] * tmp_list_a[k][4] + beta[6] * tmp_list_a[k][5] + beta[7] * \
                 tmp_list_a[k][6] + beta[8] * tmp_list_a[k][7] + beta[9] * tmp_list_a[k][8] + beta[10] * tmp_list_a[k][9]

    print("Experimental values:\n{}".format(y_i))

    gp = max(disp) / sum(disp)
    gt = 0.3346
    print("\nKohren check\nGp = {}".format(gp))
    if gp < gt:
        print("Dispersions are homogeneous")
    else:
        print("Dispersions are inhomogeneous")

    sb = sum(disp) / len(disp)
    sbs = (sb / (15 * m))**0.5

    f3 = (m - 1) * n
    sign_coef = []
    insign_coef = []
    d = 11
    res = [0] * 11

    for j in range(11):
        t_pract = 0
        for i in range(15):
            if j == 0:
                t_pract += aver_y[i] / 15
            else:
                t_pract += aver_y[i] * xn[i][j - 1]
            res[j] = beta[j]
        if math.fabs(t_pract / sbs) < t.ppf(q=0.975, df=f3):
            insign_coef.append(beta[j])
            res[j] = 0
            d -= 1
        else:
            sign_coef.append(beta[j])
    print("\nStudent criterion:")
    print("Significant regression coefficients:",
          [round(i, 3) for i in sign_coef])
    print("Insignificant regression coefficients:",
          [round(i, 3) for i in insign_coef])
    y_st = []
    for i in range(15):
        y_st.append(res[0] + res[1] * x1[i] + res[2] * x2[i] + res[3] * x3[i] +
                    res[4] * x1x2[i] + res[5] * x1x3[i] + res[6] * x2x3[i] +
                    res[7] * x1x2x3[i] + res[8] * x1kv[i] + res[9] * x2kv[i] +
                    res[10] * x3kv[i])
    print("Values with the coefficients:\n{}".format(y_st))

    print("\nFisher adequacy check")
    sad = m * sum([(y_st[i] - aver_y[i])**2 for i in range(15)]) / (n - d)
    fp = sad / sb
    f4 = n - d
    print("fp =", fp)
    if fp > f.ppf(q=0.95, dfn=f4, dfd=f3) or len(insign_coef) == 2:
        print("The mathematical model is inadequate to the experimental data")
    else:
        print("The mathematical model is adequate to the experimental data")
Beispiel #31
0
def linear(n, m):
    f1 = m - 1
    f2 = n
    f3 = f1 * f2
    q = 0.05

    x, y, x_norm = planning_matrix_linear(n, m, x_range)

    y_average, B = regression_equation(x, y, n)

    dispersion_arr = dispersion(y, y_average, n, m)

    temp_cohren = f.ppf(q=(1 - q / f1), dfn=f2, dfd=(f1 - 1) * f2)
    cohren_cr_table = temp_cohren / (temp_cohren + f1 - 1)
    Gp = max(dispersion_arr) / sum(dispersion_arr)

    print('\nПеревірка за критерієм Кохрена:\n')
    print(f'Розрахункове значення: Gp = {Gp}'
          f'\nТабличне значення: Gt = {cohren_cr_table}')
    if Gp < cohren_cr_table:
        print(f'З ймовірністю {1 - q} дисперсії однорідні.')
    else:
        print("Необхідно збільшити ксть дослідів")
        m += 1
        linear(n, m)

    qq = (1 + 0.95) / 2
    student_cr_table = t.ppf(df=f3, q=qq)
    student_t = kriteriy_studenta(x_norm[:, 1:], y, y_average, n, m)

    print('\nТабличне значення критерій Стьюдента:\n', student_cr_table)
    print('Розрахункове значення критерій Стьюдента:\n', student_t)
    res_student_t = [temp for temp in student_t if temp > student_cr_table]
    final_coefficients = [
        B[student_t.index(i)] for i in student_t if i in res_student_t
    ]
    print('Коефіцієнти {} статистично незначущі.'.format(
        [i for i in B if i not in final_coefficients]))

    y_new = []
    for j in range(n):
        y_new.append(
            regression([
                x[j][student_t.index(i)]
                for i in student_t if i in res_student_t
            ], final_coefficients))

    print(f'\nОтримаємо значення рівння регресії для {m} дослідів: ')
    print(y_new)

    d = len(res_student_t)
    f4 = n - d
    Fp = kriteriy_fishera(y, y_average, y_new, n, m, d, dispersion_arr)
    Ft = f.ppf(dfn=f4, dfd=f3, q=1 - 0.05)

    print('\nПеревірка адекватності за критерієм Фішера:\n')
    print('Розрахункове значення критерія Фішера: Fp =', Fp)
    print('Табличне значення критерія Фішера: Ft =', Ft)
    if Fp < Ft:
        print('Математична модель адекватна експериментальним даним')
        return True
    else:
        print('Математична модель не адекватна експериментальним даним')
        return False
Beispiel #32
0
def check(X, Y, B, n, m, norm=False):
    f1 = m - 1
    f2 = n
    f3 = f1 * f2
    q = 0.05

    y_aver = [round(sum(i) / len(i), 3) for i in Y]
    print('\nСереднє значення y:', y_aver)

    dispersion_arr = dispersion(Y, y_aver, n, m)

    qq = (1 + 0.95) / 2
    student_cr_table = t.ppf(df=f3, q=qq)

    ts = kriteriy_studenta(X[:, 1:], Y, y_aver, n, m)

    temp_cohren = f.ppf(q=(1 - q / f1), dfn=f2, dfd=(f1 - 1) * f2)
    cohren_cr_table = temp_cohren / (temp_cohren + f1 - 1)
    Gp = max(dispersion_arr) / sum(dispersion_arr)

    print('Дисперсія y:', dispersion_arr)

    print(f'Gp = {Gp}')
    if Gp < cohren_cr_table:
        print(f'З ймовірністю {1 - q} дисперсії однорідні.')
    else:
        print("Необхідно збільшити кількість дослідів")
        m += 1
        with_interaction_effect(n, m)

    print('\nКритерій Стьюдента:\n', ts)
    res = [t for t in ts if t > student_cr_table]
    final_k = [B[i] for i in range(len(ts)) if ts[i] in res]
    print(
        '\nКоефіцієнти {} статистично незначущі, тому ми виключаємо їх з рівняння.'
        .format([round(i, 3) for i in B if i not in final_k]))

    y_new = []
    for j in range(n):
        y_new.append(
            regression([X[j][i] for i in range(len(ts)) if ts[i] in res],
                       final_k))

    print(f'\nЗначення "y" з коефіцієнтами {final_k}')
    print(y_new)

    d = len(res)
    if d >= n:
        print('\nF4 <= 0')
        print('')
        return
    f4 = n - d

    Fp = kriteriy_fishera(Y, y_aver, y_new, n, m, d, dispersion_arr)

    Ft = f.ppf(dfn=f4, dfd=f3, q=1 - 0.05)

    print('\nПеревірка адекватності за критерієм Фішера')
    print('Fp =', Fp)
    print('Ft =', Ft)
    if Fp < Ft:
        print('Математична модель адекватна експериментальним даним')
        return True
    else:
        print('Математична модель не адекватна експериментальним даним')
        return False
from scipy.stats import f
csv_data = pd.read_csv('predict_.csv')
df = pd.DataFrame(csv_data)
df.columns = ['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'y1', 'yl', 'yi']
x1 = df['x1']
x2 = df['x2']
x3 = df['x3']
x4 = df['x4']
x5 = df['x5']
x6 = df['x6']
x7 = df['x7']
y1 = df['y1']
yi = df['yi']
mean_y = y1.mean()
ESS = np.sum((y1 - yi)**2)
RSS = np.sum((yi - mean_y)**2)
TSS = np.sum((y1 - mean_y)**2)
print(ESS)
print(RSS)
print(TSS)
p = 7
#多少个自变量维度,有七个x,p=7
n = len(x1)
#多少组数值,csv文件的长度
F = (RSS / p) / (ESS / (n - p - 1))
F_throry = f.ppf(q=0.95, dfn=p, dfd=n - p - 1)
print(F)
print(F_throry)
#统计量远大于值,拒绝原假设
#F统计量值远远大于F分布的理论值,所以拒绝原假设,认为多元线性回归模型是显著的,所以回归模型的偏回归系数不全为0。
Beispiel #34
0
def fore_chow(stock1, stock2, model, Flen, give=False, p=0, A=0, ut=0, maxp=5):
    #Flen:formation length
    if model == 1:
        model_name = 'H2'
    elif model == 2:
        model_name = 'H1*'
    else:
        model_name = 'H1'

    day1 = (np.vstack([stock1, stock2]).T)
    day1 = np.log(day1)
    h = len(day1) - Flen
    k = 2  # 幾檔股票
    n = Flen  # formation period 資料長度

    if give == False:
        y = (np.vstack([stock1[0:Flen], stock2[0:Flen]]).T)
        y = np.log(y)
        p = order_select(y, maxp)
        at, A, _ = para_vecm(y, model_name, p)
        #        at , A = para_vecm(y,model_name,p)
        ut = np.dot(at, at.T) / len(at.T)  #sigma_u

    Remain_A = A.copy()
    Remain_ut = ut.copy()
    Remain_p = p
    #LUKE pg184
    A = A.T
    phi_0 = np.eye(k)
    A1 = np.delete(A, 0, axis=0).T
    phi = np.hstack((np.zeros([k, 2 * (p - 1)]), phi_0))
    sigma_t = np.dot(np.dot(phi_0, ut), phi_0.T)  # sigma hat
    ut_h = []

    for i in range(1, h + 1):
        lag_mat = day1[len(day1) - i - p - 1:len(day1) - i, :]
        lag_mat = np.array(lag_mat[::-1])
        if p == 1:
            ut_h.append(lag_mat[0].T -
                        (A[0].T + np.dot(A[1:k * p + 1].T, lag_mat[1:2].T)).T)
        else:
            ut_h.append(lag_mat[0].T - (A[0].T + np.dot(
                A[1:k * p + 1].T, lag_mat[1:k * p - 1].reshape([k * p, 1]))).T)

    for i in range(h - 1):
        a = phi[:, i * 2:len(phi.T)]
        phi_i = np.dot(A1, a.T)
        sigma_t = sigma_t + np.dot(np.dot(phi_i, ut), phi_i.T)
        phi = np.hstack((phi, phi_i))
    phi = phi[:, ((p - 1) * k):len(phi.T)]
    ut_h = np.array(ut_h).reshape([1, h * 2])
    e_t = np.dot(phi, ut_h.T)

    # 程式防呆,如果 sigma_t inverse 發散,則回傳有結構性斷裂。
    try:
        tau_h = np.dot(np.dot(e_t.T, np.linalg.inv(sigma_t)), e_t) / k
    except:
        return Remain_p, Remain_A, Remain_ut, 1
    else:
        if tau_h > float(f.ppf(
                0.99, k, n - k * p + 1)):  #tau_h > float(chi2.ppf(0.99,k)):
            return Remain_p, Remain_A, Remain_ut, 1  # 有結構性斷裂
        else:
            return Remain_p, Remain_A, Remain_ut, 0
Beispiel #35
0
def intraclass_corr(data=None,
                    targets=None,
                    raters=None,
                    ratings=None,
                    nan_policy='raise'):
    """Intraclass correlation.

    Parameters
    ----------
    data : :py:class:`pandas.DataFrame`
        Long-format dataframe. Data must be fully balanced.
    targets : string
        Name of column in ``data`` containing the targets.
    raters : string
        Name of column in ``data`` containing the raters.
    ratings : string
        Name of column in ``data`` containing the ratings.
    nan_policy : str
        Defines how to handle when input contains missing values (nan).
        'raise' (default) throws an error, 'omit' performs the calculations
        after deleting target(s) with one or more missing values (= listwise
        deletion).

        .. versionadded:: 0.3.0

    Returns
    -------
    stats : :py:class:`pandas.DataFrame`
        Output dataframe:

        * ``'Type'``: ICC type
        * ``'Description'``: description of the ICC
        * ``'ICC'``: intraclass correlation
        * ``'F'``: F statistic
        * ``'df1'``: numerator degree of freedom
        * ``'df2'``: denominator degree of freedom
        * ``'pval'``: p-value
        * ``'CI95%'``: 95% confidence intervals around the ICC

    Notes
    -----
    The intraclass correlation (ICC) assesses the reliability of ratings by
    comparing the variability of different ratings of the same subject to the
    total variation across all ratings and all subjects.

    Shrout and Fleiss (1979) describe six cases of reliability of ratings done
    by :math:`k` raters on :math:`n` targets. Pingouin returns all six cases
    with corresponding F and p-values, as well as 95% confidence intervals.

    From the documentation of the ICC function in the R package psych:

    - **ICC1**: Each target is rated by a different rater and the raters are
      selected at random. This is a one-way ANOVA fixed effects model.

    - **ICC2**: A random sample of :math:`k` raters rate each target. The
      measure is one of absolute agreement in the ratings. ICC1 is sensitive
      to differences in means between raters and is a measure of absolute
      agreement.

    - **ICC3**: A fixed set of :math:`k` raters rate each target. There is no
      generalization to a larger population of raters. ICC2 and ICC3 remove
      mean differences between raters, but are sensitive to interactions.
      The difference between ICC2 and ICC3 is whether raters are seen as fixed
      or random effects.

    Then, for each of these cases, the reliability can either be estimated for
    a single rating or for the average of :math:`k` ratings. The 1 rating case
    is equivalent to the average intercorrelation, while the :math:`k` rating
    case is equivalent to the Spearman Brown adjusted reliability.
    **ICC1k**, **ICC2k**, **ICC3K** reflect the means of :math:`k` raters.

    This function has been tested against the ICC function of the R psych
    package. Note however that contrarily to the R implementation, the
    current implementation does not use linear mixed effect but regular ANOVA,
    which means that it only works with complete-case data (no missing values).

    References
    ----------
    .. [1] Shrout, P. E., & Fleiss, J. L. (1979). Intraclass correlations:
           uses in assessing rater reliability. Psychological bulletin, 86(2),
           420.

    .. [2] https://cran.r-project.org/web/packages/psych/psych.pdf

    .. [3] http://www.real-statistics.com/reliability/intraclass-correlation/

    Examples
    --------
    ICCs of wine quality assessed by 4 judges.

    >>> import pingouin as pg
    >>> data = pg.read_dataset('icc')
    >>> icc = pg.intraclass_corr(data=data, targets='Wine', raters='Judge',
    ...                          ratings='Scores').round(3)
    >>> icc.set_index("Type")
                       Description    ICC       F  df1  df2  pval         CI95%
    Type
    ICC1    Single raters absolute  0.728  11.680    7   24   0.0  [0.43, 0.93]
    ICC2      Single random raters  0.728  11.787    7   21   0.0  [0.43, 0.93]
    ICC3       Single fixed raters  0.729  11.787    7   21   0.0  [0.43, 0.93]
    ICC1k  Average raters absolute  0.914  11.680    7   24   0.0  [0.75, 0.98]
    ICC2k    Average random raters  0.914  11.787    7   21   0.0  [0.75, 0.98]
    ICC3k     Average fixed raters  0.915  11.787    7   21   0.0  [0.75, 0.98]
    """
    from pingouin import anova

    # Safety check
    assert isinstance(data, pd.DataFrame), 'data must be a dataframe.'
    assert all([v is not None for v in [targets, raters, ratings]])
    assert all([v in data.columns for v in [targets, raters, ratings]])
    assert nan_policy in ['omit', 'raise']

    # Convert data to wide-format
    data = data.pivot_table(index=targets, columns=raters, values=ratings)

    # Listwise deletion of missing values
    nan_present = data.isna().any().any()
    if nan_present:
        if nan_policy == 'omit':
            data = data.dropna(axis=0, how='any')
        else:
            raise ValueError("Either missing values are present in data or "
                             "data are unbalanced. Please remove them "
                             "manually or use nan_policy='omit'.")

    # Back to long-format
    # data_wide = data.copy()  # Optional, for PCA
    data = data.reset_index().melt(id_vars=targets, value_name=ratings)

    # Check that ratings is a numeric variable
    assert data[ratings].dtype.kind in 'bfi', 'Ratings must be numeric.'
    # Check that data are fully balanced
    # This behavior is ensured by the long-to-wide-to-long transformation
    # Unbalanced data will result in rows with missing values.
    # assert data.groupby(raters)[ratings].count().nunique() == 1

    # Extract sizes
    k = data[raters].nunique()
    n = data[targets].nunique()

    # Two-way ANOVA
    with np.errstate(invalid='ignore'):
        aov = anova(dv=ratings,
                    between=[targets, raters],
                    data=data,
                    ss_type=2)

    # Extract mean squares
    msb = aov.at[0, 'MS']
    msw = (aov.at[1, 'SS'] + aov.at[2, 'SS']) / (aov.at[1, 'DF'] +
                                                 aov.at[2, 'DF'])
    msj = aov.at[1, 'MS']
    mse = aov.at[2, 'MS']

    # Calculate ICCs
    icc1 = (msb - msw) / (msb + (k - 1) * msw)
    icc2 = (msb - mse) / (msb + (k - 1) * mse + k * (msj - mse) / n)
    icc3 = (msb - mse) / (msb + (k - 1) * mse)
    icc1k = (msb - msw) / msb
    icc2k = (msb - mse) / (msb + (msj - mse) / n)
    icc3k = (msb - mse) / msb

    # Calculate F, df, and p-values
    f1k = msb / msw
    df1 = n - 1
    df1kd = n * (k - 1)
    p1k = f.sf(f1k, df1, df1kd)

    f2k = f3k = msb / mse
    df2kd = (n - 1) * (k - 1)
    p2k = f.sf(f2k, df1, df2kd)

    # Create output dataframe
    stats = {
        'Type': ['ICC1', 'ICC2', 'ICC3', 'ICC1k', 'ICC2k', 'ICC3k'],
        'Description': [
            'Single raters absolute', 'Single random raters',
            'Single fixed raters', 'Average raters absolute',
            'Average random raters', 'Average fixed raters'
        ],
        'ICC': [icc1, icc2, icc3, icc1k, icc2k, icc3k],
        'F': [f1k, f2k, f2k, f1k, f2k, f2k],
        'df1':
        n - 1,
        'df2': [df1kd, df2kd, df2kd, df1kd, df2kd, df2kd],
        'pval': [p1k, p2k, p2k, p1k, p2k, p2k]
    }

    stats = pd.DataFrame(stats)

    # Calculate confidence intervals
    alpha = 0.05
    # Case 1 and 3
    f1l = f1k / f.ppf(1 - alpha / 2, df1, df1kd)
    f1u = f1k * f.ppf(1 - alpha / 2, df1kd, df1)
    l1 = (f1l - 1) / (f1l + (k - 1))
    u1 = (f1u - 1) / (f1u + (k - 1))
    f3l = f3k / f.ppf(1 - alpha / 2, df1, df2kd)
    f3u = f3k * f.ppf(1 - alpha / 2, df2kd, df1)
    l3 = (f3l - 1) / (f3l + (k - 1))
    u3 = (f3u - 1) / (f3u + (k - 1))
    # Case 2
    fj = msj / mse
    vn = df2kd * ((k * icc2 * fj + n * (1 + (k - 1) * icc2) - k * icc2))**2
    vd = df1 * k**2 * icc2**2 * fj**2 + \
        (n * (1 + (k - 1) * icc2) - k * icc2)**2
    v = vn / vd
    f2u = f.ppf(1 - alpha / 2, n - 1, v)
    f2l = f.ppf(1 - alpha / 2, v, n - 1)
    l2 = n * (msb - f2u * mse) / (f2u * (k * msj +
                                         (k * n - k - n) * mse) + n * msb)
    u2 = n * (f2l * msb - mse) / (k * msj +
                                  (k * n - k - n) * mse + n * f2l * msb)

    # Round the confidence intervals
    def list_round(x, decimals=2):
        for i, xi in enumerate(x):
            x[i] = np.round(xi, decimals).tolist()
        return x

    stats['CI95%'] = list_round(
        [[l1, u1], [l2, u2], [l3, u3], [1 - 1 / f1l, 1 - 1 / f1u],
         [l2 * k / (1 + l2 * (k - 1)), u2 * k / (1 + u2 * (k - 1))],
         [1 - 1 / f3l, 1 - 1 / f3u]])

    return stats
Beispiel #36
0
def p_up(alpha, n, N):
    quant_up = f.ppf(1 - alpha, 2 * (n + 1), 2 * (N - n), loc=0, scale=1)
    return (n + 1) * quant_up / (N - n + (n + 1) * quant_up)
Beispiel #37
0
def ellipseoid(P, y=None, z=None, pvalue=.95, units=None, show=True):
    """Calculates an ellipse(oid) as prediction interval for multivariate data.

    The prediction ellipse (or ellipsoid) is a prediction interval for a sample
    of a bivariate (or trivariate) random variable and is such that there is
    pvalue*100% of probability that a new observation will be contained in the
    ellipse (or ellipsoid) (Chew, 1966). [1]_.

    The semi-axes of the prediction ellipse(oid) are found by calculating the
    eigenvalues of the covariance matrix of the data and adjust the size of the
    semi-axes to account for the necessary prediction probability. 

    Parameters
    ----------
    P : 1-D or 2-D array_like
        For a 1-D array, P is the abscissa values of the [x,y] or [x,y,z] data.
        For a 2-D array, P is the joined values of the [x,y] or [x,y,z] data.
        The shape of the 2-D array should be (n, 2) or (n, 3) where n is the
        number of observations.
    y : 1-D array_like, optional (default = None)
        Ordinate values of the [x, y] or [x,y,z] data.
    z : 1-D array_like, optional (default = None)
        Ordinate values of the [x, y] or [x,y,z] data.
    pvalue : float, optional (default = .95)
        Desired prediction probability of the ellipse(oid).
    units : str, optional (default = None)
        Units of the input data.
    show : bool, optional (default = True)
        True (1) plots data in a matplotlib figure, False (0) to not plot.

    Returns
    -------
    volume : float
        Area of the ellipse or volume of the ellipsoid according to the inputs.
    axes : 2-D array
        Lengths of the semi-axes ellipse(oid) (largest first).
    angles : 1-D array
        Angles of the semi-axes ellipse(oid). For the ellipsoid (3D adata), the
        angles are the Euler angles calculated in the XYZ sequence.
    center : 1-D array
        Centroid of the ellipse(oid).
    rotation : 2-D array
        Rotation matrix of the semi-axes of the ellipse(oid).

    Notes
    -----
    The directions and lengths of the semi-axes are found, respectively, as the
    eigenvectors and eigenvalues of the covariance matrix of the data using
    the concept of principal components analysis (PCA) [2]_ or singular value
    decomposition (SVD) [3]_.
    
    See [4]_ for a discussion about prediction and confidence intervals and
    their use in posturography.

    References
    ----------
    .. [1] http://www.jstor.org/stable/2282774.
    .. [2] http://en.wikipedia.org/wiki/Principal_component_analysis.
    .. [3] http://en.wikipedia.org/wiki/Singular_value_decomposition.
    .. [4] http://www.sciencedirect.com/science/article/pii/S0966636213005961.

    Examples
    --------
    >>> import numpy as np
    >>> from ellipseoid import ellipseoid
    >>> y = np.cumsum(np.random.randn(3000)) / 50
    >>> x = np.cumsum(np.random.randn(3000)) / 100
    >>> area, axes, angles, center, R = ellipseoid(x, y, units='cm', show=True)
    >>> P = np.random.randn(1000, 3)
    >>> P[:, 2] = P[:, 2] + P[:, 1]*.5
    >>> P[:, 1] = P[:, 1] + P[:, 0]*.5
    >>> volume, axes, angles, center, R = ellipseoid(P, units='cm', show=True)
    """

    from scipy.stats import f as F 

    P = np.array(P, ndmin=2, dtype=float)
    if P.shape[0] == 1:
        P = P.T
    elif P.shape[1] > 3:
        P = P.T
    if y is not None:
        y = np.array(y, copy=False, ndmin=2, dtype=float)
        if y.shape[0] == 1:
            y = y.T
        P = np.concatenate((P, y), axis=1)
    if z is not None:
        z = np.array(z, copy=False, ndmin=2, dtype=float)
        if z.shape[0] == 1:
            z = z.T
        P = np.concatenate((P, z), axis=1)
    # covariance matrix
    cov = np.cov(P, rowvar=0)
    # singular value decomposition
    U, s, Vt = np.linalg.svd(cov)
    # semi-axes (largest first)
    p, n = s.size, P.shape[0]
    saxes = np.sqrt(s * F.ppf(pvalue, p, dfd=n-p) * (n-1) * p * (n+1)/(n*(n-p)))
    volume = 4/3*np.pi*np.prod(saxes) if p == 3 else np.pi*np.prod(saxes)
    # rotation matrix
    R = Vt
    if s.size == 2:
        angles = np.array([np.rad2deg(np.arctan2(R[1, 0], R[0, 0])),
                           90-np.rad2deg(np.arctan2(R[1, 0], -R[0, 0]))])
    else:
        angles = rotXYZ(R, unit='deg')
    # centroid of the ellipse(oid)
    center = np.mean(P, axis=0)

    if show:
        _plot(P, volume, saxes, center, R, pvalue, units, fig=None, ax=None)

    return volume, saxes, angles, center, R
Beispiel #38
0
    def prediction_ellipse(self, p_val=.95):
        """
        Prediction hyperellipsoid for multivariate data.
        __author__ = 'Marcos Duarte, https://github.com/demotu/BMC'

        Parameters
        ----------
        P : 1-D or 2-D array_like
            For a 1-D array, P is the abscissa values of the [x,y] or [x,y,z] data.
            For a 2-D array, P is the joined values of the multivariate data.
            The shape of the 2-D array should be (n, p) where n is the number of
            observations (rows) and p the number of dimensions (columns).
        p_val : float, optional (default = .95)
            Desired prediction probability of the hyperellipsoid.

        Returns
        -------
        hypervolume : float
            Hypervolume (e.g., area of the ellipse or volume of the ellipsoid).
        axes : 1-D array
            Lengths of the semi-axes hyperellipsoid (largest first).
        angles : 1-D array
            Angles of the semi-axes hyperellipsoid (only for 2D or 3D data).
            For the ellipsoid (3D data), the angles are the Euler angles
            calculated in the XYZ sequence.
        center : 1-D array
            Centroid of the hyperellipsoid.
        rotation : 2-D array
            Rotation matrix for hyperellipsoid semi-axes (only for 2D or 3D data).
        """

        from scipy.stats import f as F
        from scipy.special import gamma

        data = np.array([self.record.cop.x, self.record.cop.y]).transpose()

        # covariance matrix
        cov = np.cov(data, rowvar=0)
        # singular value decomposition
        U, s, Vt = np.linalg.svd(cov)
        p, n = s.size, data.shape[0]
        # F percent point function
        fppf = F.ppf(p_val, p, n - p) * (n - 1) * p * (n + 1) / n / (n - p)
        # semi-axes (largest first)
        saxes = np.sqrt(s * fppf)
        area = np.pi**(p / 2) / gamma(p / 2 + 1) * np.prod(saxes)
        # rotation matrix
        R = Vt
        angles = np.array([
            np.rad2deg(np.arctan2(R[1, 0], R[0, 0])),
            90 - np.rad2deg(np.arctan2(R[1, 0], -R[0, 0]))
        ])
        # centroid of the ellipse
        center = np.mean(data, axis=0)

        return type(
            '',
            (object, ),
            {
                'area': area,  # cm^2
                'saxes': saxes,  # cm
                'angles': angles,  # deg
                'center': center,  # cm
                'rot_matrix': R,
                'p_val': p_val
            })
def sampling_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x=np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))
Beispiel #40
0
    b_list[0] + b_list[1] * mat_X[i][0] + b_list[2] * mat_X[i][1] +
    b_list[3] * mat_X[i][2] for i in range(N)
]
print("-" * 65)
print('\nЗначення у')
for i in range(N):
    print(
        f"{b_list[0]} + {b_list[1]}*x1 + {b_list[2]}*x2 + {b_list[3]}*x3 ="
        f" {b_list[0] + b_list[1] * mat_X[i][0] + b_list[2] * mat_X[i][1] + b_list[3] * mat_X[i][2]}"
    )
print("-" * 65)

print('\nПеревіримо адекватність моделі за критерієм Фішера')
Sad = (m / (N - d)) * int(sum(y_reg[i] - average_y[i] for i in range(N))**2)
Fp = Sad / S2b
q = 0.05
F_table = f.ppf(q=1 - q, dfn=f4, dfd=f3)
print('FP  =', Fp)
if Fp > F_table:
    print('Модель неадекватна при 0.05')
else:
    print('Модель адекватна при 0.05')

print("-" * 65)
print("\nДодаткове завдання")

total_time = 0
for i in range(len(time_list)):
    print(f"Час {i+1}-ої перевірки на значимість - {time_list[i]}")
    total_time += time_list[i]
print(f"Загальний час пошуку - {total_time}")
Beispiel #41
0
    print("Незначущі коефіцієнти регресії:", [round(i, 3) for i in coefs2])
    y_st = []
    for i in range(15):
        y_st.append(res[0] + res[1] * x1[i] + res[2] * x2[i] + res[3] * x3[i] +
                    res[4] * x1x2[i] + res[5] * x1x3[i] + res[6] * x2x3[i] +
                    res[7] * x1x2x3[i] + res[8] * x1kv[i] + res[9] * x2kv[i] +
                    res[10] * x3kv[i])
    print("Значення з отриманими коефіцієнтами:")
    for i in range(15):
        print("{:.3f}".format(y_st[i]), end=" ")
    time_st += time.perf_counter() - start_time_student
    print(
        "\n------------------------- Перевірка адекватності за критерієм Фішера -------------------------"
    )

    start_time_fisher = time.perf_counter()
    Sad = m * sum([(y_st[i] - Y_average[i])**2 for i in range(15)]) / (n - d)
    Fp = Sad / sb
    F4 = n - d
    print("Fp =", Fp)
    if Fp < f.ppf(q=0.95, dfn=F4, dfd=F3):
        print("Рівняння регресії адекватне при рівні значимості 0.05")

    else:
        print("Рівняння регресії неадекватне при рівні значимості 0.05")
    time_f += time.perf_counter() - start_time_fisher

print("Середній час перевірки за критерієм Кохрена: ", time_koh / 10)
print("Середній час перевірки за критерієм Cтьюдента: ", time_st / 10)
print("Середній час перевірки за критерієм Фішера: ", time_f / 10)
def cochrane_teor(f1, f2, q=0.05):
    q1 = q / f1
    fischer_value = f.ppf(q=1 - q1, dfn=f2, dfd=(f1 - 1) * f2)
    return fischer_value / (fischer_value + f1 - 1)
    def UCL(self, alpha):
        P = self.NumFeature
        N = len(self.WCTrainLabel)

        return (P*(N-1) * f.ppf(alpha, P, N-P)) / (N-1)
Beispiel #44
0
# 1320.171111111111

df_between = 2
df_within = 15

MS_between = SS_between / df_between
MS_within = SS_within / df_within

# print(MS_between, MS_within)
# 660.0855555555555 8.898666666666665
F_ratio = 660.0855555555555 / 8.898666666666665

# print(F_ratio)
# 74.1780291679153

f_critical = f.ppf(.95, 2, 15)
# print(f_critical)
# 3.6823203436732412

η_squared = SS_within / (SS_between + SS_within)

# print(1 - η_squared)

# Q 25
kids_df = pd.DataFrame({
    'single': pd.Series([8, 7, 10, 6, 9]),
    'twin': pd.Series([4, 6, 7, 4, 9]),
    'triplet': pd.Series([4, 4, 7, 2, 3])
})

# print(kids_df.describe())
def chapter_5(input_list):
	confidence_int = 0.975
	for choice in input_list:
		if choice == 20:
			data = np.loadtxt(os.path.join(cwd,'T5-12.dat'))
			x1_data = np.loadtxt(os.path.join(cwd,'T5-12.dat'))[:,0]
			x2_data = np.loadtxt(os.path.join(cwd,'T5-12.dat'))[:,1]
			print 'x1_data',x1_data
			print 'x2_data',x2_data
			print 'mean_of_length data',x1_data.mean()
			print 'mean of wing length data',x2_data.mean()
			n = x1_data.shape[0]
			print 'n',n
			p = 2
			cov_matrix = np.cov(data,rowvar=0)
			#http://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.inv.html
			inv_of_cov = inv(cov_matrix)
			print 'covariance matrix\n',cov_matrix
			print 'inverse of covariance matrix\n',inv_of_cov
			print 'f.ppf(confidence_int,p,n-p)',f.ppf(0.95,p,n-p)
			print '(n-1)*p/(n*(n-p))',float((n-1)*p)/(n*(n-p))
			#http://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.f.html
			c_square = (float((n-1)*p)/((n-p)))*(f.ppf(0.95,p,n-p))
			print 'c_square',c_square

			#simulataneous confidence intervals
			a1,a2 = np.matrix('1;0'),np.matrix('0;1')
			#Tsquare confidence interval
			half_width1 = np.sqrt(((np.transpose(a1)*cov_matrix*a1)*c_square)/n)
			upperbound_1,lowerbound_1 = x1_data.mean() + half_width1 , x1_data.mean() - half_width1
			print 'T-square upperbound_1,lowerbound_1',upperbound_1,lowerbound_1
			half_width2 = np.sqrt((np.transpose(a2)*cov_matrix*a2)*(c_square)/n)
			upperbound_2,lowerbound_2 = x2_data.mean() + half_width2 , x2_data.mean() - half_width2
			print 'Tsquare upperbound_2,lowerbound_2',upperbound_2,lowerbound_2	

			#Bonferroni confidence interval
			half_width1 = np.sqrt((cov_matrix[0,0])/n)*(t.ppf(0.5+ (confidence_int)/2,n-1))
			upperbound_1,lowerbound_1 = x1_data.mean() + half_width1 , x1_data.mean() - half_width1
			print 'Bonferroni upperbound_1,lowerbound_1',upperbound_1,lowerbound_1
			half_width2 = np.sqrt((cov_matrix[1,1])/n)*(t.ppf(0.5+ (confidence_int)/2,n-1))
			upperbound_2,lowerbound_2 = x2_data.mean() + half_width2 , x2_data.mean() - half_width2
			print 'Bonferroni upperbound_2,lowerbound_2',upperbound_2,lowerbound_2

			print x1_data
			stats.probplot(x1_data, dist="norm", plot=pylab)
			pylab.show()

		elif choice == 22: 
			#http://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html
			data = np.loadtxt(os.path.join(cwd,'T6-10.dat'),usecols = (0,1,2))
			fuel_data,repair_data,capital_data = data[:,0],data[:,1],data[:,2]
			# #QQ plots before removal of outliers
			# stats.probplot(fuel_data, dist="norm", plot=pylab),pylab.show()
			# stats.probplot(repair_data, dist="norm", plot=pylab),pylab.show()
			# stats.probplot(capital_data, dist="norm", plot=pylab),pylab.show()
			# #scatter plots before removal of outliers
			# scatterplots(fuel_data,repair_data),scatterplots(capital_data,repair_data),scatterplots(fuel_data,capital_data)

			#removing outliers
			#http://docs.scipy.org/doc/numpy-1.10.1/reference/generated/numpy.select.html
			#fuel_data2 = np.select([fuel_data<20],[fuel_data])
			outliers_removed_data = data[(data[:,0]<20)&(data[:,0]>6)&(data[:,1]<20)&(data[:,1]>3)&(data[:,2]<25)&(data[:,2]>5)]
			#fuel_data2 = fuel_data[(fuel_data < 20) & (fuel_data > 6)]   #http://stackoverflow.com/questions/3806878/subsetting-data-in-python
			fuel_data2,repair_data2,capital_data2 = outliers_removed_data[:,0],outliers_removed_data[:,1],outliers_removed_data[:,2]
			# stats.probplot(fuel_data2, dist="norm", plot=pylab),pylab.show()
			# stats.probplot(repair_data2, dist="norm", plot=pylab),pylab.show()
			# stats.probplot(capital_data2, dist="norm", plot=pylab),pylab.show()
			#repair_data2,capital_data2 = np.select([fuel_data<20],[fuel_data])

			#confidence interval calculations
			n = outliers_removed_data.shape[0]
			cov_matrix = np.cov(outliers_removed_data,rowvar=0)
			#bonferroni confidence interval
			half_width1 = np.sqrt((cov_matrix[0,0])/n)*(t.ppf(0.5+ (confidence_int)/2,n-1))
			upperbound_1,lowerbound_1 = fuel_data2.mean() + half_width1 , fuel_data2.mean() - half_width1
			print 'Bonferroni fuel data lowerbound_1,upperbound_1',lowerbound_1,upperbound_1
			half_width2 = np.sqrt((cov_matrix[1,1])/n)*(t.ppf(0.5+ (confidence_int)/2,n-1))
			upperbound_2,lowerbound_2 = repair_data2.mean() + half_width2 , repair_data2.mean() - half_width2
			print 'Bonferroni repair_data2 lowerbound_2,upperbound_2',lowerbound_2,upperbound_2
			half_width3 = np.sqrt((cov_matrix[2,2])/n)*(t.ppf(0.5+ (confidence_int)/2,n-1))
			upperbound_3,lowerbound_3 = capital_data2.mean() + half_width3 , capital_data2.mean() - half_width3
			print 'Bonferroni capital_data2 lowerbound_2,upperbound_2',lowerbound_3,upperbound_3
			
			#T square confidence interval calculations
			p = 3
			a1,a2,a3 = np.matrix('1;0;0'),np.matrix('0;1;0'),np.matrix('0;0;1')
			c_square = (float((n-1)*p)/((n-p)))*(f.ppf(0.95,p,n-p))
			half_width1 = np.sqrt(((np.transpose(a1)*cov_matrix*a1)*c_square)/n)
			upperbound_1,lowerbound_1 = fuel_data2.mean() + half_width1 , fuel_data2.mean() - half_width1
			print 'fuel data T-square upperbound_1,lowerbound_1',lowerbound_1,upperbound_1
			half_width2 = np.sqrt(((np.transpose(a2)*cov_matrix*a2)*c_square)/n)
			upperbound_2,lowerbound_2 = repair_data2.mean() + half_width2 , repair_data2.mean() - half_width2
			print 'repair_data2 T-square upperbound_1,lowerbound_1',lowerbound_2,upperbound_2
			half_width3 = np.sqrt(((np.transpose(a3)*cov_matrix*a3)*c_square)/n)
			upperbound_3,lowerbound_3 = capital_data2.mean() + half_width3 , capital_data2.mean() - half_width3
			print 'capital_data2 T-square upperbound_1,lowerbound_1',lowerbound_3,upperbound_3

		elif choice == 30:
			n = 50   #seen from the solutions
			confidence_int = 0.95
			means = np.matrix('0.766;0.508;0.438;0.161')
			cov_matrix = np.matrix('0.856,0.635,0.173,0.096;0.635,0.568,0.128,0.067;0.173,0.127,0.171,0.039;0.096,0.067,0.039,0.043')
			print 'means\n',means
			print 'cov_matrix\n',cov_matrix
			a1,a2,a3,a4 = np.matrix('1;0;0;0'),np.matrix('0;1;0;0'),np.matrix('0;0;1;0'),np.matrix('0;0;0;1')
			a5,a6= np.matrix('1;1;1;1'),np.matrix('1;-1;0;0')
			#simultaneous confidence intervals
			c_square = chi2.ppf(confidence_int,4)
			print 'c_square',c_square
			half_width1 = np.sqrt(((np.transpose(a1)*cov_matrix*a1)*c_square)/n)
			upperbound_1,lowerbound_1 = means[0,0] + half_width1 , means[0,0] - half_width1
			print 'Petroleum Chi-square lowerbound_1,upperbound_1',lowerbound_1,upperbound_1
			# half_width2 = np.sqrt(((np.transpose(a2)*cov_matrix*a2)*c_square)/n)
			# upperbound_2,lowerbound_2 = repair_data2.mean() + half_width2 , repair_data2.mean() - half_width2
			# print 'repair_data2 T-square upperbound_1,lowerbound_1',lowerbound_2,upperbound_2
			# half_width3 = np.sqrt(((np.transpose(a3)*cov_matrix*a3)*c_square)/n)
			# upperbound_3,lowerbound_3 = capital_data2.mean() + half_width3 , capital_data2.mean() - half_width3
			# print 'capital_data2 T-square upperbound_1,lowerbound_1',lowerbound_3,upperbound_3

			half_width5 = np.sqrt(((np.transpose(a5)*cov_matrix*a5)*c_square)/n)
			upperbound_1,lowerbound_1 = means.sum() + half_width5 , means.sum() - half_width5
			print 'Petroleum Chi-square lowerbound_5,upperbound_5',lowerbound_1,upperbound_1

			half_width6 = np.sqrt(((np.transpose(a6)*cov_matrix*a6)*c_square)/n)
			upperbound_1,lowerbound_1 = np.transpose(a6)*means + half_width6 , np.transpose(a6)*means - half_width6
			print 'Petroleum Chi-square lowerbound_5,upperbound_5',lowerbound_1,upperbound_1




			#Bonferroni confidence intervals
			c_square = t.ppf((1-(1 - confidence_int)/12),n-1)
			print 'c_square',c_square 
			half_width1 = c_square * np.sqrt(((np.transpose(a1)*cov_matrix*a1)/n))
			upperbound_1,lowerbound_1 = means[0,0] + half_width1 , means[0,0] - half_width1
			print 'Petroleum Bonferroni lowerbound_1,upperbound_1',lowerbound_1,upperbound_1
def UCL_Computation(N, alpha):
    S = 1
    return (S*((N-1)**2) * f.ppf(1-alpha, S, N-S)) / (N*(N-S))
Beispiel #47
0
 def get_cochran_critical(probability, f1, f2):
     return 1 / (1 + (f2 - 1) / f.ppf(1 - (1 - probability) / f2, f1,
                                      (f2 - 1) * f1))
def pearscdf(X, mu, sigma, skew, kurt, method, k, output):

# pearspdf
#   [p,type,coefs] = pearspdf(X,mu,sigma,skew,kurt)
#
#   Returns the probability distribution denisty of the pearsons distribution
#   with mean `mu`, standard deviation `sigma`, skewness `skew` and
#   kurtosis `kurt`, evaluated at the values in X.
#
#   Some combinations of moments are not valid for any random variable, and in
#   particular, the kurtosis must be greater than the square of the skewness
#   plus 1.  The kurtosis of the normal distribution is defined to be 3.
#
#   The seven distribution types in the Pearson system correspond to the
#   following distributions:
#
#      Type 0: Normal distribution
#      Type 1: Four-parameter beta
#      Type 2: Symmetric four-parameter beta
#      Type 3: Three-parameter gamma
#      Type 4: Not related to any standard distribution.  Density proportional
#              to (1+((x-a)/b)^2)^(-c) * exp(-d*arctan((x-a)/b)).
#      Type 5: Inverse gamma location-scale
#      Type 6: F location-scale
#      Type 7: Student's t location-scale
#
#   Examples
#
#   See also
#       pearspdf pearsrnd mean std skewness kurtosis
#


#   References:
#      [1] Johnson, N.L., S. Kotz, and N. Balakrishnan (1994) Continuous
#          Univariate Distributions, Volume 1,  Wiley-Interscience.
#      [2] Devroye, L. (1986) Non-Uniform Random Variate Generation,
#          Springer-Verlag.

    otpt = len(output)
#    outClass = superiorfloat(mu, sigma, skew, kurt)

    if X[1] == inf:
        cdist = 1
        limstate = X[0]
    elif X[0] == -inf:
        cdist = 2
        limstate = X[1]
    else:
        cdist = 3
        limstate = X

    if sigma == 0:
        print "Warning: The standard deviation of output distribution",k,"is zero. No distribution or correlation can be calculated for it."
        if mu>=X[0] and mu<=X[1]:   #mean is in the limits
            return 1, None, inf, None, None, None, None, None, None, None, None
        else:                       #mean is outside the limits
            return 0, None, inf, None, None, None, None, None, None, None, None

    X = (X - mu) / sigma    # Z-score

    if method == 'MCS':
        beta1 = 0
        beta2 = 3
        beta3 = sigma ** 2
    else:
        beta1 = skew ** 2
        beta2 = kurt
        beta3 = sigma ** 2

    # Return NaN for illegal parameter values.
    if (sigma < 0) or (beta2 <= beta1 + 1):
        p = zeros(otpt)+nan
        #p = zeros(sizeout)+nan
        dtype = NaN
        coefs = zeros((1,3))+nan
        print 'Illegal parameter values passed to pearscdf! (sigma:',sigma,'  beta1:',beta1,'  beta2:', beta2,')'
        return

    #% Classify the distribution and find the roots of c0 + c1*x + c2*x^2
    c0 = (4 * beta2 - 3 * beta1)# ./ (10*beta2 - 12*beta1 - 18);
    c1 = skew * (beta2 + 3)# ./ (10*beta2 - 12*beta1 - 18);
    c2 = (2 * beta2 - 3 * beta1 - 6)# ./ (10*beta2 - 12*beta1 - 18);

    if c1 == 0:    # symmetric dist'ns
        if beta2 == 3:
            dtype = 0
            a1 = 0
            a2 = 0
        else:
            if beta2 < 3:
                dtype = 2
            elif beta2 > 3:
                dtype = 7
            a1 = -sqrt(abs(c0 / c2))
            a2 = -a1        # symmetric roots
    elif c2 == 0:    # kurt = 3 + 1.5*skew^2
        dtype = 3
        a1 = -c0 / c1    # single root
        a2 = a1
    else:
        kappa = c1 ** 2 / (4 * c0 * c2)
        if kappa < 0:
            dtype = 1
        elif kappa < 1 - finfo(float64).eps:
            dtype = 4
        elif kappa <= 1 + finfo(float64).eps:
            dtype = 5
        else:
            dtype = 6
        # Solve the quadratic for general roots a1 and a2 and sort by their real parts
        csq=c1 ** 2 - 4 * c0 * c2
        if c1 ** 2 - 4 * c0 * c2 < 0:
            tmp = -(c1 + sign(c1) * cmath.sqrt(c1 ** 2 - 4 * c0 * c2)) / 2
        else:
            tmp = -(c1 + sign(c1) * sqrt(c1 ** 2 - 4 * c0 * c2)) / 2
        a1 = tmp / c2
        a2 = c0 / tmp
        if (real(a1) > real(a2)):
            tmp = a1;
            a1 = a2;
            a2 = tmp;

    denom = (10 * beta2 - 12 * beta1 - 18)

    if abs(denom) > sqrt(finfo(double).tiny):
        c0 = c0 / denom
        c1 = c1 / denom
        c2 = c2 / denom
        coefs = [c0, c1, c2]
    else:
        dtype = 1    # this should have happened already anyway
        # beta2 = 1.8 + 1.2*beta1, and c0, c1, and c2 -> Inf.  But a1 and a2 are
        # still finite.
        coefs = zeroes((1,3))+inf

    if method == 'MCS':
        dtype = 8

    #% Generate standard (zero mean, unit variance) values
    if dtype == 0:
        # normal: standard support (-Inf,Inf)
        #     m1 = zeros(outClass);
        #     m2 = ones(outClass);
        m1 = 0
        m2 = 1
        p = norm.cdf(X[1], m1, m2) - norm.cdf(X[0], m1, m2)
        lo= norm.ppf( 3.39767E-06, mu,sigma ); 
        hi= norm.ppf( 0.999996602, mu,sigma );
        Inv1 = norm.ppf(p, 0, 1)
        #     Inv1=norm.ppf( normcdf(X[0],m1,m2), 0,1 );
        #Inv2 = norm.ppf(normcdf(X[1], m1, m2), 0, 1)

    elif dtype == 1:
        # four-parameter beta: standard support (a1,a2)
        if abs(denom) > sqrt(finfo(double).tiny):
            m1 = (c1 + a1) / (c2 * (a2 - a1))
            m2 = -(c1 + a2) / (c2 * (a2 - a1))
        else:
            # c1 and c2 -> Inf, but c1/c2 has finite limit
            m1 = c1 / (c2 * (a2 - a1))
            m2 = -c1 / (c2 * (a2 - a1))
        #     r = a1 + (a2 - a1) .* betarnd(m1+1,m2+1,sizeOut);
        X = (X - a1) / (a2 - a1)    # Transform to 0-1 interval
        #     lambda = -(a2-a1)*(m1+1)./(m1+m1+2)-a1;
        #     X = (X - lambda - a1)./(a2-a1);

        alph=m1+1
        beta=m2+1
        if alph < 1.001 and beta < 1.001:
            alph=1.001
            beta=1.001

        mode=(alph-1)/(alph+beta-2)

        if mode < 0.1: 
            if alph > beta:
                alph = max(2.0,alph)
                beta = (alph-1)/0.9 - alph + 2
            elif beta > alph:
                beta = max(2.0,beta)
                alph = (0.1*(beta -2) +1)/(1 - 0.1)
        elif mode > 0.9:
            if alph > beta:
                alph = max(2.0,alph)
                beta =(alph-1)/0.9 - alph + 2
            elif beta > alph:
                beta = max(2.0,beta);
                alph = (0.1*(beta -2) +1)/(1 - 0.1)

        p = stats.beta.cdf(X[1], alph, beta) - stats.beta.cdf(X[0], alph, beta)
        lo=a1*sigma+mu;
        hi=a2*sigma+mu;
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( beta.cdf(X[0],m1+1,m2+1), 0,1 );
        #Inv2 = norm.ppf(beta.cdf(X[1], m1 + 1, m2 + 1), 0, 1)

        #     X = X*(a2-a1) + a1;         % Undo interval tranformation
        #     r = r + (0 - a1 - (a2-a1).*(m1+1)./(m1+m2+2));
    elif dtype == 2:
        # symmetric four-parameter beta: standard support (-a1,a1)
        m = (c1 + a1) / (c2 * 2 * abs(a1))
        m1 = m
        m2 = m
        X = (X - a1) / (2 * abs(a1))
        #     r = a1 + 2*abs(a1) .* betapdf(X,m+1,m+1);

        alph=m+1;
        beta=m+1;
        if alph < 1.01: 
            alph=1.01
            beta=1.01

        p = stats.beta.cdf(X[1], alph, beta) - stats.beta.cdf(X[0], alph, beta)
        lo=a1*sigma+mu;
        hi=a2*sigma+mu;
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( beta.cdf(X[0],m+1,m+1), 0,1 );
        #Inv2 = norm.ppf(beta.cdf(X[1], m + 1, m + 1), 0, 1)

        #     X = a1 + 2*abs(a1).*X;
    elif dtype == 3:
        # three-parameter gamma: standard support (a1,Inf) or (-Inf,a1)
        m = (c0 / c1 - c1) / c1
        m1 = m
        m2 = m
        X = (X - a1) / c1
        #     r = c1 .* gampdf(X,m+1,1,sizeOut) + a1;

        p = gamma.cdf(X[1], m + 1, 1) - gamma.cdf(X[0], m + 1, 1)
        lo=(gamma.ppf( 3.39767E-06, m+1, scale=1 )*c1+a1)*sigma+mu; 
        hi=(gamma.ppf( 0.999996602, m+1, scale=1 )*c1+a1)*sigma+mu;
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( gamcdf(X[0],m+1,1), 0,1 );
        #Inv2 = norm.ppf(gamcdf(X[1], m + 1, 1), 0, 1)

        #     X = c1 .* X + a1;
    elif dtype == 4:
        # Pearson IV is not a transformation of a standard distribution: density
        # proportional to (1+((x-lambda)/a)^2)^(-m) * exp(-nu*arctan((x-lambda)/a)),
        # standard support (-Inf,Inf)
        X = X * sigma + mu
        r = 6 * (beta2 - beta1 - 1) / (2 * beta2 - 3 * beta1 - 6)
        m = 1 + r / 2
        nu = -r * (r - 2) * skew / sqrt(16 * (r - 1) - beta1 * (r - 2) ** 2)
        a = sqrt(beta3 * (16 * (r - 1) - beta1 * (r - 2) ** 2)) / 4
        _lambda = mu - ((r - 2) * skew * sigma) / 4    # gives zero mean
        m1 = m
        m2 = nu
        #     X = (X - lambda)./a;
        if cdist == 1:
            p = 1 - pearson4cdf(X[0], m, nu, a, _lambda, mu, sigma)
        elif cdist == 2:
            p = pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma)
        elif cdist == 3:
            p = pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma) - pearson4cdf(X[0], m, nu, a, _lambda, mu, sigma)
        lo=norm.ppf( 3.39767E-06, mu,sigma );   
        hi=norm.ppf( 0.999996602, mu,sigma );
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( pearson4cdf(X[0],m,nu,a,lambda,mu,sigma), 0,1 );
        #Inv2 = norm.ppf(pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma), 0, 1)

        #     C = X.*a + lambda;
        #     C = diff(C);
        #     C= C(1);
        #     p = p./(sum(p)*C);
    elif dtype == 5:
        # inverse gamma location-scale: standard support (-C1,Inf) or
        # (-Inf,-C1)
        C1 = c1 / (2 * c2)
        #     r = -((c1 - C1) ./ c2) ./ gampdf(X,1./c2 - 1,1) - C1;
        X = -((c1 - C1) / c2) / (X + C1)
        m1 = c2
        m2 = 0
        p = gamma.cdf(X[1], 1. / c2 - 1, scale=1) - gamma.cdf(X[0], 1. / c2 - 1, scale=1)
        lo=(-((c1-C1)/c2)/gamma.ppf( 3.39767E-06, 1/c2 - 1, scale=1 )-C1)*sigma+mu; 
        hi=(-((c1-C1)/c2)/gamma.ppf( 0.999996602, 1/c2 - 1, scale=1 )-C1)*sigma+mu; 
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( gamcdf(X[0],1./c2 - 1,1), 0,1 );
        #Inv2 = norm.ppf(gamcdf(X[1], 1. / c2 - 1, 1), 0, 1)

        #     X = -((c1-C1)./c2)./X-C1;
    elif dtype == 6:
        # F location-scale: standard support (a2,Inf) or (-Inf,a1)
        m1 = (a1 + c1) / (c2 * (a2 - a1))
        m2 = -(a2 + c1) / (c2 * (a2 - a1))
        # a1 and a2 have the same sign, and they've been sorted so a1 < a2
        if a2 < 0:
            nu1 = 2 * (m2 + 1)
            nu2 = -2 * (m1 + m2 + 1)
            X = (X - a2) / (a2 - a1) * (nu2 / nu1)
            #         r = a2 + (a2 - a1) .* (nu1./nu2) .* fpdf(X,nu1,nu2);

            p = fcdf(X[1], nu1, nu2) - fcdf(X[0], nu1, nu2)
            lo=(f.ppf( 3.39767E-06, nu1,nu2)+a2)*sigma+mu
            hi=(f.ppf( 0.999996602, nu1,nu2)+a2)*sigma+mu
            Inv1 = norm.ppf(p, 0, 1)
            #             Inv1=norm.ppf( fcdf(X[0],nu1,nu2), 0,1 );
            #Inv2 = norm.ppf(fcdf(X[1], nu1, nu2), 0, 1)

            #         X = a2 + (a2-a1).*(nu1./nu2).*X
        else:        # 0 < a1
            nu1 = 2 * (m1 + 1)
            nu2 = -2 * (m1 + m2 + 1)
            X = (X - a1) / (a1 - a2) * (nu2 / nu1)
            #         r = a1 + (a1 - a2) .* (nu1./nu2) .* fpdf(X,nu1,nu2);

            p = -fcdf(X[1], nu1, nu2) + fcdf(X[0], nu1, nu2)
            hi=(-f.ppf( 3.39767E-06, nu1,nu2)+a1)*sigma+mu;
            lo=(-f.ppf( 0.999996602, nu1,nu2)+a1)*sigma+mu; 
            Inv1 = norm.ppf(p, 0, 1)
            #             Inv1=norm.ppf( fcdf(X[0],nu1,nu2), 0,1 );
            #Inv2 = norm.ppf(fcdf(X[1], nu1, nu2), 0, 1)

            #         X = a1 + (a1-a2).*(nu1./nu2).*X;
    elif dtype == 7:
        # t location-scale: standard support (-Inf,Inf)

        nu = 1. / c2 - 1
        X = X / sqrt(c0 / (1 - c2))
        m1 = nu
        m2 = 0
        p = t.cdf(X[1], nu) - t.cdf(X[0], nu)
        lo=t.ppf( 3.39767E-06, nu )*sqrt(c0/(1-c2))*sigma+mu
        hi=t.ppf( 0.999996602, nu )*sqrt(c0/(1-c2))*sigma+mu
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( tcdf(X[0],nu), 0,1 );
        #Inv2 = norm.ppf(tcdf(X[1], nu), 0, 1)

        #     p = sqrt(c0./(1-c2)).*tpdf(X,nu);
        #     X = sqrt(c0./(1-c2)).*X;
    else:
        print "ERROR: Unknown data type!"
#    elif dtype == 8:
        #Monte Carlo Simulation Histogram
#        out = kurt
#        p = skew
#        m1 = 0
#        m2 = 0

    # scale and shift
    # X = X.*sigma + mu; % Undo z-score

    if dtype != 1 and dtype != 2:
        mu_s=(mu-lo)/(hi-lo);
        sigma_s=sigma ** 2/(hi-lo) ** 2;
        alph = ((1-mu_s)/sigma_s -1/mu_s)*mu_s ** 2;
        beta = alph*(1/mu_s - 1);

    if alph >70 or beta>70:
        alph=70;
        beta=70;
        lo=mu-11.87434*sigma
        hi=2*mu-lo

    return p, dtype, Inv1, m1, m2, a1, a2, alph, beta, lo, hi
Beispiel #49
0
if max(mat_disY)/sum(mat_disY) < 0.7679:
    print('Дисперсія однорідна')
else:
    print('Дисперсія неоднорідна')
print('-------------------------------------------------------------------------\nПЕРЕВІРКА ЗНАЧУЩОСТІ КОЕФІЦІЄНТІВ ЗА КРИТЕРІЄМ СТЬЮДЕНТА:\n')
S2b = sum(mat_disY) / N
S2bs = S2b / (m * N)
Sbs = sqrt(S2bs)
print('Sbs:\n', Sbs)
bb = [sum(mat_serY[k] * tran1[i][k] for k in range(N))/N for i in range(N)]
t = [abs(bb[i])/Sbs for i in range(N)]
print('bi:\n', bb, '\nti:\n', t, '\n...\n..')
f1, f2 = m - 1, N
f3 = f1 * f2
for i in range(N):
    if t[i] < t_check.ppf(q=0.975, df=f3):
        blist[i] = 0
        d -= 1
        print('Виключаємо з рівняння коефіціент b', i)
y_reg = [blist[0] + blist[1] * mat_X[i][0] + blist[2] * mat_X[i][1] + blist[3] * mat_X[i][2] for i in range(4)]
print('Значення рівнянь регресій:\n', y_reg)
print('-------------------------------------------------------------------------\nПЕРЕВІРКА АДЕКВАТНОСТІ ЗА КРИТЕРІЄМ ФІШЕРА:\n')
f4 = N - d
Sad = (m / (N - d)) * int(sum(y_reg[i] - mat_serY[i] for i in range(N))**2)
Fp = Sad / S2b
print('Кількість значимих коефіціентів:\n', d, '\nFp:\n', Fp, '\n...\n..')
if Fp > f.ppf(q=0.95, dfn=f4, dfd=f3):
    print('Рівняння регресії неадекватно оригіналу при рівні значимості 0.05')
else:
    print('Рівняння регресії адекватно оригіналу при рівні значимості 0.05')
Beispiel #50
0
def f(f1, f2, p):
    f1 = float(f1)
    f2 = float(f2)
    p = float(p)
    return sympify(sci_f.ppf(p, f1, f2))
Beispiel #51
0
    def f_a(self):
        self.F_emp_A = self.var_A / self.var_remain if self.var_A > self.var_remain else self.var_remain / self.var_A
        self.F_cr_A = f.ppf(1 - self.alpha, self.n - 1, (self.n - 1) * (self.m - 1)) if self.var_A > self.var_remain \
            else f.ppf(1 - self.alpha, (self.n - 1) * (self.m - 1), self.n - 1)

    def f_b(self):
        self.F_emp_B = self.var_B / self.var_remain if self.var_B > self.var_remain else self.var_remain / self.var_B
        self.F_cr_B = f.ppf(1 - self.alpha, self.m - 1, ((self.m - 1) * (self.n - 1))) if self.var_B > self.var_remain \
            else f.ppf(1 - self.alpha, ((self.m - 1) * (self.m - 1)), self.m - 1)

    def f_full(self):
        self.F_emp_Full = self.var_full / self.var_remain if self.var_full > self.var_remain\
            else self.var_remain / self.var_full
        self.F_cr_Full = f.ppf(1 - self.alpha, (self.n * self.m) - 1, (self.n - 1) * (self.m - 1)) \
            if self.var_full > self.var_remain \
            else f.ppf(1 - self.alpha, (self.n  - 1) * (self.m - 1), (self.n * self.m) - 1)


# data = np.array([
#     [25, 20, 30, 25],
#     [30, 40, 40, 50],
#     [23, 18, 20, 27]
# ])
pd.set_option('display.max_columns', 11)
a = AnalysisOfVariance(0.05, n=3, m=3)
a.calc()
a.data_frame()

print(f.ppf(0.95, 2, 6))
Beispiel #52
0
def main(n, m):
    f1 = m - 1
    f2 = n
    f3 = f1 * f2
    q = 0.05

    student = partial(t.ppf, q=1 - 0.025)
    t_student = student(df=f3)

    G_kr = cohren(f1, f2)

    x, y, x_norm = plan_matrix(n, m)
    y_aver = [round(sum(i) / len(i), 2) for i in y]

    B = find_coeff(x, y_aver, n)

    Gp = krit_cochrena(y, y_aver, n, m)
    print(f'Gp = {Gp}')
    if Gp < G_kr:
        print(f'З ймовірністю {1-q} дисперсії однорідні.')
    else:
        print("Необхідно збільшити ксть дослідів")
        m += 1
        main(n, m)

    ts = krit_studenta(x_norm[:, 1:], y, y_aver, n, m)
    print('Критерій Стьюдента:\n', ts)
    res = [t for t in ts if t > t_student]
    final_k = [B[ts.index(i)] for i in ts if i in res]
    print(
        'Коефіцієнти {} статистично незначущі, тому ми виключаємо їх з рівняння.'
        .format([i for i in B if i not in final_k]))

    y_new = []
    for j in range(n):
        y_new.append(reg([x[j][ts.index(i)] for i in ts if i in res], final_k))

    print(f'Значення "y" з коефіцієнтами {final_k}')
    print(y_new)

    d = len(res)
    f4 = n - d
    F_p = krit_fishera(y, y_aver, y_new, n, m, d)

    fisher = partial(f.ppf, q=1 - 0.05)
    f_t = fisher(dfn=f4, dfd=f3)

    print('Перевірка адекватності за критерієм Фішера')
    print('F_p =', F_p)
    print('F_t =', f_t)
    if F_p < f_t:
        print('Математична модель адекватна експериментальним даним')
    else:
        print('Математична модель не адекватна експериментальним даним')

    print("-----------------------------------------------------------")
    print("Додаткове завдання")
    Qa = [0.95, 0.96, 0.97, 0.98, 0.99, 0.1, 1.05, 1.1, 1.2]

    for i in range(len(Qa)):
        f_t = f.ppf(q=i, dfn=f4, dfd=f3)
        if F_p > f_t:
            print("При q=0.05 рівняння регресії не є адекватним")
    print("-----------------------------------------------------------")
Beispiel #53
0
def hyperellipsoid(P, y=None, z=None, pvalue=0.95, units=None, show=True, ax=None):
    """
    Prediction hyperellipsoid for multivariate data.

    The hyperellipsoid is a prediction interval for a sample of a multivariate
    random variable and is such that there is pvalue*100% of probability that a
    new observation will be contained inside the hyperellipsoid [1]_.  
    The hyperellipsoid is also a tolerance region such that the average or
    expected value of the proportion of the population contained in this region
    is exactly pvalue*100% (called Type 2 tolerance region by Chew (1966) [1]_).

    The directions and lengths of the semi-axes are found, respectively, as the
    eigenvectors and eigenvalues of the covariance matrix of the data using
    the concept of principal components analysis (PCA) [2]_ or singular value
    decomposition (SVD) [3]_ and the length of the semi-axes are adjusted to
    account for the necessary prediction probability.

    The volume of the hyperellipsoid is calculated with the same equation for
    the volume of a n-dimensional ball [4]_ with the radius replaced by the
    semi-axes of the hyperellipsoid.

    This function calculates the prediction hyperellipsoid for the data,
    which is considered a (finite) sample of a multivariate random variable
    with normal distribution (i.e., the F distribution is used and not
    the approximation by the chi-square distribution).

    Parameters
    ----------
    P : 1-D or 2-D array_like
        For a 1-D array, P is the abscissa values of the [x,y] or [x,y,z] data.
        For a 2-D array, P is the joined values of the multivariate data.
        The shape of the 2-D array should be (n, p) where n is the number of
        observations (rows) and p the number of dimensions (columns).
    y : 1-D array_like, optional (default = None)
        Ordinate values of the [x, y] or [x, y, z] data.
    z : 1-D array_like, optional (default = None)
        Ordinate values of the [x, y] or [x, y, z] data.
    pvalue : float, optional (default = .95)
        Desired prediction probability of the hyperellipsoid.
    units : str, optional (default = None)
        Units of the input data.
    show : bool, optional (default = True)
        True (1) plots data in a matplotlib figure, False (0) to not plot.
        Only the results for p=2 (ellipse) or p=3 (ellipsoid) will be plotted.
    ax : a matplotlib.axes.Axes instance (default = None)

    Returns
    -------
    hypervolume : float
        Hypervolume (e.g., area of the ellipse or volume of the ellipsoid).
    axes : 1-D array
        Lengths of the semi-axes hyperellipsoid (largest first).
    angles : 1-D array
        Angles of the semi-axes hyperellipsoid (only for 2D or 3D data).
        For the ellipsoid (3D data), the angles are the Euler angles
        calculated in the XYZ sequence.
    center : 1-D array
        Centroid of the hyperellipsoid.
    rotation : 2-D array
        Rotation matrix for hyperellipsoid semi-axes (only for 2D or 3D data).

    References
    ----------
    .. [1] http://www.jstor.org/stable/2282774
    .. [2] http://en.wikipedia.org/wiki/Principal_component_analysis
    .. [3] http://en.wikipedia.org/wiki/Singular_value_decomposition
    .. [4] http://en.wikipedia.org/wiki/Volume_of_an_n-ball

    Examples
    --------
    >>> from hyperellipsoid import hyperellipsoid
    >>> y = np.cumsum(np.random.randn(3000)) / 50
    >>> x = np.cumsum(np.random.randn(3000)) / 100
    >>> area, axes, angles, center, R = hyperellipsoid(x, y, units='cm')
    >>> print('Area =', area)
    >>> print('Semi-axes =', axes)
    >>> print('Angles =', angles)
    >>> print('Center =', center)
    >>> print('Rotation matrix =\n', R)

    >>> P = np.random.randn(1000, 3)
    >>> P[:, 2] = P[:, 2] + P[:, 1]*.5
    >>> P[:, 1] = P[:, 1] + P[:, 0]*.5
    >>> volume, axes, angles, center, R = hyperellipsoid(P, units='cm')
    """

    from scipy.stats import f as F
    from scipy.special import gamma

    P = np.array(P, ndmin=2, dtype=float)
    if P.shape[0] == 1:
        P = P.T
    if y is not None:
        y = np.array(y, copy=False, ndmin=2, dtype=float)
        if y.shape[0] == 1:
            y = y.T
        P = np.concatenate((P, y), axis=1)
    if z is not None:
        z = np.array(z, copy=False, ndmin=2, dtype=float)
        if z.shape[0] == 1:
            z = z.T
        P = np.concatenate((P, z), axis=1)
    # covariance matrix
    cov = np.cov(P, rowvar=0)
    # singular value decomposition
    U, s, Vt = np.linalg.svd(cov)
    p, n = s.size, P.shape[0]
    # F percent point function
    fppf = F.ppf(pvalue, p, n - p) * (n - 1) * p * (n + 1) / n / (n - p)
    # semi-axes (largest first)
    saxes = np.sqrt(s * fppf)
    hypervolume = np.pi ** (p / 2) / gamma(p / 2 + 1) * np.prod(saxes)
    # rotation matrix
    if p == 2 or p == 3:
        R = Vt
        if s.size == 2:
            angles = np.array(
                [np.rad2deg(np.arctan2(R[1, 0], R[0, 0])), 90 - np.rad2deg(np.arctan2(R[1, 0], -R[0, 0]))]
            )
        else:
            angles = rotXYZ(R, unit="deg")
        # centroid of the hyperellipsoid
        center = np.mean(P, axis=0)
    else:
        R, angles = None, None

    if show and (p == 2 or p == 3):
        _plot(P, hypervolume, saxes, center, R, pvalue, units, ax)

    return hypervolume, saxes, angles, center, R
Beispiel #54
0
 def f_a(self):
     self.F_emp_A = self.var_A / self.var_remain if self.var_A > self.var_remain else self.var_remain / self.var_A
     self.F_cr_A = f.ppf(1 - self.alpha, self.n - 1, (self.n - 1) * (self.m - 1)) if self.var_A > self.var_remain \
         else f.ppf(1 - self.alpha, (self.n - 1) * (self.m - 1), self.n - 1)
Beispiel #55
0
 def get_independent_confidence_intervals(self, alpha=0.05):
   m = len(self.unknowns)
   n = len(self.validation_data.get_time_steps())
   f_alpha = f.ppf(1-alpha, m, n)
   c_alpha = n * self.objective_value * f_alpha / (n - m)
   return np.sqrt(c_alpha / self.get_covariance_matrix().diagonal())
Beispiel #56
0
 def f_b(self):
     self.F_emp_B = self.var_B / self.var_remain if self.var_B > self.var_remain else self.var_remain / self.var_B
     self.F_cr_B = f.ppf(1 - self.alpha, self.m - 1, ((self.m - 1) * (self.n - 1))) if self.var_B > self.var_remain \
         else f.ppf(1 - self.alpha, ((self.m - 1) * (self.m - 1)), self.m - 1)
Beispiel #57
0
 def f_full(self):
     self.F_emp_Full = self.var_full / self.var_remain if self.var_full > self.var_remain\
         else self.var_remain / self.var_full
     self.F_cr_Full = f.ppf(1 - self.alpha, (self.n * self.m) - 1, (self.n - 1) * (self.m - 1)) \
         if self.var_full > self.var_remain \
         else f.ppf(1 - self.alpha, (self.n  - 1) * (self.m - 1), (self.n * self.m) - 1)
Beispiel #58
0
def Computing_UCL(N, alpha):
    S = 1
    return (S*((N-1)**2) * f.ppf(1-alpha, S, N-S)) / (N*(N-S))
Beispiel #59
0
    else:
        ts[i] = False

print("Чи значимі b: " + str(ts))

f4 = k - d
print("f4: " + str(f4))
x = [[-30, -30, 0, 0],
     [10, 60, 10, 60],
     [10, 35, 35, 10]]
yj = []
for i in range(4):
    yj.append(0)
    for j in range(4):
        if ts[j]:
            if j == 0:
                yj[i] += b[0]
            else:
                yj[i] += b[j] * x[j-1][i]
print("yj: " + str(yj))

S2ad = round(m * sum([(yj[i] - yi[i])**2 for i in range(4)])/f4, 3)

Fp = round(S2ad/S2v, 3)
print("Fp: " + str(Fp))
Fcr = round(f.ppf(1 - alpha, f4, f3), 1)
print("Fcr: " + str(Fcr))
if Fp < Fcr:
    print("F-criteria: OK")
else:
    print("F-criteria: :(")
Beispiel #60
0
    list_ai = [round(i, 5) for i in solve(list_for_solve_a, Y_average)]
    print(
        "y = {} + {}*x1 + {}*x2 + {}*x3 + {}*x1x2 + {}*x1x3 + {}*x2x3 + {}*x1x2x3"
        .format(list_ai[0], list_ai[1], list_ai[2], list_ai[3], list_ai[4],
                list_ai[5], list_ai[6], list_ai[7]))

    print(
        "###################################################################################################################"
    )

    Gp = max(Disp_list) / sum_dispersion
    F1 = m - 1
    N = len(y1)
    F2 = N
    q1 = 0.05 / F1
    fisher_value = f.ppf(q=1 - q1, dfn=F2, dfd=(F1 - 1) * F2)
    Gt = fisher_value / (fisher_value + F1 - 1)
    print("\nGp = ", Gp, " Gt = ", Gt)

    if Gp < Gt:
        print("_____Дисперсія однорідна!_____\n")

        Dispersion_B = sum_dispersion / N
        Dispersion_beta = Dispersion_B / (m * N)
        S_beta = math.sqrt(abs(Dispersion_beta))

        beta_list = [0, 0, 0, 0, 0, 0, 0, 0]
        for i in range(len(x0_factor)):
            beta_list[0] += (Y_average[i] * x0_factor[i]) / N
            beta_list[1] += (Y_average[i] * x1_factor[i]) / N
            beta_list[2] += (Y_average[i] * x2_factor[i]) / N