def fit_binomial(x, n, alpha=0.01): ''' If n=0, the distribution is uniform in [0,1]. ''' from scipy.stats import f if n == 0: ml = 0.5 else: ml = float(x) / float(n) # Lower limits if x == 0: lb = 0 else: nu1 = 2 * x; nu2 = 2 * (n - x + 1) F = f.ppf(alpha / 2, nu1, nu2) lb = (nu1 * F) / (nu2 + nu1 * F) if x == n: ub = 1 else: nu1 = 2 * (x + 1); nu2 = 2 * (n - x); F = f.ppf(1 - alpha / 2, nu1, nu2) ub = (nu1 * F) / (nu2 + nu1 * F) assert 0 <= lb <= ml <= ub <= 1 res = np.ndarray((), dtype=fit_dtype) res['mean'] = ml res['upper'] = ub res['lower'] = lb res['confidence'] = 0.01 res['skewed'] = not (lb < 0.5 < ub) return res
def chi2_distribution(): fig, ax = plt.subplots(1, 1) #display the probability density function df = 10 x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x,df)) #simulate the chi2 distribution y = [] n=10 for i in range(1000): chi2r=0.0 r = norm.rvs(size=n) for j in range(n): chi2r=chi2r+r[j]**2 y.append(chi2r) ax.hist(y, normed=True, alpha=0.2) plt.show() fig, ax = plt.subplots(1, 1) #display the probability density function df = 10 x=np.linspace(-4, 4, 100) ax.plot(x, t.pdf(x,df)) #simulate the t-distribution y = [] for i in range(1000): rx = norm.rvs() ry = chi2.rvs(df) rt = rx/np.sqrt(ry/df) y.append(rt) ax.hist(y, normed=True, alpha=0.2) plt.show() fig, ax = plt.subplots(1, 1) #display the probability density function dfn, dfm = 10, 5 x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100) ax.plot(x, f.pdf(x, dfn, dfm)) #simulate the F-distribution y = [] for i in range(1000): rx = chi2.rvs(dfn) ry = chi2.rvs(dfm) rf = np.sqrt(rx/dfn)/np.sqrt(ry/dfm) y.append(rf) ax.hist(y, normed=True, alpha=0.2) plt.show()
def meanTest(data, means_old): """ Use Hotelling T-squared test to determine if both components have equal means @param means_old: old mean values for one component, shape: (n_features,) @return: True if covars are equal, False if not """ X = data n_samples = data.shape[0] n_features = data.shape[1] S = np.cov(X, rowvar=0) # if rowvar = 0, each row represents an observation Sinv = linalg.inv(S) m = X.mean(axis=0) - means_old T_squared = n_samples * np.dot(np.dot(m.T, Sinv), m) test_statistic = ((n_samples - n_features) * T_squared) / float(n_features*(n_samples - 1)) alphaPercentile = 0.05 threshold = f.ppf(alphaPercentile, n_features, (n_samples - n_features)) # f.ppf returns the k-percentile of the f-distribution if test_statistic <= threshold: print("Mean test passed") return True else: return False
def sampling_distribution(): fig, ax = plt.subplots(1, 1) #display the probability density function dfn, dfm = 10, 5 x=np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100) ax.plot(x, f.pdf(x, dfn, dfm)) #simulate the sampling distribution y = [] for i in range(1000): r1 = norm.rvs(loc=5, scale=2, size=dfn+1) r2 = norm.rvs(loc=3, scale=2, size=dfm+1) rf =np.var(r1)/np.var(r2) y.append(rf) ax.hist(y, normed=True, alpha=0.2) plt.savefig('sampling_distribution.png')
def F_distribution(): fig, ax = plt.subplots(1, 1) # display the probability density function dfn, dfm = 10, 5 x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100) ax.plot(x, f.pdf(x, dfn, dfm)) # simulate the F-distribution y = [] for i in range(1000): rx = chi2.rvs(dfn) ry = chi2.rvs(dfm) rf = np.sqrt(rx / dfn) / np.sqrt(ry / dfm) y.append(rf) ax.hist(y, normed=True, alpha=0.2) plt.savefig('F_distribution.png')
def Computing_UCL(num_train_beats,dim_proejected, alpha): ''' Computing upper control limit (UCL) :param num_train_beats: number of ECG beats in training set :param alpha: predefined alpha level (0.01 in the paper) :return: UCL value ''' return (dim_proejected*((num_train_beats-1)*(num_train_beats+1)) * f.ppf(1-alpha, dim_proejected, num_train_beats-dim_proejected)) / (num_train_beats*(num_train_beats-dim_proejected))
def calc_scipy(): # read data data = loadtxt(DATA_PATH, delimiter=",", skiprows=1) # calculation F_value, p_value = f_oneway(data[:,0], data[:,1], data[:,2]) # omake df1 = data.shape[1] - 1 df2 = data.shape[0] * data.shape[1] - data.shape[1] F_dist = f.ppf(0.95, df1, df2) F_dist_001 = f.ppf(0.99, df1, df2) # output print '[Scipy]' print 'F value:', F_value print 'p value:', p_value print 'F dist(0.05):', F_dist, abs(F_value)>F_dist print 'F dist(0.01):', F_dist_001, abs(F_value)>F_dist_001
def plot(self, data, size, newdata=None): sizes = data[:, 0] sample = data[:, 1:] samples = dict() for n, value in zip(sizes, sample): if n in samples: samples[n] = np.vstack([samples[n], value]) else: samples[n] = value m = len(samples.keys()) n = len(samples[1]) p = len(samples[1].T) variance, S = [], [] for i in range(m): mat = np.cov(samples[i + 1].T, ddof=1) variance.append(mat.diagonal()) S.append(cova(mat)) variance, S = np.array(variance), np.array(S) means = np.array([samples[xs + 1].mean(axis=0) for xs in range(m)]) means_total = means.mean(axis=0) Smat = var_cov(variance, S) Smat_inv = np.linalg.inv(Smat) values = [] for i in range(m): a = means[i] - means_total values.append(5 * a @ Smat_inv @ a.T) p1 = (p * (m - 1) * (n - 1)) p2 = (m * n - m - p + 1) lcl = (p1 / p2) * f.ppf(0.00135, p, p2) center = (p1 / p2) * f.ppf(0.50, p, p2) ucl = (p1 / p2) * f.ppf(0.99865, p, p2) return (values, center, lcl, ucl, self._title)
def Fmask(Fimg, dfnum, dfdenom, pvalue=1.0e-04): """ Create mask for use in estimating pooled covariance based on an F contrast. """ ## TODO check nipy.algorithms.statistics.models.contrast to see if rank is ## correctly set -- I don't think it is right now. print(dfnum, dfdenom) thresh = FDbn.ppf(pvalue, dfnum, dfdenom) return Image(np.greater(np.asarray(Fimg), thresh), Fimg.grid.copy())
def calculate_t2_statistics(self, k, name='t2', plot=True): from scipy.stats import f index = np.nonzero(self.y == 0) x_normal = self.x[index[0], :] xhat_normal, wl, vl = pca(x_normal, k) # dados na nova base mu = np.mean(x_normal, 0) n = x_normal.shape[0] p = 0.05 dfn = k dfd = n - k f_density = f.ppf(1.0 - p / 2, dfn, dfd) t2_thr = (1.0 * k * (n - 1) / (n - k)) * f_density # dados na nova base utilizndo o pca calculado apenas com dados # normais. xhat = np.real(np.dot(self.x - mu, vl)) lamb = np.eye(k) for i in range(0, k): lamb[i, :] = wl[i] * lamb[i,:] t2_statistics = np.array([]) for i in range(0, xhat.shape[0]): t2_statistics = np.append(t2_statistics, np.dot( np.dot(xhat[i, :], np.linalg.inv(lamb)), np.transpose(xhat[i,:]))) t2_statistics = np.reshape(t2_statistics, (-1, 1)) yhat = t2_statistics > t2_thr time_fault = np.nonzero(self.y != 0)[0][0] if plot: plt.semilogy(np.arange(0, xhat.shape[0]), t2_statistics) plt.hold(True) plt.semilogy(t2_thr * np.ones_like(t2_statistics), '--g') plt.axvline(x=time_fault, ymin=0, ymax=1, linestyle='--', color='r') plt.xlabel('time (min)') plt.ylabel('log($t^2$ statistics)') plt.legend( ['$log(t^2)$ statistic', '$t^2$ threshold', 'fault starting time']) plt.title( '$t^2$ statistic over time for fault {:s} using {:d} principal components'.format(name, k)) plt.savefig( self.path + 'figures/' + name + 't2vstime_k=' + str(k) + '.eps') plt.clf() return yhat, t2_statistics, {'lamb': lamb, 'thr': t2_thr}
def infer_by_chi(l1, l2, alpha=0.01): ss1 = s_2(l1) ss2 = s_2(l2) fv = ss1 / ss2 from scipy.stats import f res = f.ppf(1 - alpha, len(l1) - 1, len(l2) - 1) print('res {} fv {}'.format(res, fv)) if res > fv: print('sigma1 <= sigma2') else: print('sigma1 > sigma2')
def binofit(x, n, alpha=0.01): ''' Copied from Matlab. ''' # Lower limits if x == 0: lb = 0 else: nu1 = 2 * x; nu2 = 2 * (n - x + 1); F = f.ppf(alpha / 2, nu1, nu2); lb = (nu1 * F) / (nu2 + nu1 * F); if x == n: ub = 1 else: nu1 = 2 * (x + 1); nu2 = 2 * (n - x); F = f.ppf(1 - alpha / 2, nu1, nu2); ub = (nu1 * F) / (nu2 + nu1 * F); return (lb, ub)
def gaussian_concentration(dataframe, unity, alpha=0.05): """ """ if dataframe.shape[0] > 1: X, Y, Z = SI(dataframe, unity) Sigma = np.cov(np.vstack((X, Y, Z))) U, axes, R = np.linalg.svd(Sigma) p, n = axes.size, min(len(X), len(Y), len(Z)) fppf = F.ppf(1-alpha, p, n-p)*(n-1)*p*(n+1)/n/(n-p) axes = np.sqrt(axes*fppf) vol = (np.pi**(p/2)/gamma(p/2+1)* np.prod(axes)) return (1-alpha)*dataframe.shape[0]/vol else: return float("nan")
def binconf(x, n, alpha=0.05): nu1 = 2 * (n - x + 1) nu2 = 2 * x if x > 0: ll = x/(x+ f.ppf(1-alpha/2, nu1, nu2) * (n - x +1)) else: ll = 0 nu1p = nu2+2 nu2p = nu1-2 if x<n: pp = f.ppf(1-alpha/2, nu1p, nu2p) else: pp=1 ul = ((x+1)*pp)/(n-x+(x+1)*pp) zcrit = -1 * norm.ppf(alpha/2) z2 = zcrit**2 p = x/n cl = (p + z2/2/n + np.array([-1, 1]) * zcrit * sqrt((p*(1-p) + z2/4/n)/n))/(1+z2/n) if x==1: cl[0]=-log(1-alpha)/n elif x==(n-1): cl[1]=1+log(1-alpha)/n return ll, ul, cl
def exact_interval(y1, y2, M1, M2, alpha): """Return exact bound. Note: All input values must be of float type. """ n = y1 + y2 df11 = 2 * (n - y1 + 1) df12 = 2 * y1 df21 = 2 * (y1 + 1) df22 = 2 * (n - y1) f_alpha_over_two = f.ppf(1 - alpha / 2, df11, df12) lb_pi = y1 / (y1 + (n - y1 + 1) * f_alpha_over_two) ub_pi = 1 - (n - y1) / (n - y1 + (y1 + 1) * f_alpha_over_two) lb = lb_pi / (1 - lb_pi) * M2 / M1 ub = ub_pi / (1 - ub_pi) * M2 / M1 return (lb, ub)
def regiaoAbrangencia(self,PA=0.95): ''' Método para avaliação da região de abrangência ''' Fisher = f.ppf(PA,self.NP,(self.NE*self.NY-self.NP)) Comparacao = self.Otimizacao.best_fitness*(1+float(self.NP)/(self.NE*self.NY-float(self.NP))*Fisher) Regiao = []; Hist_Posicoes = []; Hist_Fitness = [] for it in xrange(self.Otimizacao.itmax): for ID_particula in xrange(self.Otimizacao.Num_particulas): if self.Otimizacao.historico_fitness[it][ID_particula] <= Comparacao: Regiao.append(self.Otimizacao.historico_posicoes[it][ID_particula]) Hist_Posicoes.append(self.Otimizacao.historico_posicoes[it][ID_particula]) Hist_Fitness.append(self.Otimizacao.historico_fitness[it][ID_particula]) self.parametros._parametro(self.parametros.estimativa,self.parametros.matriz_covariancia,Regiao) return (Hist_Posicoes, Hist_Fitness)
def chapter_6(input_list): for choice in input_list: if choice == 1: print "Already explained in the example 6.1" if choice == 7: means1 = np.matrix('204.4;556.6') means2 = np.matrix('130;355') cov_matrix1 = np.matrix('13825.3,23823.4;23823.4,73107.4') cov_matrix2 = np.matrix('8632,19616.7;19616.7,55964.5') print 'cov_matrix1',cov_matrix1 print 'cov_matrix2',cov_matrix2 n1,n2 = 45,55 s_pooled = ((n1-1)*(cov_matrix1) + (n2-1)*(cov_matrix2))/float(n1 + n2 -2) print 's_pooled',s_pooled mean_diff = means1-means2 print 'mean_diff',mean_diff T_square = (np.transpose(mean_diff)*(((n1*n2/(n2+n1))*(inv(s_pooled)))*mean_diff)) print 'T_square',T_square c_square = (n1+n2-2)*(2)*f.ppf(0.95,2,n1+n2-2-1)/(n1+n2-2-1) print 'c_square',c_square a = (inv(s_pooled))*(mean_diff) print 'Linear Combination',a
def var_var(self, alpha): f0 = self.S1 ** 2 / self.S2 ** 2 n1, n2 = self.n1, self.n2 # hypothesis testing2 H1a = f.ppf(1 - alpha / 2.0, n1 - 1, n2 - 1) < f0 or f.ppf(alpha / 2.0, n1 - 1, n2 - 1) > f0 H1b = f.ppf(alpha / 2.0, n1 - 1, n2 - 1) < f0 H1c = f.ppf(1 - alpha / 2.0, n1 - 1, n2 - 1) > f0 # p-value p1a = np.max(np.array([f.sf(f0, n1 - 1, n2 - 1), 1 - f.sf(f0, n1 - 1, n2 - 1)])) p1b = f.sf(f0, n1 - 1, n2 - 1) p1c = 1 - f.sf(f0, n1 - 1, n2 - 1) # confidence intervals: the minimum level of significance # alpha for which the null hypothesis is rejected c1 = self.S1 ** 2 / self.S1 ** 2 * f.ppf(alpha / 2.0, n2 - 1, n1 - 1) c2 = self.S1 ** 2 / self.S1 ** 2 * f.ppf(1 - alpha / 2.0, n2 - 1, n1 - 1) return H1a, H1b, H1c, p1a, p1b, p1c, (c1, c2)
def get_scale(confProb, apriori, dim, df=0): """ returns confidence scale for apriori: True/False: apriori/aposteriori standard deviation dimension dim df: degrees of freedom """ if apriori: # for apriori standard deviations from scipy.stats import chi2 from math import sqrt return sqrt(chi2.ppf(confProb, dim)) else: if df == 0: raise ConfidenceScaleError, "Zero degrees of freedom" # for aposteriori standard deviations from scipy.stats import f from math import sqrt return sqrt(dim*f.ppf(confProb, dim, df))
def get_fisher_critical(probability, f3, f4): return f.ppf(probability, f3, f4)
p = model2.df_model n = train.shape[0] # 计算回归离差平方和 RSS = np.sum((model2.fittedvalues-ybar) ** 2) # 计算误差平方和 ESS = np.sum(model2.resid ** 2) # 计算F统计量的值 F = (RSS/p)/(ESS/(n-p-1)) print('F统计量的值:',F) # 返回模型中的F值 model2.fvalue # 导入模块 from scipy.stats import f # 计算F分布的理论值 F_Theroy = f.ppf(q=0.95, dfn = p,dfd = n-p-1) print('F分布的理论值为:',F_Theroy) # 模型的概览信息 model2.summary() # 正态性检验 # 直方图法 # 导入第三方模块 import scipy.stats as stats # 中文和负号的正常显示 plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] plt.rcParams['axes.unicode_minus'] = False # 绘制直方图 sns.distplot(a = Profit_New.Profit, bins = 10, fit = stats.norm, norm_hist = True,
x_4 = read_column_from_csv(column_number=2 + (variation - 1) * 4, file='data/4problem.csv') y_1 = read_column_from_csv(column_number=3 + (variation - 1) * 4, file='data/4problem.csv') y_estimation = [ coefficient_vector[0] + coefficient_vector[1] * x_2[i] + coefficient_vector[2] * x_3[i] + coefficient_vector[3] * x_4[i] for i in range(40) ] ess_ur = ess(y_1, y_estimation) rss_ur = rss(y_1, y_estimation) # Fisher dist f_crit = f.ppf(0.95, k - 1, n - k) f_real = ess_ur / (k - 1) / (rss_ur / (n - k)) print('F (95%, k-1, n-4) is {}'.format(f_crit)) print('ess / (k - 1) / (rss / (n - k)) is {}'.format(f_real)) if f_crit < f_real: print( 'Отвергаем гипотезу о значимости модели регрессии в целом (b1=b2=b3=b4=0)' ) else: print( 'Принимаем гипотезу о значимости модели регрессии в целом (b1=b2=b3=b4=0)' ) print()
# -*- coding: utf-8 -*- ''' Goal : Author : Yonghan Jung, ISyE, KAIST Date : 15 Comment - ''' ''' Library ''' import numpy as np from scipy.stats import f ''' Function or Class ''' class Example: def __init__(self): return None if __name__ == "__main__": DoF1 = 10 DoF2 = 20 print f.ppf(0.90, DoF1, DoF2)
y_hat.append( b0 + b1 * x_matrix[i][0] + b2 * x_matrix[i][1] + b3 * x_matrix[i][2] + b12 * x_matrix[i][0] * x_matrix[i][1] + b13 * x_matrix[i][0] * x_matrix[i][2] + b123 * x_matrix[i][0] * x_matrix[i][1] * x_matrix[i][2]) print( f"^y{chr(8321 + i)} = {b0:.2f}{b1:+.2f}*x{chr(8321 + i)}\u2081{b2:+.2f}*x{chr(8321 + i)}\u2082{b3:+.2f}*x{chr(8321 + i)}\u2083{b12:+.2f}*x{chr(8321 + i)}\u2081" f"*x{chr(8321 + i)}\u2082{b13:+.2f}*x{chr(8321 + i)}\u2081*x{chr(8321 + i)}\u2083{b123:+.2f}*x{chr(8321 + i)}\u2081*x{chr(8321 + i)}\u2082*x{chr(8321 + i)}\u2083 " f"= {y_hat[i]:.2f}") """FISHER""" d = 2 f4 = N - d S2_ad = 0 for i in range(N): S2_ad += (m / (N - d) * ((y_hat[i] - my_list[i]) ** 2)) Fp = S2_ad / S2B Ft = f.ppf(1 - q, f4, f3) print("Fp:", Fp) print("Ft:", Ft) if Fp > Ft: print("Рівняння регресії не адекватно оригіналу при рівні значимості 0,05") print("Щоб почати з початку введіть довільний символ в консоль\n") if input() == 0: break else: print("Рівняння регресії адекватно оригіналу при рівні значимості 0,05") break
title = 'Model Bias - ' + month[i] filename = FIG_PATH + 'u200_bias_' + month[i] + '.png' #interp model grid to obs grid and compute test winds_s4_mean_new = winds_s4_mean.u[i, :, :].interp_like( winds_erai_mean.u[mm[i], :, :], method='linear') winds_s4_sd_new = winds_s4_sd.u[i, :, :].interp_like( winds_erai_sd.u[mm[i], :, :], method='linear') SE = np.sqrt( np.power(winds_erai_sd.u.values[mm[i], :, :], 2) / 36 + np.power(winds_s4_sd_new.values, 2) / (36 * 51)) tt = (winds_s4_mean_new.values - winds_erai_mean.u.values[mm[i], :, :]) DF = 36 + 51 * 36 - 2 #DF = np.power(SE, 4) / (np.power(np.power(winds_erai_sd[7 + i, :, :], 2)/ 36) / 35 + # np.power(np.power(winds_s4_sd_new, 2)/ (36 * 51)) /(36 * 51 - 1)) t_cut = t.ppf(0.025, DF) df_cut = f.ppf([0.025, 0.975], 35, 36 * 51 - 1) F = np.power(winds_erai_sd.u.values[mm[i], :, :], 2) / np.power( winds_s4_sd_new.values, 2) tt = ma.masked_array(tt, mask=np.logical_and((tt / SE) > t_cut, (tt / SE) < np.abs(t_cut))) F = ma.masked_array(F, mask=np.logical_and(F > df_cut[0], F < df_cut[1])) PlotMeanStdTest(tt, F, lat_erai, lon_erai, title, filename) #seasonal means season = ['ASO', 'SON', 'OND', 'NDJ', 'DJF'] lmonth = ['Aug', 'Sep', 'Oct', 'Nov', 'Dec'] for i in np.arange(0, 5): winds_erai_seas_mean = winds_erai['u'].sel( **{
MSE = SSE[0, 0] / (n - p - 1) # Calculamos el MSR MSR = SSR[0, 0] / p # Calculamos el MST MST = SST[0, 0] / (n - 1) # Calculamos F # In[65]: F = (Rsq * (n - p - 1)) / ((1 - Rsq) * p) #F # In[66]: Rango = 0.9 # se define un rango, es decir cuanto porcentaje de la curva se quiere Ftest = f.ppf(Rango, p, n - (p + 1)) P_i = np.zeros(p) if F > Ftest: tzeros = beta[:, 0] / sigma_i P_value = 2 * (1 - norm.cdf(tzeros)) # se integran las colas for i in range(p): if P_value[i] < 0.5: P_i[i] = 1 else: P_i[i] = 0 else: #print("paila") quit() # In[75]:
def p_lo(alpha, n, N): quant_lo = f.ppf(alpha, 2 * n, 2 * (N - n + 1), loc=0, scale=1) return n * quant_lo / (N - n + 1 + n * quant_lo)
def cohren(f1, f2, q=0.05): q1 = q / f1 fisher_value = f.ppf(q=1 - q1, dfn=f2, dfd=(f1 - 1) * f2) return fisher_value / (fisher_value + f1 - 1)
def computeUCLD(self, npc, nob, p_value, phase): """ UCL (Upper Control Limit) for D-statistic .. [Ref] PCA-based multivariate statistical network monitoring for anomaly detection http://www.sciencedirect.com/science/article/pii/S0167404816300116 Parameters ---------- npc: int Number of PCs nob: int Number of observations p_value: float p-value of the test, in (0,1] phase: int SPC phase 1: Phase I 2: Phase II Return ------ lim: float64 control limit at a 1-``p_value`` confidence level. Raises ------ MSPCError When something is going wrong during the mathematical operations Examples -------- >>> from msnm.utils import datautils as tools >>> import numpy as np >>> import scipy.io as sio >>> from msnm.modules.ma import pca >>> from msnm.modules.ma import mspc >>> # Original data (complete workspace of matlab example in mspc.m of MEDA) >>> originalData = './datatest/data_adicov_mspc.mat' >>> # Calibration matrix >>> data = sio.loadmat(originalData) >>> x = data['X'] >>> weights = np.ones((x.shape[0],1)) >>> # data preprocess auto-scaled >>> xcs, average, scale = tools.preprocess2D(x,2,weights) >>> #PCA >>> pcaModel = pca.PCA() >>> pcaModel.setPCs(1) # like in mspc.m example >>> pcaModel.setData(xcs) >>> pcaModel.runPCA() >>> T = pcaModel.getScores() >>> P = pcaModel.getLoadings() >>> # Compute UCLs >>> mspcInstance = mspc.MSPC() >>> # Number of observations >>> nob = x.shape[0] >>> # Compute UCL for D-statistics with 95% of confidence level >>> mspcInstance.computeUCLD(npcs, nob, 0.05, 2) >>> print "UCLd --> %f" % mspcInstance.getUCLD() """ method_name = "computeUCLD()" try: if phase == 2: lim = (npc * (nob * nob - 1.0) / (nob * (nob - npc))) * fisher.ppf(1 - p_value, npc, nob - npc) else: lim = (nob - 1.0)**2 / nob * beta.ppf(1 - p_value, npc / 2.0, (nob - npc - 1) / 2.0) # Check is the limit is and ndarray of [1x1] dimensions and get the float value if isinstance(lim, np.ndarray): lim = lim[0, 0] # TODO: Sometimes after computations numpy takes UCLq as complex with 0j imaginary part if isinstance(lim, complex): logging.warn( "UCLd has a complex value of %s. Getting just the real part.", lim) lim = lim.real self._UCLD = lim except Exception: raise MSPCError(self, sys.exc_info()[0], method_name)
def main(n, m): x1_min = -30 x1_max = 0 x2_min = -35 x2_max = 10 x3_min = 0 x3_max = 20 x01 = (x1_max + x1_min) / 2 x02 = (x2_max + x2_min) / 2 x03 = (x3_max + x3_min) / 2 dx1 = x1_max - x01 dx2 = x2_max - x02 dx3 = x3_max - x03 xn = [[-1, -1, -1, +1, +1, +1, -1, +1, +1, +1], [-1, -1, +1, +1, -1, -1, +1, +1, +1, +1], [-1, +1, -1, -1, +1, -1, +1, +1, +1, +1], [-1, +1, +1, -1, -1, +1, -1, +1, +1, +1], [+1, -1, -1, -1, -1, +1, +1, +1, +1, +1], [+1, -1, +1, -1, +1, -1, -1, +1, +1, +1], [+1, +1, -1, +1, -1, -1, -1, +1, +1, +1], [+1, +1, +1, +1, +1, +1, +1, +1, +1, +1], [-1.73, 0, 0, 0, 0, 0, 0, 2.9929, 0, 0], [+1.73, 0, 0, 0, 0, 0, 0, 2.9929, 0, 0], [0, -1.73, 0, 0, 0, 0, 0, 0, 2.9929, 0], [0, +1.73, 0, 0, 0, 0, 0, 0, 2.9929, 0], [0, 0, -1.73, 0, 0, 0, 0, 0, 0, 2.9929], [0, 0, +1.73, 0, 0, 0, 0, 0, 0, 2.9929], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] x1 = [ x1_min, x1_min, x1_min, x1_min, x1_max, x1_max, x1_max, x1_max, -1.73 * dx1 + x01, 1.73 * dx1 + x01, x01, x01, x01, x01, x01 ] x2 = [ x2_min, x2_min, x2_max, x2_max, x2_min, x2_min, x2_max, x2_max, x02, x02, -1.73 * dx2 + x02, 1.73 * dx2 + x02, x02, x02, x02 ] x3 = [ x3_min, x3_max, x3_min, x3_max, x3_min, x3_max, x3_min, x3_max, x03, x03, x03, x03, -1.73 * dx3 + x03, 1.73 * dx3 + x03, x03 ] x1x2 = [0] * 15 x1x3 = [0] * 15 x2x3 = [0] * 15 x1x2x3 = [0] * 15 x1kv = [0] * 15 x2kv = [0] * 15 x3kv = [0] * 15 for i in range(15): x1x2[i] = x1[i] * x2[i] x1x3[i] = x1[i] * x3[i] x2x3[i] = x2[i] * x3[i] x1x2x3[i] = x1[i] * x2[i] * x3[i] x1kv[i] = x1[i]**2 x2kv[i] = x2[i]**2 x3kv[i] = x3[i]**2 tmp_list_a = list( zip(x1, x2, x3, x1x2, x1x3, x2x3, x1x2x3, x1kv, x2kv, x3kv)) plan_table = BeautifulTable() plan_table.columns.header = [ 'X1', 'X2', 'X3', 'X1X2', 'X1X3', 'X2X3', 'X1X2X3', 'X1X1', 'X2X2', 'X3X3' ] print("Planning matrix with naturalized coefficients X:") for i in range(len(tmp_list_a)): plan_table.rows.append(tmp_list_a[i]) print(plan_table) def func(X1, X2, X3): y = 5.4 + 3.6 * X1 + 6.6 * X2 + 7.7 * X3 + 8.0 * X1 * X1 + 0.3 * X2 * X2 + 2.5 * X3 * X3 + 5.9 * X1 * X2 + 0.3 * X1 * X3 + 7.2 * X2 * X3 + 5.3 * X1 * X2 * X3 + random.randint( 0, 10) - 5 return y y = [[ func(tmp_list_a[j][0], tmp_list_a[j][1], tmp_list_a[j][2]) for _ in range(m) ] for j in range(15)] plan_y = BeautifulTable() plan_y.columns.header = ['y1', 'y2', 'y3'] print("Planning matrix y:") for i in range(len(y)): plan_y.rows.append(y[i]) print(plan_y) aver_y = [] for i in range(len(y)): aver_y.append(np.mean(y[i], axis=0)) print("Average response values:\n{}".format(aver_y)) disp = [] for i in range(len(y)): a = 0 for k in y[i]: a += (k - np.mean(y[i], axis=0))**2 disp.append(a / len(y[i])) print("Dispersion:\n{}".format(disp)) def finds_value(num): a = 0 for j in range(15): a += aver_y[j] * tmp_list_a[j][num - 1] / 15 return a def a(f, s): a = 0 for j in range(15): a += tmp_list_a[j][f - 1] * tmp_list_a[j][s - 1] / 15 return a my = sum(aver_y) / 15 mx = [] for i in range(10): number_lst = [] for j in range(15): number_lst.append(tmp_list_a[j][i]) mx.append(sum(number_lst) / len(number_lst)) determinant1 = [[ 1, mx[0], mx[1], mx[2], mx[3], mx[4], mx[5], mx[6], mx[7], mx[8], mx[9] ], [ mx[0], a(1, 1), a(1, 2), a(1, 3), a(1, 4), a(1, 5), a(1, 6), a(1, 7), a(1, 8), a(1, 9), a(1, 10) ], [ mx[1], a(2, 1), a(2, 2), a(2, 3), a(2, 4), a(2, 5), a(2, 6), a(2, 7), a(2, 8), a(2, 9), a(2, 10) ], [ mx[2], a(3, 1), a(3, 2), a(3, 3), a(3, 4), a(3, 5), a(3, 6), a(3, 7), a(3, 8), a(3, 9), a(3, 10) ], [ mx[3], a(4, 1), a(4, 2), a(4, 3), a(4, 4), a(4, 5), a(4, 6), a(4, 7), a(4, 8), a(4, 9), a(4, 10) ], [ mx[4], a(5, 1), a(5, 2), a(5, 3), a(5, 4), a(5, 5), a(5, 6), a(5, 7), a(5, 8), a(5, 9), a(5, 10) ], [ mx[5], a(6, 1), a(6, 2), a(6, 3), a(6, 4), a(6, 5), a(6, 6), a(6, 7), a(6, 8), a(6, 9), a(6, 10) ], [ mx[6], a(7, 1), a(7, 2), a(7, 3), a(7, 4), a(7, 5), a(7, 6), a(7, 7), a(7, 8), a(7, 9), a(7, 10) ], [ mx[7], a(8, 1), a(8, 2), a(8, 3), a(8, 4), a(8, 5), a(8, 6), a(8, 7), a(8, 8), a(8, 9), a(8, 10) ], [ mx[8], a(9, 1), a(9, 2), a(9, 3), a(9, 4), a(9, 5), a(9, 6), a(9, 7), a(9, 8), a(9, 9), a(9, 10) ], [ mx[9], a(10, 1), a(10, 2), a(10, 3), a(10, 4), a(10, 5), a(10, 6), a(10, 7), a(10, 8), a(10, 9), a(10, 10) ]] determinant2 = [ my, finds_value(1), finds_value(2), finds_value(3), finds_value(4), finds_value(5), finds_value(6), finds_value(7), finds_value(8), finds_value(9), finds_value(10) ] beta = solve(determinant1, determinant2) print("Regression equation:") print( "y = {} + {} * X1 + {} * X2 + {} * X3 + {} * Х1X2 + {} * Х1X3 + {} * Х2X3" "+ {} * Х1Х2X3 + {} * X11^2 + {} * X22^2 + {} * X33^2".format( beta[0], beta[1], beta[2], beta[3], beta[4], beta[5], beta[6], beta[7], beta[8], beta[9], beta[10])) y_i = [0] * 15 for k in range(15): y_i[k] = beta[0] + beta[1] * tmp_list_a[k][0] + beta[2] * tmp_list_a[k][1] + beta[3] * tmp_list_a[k][2] + \ beta[4] * tmp_list_a[k][3] + beta[5] * tmp_list_a[k][4] + beta[6] * tmp_list_a[k][5] + beta[7] * \ tmp_list_a[k][6] + beta[8] * tmp_list_a[k][7] + beta[9] * tmp_list_a[k][8] + beta[10] * tmp_list_a[k][9] print("Experimental values:\n{}".format(y_i)) gp = max(disp) / sum(disp) gt = 0.3346 print("\nKohren check\nGp = {}".format(gp)) if gp < gt: print("Dispersions are homogeneous") else: print("Dispersions are inhomogeneous") sb = sum(disp) / len(disp) sbs = (sb / (15 * m))**0.5 f3 = (m - 1) * n sign_coef = [] insign_coef = [] d = 11 res = [0] * 11 for j in range(11): t_pract = 0 for i in range(15): if j == 0: t_pract += aver_y[i] / 15 else: t_pract += aver_y[i] * xn[i][j - 1] res[j] = beta[j] if math.fabs(t_pract / sbs) < t.ppf(q=0.975, df=f3): insign_coef.append(beta[j]) res[j] = 0 d -= 1 else: sign_coef.append(beta[j]) print("\nStudent criterion:") print("Significant regression coefficients:", [round(i, 3) for i in sign_coef]) print("Insignificant regression coefficients:", [round(i, 3) for i in insign_coef]) y_st = [] for i in range(15): y_st.append(res[0] + res[1] * x1[i] + res[2] * x2[i] + res[3] * x3[i] + res[4] * x1x2[i] + res[5] * x1x3[i] + res[6] * x2x3[i] + res[7] * x1x2x3[i] + res[8] * x1kv[i] + res[9] * x2kv[i] + res[10] * x3kv[i]) print("Values with the coefficients:\n{}".format(y_st)) print("\nFisher adequacy check") sad = m * sum([(y_st[i] - aver_y[i])**2 for i in range(15)]) / (n - d) fp = sad / sb f4 = n - d print("fp =", fp) if fp > f.ppf(q=0.95, dfn=f4, dfd=f3) or len(insign_coef) == 2: print("The mathematical model is inadequate to the experimental data") else: print("The mathematical model is adequate to the experimental data")
def linear(n, m): f1 = m - 1 f2 = n f3 = f1 * f2 q = 0.05 x, y, x_norm = planning_matrix_linear(n, m, x_range) y_average, B = regression_equation(x, y, n) dispersion_arr = dispersion(y, y_average, n, m) temp_cohren = f.ppf(q=(1 - q / f1), dfn=f2, dfd=(f1 - 1) * f2) cohren_cr_table = temp_cohren / (temp_cohren + f1 - 1) Gp = max(dispersion_arr) / sum(dispersion_arr) print('\nПеревірка за критерієм Кохрена:\n') print(f'Розрахункове значення: Gp = {Gp}' f'\nТабличне значення: Gt = {cohren_cr_table}') if Gp < cohren_cr_table: print(f'З ймовірністю {1 - q} дисперсії однорідні.') else: print("Необхідно збільшити ксть дослідів") m += 1 linear(n, m) qq = (1 + 0.95) / 2 student_cr_table = t.ppf(df=f3, q=qq) student_t = kriteriy_studenta(x_norm[:, 1:], y, y_average, n, m) print('\nТабличне значення критерій Стьюдента:\n', student_cr_table) print('Розрахункове значення критерій Стьюдента:\n', student_t) res_student_t = [temp for temp in student_t if temp > student_cr_table] final_coefficients = [ B[student_t.index(i)] for i in student_t if i in res_student_t ] print('Коефіцієнти {} статистично незначущі.'.format( [i for i in B if i not in final_coefficients])) y_new = [] for j in range(n): y_new.append( regression([ x[j][student_t.index(i)] for i in student_t if i in res_student_t ], final_coefficients)) print(f'\nОтримаємо значення рівння регресії для {m} дослідів: ') print(y_new) d = len(res_student_t) f4 = n - d Fp = kriteriy_fishera(y, y_average, y_new, n, m, d, dispersion_arr) Ft = f.ppf(dfn=f4, dfd=f3, q=1 - 0.05) print('\nПеревірка адекватності за критерієм Фішера:\n') print('Розрахункове значення критерія Фішера: Fp =', Fp) print('Табличне значення критерія Фішера: Ft =', Ft) if Fp < Ft: print('Математична модель адекватна експериментальним даним') return True else: print('Математична модель не адекватна експериментальним даним') return False
def check(X, Y, B, n, m, norm=False): f1 = m - 1 f2 = n f3 = f1 * f2 q = 0.05 y_aver = [round(sum(i) / len(i), 3) for i in Y] print('\nСереднє значення y:', y_aver) dispersion_arr = dispersion(Y, y_aver, n, m) qq = (1 + 0.95) / 2 student_cr_table = t.ppf(df=f3, q=qq) ts = kriteriy_studenta(X[:, 1:], Y, y_aver, n, m) temp_cohren = f.ppf(q=(1 - q / f1), dfn=f2, dfd=(f1 - 1) * f2) cohren_cr_table = temp_cohren / (temp_cohren + f1 - 1) Gp = max(dispersion_arr) / sum(dispersion_arr) print('Дисперсія y:', dispersion_arr) print(f'Gp = {Gp}') if Gp < cohren_cr_table: print(f'З ймовірністю {1 - q} дисперсії однорідні.') else: print("Необхідно збільшити кількість дослідів") m += 1 with_interaction_effect(n, m) print('\nКритерій Стьюдента:\n', ts) res = [t for t in ts if t > student_cr_table] final_k = [B[i] for i in range(len(ts)) if ts[i] in res] print( '\nКоефіцієнти {} статистично незначущі, тому ми виключаємо їх з рівняння.' .format([round(i, 3) for i in B if i not in final_k])) y_new = [] for j in range(n): y_new.append( regression([X[j][i] for i in range(len(ts)) if ts[i] in res], final_k)) print(f'\nЗначення "y" з коефіцієнтами {final_k}') print(y_new) d = len(res) if d >= n: print('\nF4 <= 0') print('') return f4 = n - d Fp = kriteriy_fishera(Y, y_aver, y_new, n, m, d, dispersion_arr) Ft = f.ppf(dfn=f4, dfd=f3, q=1 - 0.05) print('\nПеревірка адекватності за критерієм Фішера') print('Fp =', Fp) print('Ft =', Ft) if Fp < Ft: print('Математична модель адекватна експериментальним даним') return True else: print('Математична модель не адекватна експериментальним даним') return False
from scipy.stats import f csv_data = pd.read_csv('predict_.csv') df = pd.DataFrame(csv_data) df.columns = ['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'y1', 'yl', 'yi'] x1 = df['x1'] x2 = df['x2'] x3 = df['x3'] x4 = df['x4'] x5 = df['x5'] x6 = df['x6'] x7 = df['x7'] y1 = df['y1'] yi = df['yi'] mean_y = y1.mean() ESS = np.sum((y1 - yi)**2) RSS = np.sum((yi - mean_y)**2) TSS = np.sum((y1 - mean_y)**2) print(ESS) print(RSS) print(TSS) p = 7 #多少个自变量维度,有七个x,p=7 n = len(x1) #多少组数值,csv文件的长度 F = (RSS / p) / (ESS / (n - p - 1)) F_throry = f.ppf(q=0.95, dfn=p, dfd=n - p - 1) print(F) print(F_throry) #统计量远大于值,拒绝原假设 #F统计量值远远大于F分布的理论值,所以拒绝原假设,认为多元线性回归模型是显著的,所以回归模型的偏回归系数不全为0。
def fore_chow(stock1, stock2, model, Flen, give=False, p=0, A=0, ut=0, maxp=5): #Flen:formation length if model == 1: model_name = 'H2' elif model == 2: model_name = 'H1*' else: model_name = 'H1' day1 = (np.vstack([stock1, stock2]).T) day1 = np.log(day1) h = len(day1) - Flen k = 2 # 幾檔股票 n = Flen # formation period 資料長度 if give == False: y = (np.vstack([stock1[0:Flen], stock2[0:Flen]]).T) y = np.log(y) p = order_select(y, maxp) at, A, _ = para_vecm(y, model_name, p) # at , A = para_vecm(y,model_name,p) ut = np.dot(at, at.T) / len(at.T) #sigma_u Remain_A = A.copy() Remain_ut = ut.copy() Remain_p = p #LUKE pg184 A = A.T phi_0 = np.eye(k) A1 = np.delete(A, 0, axis=0).T phi = np.hstack((np.zeros([k, 2 * (p - 1)]), phi_0)) sigma_t = np.dot(np.dot(phi_0, ut), phi_0.T) # sigma hat ut_h = [] for i in range(1, h + 1): lag_mat = day1[len(day1) - i - p - 1:len(day1) - i, :] lag_mat = np.array(lag_mat[::-1]) if p == 1: ut_h.append(lag_mat[0].T - (A[0].T + np.dot(A[1:k * p + 1].T, lag_mat[1:2].T)).T) else: ut_h.append(lag_mat[0].T - (A[0].T + np.dot( A[1:k * p + 1].T, lag_mat[1:k * p - 1].reshape([k * p, 1]))).T) for i in range(h - 1): a = phi[:, i * 2:len(phi.T)] phi_i = np.dot(A1, a.T) sigma_t = sigma_t + np.dot(np.dot(phi_i, ut), phi_i.T) phi = np.hstack((phi, phi_i)) phi = phi[:, ((p - 1) * k):len(phi.T)] ut_h = np.array(ut_h).reshape([1, h * 2]) e_t = np.dot(phi, ut_h.T) # 程式防呆,如果 sigma_t inverse 發散,則回傳有結構性斷裂。 try: tau_h = np.dot(np.dot(e_t.T, np.linalg.inv(sigma_t)), e_t) / k except: return Remain_p, Remain_A, Remain_ut, 1 else: if tau_h > float(f.ppf( 0.99, k, n - k * p + 1)): #tau_h > float(chi2.ppf(0.99,k)): return Remain_p, Remain_A, Remain_ut, 1 # 有結構性斷裂 else: return Remain_p, Remain_A, Remain_ut, 0
def intraclass_corr(data=None, targets=None, raters=None, ratings=None, nan_policy='raise'): """Intraclass correlation. Parameters ---------- data : :py:class:`pandas.DataFrame` Long-format dataframe. Data must be fully balanced. targets : string Name of column in ``data`` containing the targets. raters : string Name of column in ``data`` containing the raters. ratings : string Name of column in ``data`` containing the ratings. nan_policy : str Defines how to handle when input contains missing values (nan). 'raise' (default) throws an error, 'omit' performs the calculations after deleting target(s) with one or more missing values (= listwise deletion). .. versionadded:: 0.3.0 Returns ------- stats : :py:class:`pandas.DataFrame` Output dataframe: * ``'Type'``: ICC type * ``'Description'``: description of the ICC * ``'ICC'``: intraclass correlation * ``'F'``: F statistic * ``'df1'``: numerator degree of freedom * ``'df2'``: denominator degree of freedom * ``'pval'``: p-value * ``'CI95%'``: 95% confidence intervals around the ICC Notes ----- The intraclass correlation (ICC) assesses the reliability of ratings by comparing the variability of different ratings of the same subject to the total variation across all ratings and all subjects. Shrout and Fleiss (1979) describe six cases of reliability of ratings done by :math:`k` raters on :math:`n` targets. Pingouin returns all six cases with corresponding F and p-values, as well as 95% confidence intervals. From the documentation of the ICC function in the R package psych: - **ICC1**: Each target is rated by a different rater and the raters are selected at random. This is a one-way ANOVA fixed effects model. - **ICC2**: A random sample of :math:`k` raters rate each target. The measure is one of absolute agreement in the ratings. ICC1 is sensitive to differences in means between raters and is a measure of absolute agreement. - **ICC3**: A fixed set of :math:`k` raters rate each target. There is no generalization to a larger population of raters. ICC2 and ICC3 remove mean differences between raters, but are sensitive to interactions. The difference between ICC2 and ICC3 is whether raters are seen as fixed or random effects. Then, for each of these cases, the reliability can either be estimated for a single rating or for the average of :math:`k` ratings. The 1 rating case is equivalent to the average intercorrelation, while the :math:`k` rating case is equivalent to the Spearman Brown adjusted reliability. **ICC1k**, **ICC2k**, **ICC3K** reflect the means of :math:`k` raters. This function has been tested against the ICC function of the R psych package. Note however that contrarily to the R implementation, the current implementation does not use linear mixed effect but regular ANOVA, which means that it only works with complete-case data (no missing values). References ---------- .. [1] Shrout, P. E., & Fleiss, J. L. (1979). Intraclass correlations: uses in assessing rater reliability. Psychological bulletin, 86(2), 420. .. [2] https://cran.r-project.org/web/packages/psych/psych.pdf .. [3] http://www.real-statistics.com/reliability/intraclass-correlation/ Examples -------- ICCs of wine quality assessed by 4 judges. >>> import pingouin as pg >>> data = pg.read_dataset('icc') >>> icc = pg.intraclass_corr(data=data, targets='Wine', raters='Judge', ... ratings='Scores').round(3) >>> icc.set_index("Type") Description ICC F df1 df2 pval CI95% Type ICC1 Single raters absolute 0.728 11.680 7 24 0.0 [0.43, 0.93] ICC2 Single random raters 0.728 11.787 7 21 0.0 [0.43, 0.93] ICC3 Single fixed raters 0.729 11.787 7 21 0.0 [0.43, 0.93] ICC1k Average raters absolute 0.914 11.680 7 24 0.0 [0.75, 0.98] ICC2k Average random raters 0.914 11.787 7 21 0.0 [0.75, 0.98] ICC3k Average fixed raters 0.915 11.787 7 21 0.0 [0.75, 0.98] """ from pingouin import anova # Safety check assert isinstance(data, pd.DataFrame), 'data must be a dataframe.' assert all([v is not None for v in [targets, raters, ratings]]) assert all([v in data.columns for v in [targets, raters, ratings]]) assert nan_policy in ['omit', 'raise'] # Convert data to wide-format data = data.pivot_table(index=targets, columns=raters, values=ratings) # Listwise deletion of missing values nan_present = data.isna().any().any() if nan_present: if nan_policy == 'omit': data = data.dropna(axis=0, how='any') else: raise ValueError("Either missing values are present in data or " "data are unbalanced. Please remove them " "manually or use nan_policy='omit'.") # Back to long-format # data_wide = data.copy() # Optional, for PCA data = data.reset_index().melt(id_vars=targets, value_name=ratings) # Check that ratings is a numeric variable assert data[ratings].dtype.kind in 'bfi', 'Ratings must be numeric.' # Check that data are fully balanced # This behavior is ensured by the long-to-wide-to-long transformation # Unbalanced data will result in rows with missing values. # assert data.groupby(raters)[ratings].count().nunique() == 1 # Extract sizes k = data[raters].nunique() n = data[targets].nunique() # Two-way ANOVA with np.errstate(invalid='ignore'): aov = anova(dv=ratings, between=[targets, raters], data=data, ss_type=2) # Extract mean squares msb = aov.at[0, 'MS'] msw = (aov.at[1, 'SS'] + aov.at[2, 'SS']) / (aov.at[1, 'DF'] + aov.at[2, 'DF']) msj = aov.at[1, 'MS'] mse = aov.at[2, 'MS'] # Calculate ICCs icc1 = (msb - msw) / (msb + (k - 1) * msw) icc2 = (msb - mse) / (msb + (k - 1) * mse + k * (msj - mse) / n) icc3 = (msb - mse) / (msb + (k - 1) * mse) icc1k = (msb - msw) / msb icc2k = (msb - mse) / (msb + (msj - mse) / n) icc3k = (msb - mse) / msb # Calculate F, df, and p-values f1k = msb / msw df1 = n - 1 df1kd = n * (k - 1) p1k = f.sf(f1k, df1, df1kd) f2k = f3k = msb / mse df2kd = (n - 1) * (k - 1) p2k = f.sf(f2k, df1, df2kd) # Create output dataframe stats = { 'Type': ['ICC1', 'ICC2', 'ICC3', 'ICC1k', 'ICC2k', 'ICC3k'], 'Description': [ 'Single raters absolute', 'Single random raters', 'Single fixed raters', 'Average raters absolute', 'Average random raters', 'Average fixed raters' ], 'ICC': [icc1, icc2, icc3, icc1k, icc2k, icc3k], 'F': [f1k, f2k, f2k, f1k, f2k, f2k], 'df1': n - 1, 'df2': [df1kd, df2kd, df2kd, df1kd, df2kd, df2kd], 'pval': [p1k, p2k, p2k, p1k, p2k, p2k] } stats = pd.DataFrame(stats) # Calculate confidence intervals alpha = 0.05 # Case 1 and 3 f1l = f1k / f.ppf(1 - alpha / 2, df1, df1kd) f1u = f1k * f.ppf(1 - alpha / 2, df1kd, df1) l1 = (f1l - 1) / (f1l + (k - 1)) u1 = (f1u - 1) / (f1u + (k - 1)) f3l = f3k / f.ppf(1 - alpha / 2, df1, df2kd) f3u = f3k * f.ppf(1 - alpha / 2, df2kd, df1) l3 = (f3l - 1) / (f3l + (k - 1)) u3 = (f3u - 1) / (f3u + (k - 1)) # Case 2 fj = msj / mse vn = df2kd * ((k * icc2 * fj + n * (1 + (k - 1) * icc2) - k * icc2))**2 vd = df1 * k**2 * icc2**2 * fj**2 + \ (n * (1 + (k - 1) * icc2) - k * icc2)**2 v = vn / vd f2u = f.ppf(1 - alpha / 2, n - 1, v) f2l = f.ppf(1 - alpha / 2, v, n - 1) l2 = n * (msb - f2u * mse) / (f2u * (k * msj + (k * n - k - n) * mse) + n * msb) u2 = n * (f2l * msb - mse) / (k * msj + (k * n - k - n) * mse + n * f2l * msb) # Round the confidence intervals def list_round(x, decimals=2): for i, xi in enumerate(x): x[i] = np.round(xi, decimals).tolist() return x stats['CI95%'] = list_round( [[l1, u1], [l2, u2], [l3, u3], [1 - 1 / f1l, 1 - 1 / f1u], [l2 * k / (1 + l2 * (k - 1)), u2 * k / (1 + u2 * (k - 1))], [1 - 1 / f3l, 1 - 1 / f3u]]) return stats
def p_up(alpha, n, N): quant_up = f.ppf(1 - alpha, 2 * (n + 1), 2 * (N - n), loc=0, scale=1) return (n + 1) * quant_up / (N - n + (n + 1) * quant_up)
def ellipseoid(P, y=None, z=None, pvalue=.95, units=None, show=True): """Calculates an ellipse(oid) as prediction interval for multivariate data. The prediction ellipse (or ellipsoid) is a prediction interval for a sample of a bivariate (or trivariate) random variable and is such that there is pvalue*100% of probability that a new observation will be contained in the ellipse (or ellipsoid) (Chew, 1966). [1]_. The semi-axes of the prediction ellipse(oid) are found by calculating the eigenvalues of the covariance matrix of the data and adjust the size of the semi-axes to account for the necessary prediction probability. Parameters ---------- P : 1-D or 2-D array_like For a 1-D array, P is the abscissa values of the [x,y] or [x,y,z] data. For a 2-D array, P is the joined values of the [x,y] or [x,y,z] data. The shape of the 2-D array should be (n, 2) or (n, 3) where n is the number of observations. y : 1-D array_like, optional (default = None) Ordinate values of the [x, y] or [x,y,z] data. z : 1-D array_like, optional (default = None) Ordinate values of the [x, y] or [x,y,z] data. pvalue : float, optional (default = .95) Desired prediction probability of the ellipse(oid). units : str, optional (default = None) Units of the input data. show : bool, optional (default = True) True (1) plots data in a matplotlib figure, False (0) to not plot. Returns ------- volume : float Area of the ellipse or volume of the ellipsoid according to the inputs. axes : 2-D array Lengths of the semi-axes ellipse(oid) (largest first). angles : 1-D array Angles of the semi-axes ellipse(oid). For the ellipsoid (3D adata), the angles are the Euler angles calculated in the XYZ sequence. center : 1-D array Centroid of the ellipse(oid). rotation : 2-D array Rotation matrix of the semi-axes of the ellipse(oid). Notes ----- The directions and lengths of the semi-axes are found, respectively, as the eigenvectors and eigenvalues of the covariance matrix of the data using the concept of principal components analysis (PCA) [2]_ or singular value decomposition (SVD) [3]_. See [4]_ for a discussion about prediction and confidence intervals and their use in posturography. References ---------- .. [1] http://www.jstor.org/stable/2282774. .. [2] http://en.wikipedia.org/wiki/Principal_component_analysis. .. [3] http://en.wikipedia.org/wiki/Singular_value_decomposition. .. [4] http://www.sciencedirect.com/science/article/pii/S0966636213005961. Examples -------- >>> import numpy as np >>> from ellipseoid import ellipseoid >>> y = np.cumsum(np.random.randn(3000)) / 50 >>> x = np.cumsum(np.random.randn(3000)) / 100 >>> area, axes, angles, center, R = ellipseoid(x, y, units='cm', show=True) >>> P = np.random.randn(1000, 3) >>> P[:, 2] = P[:, 2] + P[:, 1]*.5 >>> P[:, 1] = P[:, 1] + P[:, 0]*.5 >>> volume, axes, angles, center, R = ellipseoid(P, units='cm', show=True) """ from scipy.stats import f as F P = np.array(P, ndmin=2, dtype=float) if P.shape[0] == 1: P = P.T elif P.shape[1] > 3: P = P.T if y is not None: y = np.array(y, copy=False, ndmin=2, dtype=float) if y.shape[0] == 1: y = y.T P = np.concatenate((P, y), axis=1) if z is not None: z = np.array(z, copy=False, ndmin=2, dtype=float) if z.shape[0] == 1: z = z.T P = np.concatenate((P, z), axis=1) # covariance matrix cov = np.cov(P, rowvar=0) # singular value decomposition U, s, Vt = np.linalg.svd(cov) # semi-axes (largest first) p, n = s.size, P.shape[0] saxes = np.sqrt(s * F.ppf(pvalue, p, dfd=n-p) * (n-1) * p * (n+1)/(n*(n-p))) volume = 4/3*np.pi*np.prod(saxes) if p == 3 else np.pi*np.prod(saxes) # rotation matrix R = Vt if s.size == 2: angles = np.array([np.rad2deg(np.arctan2(R[1, 0], R[0, 0])), 90-np.rad2deg(np.arctan2(R[1, 0], -R[0, 0]))]) else: angles = rotXYZ(R, unit='deg') # centroid of the ellipse(oid) center = np.mean(P, axis=0) if show: _plot(P, volume, saxes, center, R, pvalue, units, fig=None, ax=None) return volume, saxes, angles, center, R
def prediction_ellipse(self, p_val=.95): """ Prediction hyperellipsoid for multivariate data. __author__ = 'Marcos Duarte, https://github.com/demotu/BMC' Parameters ---------- P : 1-D or 2-D array_like For a 1-D array, P is the abscissa values of the [x,y] or [x,y,z] data. For a 2-D array, P is the joined values of the multivariate data. The shape of the 2-D array should be (n, p) where n is the number of observations (rows) and p the number of dimensions (columns). p_val : float, optional (default = .95) Desired prediction probability of the hyperellipsoid. Returns ------- hypervolume : float Hypervolume (e.g., area of the ellipse or volume of the ellipsoid). axes : 1-D array Lengths of the semi-axes hyperellipsoid (largest first). angles : 1-D array Angles of the semi-axes hyperellipsoid (only for 2D or 3D data). For the ellipsoid (3D data), the angles are the Euler angles calculated in the XYZ sequence. center : 1-D array Centroid of the hyperellipsoid. rotation : 2-D array Rotation matrix for hyperellipsoid semi-axes (only for 2D or 3D data). """ from scipy.stats import f as F from scipy.special import gamma data = np.array([self.record.cop.x, self.record.cop.y]).transpose() # covariance matrix cov = np.cov(data, rowvar=0) # singular value decomposition U, s, Vt = np.linalg.svd(cov) p, n = s.size, data.shape[0] # F percent point function fppf = F.ppf(p_val, p, n - p) * (n - 1) * p * (n + 1) / n / (n - p) # semi-axes (largest first) saxes = np.sqrt(s * fppf) area = np.pi**(p / 2) / gamma(p / 2 + 1) * np.prod(saxes) # rotation matrix R = Vt angles = np.array([ np.rad2deg(np.arctan2(R[1, 0], R[0, 0])), 90 - np.rad2deg(np.arctan2(R[1, 0], -R[0, 0])) ]) # centroid of the ellipse center = np.mean(data, axis=0) return type( '', (object, ), { 'area': area, # cm^2 'saxes': saxes, # cm 'angles': angles, # deg 'center': center, # cm 'rot_matrix': R, 'p_val': p_val })
def sampling_distribution(): fig, ax = plt.subplots(1, 1) #display the probability density function dfn, dfm = 10, 5 x=np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100) ax.plot(x, f.pdf(x, dfn, dfm))
b_list[0] + b_list[1] * mat_X[i][0] + b_list[2] * mat_X[i][1] + b_list[3] * mat_X[i][2] for i in range(N) ] print("-" * 65) print('\nЗначення у') for i in range(N): print( f"{b_list[0]} + {b_list[1]}*x1 + {b_list[2]}*x2 + {b_list[3]}*x3 =" f" {b_list[0] + b_list[1] * mat_X[i][0] + b_list[2] * mat_X[i][1] + b_list[3] * mat_X[i][2]}" ) print("-" * 65) print('\nПеревіримо адекватність моделі за критерієм Фішера') Sad = (m / (N - d)) * int(sum(y_reg[i] - average_y[i] for i in range(N))**2) Fp = Sad / S2b q = 0.05 F_table = f.ppf(q=1 - q, dfn=f4, dfd=f3) print('FP =', Fp) if Fp > F_table: print('Модель неадекватна при 0.05') else: print('Модель адекватна при 0.05') print("-" * 65) print("\nДодаткове завдання") total_time = 0 for i in range(len(time_list)): print(f"Час {i+1}-ої перевірки на значимість - {time_list[i]}") total_time += time_list[i] print(f"Загальний час пошуку - {total_time}")
print("Незначущі коефіцієнти регресії:", [round(i, 3) for i in coefs2]) y_st = [] for i in range(15): y_st.append(res[0] + res[1] * x1[i] + res[2] * x2[i] + res[3] * x3[i] + res[4] * x1x2[i] + res[5] * x1x3[i] + res[6] * x2x3[i] + res[7] * x1x2x3[i] + res[8] * x1kv[i] + res[9] * x2kv[i] + res[10] * x3kv[i]) print("Значення з отриманими коефіцієнтами:") for i in range(15): print("{:.3f}".format(y_st[i]), end=" ") time_st += time.perf_counter() - start_time_student print( "\n------------------------- Перевірка адекватності за критерієм Фішера -------------------------" ) start_time_fisher = time.perf_counter() Sad = m * sum([(y_st[i] - Y_average[i])**2 for i in range(15)]) / (n - d) Fp = Sad / sb F4 = n - d print("Fp =", Fp) if Fp < f.ppf(q=0.95, dfn=F4, dfd=F3): print("Рівняння регресії адекватне при рівні значимості 0.05") else: print("Рівняння регресії неадекватне при рівні значимості 0.05") time_f += time.perf_counter() - start_time_fisher print("Середній час перевірки за критерієм Кохрена: ", time_koh / 10) print("Середній час перевірки за критерієм Cтьюдента: ", time_st / 10) print("Середній час перевірки за критерієм Фішера: ", time_f / 10)
def cochrane_teor(f1, f2, q=0.05): q1 = q / f1 fischer_value = f.ppf(q=1 - q1, dfn=f2, dfd=(f1 - 1) * f2) return fischer_value / (fischer_value + f1 - 1)
def UCL(self, alpha): P = self.NumFeature N = len(self.WCTrainLabel) return (P*(N-1) * f.ppf(alpha, P, N-P)) / (N-1)
# 1320.171111111111 df_between = 2 df_within = 15 MS_between = SS_between / df_between MS_within = SS_within / df_within # print(MS_between, MS_within) # 660.0855555555555 8.898666666666665 F_ratio = 660.0855555555555 / 8.898666666666665 # print(F_ratio) # 74.1780291679153 f_critical = f.ppf(.95, 2, 15) # print(f_critical) # 3.6823203436732412 η_squared = SS_within / (SS_between + SS_within) # print(1 - η_squared) # Q 25 kids_df = pd.DataFrame({ 'single': pd.Series([8, 7, 10, 6, 9]), 'twin': pd.Series([4, 6, 7, 4, 9]), 'triplet': pd.Series([4, 4, 7, 2, 3]) }) # print(kids_df.describe())
def chapter_5(input_list): confidence_int = 0.975 for choice in input_list: if choice == 20: data = np.loadtxt(os.path.join(cwd,'T5-12.dat')) x1_data = np.loadtxt(os.path.join(cwd,'T5-12.dat'))[:,0] x2_data = np.loadtxt(os.path.join(cwd,'T5-12.dat'))[:,1] print 'x1_data',x1_data print 'x2_data',x2_data print 'mean_of_length data',x1_data.mean() print 'mean of wing length data',x2_data.mean() n = x1_data.shape[0] print 'n',n p = 2 cov_matrix = np.cov(data,rowvar=0) #http://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.inv.html inv_of_cov = inv(cov_matrix) print 'covariance matrix\n',cov_matrix print 'inverse of covariance matrix\n',inv_of_cov print 'f.ppf(confidence_int,p,n-p)',f.ppf(0.95,p,n-p) print '(n-1)*p/(n*(n-p))',float((n-1)*p)/(n*(n-p)) #http://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.f.html c_square = (float((n-1)*p)/((n-p)))*(f.ppf(0.95,p,n-p)) print 'c_square',c_square #simulataneous confidence intervals a1,a2 = np.matrix('1;0'),np.matrix('0;1') #Tsquare confidence interval half_width1 = np.sqrt(((np.transpose(a1)*cov_matrix*a1)*c_square)/n) upperbound_1,lowerbound_1 = x1_data.mean() + half_width1 , x1_data.mean() - half_width1 print 'T-square upperbound_1,lowerbound_1',upperbound_1,lowerbound_1 half_width2 = np.sqrt((np.transpose(a2)*cov_matrix*a2)*(c_square)/n) upperbound_2,lowerbound_2 = x2_data.mean() + half_width2 , x2_data.mean() - half_width2 print 'Tsquare upperbound_2,lowerbound_2',upperbound_2,lowerbound_2 #Bonferroni confidence interval half_width1 = np.sqrt((cov_matrix[0,0])/n)*(t.ppf(0.5+ (confidence_int)/2,n-1)) upperbound_1,lowerbound_1 = x1_data.mean() + half_width1 , x1_data.mean() - half_width1 print 'Bonferroni upperbound_1,lowerbound_1',upperbound_1,lowerbound_1 half_width2 = np.sqrt((cov_matrix[1,1])/n)*(t.ppf(0.5+ (confidence_int)/2,n-1)) upperbound_2,lowerbound_2 = x2_data.mean() + half_width2 , x2_data.mean() - half_width2 print 'Bonferroni upperbound_2,lowerbound_2',upperbound_2,lowerbound_2 print x1_data stats.probplot(x1_data, dist="norm", plot=pylab) pylab.show() elif choice == 22: #http://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html data = np.loadtxt(os.path.join(cwd,'T6-10.dat'),usecols = (0,1,2)) fuel_data,repair_data,capital_data = data[:,0],data[:,1],data[:,2] # #QQ plots before removal of outliers # stats.probplot(fuel_data, dist="norm", plot=pylab),pylab.show() # stats.probplot(repair_data, dist="norm", plot=pylab),pylab.show() # stats.probplot(capital_data, dist="norm", plot=pylab),pylab.show() # #scatter plots before removal of outliers # scatterplots(fuel_data,repair_data),scatterplots(capital_data,repair_data),scatterplots(fuel_data,capital_data) #removing outliers #http://docs.scipy.org/doc/numpy-1.10.1/reference/generated/numpy.select.html #fuel_data2 = np.select([fuel_data<20],[fuel_data]) outliers_removed_data = data[(data[:,0]<20)&(data[:,0]>6)&(data[:,1]<20)&(data[:,1]>3)&(data[:,2]<25)&(data[:,2]>5)] #fuel_data2 = fuel_data[(fuel_data < 20) & (fuel_data > 6)] #http://stackoverflow.com/questions/3806878/subsetting-data-in-python fuel_data2,repair_data2,capital_data2 = outliers_removed_data[:,0],outliers_removed_data[:,1],outliers_removed_data[:,2] # stats.probplot(fuel_data2, dist="norm", plot=pylab),pylab.show() # stats.probplot(repair_data2, dist="norm", plot=pylab),pylab.show() # stats.probplot(capital_data2, dist="norm", plot=pylab),pylab.show() #repair_data2,capital_data2 = np.select([fuel_data<20],[fuel_data]) #confidence interval calculations n = outliers_removed_data.shape[0] cov_matrix = np.cov(outliers_removed_data,rowvar=0) #bonferroni confidence interval half_width1 = np.sqrt((cov_matrix[0,0])/n)*(t.ppf(0.5+ (confidence_int)/2,n-1)) upperbound_1,lowerbound_1 = fuel_data2.mean() + half_width1 , fuel_data2.mean() - half_width1 print 'Bonferroni fuel data lowerbound_1,upperbound_1',lowerbound_1,upperbound_1 half_width2 = np.sqrt((cov_matrix[1,1])/n)*(t.ppf(0.5+ (confidence_int)/2,n-1)) upperbound_2,lowerbound_2 = repair_data2.mean() + half_width2 , repair_data2.mean() - half_width2 print 'Bonferroni repair_data2 lowerbound_2,upperbound_2',lowerbound_2,upperbound_2 half_width3 = np.sqrt((cov_matrix[2,2])/n)*(t.ppf(0.5+ (confidence_int)/2,n-1)) upperbound_3,lowerbound_3 = capital_data2.mean() + half_width3 , capital_data2.mean() - half_width3 print 'Bonferroni capital_data2 lowerbound_2,upperbound_2',lowerbound_3,upperbound_3 #T square confidence interval calculations p = 3 a1,a2,a3 = np.matrix('1;0;0'),np.matrix('0;1;0'),np.matrix('0;0;1') c_square = (float((n-1)*p)/((n-p)))*(f.ppf(0.95,p,n-p)) half_width1 = np.sqrt(((np.transpose(a1)*cov_matrix*a1)*c_square)/n) upperbound_1,lowerbound_1 = fuel_data2.mean() + half_width1 , fuel_data2.mean() - half_width1 print 'fuel data T-square upperbound_1,lowerbound_1',lowerbound_1,upperbound_1 half_width2 = np.sqrt(((np.transpose(a2)*cov_matrix*a2)*c_square)/n) upperbound_2,lowerbound_2 = repair_data2.mean() + half_width2 , repair_data2.mean() - half_width2 print 'repair_data2 T-square upperbound_1,lowerbound_1',lowerbound_2,upperbound_2 half_width3 = np.sqrt(((np.transpose(a3)*cov_matrix*a3)*c_square)/n) upperbound_3,lowerbound_3 = capital_data2.mean() + half_width3 , capital_data2.mean() - half_width3 print 'capital_data2 T-square upperbound_1,lowerbound_1',lowerbound_3,upperbound_3 elif choice == 30: n = 50 #seen from the solutions confidence_int = 0.95 means = np.matrix('0.766;0.508;0.438;0.161') cov_matrix = np.matrix('0.856,0.635,0.173,0.096;0.635,0.568,0.128,0.067;0.173,0.127,0.171,0.039;0.096,0.067,0.039,0.043') print 'means\n',means print 'cov_matrix\n',cov_matrix a1,a2,a3,a4 = np.matrix('1;0;0;0'),np.matrix('0;1;0;0'),np.matrix('0;0;1;0'),np.matrix('0;0;0;1') a5,a6= np.matrix('1;1;1;1'),np.matrix('1;-1;0;0') #simultaneous confidence intervals c_square = chi2.ppf(confidence_int,4) print 'c_square',c_square half_width1 = np.sqrt(((np.transpose(a1)*cov_matrix*a1)*c_square)/n) upperbound_1,lowerbound_1 = means[0,0] + half_width1 , means[0,0] - half_width1 print 'Petroleum Chi-square lowerbound_1,upperbound_1',lowerbound_1,upperbound_1 # half_width2 = np.sqrt(((np.transpose(a2)*cov_matrix*a2)*c_square)/n) # upperbound_2,lowerbound_2 = repair_data2.mean() + half_width2 , repair_data2.mean() - half_width2 # print 'repair_data2 T-square upperbound_1,lowerbound_1',lowerbound_2,upperbound_2 # half_width3 = np.sqrt(((np.transpose(a3)*cov_matrix*a3)*c_square)/n) # upperbound_3,lowerbound_3 = capital_data2.mean() + half_width3 , capital_data2.mean() - half_width3 # print 'capital_data2 T-square upperbound_1,lowerbound_1',lowerbound_3,upperbound_3 half_width5 = np.sqrt(((np.transpose(a5)*cov_matrix*a5)*c_square)/n) upperbound_1,lowerbound_1 = means.sum() + half_width5 , means.sum() - half_width5 print 'Petroleum Chi-square lowerbound_5,upperbound_5',lowerbound_1,upperbound_1 half_width6 = np.sqrt(((np.transpose(a6)*cov_matrix*a6)*c_square)/n) upperbound_1,lowerbound_1 = np.transpose(a6)*means + half_width6 , np.transpose(a6)*means - half_width6 print 'Petroleum Chi-square lowerbound_5,upperbound_5',lowerbound_1,upperbound_1 #Bonferroni confidence intervals c_square = t.ppf((1-(1 - confidence_int)/12),n-1) print 'c_square',c_square half_width1 = c_square * np.sqrt(((np.transpose(a1)*cov_matrix*a1)/n)) upperbound_1,lowerbound_1 = means[0,0] + half_width1 , means[0,0] - half_width1 print 'Petroleum Bonferroni lowerbound_1,upperbound_1',lowerbound_1,upperbound_1
def UCL_Computation(N, alpha): S = 1 return (S*((N-1)**2) * f.ppf(1-alpha, S, N-S)) / (N*(N-S))
def get_cochran_critical(probability, f1, f2): return 1 / (1 + (f2 - 1) / f.ppf(1 - (1 - probability) / f2, f1, (f2 - 1) * f1))
def pearscdf(X, mu, sigma, skew, kurt, method, k, output): # pearspdf # [p,type,coefs] = pearspdf(X,mu,sigma,skew,kurt) # # Returns the probability distribution denisty of the pearsons distribution # with mean `mu`, standard deviation `sigma`, skewness `skew` and # kurtosis `kurt`, evaluated at the values in X. # # Some combinations of moments are not valid for any random variable, and in # particular, the kurtosis must be greater than the square of the skewness # plus 1. The kurtosis of the normal distribution is defined to be 3. # # The seven distribution types in the Pearson system correspond to the # following distributions: # # Type 0: Normal distribution # Type 1: Four-parameter beta # Type 2: Symmetric four-parameter beta # Type 3: Three-parameter gamma # Type 4: Not related to any standard distribution. Density proportional # to (1+((x-a)/b)^2)^(-c) * exp(-d*arctan((x-a)/b)). # Type 5: Inverse gamma location-scale # Type 6: F location-scale # Type 7: Student's t location-scale # # Examples # # See also # pearspdf pearsrnd mean std skewness kurtosis # # References: # [1] Johnson, N.L., S. Kotz, and N. Balakrishnan (1994) Continuous # Univariate Distributions, Volume 1, Wiley-Interscience. # [2] Devroye, L. (1986) Non-Uniform Random Variate Generation, # Springer-Verlag. otpt = len(output) # outClass = superiorfloat(mu, sigma, skew, kurt) if X[1] == inf: cdist = 1 limstate = X[0] elif X[0] == -inf: cdist = 2 limstate = X[1] else: cdist = 3 limstate = X if sigma == 0: print "Warning: The standard deviation of output distribution",k,"is zero. No distribution or correlation can be calculated for it." if mu>=X[0] and mu<=X[1]: #mean is in the limits return 1, None, inf, None, None, None, None, None, None, None, None else: #mean is outside the limits return 0, None, inf, None, None, None, None, None, None, None, None X = (X - mu) / sigma # Z-score if method == 'MCS': beta1 = 0 beta2 = 3 beta3 = sigma ** 2 else: beta1 = skew ** 2 beta2 = kurt beta3 = sigma ** 2 # Return NaN for illegal parameter values. if (sigma < 0) or (beta2 <= beta1 + 1): p = zeros(otpt)+nan #p = zeros(sizeout)+nan dtype = NaN coefs = zeros((1,3))+nan print 'Illegal parameter values passed to pearscdf! (sigma:',sigma,' beta1:',beta1,' beta2:', beta2,')' return #% Classify the distribution and find the roots of c0 + c1*x + c2*x^2 c0 = (4 * beta2 - 3 * beta1)# ./ (10*beta2 - 12*beta1 - 18); c1 = skew * (beta2 + 3)# ./ (10*beta2 - 12*beta1 - 18); c2 = (2 * beta2 - 3 * beta1 - 6)# ./ (10*beta2 - 12*beta1 - 18); if c1 == 0: # symmetric dist'ns if beta2 == 3: dtype = 0 a1 = 0 a2 = 0 else: if beta2 < 3: dtype = 2 elif beta2 > 3: dtype = 7 a1 = -sqrt(abs(c0 / c2)) a2 = -a1 # symmetric roots elif c2 == 0: # kurt = 3 + 1.5*skew^2 dtype = 3 a1 = -c0 / c1 # single root a2 = a1 else: kappa = c1 ** 2 / (4 * c0 * c2) if kappa < 0: dtype = 1 elif kappa < 1 - finfo(float64).eps: dtype = 4 elif kappa <= 1 + finfo(float64).eps: dtype = 5 else: dtype = 6 # Solve the quadratic for general roots a1 and a2 and sort by their real parts csq=c1 ** 2 - 4 * c0 * c2 if c1 ** 2 - 4 * c0 * c2 < 0: tmp = -(c1 + sign(c1) * cmath.sqrt(c1 ** 2 - 4 * c0 * c2)) / 2 else: tmp = -(c1 + sign(c1) * sqrt(c1 ** 2 - 4 * c0 * c2)) / 2 a1 = tmp / c2 a2 = c0 / tmp if (real(a1) > real(a2)): tmp = a1; a1 = a2; a2 = tmp; denom = (10 * beta2 - 12 * beta1 - 18) if abs(denom) > sqrt(finfo(double).tiny): c0 = c0 / denom c1 = c1 / denom c2 = c2 / denom coefs = [c0, c1, c2] else: dtype = 1 # this should have happened already anyway # beta2 = 1.8 + 1.2*beta1, and c0, c1, and c2 -> Inf. But a1 and a2 are # still finite. coefs = zeroes((1,3))+inf if method == 'MCS': dtype = 8 #% Generate standard (zero mean, unit variance) values if dtype == 0: # normal: standard support (-Inf,Inf) # m1 = zeros(outClass); # m2 = ones(outClass); m1 = 0 m2 = 1 p = norm.cdf(X[1], m1, m2) - norm.cdf(X[0], m1, m2) lo= norm.ppf( 3.39767E-06, mu,sigma ); hi= norm.ppf( 0.999996602, mu,sigma ); Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( normcdf(X[0],m1,m2), 0,1 ); #Inv2 = norm.ppf(normcdf(X[1], m1, m2), 0, 1) elif dtype == 1: # four-parameter beta: standard support (a1,a2) if abs(denom) > sqrt(finfo(double).tiny): m1 = (c1 + a1) / (c2 * (a2 - a1)) m2 = -(c1 + a2) / (c2 * (a2 - a1)) else: # c1 and c2 -> Inf, but c1/c2 has finite limit m1 = c1 / (c2 * (a2 - a1)) m2 = -c1 / (c2 * (a2 - a1)) # r = a1 + (a2 - a1) .* betarnd(m1+1,m2+1,sizeOut); X = (X - a1) / (a2 - a1) # Transform to 0-1 interval # lambda = -(a2-a1)*(m1+1)./(m1+m1+2)-a1; # X = (X - lambda - a1)./(a2-a1); alph=m1+1 beta=m2+1 if alph < 1.001 and beta < 1.001: alph=1.001 beta=1.001 mode=(alph-1)/(alph+beta-2) if mode < 0.1: if alph > beta: alph = max(2.0,alph) beta = (alph-1)/0.9 - alph + 2 elif beta > alph: beta = max(2.0,beta) alph = (0.1*(beta -2) +1)/(1 - 0.1) elif mode > 0.9: if alph > beta: alph = max(2.0,alph) beta =(alph-1)/0.9 - alph + 2 elif beta > alph: beta = max(2.0,beta); alph = (0.1*(beta -2) +1)/(1 - 0.1) p = stats.beta.cdf(X[1], alph, beta) - stats.beta.cdf(X[0], alph, beta) lo=a1*sigma+mu; hi=a2*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( beta.cdf(X[0],m1+1,m2+1), 0,1 ); #Inv2 = norm.ppf(beta.cdf(X[1], m1 + 1, m2 + 1), 0, 1) # X = X*(a2-a1) + a1; % Undo interval tranformation # r = r + (0 - a1 - (a2-a1).*(m1+1)./(m1+m2+2)); elif dtype == 2: # symmetric four-parameter beta: standard support (-a1,a1) m = (c1 + a1) / (c2 * 2 * abs(a1)) m1 = m m2 = m X = (X - a1) / (2 * abs(a1)) # r = a1 + 2*abs(a1) .* betapdf(X,m+1,m+1); alph=m+1; beta=m+1; if alph < 1.01: alph=1.01 beta=1.01 p = stats.beta.cdf(X[1], alph, beta) - stats.beta.cdf(X[0], alph, beta) lo=a1*sigma+mu; hi=a2*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( beta.cdf(X[0],m+1,m+1), 0,1 ); #Inv2 = norm.ppf(beta.cdf(X[1], m + 1, m + 1), 0, 1) # X = a1 + 2*abs(a1).*X; elif dtype == 3: # three-parameter gamma: standard support (a1,Inf) or (-Inf,a1) m = (c0 / c1 - c1) / c1 m1 = m m2 = m X = (X - a1) / c1 # r = c1 .* gampdf(X,m+1,1,sizeOut) + a1; p = gamma.cdf(X[1], m + 1, 1) - gamma.cdf(X[0], m + 1, 1) lo=(gamma.ppf( 3.39767E-06, m+1, scale=1 )*c1+a1)*sigma+mu; hi=(gamma.ppf( 0.999996602, m+1, scale=1 )*c1+a1)*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( gamcdf(X[0],m+1,1), 0,1 ); #Inv2 = norm.ppf(gamcdf(X[1], m + 1, 1), 0, 1) # X = c1 .* X + a1; elif dtype == 4: # Pearson IV is not a transformation of a standard distribution: density # proportional to (1+((x-lambda)/a)^2)^(-m) * exp(-nu*arctan((x-lambda)/a)), # standard support (-Inf,Inf) X = X * sigma + mu r = 6 * (beta2 - beta1 - 1) / (2 * beta2 - 3 * beta1 - 6) m = 1 + r / 2 nu = -r * (r - 2) * skew / sqrt(16 * (r - 1) - beta1 * (r - 2) ** 2) a = sqrt(beta3 * (16 * (r - 1) - beta1 * (r - 2) ** 2)) / 4 _lambda = mu - ((r - 2) * skew * sigma) / 4 # gives zero mean m1 = m m2 = nu # X = (X - lambda)./a; if cdist == 1: p = 1 - pearson4cdf(X[0], m, nu, a, _lambda, mu, sigma) elif cdist == 2: p = pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma) elif cdist == 3: p = pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma) - pearson4cdf(X[0], m, nu, a, _lambda, mu, sigma) lo=norm.ppf( 3.39767E-06, mu,sigma ); hi=norm.ppf( 0.999996602, mu,sigma ); Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( pearson4cdf(X[0],m,nu,a,lambda,mu,sigma), 0,1 ); #Inv2 = norm.ppf(pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma), 0, 1) # C = X.*a + lambda; # C = diff(C); # C= C(1); # p = p./(sum(p)*C); elif dtype == 5: # inverse gamma location-scale: standard support (-C1,Inf) or # (-Inf,-C1) C1 = c1 / (2 * c2) # r = -((c1 - C1) ./ c2) ./ gampdf(X,1./c2 - 1,1) - C1; X = -((c1 - C1) / c2) / (X + C1) m1 = c2 m2 = 0 p = gamma.cdf(X[1], 1. / c2 - 1, scale=1) - gamma.cdf(X[0], 1. / c2 - 1, scale=1) lo=(-((c1-C1)/c2)/gamma.ppf( 3.39767E-06, 1/c2 - 1, scale=1 )-C1)*sigma+mu; hi=(-((c1-C1)/c2)/gamma.ppf( 0.999996602, 1/c2 - 1, scale=1 )-C1)*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( gamcdf(X[0],1./c2 - 1,1), 0,1 ); #Inv2 = norm.ppf(gamcdf(X[1], 1. / c2 - 1, 1), 0, 1) # X = -((c1-C1)./c2)./X-C1; elif dtype == 6: # F location-scale: standard support (a2,Inf) or (-Inf,a1) m1 = (a1 + c1) / (c2 * (a2 - a1)) m2 = -(a2 + c1) / (c2 * (a2 - a1)) # a1 and a2 have the same sign, and they've been sorted so a1 < a2 if a2 < 0: nu1 = 2 * (m2 + 1) nu2 = -2 * (m1 + m2 + 1) X = (X - a2) / (a2 - a1) * (nu2 / nu1) # r = a2 + (a2 - a1) .* (nu1./nu2) .* fpdf(X,nu1,nu2); p = fcdf(X[1], nu1, nu2) - fcdf(X[0], nu1, nu2) lo=(f.ppf( 3.39767E-06, nu1,nu2)+a2)*sigma+mu hi=(f.ppf( 0.999996602, nu1,nu2)+a2)*sigma+mu Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( fcdf(X[0],nu1,nu2), 0,1 ); #Inv2 = norm.ppf(fcdf(X[1], nu1, nu2), 0, 1) # X = a2 + (a2-a1).*(nu1./nu2).*X else: # 0 < a1 nu1 = 2 * (m1 + 1) nu2 = -2 * (m1 + m2 + 1) X = (X - a1) / (a1 - a2) * (nu2 / nu1) # r = a1 + (a1 - a2) .* (nu1./nu2) .* fpdf(X,nu1,nu2); p = -fcdf(X[1], nu1, nu2) + fcdf(X[0], nu1, nu2) hi=(-f.ppf( 3.39767E-06, nu1,nu2)+a1)*sigma+mu; lo=(-f.ppf( 0.999996602, nu1,nu2)+a1)*sigma+mu; Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( fcdf(X[0],nu1,nu2), 0,1 ); #Inv2 = norm.ppf(fcdf(X[1], nu1, nu2), 0, 1) # X = a1 + (a1-a2).*(nu1./nu2).*X; elif dtype == 7: # t location-scale: standard support (-Inf,Inf) nu = 1. / c2 - 1 X = X / sqrt(c0 / (1 - c2)) m1 = nu m2 = 0 p = t.cdf(X[1], nu) - t.cdf(X[0], nu) lo=t.ppf( 3.39767E-06, nu )*sqrt(c0/(1-c2))*sigma+mu hi=t.ppf( 0.999996602, nu )*sqrt(c0/(1-c2))*sigma+mu Inv1 = norm.ppf(p, 0, 1) # Inv1=norm.ppf( tcdf(X[0],nu), 0,1 ); #Inv2 = norm.ppf(tcdf(X[1], nu), 0, 1) # p = sqrt(c0./(1-c2)).*tpdf(X,nu); # X = sqrt(c0./(1-c2)).*X; else: print "ERROR: Unknown data type!" # elif dtype == 8: #Monte Carlo Simulation Histogram # out = kurt # p = skew # m1 = 0 # m2 = 0 # scale and shift # X = X.*sigma + mu; % Undo z-score if dtype != 1 and dtype != 2: mu_s=(mu-lo)/(hi-lo); sigma_s=sigma ** 2/(hi-lo) ** 2; alph = ((1-mu_s)/sigma_s -1/mu_s)*mu_s ** 2; beta = alph*(1/mu_s - 1); if alph >70 or beta>70: alph=70; beta=70; lo=mu-11.87434*sigma hi=2*mu-lo return p, dtype, Inv1, m1, m2, a1, a2, alph, beta, lo, hi
if max(mat_disY)/sum(mat_disY) < 0.7679: print('Дисперсія однорідна') else: print('Дисперсія неоднорідна') print('-------------------------------------------------------------------------\nПЕРЕВІРКА ЗНАЧУЩОСТІ КОЕФІЦІЄНТІВ ЗА КРИТЕРІЄМ СТЬЮДЕНТА:\n') S2b = sum(mat_disY) / N S2bs = S2b / (m * N) Sbs = sqrt(S2bs) print('Sbs:\n', Sbs) bb = [sum(mat_serY[k] * tran1[i][k] for k in range(N))/N for i in range(N)] t = [abs(bb[i])/Sbs for i in range(N)] print('bi:\n', bb, '\nti:\n', t, '\n...\n..') f1, f2 = m - 1, N f3 = f1 * f2 for i in range(N): if t[i] < t_check.ppf(q=0.975, df=f3): blist[i] = 0 d -= 1 print('Виключаємо з рівняння коефіціент b', i) y_reg = [blist[0] + blist[1] * mat_X[i][0] + blist[2] * mat_X[i][1] + blist[3] * mat_X[i][2] for i in range(4)] print('Значення рівнянь регресій:\n', y_reg) print('-------------------------------------------------------------------------\nПЕРЕВІРКА АДЕКВАТНОСТІ ЗА КРИТЕРІЄМ ФІШЕРА:\n') f4 = N - d Sad = (m / (N - d)) * int(sum(y_reg[i] - mat_serY[i] for i in range(N))**2) Fp = Sad / S2b print('Кількість значимих коефіціентів:\n', d, '\nFp:\n', Fp, '\n...\n..') if Fp > f.ppf(q=0.95, dfn=f4, dfd=f3): print('Рівняння регресії неадекватно оригіналу при рівні значимості 0.05') else: print('Рівняння регресії адекватно оригіналу при рівні значимості 0.05')
def f(f1, f2, p): f1 = float(f1) f2 = float(f2) p = float(p) return sympify(sci_f.ppf(p, f1, f2))
def f_a(self): self.F_emp_A = self.var_A / self.var_remain if self.var_A > self.var_remain else self.var_remain / self.var_A self.F_cr_A = f.ppf(1 - self.alpha, self.n - 1, (self.n - 1) * (self.m - 1)) if self.var_A > self.var_remain \ else f.ppf(1 - self.alpha, (self.n - 1) * (self.m - 1), self.n - 1) def f_b(self): self.F_emp_B = self.var_B / self.var_remain if self.var_B > self.var_remain else self.var_remain / self.var_B self.F_cr_B = f.ppf(1 - self.alpha, self.m - 1, ((self.m - 1) * (self.n - 1))) if self.var_B > self.var_remain \ else f.ppf(1 - self.alpha, ((self.m - 1) * (self.m - 1)), self.m - 1) def f_full(self): self.F_emp_Full = self.var_full / self.var_remain if self.var_full > self.var_remain\ else self.var_remain / self.var_full self.F_cr_Full = f.ppf(1 - self.alpha, (self.n * self.m) - 1, (self.n - 1) * (self.m - 1)) \ if self.var_full > self.var_remain \ else f.ppf(1 - self.alpha, (self.n - 1) * (self.m - 1), (self.n * self.m) - 1) # data = np.array([ # [25, 20, 30, 25], # [30, 40, 40, 50], # [23, 18, 20, 27] # ]) pd.set_option('display.max_columns', 11) a = AnalysisOfVariance(0.05, n=3, m=3) a.calc() a.data_frame() print(f.ppf(0.95, 2, 6))
def main(n, m): f1 = m - 1 f2 = n f3 = f1 * f2 q = 0.05 student = partial(t.ppf, q=1 - 0.025) t_student = student(df=f3) G_kr = cohren(f1, f2) x, y, x_norm = plan_matrix(n, m) y_aver = [round(sum(i) / len(i), 2) for i in y] B = find_coeff(x, y_aver, n) Gp = krit_cochrena(y, y_aver, n, m) print(f'Gp = {Gp}') if Gp < G_kr: print(f'З ймовірністю {1-q} дисперсії однорідні.') else: print("Необхідно збільшити ксть дослідів") m += 1 main(n, m) ts = krit_studenta(x_norm[:, 1:], y, y_aver, n, m) print('Критерій Стьюдента:\n', ts) res = [t for t in ts if t > t_student] final_k = [B[ts.index(i)] for i in ts if i in res] print( 'Коефіцієнти {} статистично незначущі, тому ми виключаємо їх з рівняння.' .format([i for i in B if i not in final_k])) y_new = [] for j in range(n): y_new.append(reg([x[j][ts.index(i)] for i in ts if i in res], final_k)) print(f'Значення "y" з коефіцієнтами {final_k}') print(y_new) d = len(res) f4 = n - d F_p = krit_fishera(y, y_aver, y_new, n, m, d) fisher = partial(f.ppf, q=1 - 0.05) f_t = fisher(dfn=f4, dfd=f3) print('Перевірка адекватності за критерієм Фішера') print('F_p =', F_p) print('F_t =', f_t) if F_p < f_t: print('Математична модель адекватна експериментальним даним') else: print('Математична модель не адекватна експериментальним даним') print("-----------------------------------------------------------") print("Додаткове завдання") Qa = [0.95, 0.96, 0.97, 0.98, 0.99, 0.1, 1.05, 1.1, 1.2] for i in range(len(Qa)): f_t = f.ppf(q=i, dfn=f4, dfd=f3) if F_p > f_t: print("При q=0.05 рівняння регресії не є адекватним") print("-----------------------------------------------------------")
def hyperellipsoid(P, y=None, z=None, pvalue=0.95, units=None, show=True, ax=None): """ Prediction hyperellipsoid for multivariate data. The hyperellipsoid is a prediction interval for a sample of a multivariate random variable and is such that there is pvalue*100% of probability that a new observation will be contained inside the hyperellipsoid [1]_. The hyperellipsoid is also a tolerance region such that the average or expected value of the proportion of the population contained in this region is exactly pvalue*100% (called Type 2 tolerance region by Chew (1966) [1]_). The directions and lengths of the semi-axes are found, respectively, as the eigenvectors and eigenvalues of the covariance matrix of the data using the concept of principal components analysis (PCA) [2]_ or singular value decomposition (SVD) [3]_ and the length of the semi-axes are adjusted to account for the necessary prediction probability. The volume of the hyperellipsoid is calculated with the same equation for the volume of a n-dimensional ball [4]_ with the radius replaced by the semi-axes of the hyperellipsoid. This function calculates the prediction hyperellipsoid for the data, which is considered a (finite) sample of a multivariate random variable with normal distribution (i.e., the F distribution is used and not the approximation by the chi-square distribution). Parameters ---------- P : 1-D or 2-D array_like For a 1-D array, P is the abscissa values of the [x,y] or [x,y,z] data. For a 2-D array, P is the joined values of the multivariate data. The shape of the 2-D array should be (n, p) where n is the number of observations (rows) and p the number of dimensions (columns). y : 1-D array_like, optional (default = None) Ordinate values of the [x, y] or [x, y, z] data. z : 1-D array_like, optional (default = None) Ordinate values of the [x, y] or [x, y, z] data. pvalue : float, optional (default = .95) Desired prediction probability of the hyperellipsoid. units : str, optional (default = None) Units of the input data. show : bool, optional (default = True) True (1) plots data in a matplotlib figure, False (0) to not plot. Only the results for p=2 (ellipse) or p=3 (ellipsoid) will be plotted. ax : a matplotlib.axes.Axes instance (default = None) Returns ------- hypervolume : float Hypervolume (e.g., area of the ellipse or volume of the ellipsoid). axes : 1-D array Lengths of the semi-axes hyperellipsoid (largest first). angles : 1-D array Angles of the semi-axes hyperellipsoid (only for 2D or 3D data). For the ellipsoid (3D data), the angles are the Euler angles calculated in the XYZ sequence. center : 1-D array Centroid of the hyperellipsoid. rotation : 2-D array Rotation matrix for hyperellipsoid semi-axes (only for 2D or 3D data). References ---------- .. [1] http://www.jstor.org/stable/2282774 .. [2] http://en.wikipedia.org/wiki/Principal_component_analysis .. [3] http://en.wikipedia.org/wiki/Singular_value_decomposition .. [4] http://en.wikipedia.org/wiki/Volume_of_an_n-ball Examples -------- >>> from hyperellipsoid import hyperellipsoid >>> y = np.cumsum(np.random.randn(3000)) / 50 >>> x = np.cumsum(np.random.randn(3000)) / 100 >>> area, axes, angles, center, R = hyperellipsoid(x, y, units='cm') >>> print('Area =', area) >>> print('Semi-axes =', axes) >>> print('Angles =', angles) >>> print('Center =', center) >>> print('Rotation matrix =\n', R) >>> P = np.random.randn(1000, 3) >>> P[:, 2] = P[:, 2] + P[:, 1]*.5 >>> P[:, 1] = P[:, 1] + P[:, 0]*.5 >>> volume, axes, angles, center, R = hyperellipsoid(P, units='cm') """ from scipy.stats import f as F from scipy.special import gamma P = np.array(P, ndmin=2, dtype=float) if P.shape[0] == 1: P = P.T if y is not None: y = np.array(y, copy=False, ndmin=2, dtype=float) if y.shape[0] == 1: y = y.T P = np.concatenate((P, y), axis=1) if z is not None: z = np.array(z, copy=False, ndmin=2, dtype=float) if z.shape[0] == 1: z = z.T P = np.concatenate((P, z), axis=1) # covariance matrix cov = np.cov(P, rowvar=0) # singular value decomposition U, s, Vt = np.linalg.svd(cov) p, n = s.size, P.shape[0] # F percent point function fppf = F.ppf(pvalue, p, n - p) * (n - 1) * p * (n + 1) / n / (n - p) # semi-axes (largest first) saxes = np.sqrt(s * fppf) hypervolume = np.pi ** (p / 2) / gamma(p / 2 + 1) * np.prod(saxes) # rotation matrix if p == 2 or p == 3: R = Vt if s.size == 2: angles = np.array( [np.rad2deg(np.arctan2(R[1, 0], R[0, 0])), 90 - np.rad2deg(np.arctan2(R[1, 0], -R[0, 0]))] ) else: angles = rotXYZ(R, unit="deg") # centroid of the hyperellipsoid center = np.mean(P, axis=0) else: R, angles = None, None if show and (p == 2 or p == 3): _plot(P, hypervolume, saxes, center, R, pvalue, units, ax) return hypervolume, saxes, angles, center, R
def f_a(self): self.F_emp_A = self.var_A / self.var_remain if self.var_A > self.var_remain else self.var_remain / self.var_A self.F_cr_A = f.ppf(1 - self.alpha, self.n - 1, (self.n - 1) * (self.m - 1)) if self.var_A > self.var_remain \ else f.ppf(1 - self.alpha, (self.n - 1) * (self.m - 1), self.n - 1)
def get_independent_confidence_intervals(self, alpha=0.05): m = len(self.unknowns) n = len(self.validation_data.get_time_steps()) f_alpha = f.ppf(1-alpha, m, n) c_alpha = n * self.objective_value * f_alpha / (n - m) return np.sqrt(c_alpha / self.get_covariance_matrix().diagonal())
def f_b(self): self.F_emp_B = self.var_B / self.var_remain if self.var_B > self.var_remain else self.var_remain / self.var_B self.F_cr_B = f.ppf(1 - self.alpha, self.m - 1, ((self.m - 1) * (self.n - 1))) if self.var_B > self.var_remain \ else f.ppf(1 - self.alpha, ((self.m - 1) * (self.m - 1)), self.m - 1)
def f_full(self): self.F_emp_Full = self.var_full / self.var_remain if self.var_full > self.var_remain\ else self.var_remain / self.var_full self.F_cr_Full = f.ppf(1 - self.alpha, (self.n * self.m) - 1, (self.n - 1) * (self.m - 1)) \ if self.var_full > self.var_remain \ else f.ppf(1 - self.alpha, (self.n - 1) * (self.m - 1), (self.n * self.m) - 1)
def Computing_UCL(N, alpha): S = 1 return (S*((N-1)**2) * f.ppf(1-alpha, S, N-S)) / (N*(N-S))
else: ts[i] = False print("Чи значимі b: " + str(ts)) f4 = k - d print("f4: " + str(f4)) x = [[-30, -30, 0, 0], [10, 60, 10, 60], [10, 35, 35, 10]] yj = [] for i in range(4): yj.append(0) for j in range(4): if ts[j]: if j == 0: yj[i] += b[0] else: yj[i] += b[j] * x[j-1][i] print("yj: " + str(yj)) S2ad = round(m * sum([(yj[i] - yi[i])**2 for i in range(4)])/f4, 3) Fp = round(S2ad/S2v, 3) print("Fp: " + str(Fp)) Fcr = round(f.ppf(1 - alpha, f4, f3), 1) print("Fcr: " + str(Fcr)) if Fp < Fcr: print("F-criteria: OK") else: print("F-criteria: :(")
list_ai = [round(i, 5) for i in solve(list_for_solve_a, Y_average)] print( "y = {} + {}*x1 + {}*x2 + {}*x3 + {}*x1x2 + {}*x1x3 + {}*x2x3 + {}*x1x2x3" .format(list_ai[0], list_ai[1], list_ai[2], list_ai[3], list_ai[4], list_ai[5], list_ai[6], list_ai[7])) print( "###################################################################################################################" ) Gp = max(Disp_list) / sum_dispersion F1 = m - 1 N = len(y1) F2 = N q1 = 0.05 / F1 fisher_value = f.ppf(q=1 - q1, dfn=F2, dfd=(F1 - 1) * F2) Gt = fisher_value / (fisher_value + F1 - 1) print("\nGp = ", Gp, " Gt = ", Gt) if Gp < Gt: print("_____Дисперсія однорідна!_____\n") Dispersion_B = sum_dispersion / N Dispersion_beta = Dispersion_B / (m * N) S_beta = math.sqrt(abs(Dispersion_beta)) beta_list = [0, 0, 0, 0, 0, 0, 0, 0] for i in range(len(x0_factor)): beta_list[0] += (Y_average[i] * x0_factor[i]) / N beta_list[1] += (Y_average[i] * x1_factor[i]) / N beta_list[2] += (Y_average[i] * x2_factor[i]) / N