def stats(self,SigmaR,B_Col): #Chi Squared test print('Our Null Hypothesis states that the variance of our population = sample variance') #Variance of our radius from measured points observed = SigmaR #Expected variance (3mm per x,y,z observation) expected = .003**2+.003**2+.003**2 #Calculation of degrees of freedom dof = B_Col-1 #Calculation of test statistics teststatx = B_Col*((observed - expected)**2/expected) teststatx1 = dof*(observed/expected) #User is prompted to input desired significance level significance = np.float(input('Please specify the significance level: ')) print(teststatx), print(teststatx1) #Using built in scipy.stats.chi2 function instead of looking up values on a table mean, var, skew, kurt = chi2.stats(dof, moments='mvsk') Chi = chi2.ppf((1-significance),dof) #If our sampled variance is greater than the population variance at the chosen significance level then we reject the null hypothesis at that significance level if teststatx > Chi: print 'We reject the null hypothesis at the ',significance,'significance level' else: print 'We fail to reject the null hypothesis at the ',significance,'significance level' print(teststatx, dof)
def main(): fig, ax = plt.subplots(1, 1) df = 5 loc = 20 scale = 8 mean, var, skew, kurt = chi2.stats(df, moments='mvsk') #x = np.linspace(chi2.ppf(0.01, df, loc, scale),chi2.ppf(0.99, df, loc, scale), 20) valmax = int(chi2.ppf(0.99, df, loc, scale)) + 1 if (valmax % 2 != 0): valmax = valmax + 1 #valmax= 46 x = np.linspace(0, valmax, valmax + 1) #print (x) proba = chi2.pdf(x, df, loc, scale) vs = map(repr, proba.tolist()) repartition = list(zip(x.tolist(), proba.tolist())) nbTranche = 5 valmin = loc space = (valmax - valmin) / nbTranche remain = 100 tranche0 = int(100 * sum([pro for val, pro in repartition if (val < valmin)])) remain -= tranche0 print("P(v < %d) = %d" % (valmin, tranche0)) for traidx in range(nbTranche): deb = valmin + traidx * space fin = valmin + (traidx + 1) * space tranche = int(100 * sum( [pro for val, pro in repartition if (val >= deb) and (val < fin)])) remain -= tranche print("P(%d <= v < %d) = %d" % (deb, fin, tranche)) trancheF = 100 * sum([ pro for val, pro in repartition if (val >= valmin + nbTranche * space) ]) print("P(v >= %d) = %d" % (valmin + nbTranche * space, remain)) for certitude in [0.6, 0.7, 0.75, 0.8, 0.85, 0.90, 0.95]: print("certi = %d , val = %d" % (certitude, int(chi2.ppf(certitude, df, loc, scale)))) #print (" ".join(list(vs)).replace (".",",")) #print (" ".join(proba.tolist())) #print (" ".join().replace(".",",")) ax.plot(x, proba, 'r-', lw=5, alpha=0.6, label='chi2 pdf') #rv = chi2(df, loc, scale) #ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') #vals = chi2.ppf([0.001, 0.5, 0.999], df, loc, scale) #np.allclose([0.001, 0.5, 0.999], chi2.cdf(vals, df, loc, scale)) #r = chi2.rvs(df, loc = loc, scale = scale , size=1000) #ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) #ax.legend(loc='best', frameon=False) plt.show()
def chi_squared(df): fig, ax = plt.subplots(1, 1) # Calculate a few first moments: mean, var, skew, kurt = chi2.stats(df, moments='mvsk') # Display the probability density function (pdf): x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf') plt.show()
def plot_chi2(): from scipy.stats import chi2 chi2s = np.loadtxt("txt_files/chi2.txt") good = np.where(chi2s < 200)[0] chi2s = chi2s[good] dofs = np.loadtxt("txt_files/dofs.txt") df = np.mean(dofs) fchi2s = chi2s.flatten() mean, var = chi2.stats(df, moments="mv") x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) plt.plot(x, chi2.pdf(x, df)) plt.hist(fchi2s, 40, normed=True) plt.xlabel(r"$\chi_2$", fontsize=24) plt.subplots_adjust(bottom=0.15) plt.show()
def kafang(): #卡方分布仅有一个参数还是比较好理解的 fig, ax = plt.subplots(1, 1) df = 5 mean, var, skew, kurt = chi2.stats(df, moments='mvsk') x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf') rv = chi2(df) # ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # plt.show() #自由度为15,卡方值小于10的概率 chi2.cdf(10, df=15) #卡方分布右尾概率为0.05时的反函数 chi2.ppf(0.95, df=10)
def test_chi2(self): from scipy.stats import chi2 import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) df = 55 mean, var, skew, kurt = chi2.stats(df, moments='mvsk') x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf') rv = chi2(df) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = chi2.ppf([0.001, 0.5, 0.999], df) np.allclose([0.001, 0.5, 0.999], chi2.cdf(vals, df)) r = chi2.rvs(df, size=1000) ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) self.assertEqual(str(ax), "AxesSubplot(0.125,0.11;0.775x0.77)")
#lM = np.log10(np.mean(10**lM_bins,1)) #visualize.NM_plot(lM,N_data,N_err,lM,N_emu) if add_uncertainty: np.savetxt("chi2s_p%dpc.txt" % percent, chi2s) else: np.savetxt("chi2s.txt", chi2s) np.savetxt("Nfp.txt", Nfp) import matplotlib.pyplot as plt from scipy.stats import chi2 plt.rc('text', usetex=True, fontsize=20) if add_uncertainty: chi2s = np.loadtxt("chi2s_p%dpc.txt" % percent).flatten() else: chi2s = np.loadtxt("chi2s.txt").flatten() Nfp = np.loadtxt("Nfp.txt") plt.hist(chi2s, 20, normed=True) #Make the histogram df = np.mean(Nfp) mean, var, skew, kurt = chi2.stats(df, moments='mvsk') x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) plt.plot(x, chi2.pdf(x, df)) plt.xlabel(r"$\chi^2$", fontsize=24) plt.xlim(0, 80) plt.ylim(0, 0.1) plt.subplots_adjust(bottom=0.15) plt.show()
import matplotlib.pyplot as plt import numpy import pandas as pd import numpy as np from scipy.stats import chi2_contingency, chi2 from scipy.stats import ttest_ind # from bioinfokit.analys import stat, get_data, chisq from itertools import combinations df = pd.read_csv(r'C:\Users\ether\OneDrive\Desktop\info.csv') deg_of_frdm = 15 data = np.random.randint(0, 5, (199, 15)) p_value_matrix = np.zeros((15, 15)) mean, var, skew, kurt = chi2.stats(deg_of_frdm, moments='mvsk') sigma = np.sqrt(var) # x = np.linspace(chi2.ppf(0.01, deg_of_frdm), chi2.ppf(0.99, deg_of_frdm), 199) chi2.cdf(np.array((data[:, 0][:, None], data[:, 1][:, None])), mean, sigma) p_val = chi2.cdf(np.array((data[:, 0], data[:, 1])), mean, sigma) p_val = chi2.cdf(np.array((data[:, 0][:, None], data[:, 1][:, None])).mean(), mean, sigma) i_mean = np.mean(data[:, 0]) j_mean = np.mean(data[:, 1]) i_std = np.std(data[:, 0]) j_std = np.std(data[:, 1]) ttest, pval = ttest_ind(data[:, 0], data[:, 1])
def test_fast_mcd(data): """ """ n = data.shape[0] p = data.shape[1] ### Naive location and scatter estimates location = data.mean(0) covariance = np.cov(data.T) # invert the covariance matrix try: inv_sigma = linalg.inv(robust_covariance) except: u, s, vh = linalg.svd(covariance) inv_s = (1. / s) * \ ((np.cumsum(s) < np.sum(s) * .95) | ([True]+[False]*(len(s)-1))) inv_sigma = np.dot(np.dot(vh.T, np.diag(inv_s)), u.T) # get distribution of data's Mahalanobis distances Y = data - location R = np.sqrt((np.dot(Y, inv_sigma) * Y).sum(1)) # estimate the density with a gaussian kernel nonnan_subjects_arg = np.where(~np.isnan(R))[0] R = R[nonnan_subjects_arg] x1 = np.arange(0., 1.2*np.amax(R), 0.0012*np.amax(R)) n = R.size sigma = 1.05 * np.std(R) * n**(-0.2) kernel_arg = (np.tile(x1, (n,1)).T - R) / sigma fh = ((1/np.sqrt(2*np.pi)) * np.exp(-0.5*kernel_arg**2)).sum(1) / (n*sigma) # plot the distribution if PLOT: plt.figure() plt.plot(x1, fh, color='blue') # Khi-2 distribution diff_scale = np.sqrt(R.var() / float(chi2.stats(p, moments='v'))) diff_loc = R.mean() - float(chi2.stats(p, scale=diff_scale, moments='m')) template = chi2(p, loc=diff_loc, scale=diff_scale) if PLOT: plt.plot(x1, template.pdf(x1), linestyle='--', color='blue') mse_naive = ((fh - template.pdf(x1))**2).mean() imse_naive = 0.5 * ((fh - template.pdf(x1))**2).sum() * (x1[1] - x1[0]) if PLOT: print "MSE (naive case) =", mse_naive print "IMSE (naive case) =", imse_naive ### Robust location and scatter estimates robust_location, robust_covariance = fast_mcd(data) try: inv_sigma = linalg.inv(robust_covariance) except: u, s, vh = linalg.svd(robust_covariance) inv_s = (1. / s) * \ ((np.cumsum(s) < np.sum(s) * .95) | ([True]+[False]*(len(s)-1))) inv_sigma = np.dot(np.dot(vh.T, np.diag(inv_s)), u.T) # get distribution of data's Mahalanobis distances Y = data - robust_location R = np.sqrt((np.dot(Y, inv_sigma) * Y).sum(1)) # estimate the density with a gaussian kernel nonnan_subjects_arg = np.where(~np.isnan(R))[0] R = R[nonnan_subjects_arg] x2 = np.arange(0., 1.2*np.amax(R), 0.0012*np.amax(R)) n = R.size sigma = 1.05 * np.std(R) * n**(-0.2) kernel_arg = (np.tile(x2, (n,1)).T - R) / sigma fh = ((1/np.sqrt(2*np.pi)) * np.exp(-0.5*kernel_arg**2)).sum(1) / (n*sigma) # plot the distribution if PLOT: plt.plot(x2, fh, color='green') # Khi-2 distribution diff_scale = np.sqrt(R.var() / float(chi2.stats(p, moments='v'))) diff_loc = R.mean() - float(chi2.stats(p, scale=diff_scale, moments='m')) template = chi2(p, loc=diff_loc, scale=diff_scale) if PLOT: plt.plot(x2, template.pdf(x2), linestyle='--', color='green') mse_robust = ((fh - template.pdf(x2))**2).mean() imse_robust = 0.5 * ((fh - template.pdf(x2))**2).sum() * (x2[1] - x2[0]) if PLOT: print "MSE (robust case) =", mse_robust print "IMSE (robust case) =", imse_robust plt.legend(('empirical distribution (naive)', 'chi-2 (naive)', 'empirical distribution (robust)', 'chi-2 (robust)'), loc='upper center', bbox_to_anchor=(0.5, 0.)) plt.show() return mse_naive, mse_robust, imse_naive, imse_robust
imgh = np.reshape(img, nx*ny) imgh1 = np.reshape(img_mask1, nx1*ny1) df=36*38 dof=chi2_distance(imgh,imgh1) chi2_distance(imgh, imght[0,:]) chi2_distance(imgh, imght[1,:]) chi2_distance(imgh, imght[2,:]) chi2_distance(imgh, imght[3,:]) chi2_distance(imgh, imght[4,:]) chi2_distance(imgh, imght[5,:]) chi2_distance(imgh, imght[6,:]) chi_sfh=np.array([502.5312995308081,580.45729191839803,204.19667370317001,518.27719309677684,1534.8645907539676,1555.9265125639893]) weights = np.ones_like(chi_sfh)/float(len(chi_sfh)) fig, ax = plt.subplots(1, 1) mean, var, skew, kurt = chi2.stats(df, moments='mvsk') x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) xu = np.linspace(chi2.ppf(0.01, dof), chi2.ppf(0.99, dof), 100) ax.axvline(x=dof/df,color='k', linestyle='dashed',lw=4, label='UGC11680NED01 $\chi^2$') ax.hist(chi_sfh/df,bins=10, normed=False,weights=weights, histtype='step', lw=3,label='Mass $10<\log (M/M_{\odot})<11$ , color $2<g-r<3$') ax.legend(loc='best', frameon=False) ax.set_ylabel('Probability density $\chi ^2$') ax.set_xlabel('$x$') plt.show()
popt_pdf2, pcov_pdf2 = curve_fit( modified_sch, bins_final[index], pdf_spinpar[index], sigma=yerr[index], p0=[(1e-3, 4., 0.6, 1e-3)]) #,maxfev=100000)#,p0=[(1e-3,4.,0.6,1e-3)]) print('popt_pdf2 = ', popt_pdf2) model_pdf2 = modified_sch(bins_final, *popt_pdf2) chi2 = np.sum( (pdf_spinpar[index] - model_pdf2[index])**2 / (yerr[index])**2) dof = len(pdf_spinpar[index]) - 5 print('chi2 = ', chi2) print('dof = ', dof) chi2r = chi2 / dof print('chi2r = ', chi2r) chi2_expected = chi2scipy.stats(dof)[0] print('expexted chi2 = ', chi2_expected) t[i].add_column(Column(name='pars', data=popt_pdf2, unit='')) t[i].add_column(Column(name='errors', data=np.diag(pcov_pdf2), unit='')) outsch = os.path.join(this_dir, 'tables', 'schechter_HMD_lambda_z_%.3g.fit' % (z_snap)) t[i].write(outsch, overwrite=True) ax.scatter(bins_final, pdf_spinpar, label=r'$z=%.3g$' % (z_snap), ls='None', marker='o', s=15) # ax.fill_between(bins_final[index],pdf_spinpar[index] - yerr[index],pdf_spinpar[index] + yerr[index], alpha=0.4 )
import numpy as np from scipy.stats import chi2, norm import matplotlib.pyplot as plt import matplotlib.patches as mpatches # k degrees of freedom k = 5 th_mean, th_var, _, _ = chi2.stats(k, moments='mvsk') print("THEORY MEAN=", th_mean, "THEORY VARIANCE=", th_var) # prob dens function # what X is in 0.01 and 0.99 quantiles start = chi2.ppf(0.01, k) stop = chi2.ppf(0.99, k) # theoretical dist x = np.linspace(start, stop, 100) plt.plot(x, chi2.pdf(x, k), 'r-', lw=5, alpha=0.6, label='chi2 pdf') # histogram # random nums from dist r = chi2.rvs(k, size=1000) plt.hist(r, normed=True, histtype='stepfilled', alpha=0.2) plt.legend(loc='best', frameon=False) plt.title('Theoretical Dist') plt.ylabel('Prob Dens') plt.xlabel('X') plt.show() # calculate means for 1000 expirements with n samples in each def sample_mean(n): smeans = [] for x in range(1000):
def testChisquare(N, size): values = [lhw.rnchsq(N) for i in range(size)] mean, var = chi2.stats(N, moments='mv') startWork(values, mean, var, "chiSquare") pass
sortedR = R[~np.isnan(R)].copy() sortedR.sort() qi, qe, qa = np.outer(len(sortedR), [0.25, 0.5, 0.75])[0] bnd = (sortedR[qa] - sortedR[qi])*3 + sortedR[qe] ### Estimate the density with a gaussian kernel nonnan_subjects_arg = np.where(~np.isnan(R))[0] R = R[nonnan_subjects_arg] x = np.arange(0., 1.2*np.amax(R), 0.0012*np.amax(R)) n = R.size sigma = 1.05 * np.std(R) * n**(-0.2) kernel_arg = (np.tile(x, (n,1)).T - R) / sigma fh = ((1/np.sqrt(2*np.pi)) * np.exp(-0.5*kernel_arg**2)).sum(1) / (n*sigma) # print it plt.figure() plt.plot(x, fh) plt.vlines(sortedR[qe], 0, np.amax(fh)) plt.vlines(bnd, 0, np.amax(fh)) # Khi-2 distribution p = labels.size diff_scale = np.sqrt(R.var() / float(chi2.stats(p, moments='v'))) diff_loc = R.mean() - float(chi2.stats(p, scale=diff_scale, moments='m')) template = chi2(p, loc=diff_loc, scale=diff_scale) plt.plot(x, template.pdf(x), linestyle='--', color='green') plt.show() for i in np.where(R > bnd)[0]: print actual_files[nonnan_subjects_arg[i]][26:38]
out_table = os.path.join(this_dir,'results','zevo','hsigma_params_Rvir.fit') os.makedirs(os.path.dirname(out_table), exist_ok=True) t.write(out_table, overwrite=True) #fit_ = fit[np.where(counts_tot>c3)] fit_ = h_func([s_grid_all, xoff_grid_all,spin_grid_all,redshift_all], *popt) print('min log10(hsigma_) =', min(np.log10(hsigma_all))) print('min fit_ = ',min(fit_)) print('max log10(hsigma_) =', max(np.log10(hsigma_all))) print('max fit_ = ',max(fit_)) chi_2 = np.sum((np.log10(hsigma_all)-fit_)**2/herr_all**2) dof = len(hsigma_all) - len(names) chi_2r = chi_2/dof rv = chi2.stats(dof) print('chi2 = ', chi_2) print('dof = ', dof) print('chi2r = ',chi_2r) print('expected chi2 = ', rv) #PDF OF THE RESIDUALS res = ((np.log10(hsigma_all)-fit_)/herr_all) pdf, b = np.histogram(res,bins=100,density=True) bins = (b[:-1]+b[1:])/2 def ga(x, x0, sigma): a=1/sigma/np.sqrt(2*np.pi) return a*np.exp(-(x-x0)**2/(2*sigma**2)) par,cov = curve_fit(ga,bins,pdf) gauss = ga(bins,*par)