Example #1
0
    def stats(self,SigmaR,B_Col):
        #Chi Squared test
        print('Our Null Hypothesis states that the variance of our population = sample variance')

        #Variance of our radius from measured points
        observed = SigmaR

        #Expected variance (3mm per x,y,z observation)
        expected = .003**2+.003**2+.003**2

		#Calculation of degrees of freedom
        dof = B_Col-1

        #Calculation of test statistics
        teststatx = B_Col*((observed - expected)**2/expected)
        teststatx1 = dof*(observed/expected)

        #User is prompted to input desired significance level
        significance = np.float(input('Please specify the significance level: '))

        print(teststatx),
        print(teststatx1)

        #Using built in scipy.stats.chi2 function instead of looking up values on a table
        mean, var, skew, kurt = chi2.stats(dof, moments='mvsk')
        Chi = chi2.ppf((1-significance),dof)

		#If our sampled variance is greater than the population variance at the chosen significance level then we reject the null hypothesis at that significance level
        if teststatx > Chi:
            print 'We reject the null hypothesis at the ',significance,'significance level'

        else:
            print 'We fail to reject the null hypothesis at the ',significance,'significance level'

        print(teststatx, dof)
Example #2
0
def main():
    fig, ax = plt.subplots(1, 1)
    df = 5
    loc = 20
    scale = 8

    mean, var, skew, kurt = chi2.stats(df, moments='mvsk')
    #x = np.linspace(chi2.ppf(0.01, df, loc, scale),chi2.ppf(0.99, df, loc, scale), 20)
    valmax = int(chi2.ppf(0.99, df, loc, scale)) + 1
    if (valmax % 2 != 0): valmax = valmax + 1
    #valmax= 46
    x = np.linspace(0, valmax, valmax + 1)

    #print (x)
    proba = chi2.pdf(x, df, loc, scale)
    vs = map(repr, proba.tolist())
    repartition = list(zip(x.tolist(), proba.tolist()))

    nbTranche = 5
    valmin = loc
    space = (valmax - valmin) / nbTranche

    remain = 100
    tranche0 = int(100 *
                   sum([pro for val, pro in repartition if (val < valmin)]))
    remain -= tranche0
    print("P(v < %d) = %d" % (valmin, tranche0))
    for traidx in range(nbTranche):
        deb = valmin + traidx * space
        fin = valmin + (traidx + 1) * space
        tranche = int(100 * sum(
            [pro for val, pro in repartition if (val >= deb) and (val < fin)]))
        remain -= tranche
        print("P(%d <= v < %d) = %d" % (deb, fin, tranche))
    trancheF = 100 * sum([
        pro for val, pro in repartition if (val >= valmin + nbTranche * space)
    ])
    print("P(v >= %d) = %d" % (valmin + nbTranche * space, remain))

    for certitude in [0.6, 0.7, 0.75, 0.8, 0.85, 0.90, 0.95]:
        print("certi = %d , val = %d" %
              (certitude, int(chi2.ppf(certitude, df, loc, scale))))
    #print (" ".join(list(vs)).replace (".",","))
    #print (" ".join(proba.tolist()))
    #print (" ".join().replace(".",","))
    ax.plot(x, proba, 'r-', lw=5, alpha=0.6, label='chi2 pdf')

    #rv = chi2(df, loc, scale)
    #ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
    #vals = chi2.ppf([0.001, 0.5, 0.999], df, loc, scale)
    #np.allclose([0.001, 0.5, 0.999], chi2.cdf(vals, df, loc, scale))
    #r = chi2.rvs(df,  loc = loc, scale = scale ,  size=1000)
    #ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
    #ax.legend(loc='best', frameon=False)

    plt.show()
def chi_squared(df):
    fig, ax = plt.subplots(1, 1)

    # Calculate a few first moments:
    mean, var, skew, kurt = chi2.stats(df, moments='mvsk')

    # Display the probability density function (pdf):
    x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf')
    plt.show()
Example #4
0
def plot_chi2():
    from scipy.stats import chi2
    chi2s = np.loadtxt("txt_files/chi2.txt")
    good = np.where(chi2s < 200)[0]
    chi2s = chi2s[good]
    dofs = np.loadtxt("txt_files/dofs.txt")
    df = np.mean(dofs)
    fchi2s = chi2s.flatten()
    mean, var = chi2.stats(df, moments="mv")
    x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    plt.plot(x, chi2.pdf(x, df))
    plt.hist(fchi2s, 40, normed=True)
    plt.xlabel(r"$\chi_2$", fontsize=24)
    plt.subplots_adjust(bottom=0.15)
    plt.show()
def kafang():
    #卡方分布仅有一个参数还是比较好理解的
    fig, ax = plt.subplots(1, 1)
    df = 5
    mean, var, skew, kurt = chi2.stats(df, moments='mvsk')
    x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf')
    rv = chi2(df)
    # ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
    # plt.show()

    #自由度为15,卡方值小于10的概率
    chi2.cdf(10, df=15)

    #卡方分布右尾概率为0.05时的反函数

    chi2.ppf(0.95, df=10)
Example #6
0
    def test_chi2(self):
        from scipy.stats import chi2
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(1, 1)

        df = 55
        mean, var, skew, kurt = chi2.stats(df, moments='mvsk')

        x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
        ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf')

        rv = chi2(df)
        ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

        vals = chi2.ppf([0.001, 0.5, 0.999], df)
        np.allclose([0.001, 0.5, 0.999], chi2.cdf(vals, df))

        r = chi2.rvs(df, size=1000)

        ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
        ax.legend(loc='best', frameon=False)
        self.assertEqual(str(ax), "AxesSubplot(0.125,0.11;0.775x0.77)")
Example #7
0
        #lM = np.log10(np.mean(10**lM_bins,1))
        #visualize.NM_plot(lM,N_data,N_err,lM,N_emu)

if add_uncertainty:
    np.savetxt("chi2s_p%dpc.txt" % percent, chi2s)
else:
    np.savetxt("chi2s.txt", chi2s)
np.savetxt("Nfp.txt", Nfp)

import matplotlib.pyplot as plt
from scipy.stats import chi2
plt.rc('text', usetex=True, fontsize=20)

if add_uncertainty:
    chi2s = np.loadtxt("chi2s_p%dpc.txt" % percent).flatten()
else:
    chi2s = np.loadtxt("chi2s.txt").flatten()
Nfp = np.loadtxt("Nfp.txt")

plt.hist(chi2s, 20, normed=True)  #Make the histogram
df = np.mean(Nfp)
mean, var, skew, kurt = chi2.stats(df, moments='mvsk')
x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
plt.plot(x, chi2.pdf(x, df))
plt.xlabel(r"$\chi^2$", fontsize=24)
plt.xlim(0, 80)
plt.ylim(0, 0.1)
plt.subplots_adjust(bottom=0.15)
plt.show()
Example #8
0
import matplotlib.pyplot as plt
import numpy
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency, chi2
from scipy.stats import ttest_ind
# from bioinfokit.analys import stat, get_data, chisq
from itertools import combinations

df = pd.read_csv(r'C:\Users\ether\OneDrive\Desktop\info.csv')

deg_of_frdm = 15
data = np.random.randint(0, 5, (199, 15))
p_value_matrix = np.zeros((15, 15))

mean, var, skew, kurt = chi2.stats(deg_of_frdm, moments='mvsk')
sigma = np.sqrt(var)
# x = np.linspace(chi2.ppf(0.01, deg_of_frdm), chi2.ppf(0.99, deg_of_frdm), 199)

chi2.cdf(np.array((data[:, 0][:, None], data[:, 1][:, None])), mean, sigma)
p_val = chi2.cdf(np.array((data[:, 0], data[:, 1])), mean, sigma)
p_val = chi2.cdf(np.array((data[:, 0][:, None], data[:, 1][:, None])).mean(), mean, sigma)


i_mean = np.mean(data[:, 0])
j_mean = np.mean(data[:, 1])
i_std = np.std(data[:, 0])
j_std = np.std(data[:, 1])
ttest, pval = ttest_ind(data[:, 0], data[:, 1])

Example #9
0
def test_fast_mcd(data):
    """

    """
    n = data.shape[0]
    p = data.shape[1]
    
    ### Naive location and scatter estimates
    location = data.mean(0)
    covariance = np.cov(data.T)
    # invert the covariance matrix
    try:
        inv_sigma = linalg.inv(robust_covariance)
    except:
        u, s, vh = linalg.svd(covariance)
        inv_s = (1. / s) * \
                ((np.cumsum(s) < np.sum(s) * .95) | ([True]+[False]*(len(s)-1)))
        inv_sigma = np.dot(np.dot(vh.T, np.diag(inv_s)), u.T)
    # get distribution of data's Mahalanobis distances
    Y = data - location
    R = np.sqrt((np.dot(Y, inv_sigma) * Y).sum(1))
    # estimate the density with a gaussian kernel
    nonnan_subjects_arg = np.where(~np.isnan(R))[0]
    R = R[nonnan_subjects_arg]
    x1 = np.arange(0., 1.2*np.amax(R), 0.0012*np.amax(R))
    n = R.size
    sigma = 1.05 * np.std(R) * n**(-0.2)
    kernel_arg = (np.tile(x1, (n,1)).T - R) / sigma
    fh = ((1/np.sqrt(2*np.pi)) * np.exp(-0.5*kernel_arg**2)).sum(1) / (n*sigma)
    # plot the distribution
    if PLOT:
        plt.figure()
        plt.plot(x1, fh, color='blue')
    # Khi-2 distribution
    diff_scale = np.sqrt(R.var() / float(chi2.stats(p, moments='v')))
    diff_loc = R.mean() - float(chi2.stats(p, scale=diff_scale, moments='m'))
    template = chi2(p, loc=diff_loc, scale=diff_scale)
    if PLOT:
        plt.plot(x1, template.pdf(x1), linestyle='--', color='blue')
    mse_naive = ((fh - template.pdf(x1))**2).mean()
    imse_naive = 0.5 * ((fh - template.pdf(x1))**2).sum() * (x1[1] - x1[0])
    if PLOT:
        print "MSE (naive case) =", mse_naive
        print "IMSE (naive case) =", imse_naive
    
    ### Robust location and scatter estimates
    robust_location, robust_covariance = fast_mcd(data)
    try:
        inv_sigma = linalg.inv(robust_covariance)
    except:
        u, s, vh = linalg.svd(robust_covariance)
        inv_s = (1. / s) * \
                ((np.cumsum(s) < np.sum(s) * .95) | ([True]+[False]*(len(s)-1)))
        inv_sigma = np.dot(np.dot(vh.T, np.diag(inv_s)), u.T)
    # get distribution of data's Mahalanobis distances
    Y = data - robust_location
    R = np.sqrt((np.dot(Y, inv_sigma) * Y).sum(1))
    # estimate the density with a gaussian kernel
    nonnan_subjects_arg = np.where(~np.isnan(R))[0]
    R = R[nonnan_subjects_arg]
    x2 = np.arange(0., 1.2*np.amax(R), 0.0012*np.amax(R))
    n = R.size
    sigma = 1.05 * np.std(R) * n**(-0.2)
    kernel_arg = (np.tile(x2, (n,1)).T - R) / sigma
    fh = ((1/np.sqrt(2*np.pi)) * np.exp(-0.5*kernel_arg**2)).sum(1) / (n*sigma)
    # plot the distribution
    if PLOT:
        plt.plot(x2, fh, color='green')
    # Khi-2 distribution
    diff_scale = np.sqrt(R.var() / float(chi2.stats(p, moments='v')))
    diff_loc = R.mean() - float(chi2.stats(p, scale=diff_scale, moments='m'))
    template = chi2(p, loc=diff_loc, scale=diff_scale)
    if PLOT:
        plt.plot(x2, template.pdf(x2), linestyle='--', color='green')
    mse_robust = ((fh - template.pdf(x2))**2).mean()
    imse_robust = 0.5 * ((fh - template.pdf(x2))**2).sum() * (x2[1] - x2[0])
    if PLOT:
        print "MSE (robust case) =", mse_robust
        print "IMSE (robust case) =", imse_robust
        plt.legend(('empirical distribution (naive)', 'chi-2 (naive)',
                    'empirical distribution (robust)', 'chi-2 (robust)'),
                   loc='upper center', bbox_to_anchor=(0.5, 0.))
        plt.show()
    
    return mse_naive, mse_robust, imse_naive, imse_robust
imgh = np.reshape(img, nx*ny)
imgh1 = np.reshape(img_mask1, nx1*ny1)


df=36*38
dof=chi2_distance(imgh,imgh1)
chi2_distance(imgh, imght[0,:])
chi2_distance(imgh, imght[1,:])
chi2_distance(imgh, imght[2,:])
chi2_distance(imgh, imght[3,:])
chi2_distance(imgh, imght[4,:])
chi2_distance(imgh, imght[5,:])
chi2_distance(imgh, imght[6,:])

chi_sfh=np.array([502.5312995308081,580.45729191839803,204.19667370317001,518.27719309677684,1534.8645907539676,1555.9265125639893])


weights = np.ones_like(chi_sfh)/float(len(chi_sfh))
fig, ax = plt.subplots(1, 1)
mean, var, skew, kurt = chi2.stats(df, moments='mvsk')
x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
xu = np.linspace(chi2.ppf(0.01, dof), chi2.ppf(0.99, dof), 100)
ax.axvline(x=dof/df,color='k', linestyle='dashed',lw=4, label='UGC11680NED01 $\chi^2$')
ax.hist(chi_sfh/df,bins=10, normed=False,weights=weights, histtype='step', lw=3,label='Mass $10<\log (M/M_{\odot})<11$ , color $2<g-r<3$')
ax.legend(loc='best', frameon=False)
ax.set_ylabel('Probability density $\chi ^2$')
ax.set_xlabel('$x$')
plt.show()


Example #11
0
    popt_pdf2, pcov_pdf2 = curve_fit(
        modified_sch,
        bins_final[index],
        pdf_spinpar[index],
        sigma=yerr[index],
        p0=[(1e-3, 4., 0.6, 1e-3)])  #,maxfev=100000)#,p0=[(1e-3,4.,0.6,1e-3)])
    print('popt_pdf2 = ', popt_pdf2)
    model_pdf2 = modified_sch(bins_final, *popt_pdf2)
    chi2 = np.sum(
        (pdf_spinpar[index] - model_pdf2[index])**2 / (yerr[index])**2)
    dof = len(pdf_spinpar[index]) - 5
    print('chi2 = ', chi2)
    print('dof = ', dof)
    chi2r = chi2 / dof
    print('chi2r = ', chi2r)
    chi2_expected = chi2scipy.stats(dof)[0]
    print('expexted chi2 = ', chi2_expected)

    t[i].add_column(Column(name='pars', data=popt_pdf2, unit=''))
    t[i].add_column(Column(name='errors', data=np.diag(pcov_pdf2), unit=''))
    outsch = os.path.join(this_dir, 'tables',
                          'schechter_HMD_lambda_z_%.3g.fit' % (z_snap))
    t[i].write(outsch, overwrite=True)

    ax.scatter(bins_final,
               pdf_spinpar,
               label=r'$z=%.3g$' % (z_snap),
               ls='None',
               marker='o',
               s=15)
    #    ax.fill_between(bins_final[index],pdf_spinpar[index] - yerr[index],pdf_spinpar[index] + yerr[index], alpha=0.4 )
Example #12
0
import numpy as np
from scipy.stats import chi2, norm
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# k degrees of freedom
k = 5
th_mean, th_var, _, _ = chi2.stats(k, moments='mvsk')
print("THEORY MEAN=", th_mean, "THEORY VARIANCE=", th_var)
# prob dens function
# what X is in 0.01 and 0.99 quantiles
start = chi2.ppf(0.01, k)
stop = chi2.ppf(0.99, k)
# theoretical dist
x = np.linspace(start, stop, 100)
plt.plot(x, chi2.pdf(x, k), 'r-', lw=5, alpha=0.6, label='chi2 pdf')
# histogram
# random nums from dist
r = chi2.rvs(k, size=1000)
plt.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
plt.legend(loc='best', frameon=False)
plt.title('Theoretical Dist')
plt.ylabel('Prob Dens')
plt.xlabel('X')
plt.show()


# calculate means for 1000 expirements with n samples in each
def sample_mean(n):
    smeans = []
    for x in range(1000):
Example #13
0
def testChisquare(N, size):
    values = [lhw.rnchsq(N) for i in range(size)]
    mean, var = chi2.stats(N, moments='mv')
    startWork(values, mean, var, "chiSquare")
    pass
sortedR = R[~np.isnan(R)].copy()
sortedR.sort()
qi, qe, qa = np.outer(len(sortedR), [0.25, 0.5, 0.75])[0]
bnd = (sortedR[qa] - sortedR[qi])*3 + sortedR[qe]

### Estimate the density with a gaussian kernel
nonnan_subjects_arg = np.where(~np.isnan(R))[0]
R = R[nonnan_subjects_arg]
x = np.arange(0., 1.2*np.amax(R), 0.0012*np.amax(R))
n = R.size
sigma = 1.05 * np.std(R) * n**(-0.2)
kernel_arg = (np.tile(x, (n,1)).T - R) / sigma
fh = ((1/np.sqrt(2*np.pi)) * np.exp(-0.5*kernel_arg**2)).sum(1) / (n*sigma)
# print it
plt.figure()
plt.plot(x, fh)
plt.vlines(sortedR[qe], 0, np.amax(fh))
plt.vlines(bnd, 0, np.amax(fh))
# Khi-2 distribution
p = labels.size
diff_scale = np.sqrt(R.var() / float(chi2.stats(p, moments='v')))
diff_loc = R.mean() - float(chi2.stats(p, scale=diff_scale, moments='m'))
template = chi2(p, loc=diff_loc, scale=diff_scale)
plt.plot(x, template.pdf(x), linestyle='--', color='green')
plt.show()



for i in np.where(R > bnd)[0]:
    print actual_files[nonnan_subjects_arg[i]][26:38]
Example #15
0
out_table = os.path.join(this_dir,'results','zevo','hsigma_params_Rvir.fit')
os.makedirs(os.path.dirname(out_table), exist_ok=True)
t.write(out_table, overwrite=True)

#fit_ = fit[np.where(counts_tot>c3)]
fit_ = h_func([s_grid_all, xoff_grid_all,spin_grid_all,redshift_all], *popt)
print('min log10(hsigma_) =', min(np.log10(hsigma_all)))
print('min fit_ = ',min(fit_))

print('max log10(hsigma_) =', max(np.log10(hsigma_all)))
print('max fit_ = ',max(fit_))

chi_2 = np.sum((np.log10(hsigma_all)-fit_)**2/herr_all**2)
dof = len(hsigma_all) - len(names)
chi_2r = chi_2/dof
rv = chi2.stats(dof)
print('chi2 = ', chi_2)
print('dof = ', dof)
print('chi2r = ',chi_2r)
print('expected chi2 = ', rv)

#PDF OF THE RESIDUALS
res = ((np.log10(hsigma_all)-fit_)/herr_all)
pdf, b = np.histogram(res,bins=100,density=True)
bins = (b[:-1]+b[1:])/2

def ga(x, x0, sigma):
    a=1/sigma/np.sqrt(2*np.pi)
    return a*np.exp(-(x-x0)**2/(2*sigma**2))
par,cov = curve_fit(ga,bins,pdf)
gauss = ga(bins,*par)