Ejemplo n.º 1
0
def var_ratio_ci_est(data1, data2, alpha):
    n1 = len(data1)
    n2 = len(data2)
    f_lower_value = f.ppf(alpha/2, n1-1, n2-1)
    f_upper_value = f.ppf(1-alpha/2, n1-1, n2-1)
    var_ratio = variance(data1)/variance(data2)
    return var_ratio/f_upper_value, var_ratio/f_lower_value
Ejemplo n.º 2
0
def t_test(data1, data2=None, tail='both', mu=0, equal=True):
    assert tail in ['both', 'left',
                    'right'], 'tail should be one of "both","left","right"'
    if data2 is None:
        mean_val = mean(data1)
        se = std(data1)/sqrt(len(data1))
        t_val = (mean_val-mu)/se
        df = len(data1)-1
    else:
        n1 = len(data1)
        n2 = len(data2)
        mean_diff = mean(data1)-mean(data2)
        sample1_var = variance(data1)
        sample2_var = variance(data2)
        if equal:
            sw = sqrt(((n1-1)*sample1_var+(n2-1)*sample2_var)/(n1+n2-2))
            t_val = (mean_diff-mu)/(sw*sqrt(1/n1+1/n2))
            df = n1 + n2 - 2
        else:
            se = sqrt(sample1_var/n1+sample2_var/n2)
            t_val = (mean_diff-mu)/se
            df_numerator = (sample1_var/n1+sample2_var/n2)**2
            df_denominator = (sample1_var/n1)**2/(n1-1) + \
                (sample2_var/n2)**2/(n2-1)
            df = df_numerator/df_denominator
    if tail == "both":
        p = 2*(1-t.cdf(abs(t_val), df))
    elif tail == "left":
        p = t.cdf(t_val, df)
    else:
        p = 1-t.cdf(t_val, df)
    return t_val, df, p
Ejemplo n.º 3
0
def var_ratio_ci_est(data1, data2, alpha):
    """两个总体方差未知,求方差比的置信区间"""
    n1 = len(data1)
    n2 = len(data2)
    sample_ratio = variance(data1) / variance(data2)
    f_low_value = f.ppf(alpha/2, n1-1, n2-1)
    f_high_value = f.ppf(1-alpha/2, n1-1, n2-1)
    return round(sample_ratio / f_high_value, 3), round(sample_ratio / f_low_value, 3)
Ejemplo n.º 4
0
def var_ci_est(data, alpha):
    """均值未知,方差的区间估计"""
    n = len(data)
    s2 = variance(data)
    chi2_low_value = chi2.ppf(alpha/2, n-1)
    chi2_high_value = chi2.ppf(1-alpha/2, n-1)
    return round((n-1)*s2/chi2_high_value,2), round((n-1)*s2/chi2_low_value,2)
Ejemplo n.º 5
0
def f_test(data1, data2, tail="both", ratio=1):
    assert tail in ['both', 'left',
                    'right'], 'tail should be one of "both","left","right"'
    n1 = len(data1)
    n2 = len(data2)
    sample1_var = variance(data1)
    sample2_var = variance(data2)
    f_val = sample1_var/sample2_var/ratio
    df1 = n1-1
    df2 = n2-1
    if tail == "both":
        p = 2*min(1-f.cdf(f_val, df1, df2), f.cdf(f_val, df1, df2))
    elif tail == "left":
        p = f.cdf(f_val, df1, df2)
    else:
        p = 1-f.cdf(f_val, df1, df2)
    return f_val, df1, df2, p
Ejemplo n.º 6
0
def simple_linear_reg(x, y):
    assert len(x) == len(y)
    n = len(x)
    assert n > 1

    mean_x = mean(x)
    mean_y = mean(y)

    beta1 = covariance(x, y)/variance(x)
    beta0 = mean_y-beta1*mean_x

    y_hat = [beta0+beta1*e for e in x]
    ss_residual = sum((e1-e2)**2 for e1, e2 in zip(y, y_hat))
    se_model = sqrt(ss_residual/(n-2))

    t_value = beta1/(se_model/sqrt((n-1)*variance(x)))
    p = 2*(1-t.cdf(abs(t_value), n-2))

    return beta0, beta1, t_value, n-2, p
Ejemplo n.º 7
0
def t_test(data1, data2=None, tail='both', mu=0.0, equal=True):

    assert tail in ['both', 'left',
                    'right'], 'tail should be one of "both", "left", "right"'

    if data2 is None:
        # 单个总体的情况
        mean_val = mean(data1)
        se = std(data1) / np.sqrt(len(data1))
        t_val = (mean_val - mu) / se
        df = len(data1) - 1
    else:
        # 两个总体的情况
        n1 = len(data1)
        n2 = len(data2)
        mean_diff = mean(data1) - mean(data2)
        sample1_var = variance(data1)
        sample2_var = variance(data2)
        if equal:
            # 方差相等的情况
            sw = np.sqrt((((n1 - 1) * sample1_var + (n2 - 1) * sample2_var)) /
                         (n1 + n2 - 2))
            t_val = (mean_diff - mu) / (sw * np.sqrt(1 / n2 + 1 / n2))
            df = n1 + n2 - 2
        else:
            # 方差不等的情况
            se = np.sqrt(sample1_var / n1 + sample2_var / n2)
            t_val = (mean_diff - mu) / se
            df = (sample1_var / n1 + sample2_var / n2)**2 / (
                (sample1_var / n1)**2 / (n1 - 1) + (sample2_var / n2)**2 /
                (n2 - 1))

    if tail == 'both':
        # 双尾检验
        p = 2 * (1 - t.cdf(abs(t_val), df))
    elif tail == 'left':
        # 左尾检验
        p = t.cdf(t_val, df)
    else:
        # 右尾检验
        p = 1 - t.cdf(t_val, df)

    return round(t_val, 2), round(df, 2), p
Ejemplo n.º 8
0
def mean_diff_ci_t_est(data1, data2, alpha, equal=True):
    n1 = len(data1)
    n2 = len(data2)
    mean_diff = mean(data1)-mean(data2)

    sample1_var = variance(data1)
    sample2_var = variance(data2)

    if equal:
        sw = sqrt(((n1-1)*sample1_var+(n2-1)*sample2_var)/(n1+n2-2))
        t_value = abs(t.ppf(alpha/2, n1+n2-2))
        return mean_diff - sw*sqrt(1/n1+1/n2) * t_value, \
            mean_diff + sw*sqrt(1/n1+1/n2) * t_value
    else:
        df_numerator = (sample1_var/n1+sample2_var/n2)**2
        df_denominator = (sample1_var/n1)**2/(n1-1)+(sample2_var/n2)**2/(n2-1)
        df = df_numerator/df_denominator
        t_value = abs(t.ppf(alpha/2, df))
        return mean_diff - sqrt(sample1_var/n1+sample2_var/n2)*t_value,\
            mean_diff + sqrt(sample1_var/n1+sample2_var/n2)*t_value
Ejemplo n.º 9
0
def mean_diff_ci_t_est(data1, data2, alpha, equal=True):
    """总体方差未知, 求均值差的置信区间"""
    n1 = len(data1)
    n2 = len(data2)
    mean_diff = mean(data1) - mean(data2)
    sample1_var = variance(data1)
    sample2_var = variance(data2)

    if equal:
        """两总体方差未知且相等"""
        sw = np.sqrt(((n1-1)*sample1_var + (n2-1)*sample2_var) / (n1+n2-2))
        t_value = abs(t.ppf(alpha/2, n1+n2-2))
        return round(mean_diff - sw*np.sqrt(1/n1+1/n2)*t_value, 2), \
               round(mean_diff + sw*np.sqrt(1/n1+1/n2)*t_value, 2)
    else:
        """两总体方差未知且不等"""
        df = (sample1_var/n1 + sample2_var/n2)**2 / ((sample1_var/n1)**2 / (n1-1) + (sample2_var/n2)**2 / (n2-1))
        t_value = abs(t.ppf(alpha/2, df))
        return round(mean_diff - np.sqrt(sample1_var/n1 + sample2_var/n2) * t_value, 2), \
               round(mean_diff + np.sqrt(sample1_var/n1 + sample2_var/n2) * t_value, 2)
def var_ci_est(data, alpha):
    """
        总体均值未知,求方差的置信空间
        data为传入的样本; alpha为需要传入的置信水平的值
    """
    n = len(data)  #求样本容量
    s2 = variance(data)  #求样本方差

    chi2_lower_value = chi2.ppf(
        alpha / 2, n - 1)  #求坐标左侧Z面积,没错你没看错,因为数学证明的过程中是以右侧为基准的,但是scipy是以左侧为基准的
    chi2_upper_value = chi2.ppf(1 - alpha / 2, n - 1)  #求坐标右侧Z面积
    return (n - 1) * s2 / chi2_upper_value, (n - 1) * s2 / chi2_lower_value
Ejemplo n.º 11
0
def f_test(data1, data2, tail='both', ratio=1):
    """两个总体"""
    assert tail in ['both', 'left',
                    'right'], 'tail should be one of "both", "left", "right"'
    n1 = len(data1)
    n2 = len(data2)
    sample_var1 = variance(data1)
    sample_var2 = variance(data2)
    f_val = sample_var1 / sample_var2 / ratio

    if tail == 'both':
        # 双尾检验
        p = 2 * min(1 - f.cdf(f_val, n1 - 1, n2 - 1),
                    f.cdf(f_val, n1 - 1, n2 - 1))
    elif tail == 'left':
        # 左尾检验
        p = f.cdf(f_val, n1 - 1, n2 - 1)
    else:
        # 右尾检验
        p = 1 - f.cdf(f_val, n1 - 1, n2 - 1)

    return round(f_val, 4), n1 - 1, n2 - 1, round(p, 5)
Ejemplo n.º 12
0
def chi2_test(data, tail="both", sigma2=1):
    assert tail in ['both', 'left',
                    'right'], 'tail should be one of "both","left","right"'
    n = len(data)
    sample_var = variance(data)
    chi2_val = (n-1)*sample_var/sigma2

    if tail == "both":
        p = 2*min(1-chi2.cdf(chi2_val, n-1), chi2.cdf(chi2_val, n-1))

    elif tail == "left":
        p = chi2.cdf(chi2_val, n-1)
    else:
        p = 1-chi2.cdf(chi2_val, n-1)
    return chi2_val, n-1, p
Ejemplo n.º 13
0
def chi2_test(data, tail='both', sigma2=1):
    """单个总体"""
    assert tail in ['both', 'left',
                    'right'], 'tail should be one of "both", "left", "right"'

    n = len(data)
    sample_var = variance(data)
    chi2_val = (n - 1) * sample_var / sigma2

    if tail == 'both':
        # 双尾检验
        p = 2 * min(1 - chi2.cdf(chi2_val, n - 1), chi2.cdf(chi2_val, n - 1))
    elif tail == 'left':
        # 左尾检验
        p = chi2.cdf(chi2_val, n - 1)
    else:
        # 右尾检验
        p = 1 - chi2.cdf(chi2_val, n - 1)

    return round(chi2_val, 2), n - 1, p
Ejemplo n.º 14
0
def var_ci_est(data, alpha):
    n = len(data)
    s2 = variance(data)
    chi2_lower_value = chi2.ppf(alpha/2, n-1)
    chi2_upper_value = chi2.ppf(1-alpha/2, n-1)
    return (n-1)*s2/chi2_upper_value, (n-1)*s2/chi2_lower_value
Ejemplo n.º 15
0
def var_one_sided_upper_ci_est(data,alpha):
    n = len(data)
    s2 = variance(data)
    chi2_lower_value = chi2.ppf(alpha, n - 1)  # 接受的是左边的面积 求上限,分母是下分位点
    return -inf, (n - 1) * s2 / chi2_lower_value
Ejemplo n.º 16
0
def var_one_sided_lower_ci_est(data,alpha):
    n = len(data)
    s2 = variance(data)
    chi2_upper_value = chi2.ppf(1 - alpha, n - 1) # 接受的是左边的面积 求下限,分母是上分位点
    return (n - 1) * s2 / chi2_upper_value, inf
Ejemplo n.º 17
0
    # 测试频率
    print(frequency(data))

    # 测试众数
    print(mode(data))

    # 测试中位数
    print(median(data))

    # 测试均值
    print(mean(data))

    # 测试极差
    print(rng(data))

    # 测试四分位数
    print(quartile(data))

    # 测试方差
    print(variance(data))

    # 测试标准差
    print(std(data))






Ejemplo n.º 18
0
import random
from playStats.descriptive_stats import mean, variance
import matplotlib.pyplot as plt

if __name__ == "__main__":
    sample_means = []
    sample_vars = []
    indices = []
    for sz in range(20, 10001, 50):
        indices.append(sz)
        sample = [random.gauss(0.0, 1.0) for _ in range(sz)]
        sample_means.append(mean(sample))
        sample_vars.append(variance(sample))
    plt.plot(indices, sample_means)
    plt.plot(indices, sample_vars)
    plt.show()
Ejemplo n.º 19
0
import random
from playStats.descriptive_stats import mean
from playStats.descriptive_stats import variance

chi2 = []
for i in range(50000):
    #x = random.random() #返回一个介于左闭右开[0.0, 1.0)区间的浮点数
    x1 = random.normalvariate(0, 1)  #返回一个均值是0,方差是1的正态分布随机数
    x2 = random.normalvariate(0, 1)
    x3 = random.normalvariate(0, 1)
    x4 = random.normalvariate(0, 1)
    x5 = random.normalvariate(0, 1)
    x6 = random.normalvariate(0, 1)
    x7 = random.normalvariate(0, 1)
    x8 = random.normalvariate(0, 1)

    chi2.append(x1**2)  # 演示一个自由度chi2分布
    #chi2.append(x1**2+x2**2+x3**2+x4**2+x5**2+x6**2+x7**2+x8**2) # 演示多个自由度chi2分布
    #chi2.append(random.normalvariate(0,1)) # 演示正态分布
    #chi2.append(x) #演示uniform分布

print(variance(chi2))  #打印相关卡方分布方差
#plt.figure(num= "不同自由度卡方分布图")
plt.hist(chi2, bins=30)
plt.show()
'''
x = np.linspace(-5,5,1000)
f = 1/np.sqrt(2*np.pi)*np.exp(-x**2/2)
plt.plot(x,f)
plt.show()
'''
        总体均值未知,求方差的置信空间
        data为传入的样本; alpha为需要传入的置信水平的值
    """
    n = len(data)  #求样本容量
    s2 = variance(data)  #求样本方差

    chi2_lower_value = chi2.ppf(
        alpha / 2, n - 1)  #求坐标左侧Z面积,没错你没看错,因为数学证明的过程中是以右侧为基准的,但是scipy是以左侧为基准的
    chi2_upper_value = chi2.ppf(1 - alpha / 2, n - 1)  #求坐标右侧Z面积
    return (n - 1) * s2 / chi2_upper_value, (n - 1) * s2 / chi2_lower_value


if __name__ == '__main__':
    salary_18 = [1484, 785, 1598, 1366, 1716, 1020, 1716, 785, 3113,
                 1601]  #18岁月收入数据
    salary_35 = [902, 4508, 3809, 3923, 4276, 2065, 1601, 553, 3345,
                 2182]  #35岁月收入数据

    print(mean(salary_18))  #平均月收入的点估计
    print(mean_ci_est(salary_18, 0.05))  #平均月收入的区间估计
    print(mean(salary_35))  #平均月收入的点估计
    print(mean_ci_est(salary_35, 0.05))  #平均月收入的区间估计
    print()
    print(std(salary_18))  #整体方差的点估计开根
    print(variance(salary_18))  #整体方差的点估计(样本方差)
    print(var_ci_est(salary_18, 0.05))  #区间估计
    print(std(salary_35))  #整体方差的点估计开根
    print(variance(salary_35))  #整体方差的点估计(样本方差)
    print(var_ci_est(salary_35, 0.05))  #区间估计
    print()
Ejemplo n.º 21
0
import random
from playStats.descriptive_stats import mean
from playStats.descriptive_stats import variance
import matplotlib.pyplot as plt

if __name__ == '__main__':

    indices = []
    data_mean = []
    data_varvance = []
    for sample_sz in range(20, 10001, 50):
        indices.append(sample_sz)
        sample = [random.gauss(0.0, 1.0) for _ in range(sample_sz)]
        data_mean.append(mean(sample))
        data_varvance.append(variance(sample))

    plt.plot(indices, data_mean)
    plt.axhline(0, c='r')

    plt.plot(indices, data_varvance)
    plt.axhline(1, c='b')

    plt.show()