def f_test(data1, data2, tail="both", ratio=1):
    """
    F 分布
    :param data1: 样本值 1
    :param data2: 样本值 2
    :param tail: 尾类型
    :param ratio:
    :return:
    """

    assert tail in ["both", "left", "right"], \
        'tail should be one of “both”, “left”, “right”'

    n1 = len(data1)
    n2 = len(data2)
    sample1_var = variance(data1)
    sample2_var = variance(data2)
    f_val = sample1_var / sample2_var / ratio
    df1 = n1 - 1
    df2 = n2 - 1

    if tail == "both":
        p = 2 * min(1 - f.cdf(f_val, df1, df2), f.cdf(f_val, df1, df2))
    elif tail == "left":
        p = f.cdf(f_val, df1, df2)
    else:
        p = 1 - f.cdf(f_val, df1, df2)

    return f_val, df1, df2, p
Example #2
0
def mean_diff_ci_t_est(data1, data2, alpha, equal=True):
    # 样本容量 1
    n1 = len(data1)
    # 样本容量 2
    n2 = len(data2)
    # 均值差
    mean_diff = mean(data1) - mean(data2)
    # 样本方差
    sample1_var = variance(data1)
    sample2_var = variance(data2)

    # 两个总体方差未知且相等,求均值差的置信区间
    if equal:
        # 联合标准差
        sw = sqrt(
            ((n1 - 1) * sample1_var + (n2 - 1) * sample2_var) / (n1 + n2 - 2))
        t_value = abs(t.ppf(alpha / 2, n1 + n2 - 2))
        return mean_diff - sw * sqrt(1 / n1 + 1 / n2) * t_value, \
               mean_diff + sw * sqrt(1 / n1 + 1 / n2) * t_value

    # 两个总体方差未知且不等,求均值差的置信区间
    else:
        # 自由度
        # 分子
        df_numerator = (sample1_var / n1 + sample2_var / n2)**2
        # 分母
        df_denominator = (sample1_var / n1)**2 / (n1 - 1) + (sample2_var /
                                                             n2)**2 / (n2 - 1)
        df = df_numerator / df_denominator
        t_value = abs(t.ppf(alpha / 2, df))
        return mean_diff - sqrt(sample1_var / n1 + sample2_var / n2) * t_value, \
               mean_diff + sqrt(sample1_var / n1 + sample2_var / n2) * t_value
Example #3
0
def var_ratio_ci_est(data1, data2, alpha):
    # 样本容量 1
    n1 = len(data1)
    # 样本容量 2
    n2 = len(data2)
    # 置信下限
    f_lower_value = f.ppf(alpha / 2, n1 - 1, n2 - 1)
    # 置信上限
    f_upper_value = f.ppf(1 - alpha / 2, n1 - 1, n2 - 1)
    # 方差比
    var_ratio = variance(data1) / variance(data2)
    return var_ratio / f_upper_value, var_ratio / f_lower_value
def t_test(data1, data2=None, tail="both", mu=0, equal=True):
    """
    t检验
    :param data1: 样本 1
    :param data2: 样本 2
    :param tail: 是否双尾检验, 默认是
    :param mu: μ值
    :param equal:
    :return:
    """
    assert tail in ["both", "left", "right"], \
        'tail should be one of "both", "left", "right"'

    if data2 is None:
        mean_val = mean(data1)
        se = std(data1) / sqrt(len(data1))
        t_val = (mean_val - mu) / se
        df = len(data1) - 1
    else:
        n1 = len(data1)
        n2 = len(data2)
        mean_diff = mean(data1) - mean(data2)
        sample1_var = variance(data1)
        sample2_var = variance(data2)

        if equal:
            sw = sqrt(((n1 - 1) * sample1_var + (n2 - 1) * sample2_var) /
                      (n1 + n2 - 2))
            t_val = (mean_diff - mu) / (sw * sqrt(1 / n1 + 1 / n2))
            df = n1 + n2 - 2
        else:
            se = sqrt(sample1_var / n1 + sample2_var / n2)
            t_val = (mean_diff - mu) / se
            df_numerator = (sample1_var / n1 + sample2_var / n2)**2
            df_denominator = (sample1_var / n1)**2 / (n1 - 1) + (
                sample2_var / n2)**2 / (n2 - 1)
            df = df_numerator / df_denominator

    if tail == "both":
        p = 2 * (1 - t.cdf(abs(t_val), df))
    elif tail == "left":
        p = t.cdf(t_val, df)
    else:
        p = 1 - t.cdf(t_val, df)

    return t_val, df, p
Example #5
0
def var_ci_est(data, alpha):
    """方差的置信区间"""
    n = len(data)
    s2 = variance(data)
    # 卡方分布
    chi2_lower_value = chi2.ppf(alpha / 2, n - 1)
    chi2_upper_value = chi2.ppf(1 - alpha / 2, n - 1)
    return (n - 1) * s2 / chi2_upper_value, (n - 1) * s2 / chi2_lower_value
def simple_linear_reg(y, x):
    """一元线性回归"""
    assert len(x) == len(y)

    n = len(x)
    assert n > 1

    mean_x = mean(x)
    mean_y = mean(y)

    beta1 = covariance(x, y) / variance(x)
    beta0 = mean_y - beta1 * mean_x

    y_hat = [beta0 + beta1 * e for e in x]
    ss_residual = sum((e1 - e2)**2 for e1, e2 in zip(y, y_hat))
    se_model = sqrt(ss_residual / (n - 2))

    t_value = beta1 / (se_model / sqrt((n - 1) * variance(x)))
    p = 2 * (1 - t.cdf(abs(t_value), n - 2))

    return beta0, beta1, t_value, n - 2, p
def chi2_test(data, tail="both", sigma2=1):
    """
    卡方分布
    :param data: 样本值
    :param tail: 尾类型
    :param sigma2: μ
    :return:
    """

    assert tail in ["both", "left", "right"], \
        'tail should be one of “both”, “left”, “right”'

    n = len(data)
    sample_var = variance(data)
    chi2_val = (n - 1) * sample_var / sigma2

    if tail == "both":
        p = 2 * min(1 - chi2.cdf(chi2_val, n - 1), chi2.cdf(chi2_val, n - 1))
    elif tail == "left":
        p = chi2.cdf(chi2_val, n - 1)
    else:
        p = 1 - chi2.cdf(chi2_val, n - 1)

    return chi2_val, n - 1, p
    zws2 = median(data_zws2)
    zws3 = median(data_zws3)
    print("zws", zws1, zws2, zws3)  # 再次认证中位数和极端值没有关联,是集中趋势

    # 测试均值
    data_jz = [1, 2, 3, 4, 5]
    jz = mean(data_jz)
    print("jz", jz)

    # 测试极差
    data_jc = [1, 2, 3, 999]
    jc = rng(data_jc)
    print("jc", jc)

    # 测试四分位数
    data_sfws = [1, 4, 2, 3, 5]
    sfws = quartile(data_sfws)
    print(sfws)

    # 测试方差
    data_fc = [1, 4, 2, 3, 5]
    fc = variance(data_fc)
    bzc = std(data_fc)
    print(fc, bzc)

    # 测试协方差,相关系数
    score = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    happy = [1, 3, 2, 6, 4, 5, 8, 10, 9, 7]
    print(covariance(score, happy))
    print(cor(score, happy))
import random
import matplotlib.pyplot as plt
from stats.descriptive_stats import mean, variance

if __name__ == '__main__':
    """相合性"""

    # 样本均值
    sample_means = []
    # 样本方差
    sample_vars = []
    # 样本容量
    indices = []

    for sz in range(20, 10001, 50):
        indices.append(sz)
        # 调用高斯分布
        sample = [random.gauss(0.0, 1.0) for _ in range(sz)]
        sample_means.append(mean(sample))
        sample_vars.append(variance(sample))

    plt.plot(indices, sample_means)
    plt.plot(indices, sample_vars)
    """结论,当样本越大时,样本均值逐渐趋向于样本方差,这就是相合性。"""
    plt.show()