Beispiel #1
0
def show_continuous():
    """Show a variety of continuous distributions"""

    x = linspace(-10, 10, 201)

    # Normal distribution
    showDistribution(x, stats.norm, stats.norm(loc=2, scale=4), "Normal Distribution", "Z", "P(Z)", "")

    # Exponential distribution
    showDistribution(x, stats.expon, stats.expon(loc=-2, scale=4), "Exponential Distribution", "X", "P(X)", "")

    # Students' T-distribution
    # ... with 4, and with 10 degrees of freedom (DOF)
    plot(x, stats.norm.pdf(x), "g")
    hold(True)
    showDistribution(x, stats.t(4), stats.t(10), "T-Distribution", "X", "P(X)", ["normal", "t=4", "t=10"])

    # F-distribution
    # ... with (3,4) and (10,15) DOF
    showDistribution(x, stats.f(3, 4), stats.f(10, 15), "F-Distribution", "F", "P(F)", ["(3,4) DOF", "(10,15) DOF"])

    # Weibull distribution
    # ... with the shape parameter set to 1 and 2
    # Don't worry that in Python it is called "weibull_min": the "weibull_max" is
    # simply mirrored about the origin.
    showDistribution(
        arange(0, 5, 0.02),
        stats.weibull_min(1),
        stats.weibull_min(2),
        "Weibull Distribution",
        "X",
        "P(X)",
        ["k=1", "k=2"],
        xmin=0,
        xmax=4,
    )

    # Uniform distribution
    showDistribution(x, stats.uniform, "", "Uniform Distribution", "X", "P(X)", "")

    # Logistic distribution
    showDistribution(x, stats.norm, stats.logistic, "Logistic Distribution", "X", "P(X)", ["Normal", "Logistic"])

    # Lognormal distribution
    x = logspace(-9, 1, 1001) + 1e-9
    showDistribution(x, stats.lognorm(2), "", "Lognormal Distribution", "X", "lognorm(X)", "", xmin=-0.1)

    # The log-lin plot has to be done by hand:
    plot(log(x), stats.lognorm.pdf(x, 2))
    xlim(-10, 4)
    title("Lognormal Distribution")
    xlabel("log(X)")
    ylabel("lognorm(X)")
    show()
Beispiel #2
0
def ftest(data1, data2, alpha=0.05):
    alpha1 = alpha
    Sd1 = np.var(data1)
    Sd2 = np.var(data2)
    n1 = len(data1)
    n2 = len(data2)
    F = Sd1 / Sd2  #F统计量
    if F > stats.f(n1 - 1, n2 - 1).ppf(1 - alpha / 2) or F < stats.f(
            n1 - 1, n2 - 1).ppf(alpha / 2):
        print("Reject H0 at the significance level of", alpha1, ".")
    else:
        print("Accept H0 at the significance level of", alpha1, ".")
def show_continuous():
    """Show a variety of continuous distributions"""
        
    x = linspace(-10,10,201)
    
    # Normal distribution
    showDistribution(x, stats.norm, stats.norm(loc=2, scale=4),
                     'Normal Distribution', 'Z', 'P(Z)','')
    
    # Exponential distribution
    showDistribution(x, stats.expon, stats.expon(loc=-2, scale=4),
                     'Exponential Distribution', 'X', 'P(X)','')
    
    # Students' T-distribution
    # ... with 4, and with 10 degrees of freedom (DOF)
    plot(x, stats.norm.pdf(x), 'g-.')
    hold(True)
    showDistribution(x, stats.t(4), stats.t(10),
                     'T-Distribution', 'X', 'P(X)',['normal', 't=4', 't=10'])
    
    # F-distribution
    # ... with (3,4) and (10,15) DOF
    showDistribution(x, stats.f(3,4), stats.f(10,15),
                     'F-Distribution', 'F', 'P(F)',['(3,4) DOF', '(10,15) DOF'])
    
    # Weibull distribution
    # ... with the shape parameter set to 1 and 2
    # Don't worry that in Python it is called "weibull_min": the "weibull_max" is
    # simply mirrored about the origin.
    showDistribution(arange(0,5,0.02), stats.weibull_min(1), stats.weibull_min(2),
                     'Weibull Distribution', 'X', 'P(X)',['k=1', 'k=2'], xmin=0, xmax=4)
    
    # Uniform distribution
    showDistribution(x, stats.uniform,'' ,
                     'Uniform Distribution', 'X', 'P(X)','')
    
    # Logistic distribution
    showDistribution(x, stats.norm, stats.logistic,
                     'Logistic Distribution', 'X', 'P(X)',['Normal', 'Logistic'])
    
    # Lognormal distribution
    x = logspace(-9,1,1001)+1e-9
    showDistribution(x, stats.lognorm(2), '',
                     'Lognormal Distribution', 'X', 'lognorm(X)','', xmin=-0.1)
    
    # The log-lin plot has to be done by hand:
    plot(log(x), stats.lognorm.pdf(x,2))
    xlim(-10, 4)
    title('Lognormal Distribution')
    xlabel('log(X)')
    ylabel('lognorm(X)')
    show()
Beispiel #4
0
def show_continuous():
    """Show a variety of continuous distributions"""
        
    x = np.linspace(-10,10,201)
    
    # Normal distribution
    showDistribution(x, stats.norm, stats.norm(loc=2, scale=4),
                     'Normal Distribution', 'Z', 'P(Z)','')
    
    # Exponential distribution
    showDistribution(x, stats.expon, stats.expon(loc=-2, scale=4),
                     'Exponential Distribution', 'X', 'P(X)','')
    
    # Students' T-distribution
    # ... with 4, and with 10 degrees of freedom (DOF)
    plt.plot(x, stats.norm.pdf(x), 'g-.')
    plt.hold(True)
    showDistribution(x, stats.t(4), stats.t(10),
                     'T-Distribution', 'X', 'P(X)',['normal', 't=4', 't=10'])
    
    # F-distribution
    # ... with (3,4) and (10,15) DOF
    showDistribution(x, stats.f(3,4), stats.f(10,15),
                     'F-Distribution', 'F', 'P(F)',['(3,4) DOF', '(10,15) DOF'])
    
    # Weibull distribution
    # ... with the shape parameter set to 1 and 2
    # Don't worry that in Python it is called "weibull_min": the "weibull_max" is
    # simply mirrored about the origin.
    showDistribution(np.arange(0,5,0.02), stats.weibull_min(1), stats.weibull_min(2),
                     'Weibull Distribution', 'X', 'P(X)',['k=1', 'k=2'], xmin=0, xmax=4)
    
    # Uniform distribution
    showDistribution(x, stats.uniform,'' ,
                     'Uniform Distribution', 'X', 'P(X)','')
    
    # Logistic distribution
    showDistribution(x, stats.norm, stats.logistic,
                     'Logistic Distribution', 'X', 'P(X)',['Normal', 'Logistic'])
    
    # Lognormal distribution
    x = np.logspace(-9,1,1001)+1e-9
    showDistribution(x, stats.lognorm(2), '',
                     'Lognormal Distribution', 'X', 'lognorm(X)','', xmin=-0.1)
    
    # The log-lin plot has to be done by hand:
    plt.plot(np.log(x), stats.lognorm.pdf(x,2))
    plt.xlim(-10, 4)
    plt.title('Lognormal Distribution')
    plt.xlabel('log(X)')
    plt.ylabel('lognorm(X)')
    plt.show()
Beispiel #5
0
def twoway_anova(data, alpha):
    '''
    双因素方差分析:涉及两个分类型自变量时的方差分析,这里只写一份无重复双因素的代码
    
    Parameters
    ----------
    data : R(m*n) 该自变量的多个水平的观测值表,标准的 m * n 二维矩阵
    alpha: 执行F统计量假设检验的置信度
    
    Returns
    -------
    R: 自变量与因变量之间的关系强度, R**2 = SSR + SSC / SST
    '''
    data = np.array(data)
    k, r = np.shape(data)  # k为行数,r为列数
    n = k * r  # 总观测值
    mrs = np.mean(data, axis=1)  # 每行的均值
    mcs = np.mean(data, axis=0)  # 每列的均值
    mt = np.mean(data)  # 总体均值

    sst = (np.var(data) * k * r).round(4)  # 总体平方和
    ssr = (np.sum((mrs - mt)**2) * r).round(4)  # 行间平方和
    ssc = (np.sum((mcs - mt)**2) * k).round(4)  # 列间平方和
    sse = (sst - ssr - ssc).round(4)  # 随机误差
    msr = (ssr / (k - 1)).round(4)
    msc = (ssc / (r - 1)).round(4)
    mse = (sse / ((k - 1) * (r - 1))).round(4)

    rvr = st.f(k - 1, (k - 1) * (r - 1))
    fr = (msr / mse).round(4)  # 检验行因素
    fr_crit = rvr.ppf(1 - alpha).round(4)  # 在当前置信度下的F临界值
    pr = (1 - rvr.cdf(fr)).round(4)  # P值

    rvc = st.f(r - 1, (k - 1) * (r - 1))
    fc = (msc / mse).round(4)  # 检验行因素
    fc_crit = rvc.ppf(1 - alpha).round(4)  # 在当前置信度下的F临界值
    pc = (1 - rvc.cdf(fc)).round(4)  # P值

    print('{0:-^97}'.format(''))
    print('{0:^10}|{1:^15}|{2:^10}|{3:^15}|{4:^15}|{5:^15}|{6:^15}'.format(\
        'Source', 'SS', 'df', 'MS', 'F', 'P-Value', 'F crit'))
    print('{0:-^97}'.format(''))
    print('{0:^10}|{1:^15}|{2:^10}|{3:^15}|{4:^15}|{5:^15}|{6:^15}'.format(\
        'Rows', ssr, k - 1, msr, fr, pr, fr_crit))
    print('{0:^10}|{1:^15}|{2:^10}|{3:^15}|{4:^15}|{5:^15}|{6:^15}'.format(\
        'Cols', ssc, r - 1, msc, fc, pc, fc_crit))
    print('{0:^10}|{1:^15}|{2:^10}|{3:^15}|'.format('Errors', sse,
                                                    (k - 1) * (r - 1), mse))
    print('{0:^10}|{1:^15}|{2:^10}|'.format('Total', sst, k * r - 1))

    return (ssr + ssc) / sst
Beispiel #6
0
def blocked_anova(file, reps):

    data, _, treat_sums, block_sums, grand_total, n, n_treats, obs_per_treat = prep_data(
        file, reps)
    n_blocks = obs_per_treat // reps
    obs_per_block = n // n_blocks

    anova = {}
    sources = ['treats', 'blocks', 'err', 'total']
    for item in sources:
        anova[item] = {}

    # DF's
    anova['total']['DF'] = n - 1
    anova['treats']['DF'] = n_treats - 1
    anova['blocks']['DF'] = n_blocks - 1
    anova['err']['DF'] = anova['total']['DF'] - anova['treats']['DF'] - anova[
        'blocks']['DF']

    # SS's
    anova['total']['SS'] = sum_xsq(data) - ((sum_x(data)**2) / n)
    ssTreats = 0
    for i in range(n_treats):
        ssTreats += (treat_sums[i]**2) / obs_per_treat
    ssTreats -= (grand_total**2) / n
    anova['treats']['SS'] = ssTreats
    ssBlocks = 0
    for i in range(n_blocks):
        ssBlocks += (block_sums[i]**2) / obs_per_block
    ssBlocks -= (grand_total**2) / n
    anova['blocks']['SS'] = ssBlocks
    anova['err']['SS'] = anova['total']['SS'] - anova['treats']['SS'] - anova[
        'blocks']['SS']

    # MS's
    anova['treats']['MS'] = anova['treats']['SS'] / anova['treats']['DF']
    anova['blocks']['MS'] = anova['blocks']['SS'] / anova['blocks']['DF']
    anova['err']['MS'] = anova['err']['SS'] / anova['err']['DF']

    # F
    anova['treats']['F'] = anova['treats']['MS'] / anova['err']['MS']
    anova['blocks']['F'] = anova['blocks']['MS'] / anova['err']['MS']

    # p
    anova['treats']['p'] = stats.f(anova['treats']['DF'],
                                   anova['err']['DF']).sf(anova['treats']['F'])
    anova['blocks']['p'] = stats.f(anova['blocks']['DF'],
                                   anova['err']['DF']).sf(anova['blocks']['F'])

    pretty_anova_tbl(anova, sources)
Beispiel #7
0
def F_test(s1, s2, n1, n2, H0, alpha=0.05):
    '''
    F-Test for comparison of two variances.  
    H0: σ1 ≤, ≥, = σ2.  
    In slides 458.  
    REQUIRE: H0 can take three values: "equal", "less", "greater".  
    RETURN: Test statistics, critical value, p-value.  
    '''
    F = s1**2 / s2**2
    if H0 == "less":
        c_value = stats.f(n1 - 1, n2 - 1).ppf(1 - alpha)
        p_value = 1 - stats.f(n1 - 1, n2 - 1).cdf(F)
    elif H0 == "greater":
        c_value = stats.f(n1 - 1, n2 - 1).ppf(alpha)
        p_value = stats.f(n1 - 1, n2 - 1).cdf(F)
    elif H0 == "equal":
        F1, F2 = F, 1 / F
        c_value1, c_value2 = stats.f(n1 - 1,
                                     n2 - 1).ppf(1 - alpha / 2), stats.f(
                                         n2 - 1, n1 - 1).ppf(1 - alpha / 2)
        F = (F1, F2)
        c_value = (c_value1, c_value2)
        p_value = 2 * min(1 - stats.f(n1 - 1, n2 - 1).cdf(F1),
                          1 - stats.f(n2 - 1, n1 - 1).cdf(F2))
    return F, c_value, p_value
Beispiel #8
0
def FProbabilitiesLowerTail(values, dfn, dfd):
    if len(values) > 0 and dfn > 0 and dfd > 0:
        outputStr = ""
        areas = []

        for val in values:
            outputStr += str(val)

            rv = stats.f(dfn, dfd, loc=0, scale=1)
            area = rv.cdf(val)
            area = "{0:.5f}".format(area)
            areas.append(area)

            if len(values) > 1 and values.index(val) < len(values) - 1:
                outputStr += ", "
            else:
                outputStr += ""

        outputStr += ", serbestlik derecesi (pay): " + str(
            dfn) + ", serbestlik derecesi (payda): " + str(dfd)
        return outputStr, areas

    elif dfn <= 0 or dfd <= 0:
        return False, "Serbestlik dereceleri 0'dan kucuk olamaz."
    else:
        return False, "Hesaplama icin gecerli degerler girilmelidir."
Beispiel #9
0
def FQuantilesLowerTail(probs, dfn, dfd):
    if len(probs) > 0 and dfn > 0 and dfd > 0:
        outputStr = ""
        yArray = []

        for prob in probs:
            outputStr += str(prob)

            if prob > 0 and prob < 1:

                rv = stats.f(dfn, dfd, loc=0, scale=1)
                y = rv.ppf(prob)
                y = "{0:.5f}".format(y)
                yArray.append(y)

            else:
                yArray.append("NaN")

            if len(probs) > 1 and probs.index(prob) < len(probs) - 1:
                outputStr += ", "
            else:
                outputStr += ""

        outputStr += ", serbestlik derecesi (pay): " + str(
            dfn) + ", serbestlik derecesi (payda): " + str(dfd)
        return outputStr, yArray

    elif dfn <= 0 or dfd <= 0:
        return False, "Serbestlik dereceleri 0'dan kucuk olamaz."
    else:
        return False, "Gecerli olasilik degeri girilmelidir."
Beispiel #10
0
def sediff(sv1, sv2, n1, n2, alpha, bilateral=True):
    """
    Calculate the interval estimation of the difference of 2 population variance
    (两个总体方差比的区间估计)
    
    sv1**2    pv2**2 
    ------ * -------- ~ F(n1-1, n2-1)
    sv2**2    pv1**2

    Parameters
    ----------
    sv1 : sample mean, 总体1的样本标准差
    sv2 : sample mean, 总体2的样本标准差
    n1  : sample count, 总体1的样本容量
    n2  : sample count, 总体2的样本容量
    alpha: confidence level, 置信水平
    bilateral: 是否是双侧检验
    
    Returns
    -------
    tuple(pm1, pm2) : interval estimation of diff of population proportion,
                      两个总体方差比的区间估计
    """

    rv = st.f(n1 - 1, n2 - 1)
    f1 = rv.ppf((1 - alpha) / 2)
    f2 = rv.ppf((1 + alpha) / 2)

    svdiff = sv1**2 / sv2**2
    # print(svsum)
    return tuple([svdiff / f2, svdiff / f1])
def calc_det_thresh(fstat_vals,
                    det_p_val,
                    TB_prod,
                    channel_cnt,
                    fstat_ref_peak=None):
    fstat_min = np.min(fstat_vals)
    fstat_max = np.max(fstat_vals)

    # compute reference threshold if not provided
    if fstat_ref_peak:
        fstat_peak = fstat_ref_peak
    else:

        def temp_fstat(f):
            return -stats.f(TB_prod, TB_prod * (channel_cnt - 1)).pdf(f)

        fstat_peak = minimize_scalar(temp_fstat,
                                     bracket=(fstat_min, fstat_max)).x

    # compute
    kde = stats.gaussian_kde(fstat_vals)

    def temp_kde(f):
        return -kde.pdf(f)[0]

    kde_peak = minimize_scalar(temp_kde,
                               bracket=(fstat_min, fstat_max),
                               options={
                                   'maxiter': 250
                               }).x

    return stats.f(TB_prod, TB_prod *
                   (channel_cnt - 1)).ppf(det_p_val) * (kde_peak / fstat_peak)
Beispiel #12
0
def plotFDistribution(FCrit, FValue, dfModel, dfError):
    mu = 0
    x = np.linspace(0, FValue + 2, 1001)[1:]
    fig, ax = plt.subplots(figsize=(5, 3.75))
    dist = stats.f(dfModel, dfError, mu)
    plt.plot(x,
             dist.pdf(x),
             ls='-',
             c='black',
             label=r'$d_1=%i,\ d_2=%i$' % (dfModel, dfError))

    plt.xlim(0, FValue + 2)
    plt.ylim(0.0, 1.0)

    plt.annotate('F Crit\n (%s)' % FCrit,
                 xy=(FCrit, 0),
                 xytext=(FCrit - 1, 0.4),
                 arrowprops=dict(facecolor='red', shrink=0.05))
    plt.annotate('F Value\n (%s)' % FValue,
                 xy=(FValue, 0),
                 xytext=(FValue - 2, 0.2),
                 arrowprops=dict(facecolor='blue', shrink=0.05))

    plt.xlabel('$x$')
    plt.ylabel(r'$p(x|d_1, d_2)$')
    plt.title("Fisher's Distribution")

    plt.legend()
    plt.show()
Beispiel #13
0
def PlotFDistributionDistributionFunction(dfn, dfd):
    if dfn>0 and dfd>0:
        main_frame = QtGui.QWidget()
        dpi = 100
        fig = Figure((5.0, 4.0), dpi=dpi)
        canvas = FigureCanvas(fig)
        canvas.setParent(main_frame)

        axes = fig.add_subplot(111)
        mpl_toolbar = NavigationToolbar(canvas, main_frame)

        hbox = QtGui.QHBoxLayout()
        vbox = QtGui.QVBoxLayout()
        vbox.addWidget(canvas)
        vbox.addWidget(mpl_toolbar)
        vbox.addLayout(hbox)
        main_frame.setLayout(vbox)

        alpha = 0.0005
        sequence = stats.f.isf(alpha, dfn, dfd)

        x = np.linspace(-sequence, sequence, 1000)
        rv = stats.f(dfn, dfd)
        y = rv.cdf(x)

        axes.plot(x,y)
        canvas.draw()

        return main_frame
    else: 
        return False, "Serbestlik derecesi 0'dan kucuk olamaz."
#---/F DISTRIBUTION
Beispiel #14
0
def FProbabilitiesLowerTail(values, dfn, dfd):
    if len(values)>0 and dfn>0 and dfd>0:
        outputStr = ""
        areas = []

        for val in values:
            outputStr += str(val)

            rv =  stats.f(dfn, dfd, loc = 0, scale=1)
            area = rv.cdf(val)
            area = "{0:.5f}".format(area)
            areas.append(area)

            if len(values) >1 and values.index(val) < len(values) - 1 : 
                    outputStr += ", "
            else: 
                    outputStr += ""

        outputStr += ", serbestlik derecesi (pay): " + str(dfn) + ", serbestlik derecesi (payda): " + str(dfd)  
        return outputStr, areas

    elif dfn<=0 or dfd<=0:
        return False, "Serbestlik dereceleri 0'dan kucuk olamaz."
    else:
        return False, "Hesaplama icin gecerli degerler girilmelidir."
Beispiel #15
0
def PlotFDistributionDistributionFunction(dfn, dfd):
    if dfn > 0 and dfd > 0:
        main_frame = QtGui.QWidget()
        dpi = 100
        fig = Figure((5.0, 4.0), dpi=dpi)
        canvas = FigureCanvas(fig)
        canvas.setParent(main_frame)

        axes = fig.add_subplot(111)
        mpl_toolbar = NavigationToolbar(canvas, main_frame)

        hbox = QtGui.QHBoxLayout()
        vbox = QtGui.QVBoxLayout()
        vbox.addWidget(canvas)
        vbox.addWidget(mpl_toolbar)
        vbox.addLayout(hbox)
        main_frame.setLayout(vbox)

        alpha = 0.0005
        sequence = stats.f.isf(alpha, dfn, dfd)

        x = np.linspace(-sequence, sequence, 1000)
        rv = stats.f(dfn, dfd)
        y = rv.cdf(x)

        axes.plot(x, y)
        canvas.draw()

        return main_frame
    else:
        return False, "Serbestlik derecesi 0'dan kucuk olamaz."


#---/F DISTRIBUTION
Beispiel #16
0
def param_table(results, title, pad_bottom=False):
    """Formatted standard parameter table"""
    param_data = np.c_[results.params.values[:, None],
                       results.std_errors.values[:, None],
                       results.tstats.values[:, None],
                       results.pvalues.values[:, None],
                       results.conf_int()]
    data = []
    for row in param_data:
        txt_row = []
        for i, v in enumerate(row):
            f = _str
            if i == 3:
                f = pval_format
            txt_row.append(f(v))
        data.append(txt_row)
    header = [
        'Parameter', 'Std. Err.', 'T-stat', 'P-value', 'Lower CI', 'Upper CI'
    ]
    table_stubs = list(results.params.index)
    if pad_bottom:
        # Append blank row for spacing
        data.append([''] * 6)
        table_stubs += ['']

    return SimpleTable(data,
                       stubs=table_stubs,
                       txt_fmt=fmt_params,
                       headers=header,
                       title=title)
Beispiel #17
0
def three_sampling_dis():
    """
    三大抽样分布与标准正态分布
    :return:
    """
    nor_dis = stats.norm()
    chi2_dis = stats.chi2(df=app.df1)
    t_dis = stats.t(df=app.df2)
    f_dis = stats.f(dfn=app.df3, dfd=app.df4)

    x1 = np.linspace(nor_dis.ppf(0.001), nor_dis.ppf(0.999), 1000)
    x2 = np.linspace(chi2_dis.ppf(0.001), chi2_dis.ppf(0.999), 1000)
    x3 = np.linspace(t_dis.ppf(0.001), t_dis.ppf(0.999), 1000)
    x4 = np.linspace(f_dis.ppf(0.001), f_dis.ppf(0.999), 1000)
    fig, ax = plt.subplots(1, 1, figsize=(16, 8))
    ax.plot(x1, nor_dis.pdf(x1), 'r-', lw=2, label=r'N(0, 1)')
    ax.plot(x2, chi2_dis.pdf(x2), 'g-', lw=2, label=f'$\chi^2$({app.df1})')
    ax.plot(x3, t_dis.pdf(x3), 'b-', lw=2, label=f't({app.df2})')
    ax.plot(x4,
            f_dis.pdf(x4),
            'm-',
            lw=2,
            label=f'F({app.df3}, {app.df4 * 2})')

    plt.ylabel('Probability')
    plt.title(r'PDF of Three Sampling Distribution')
    ax.legend(loc='best', frameon=False)
    plt.show()
Beispiel #18
0
 def calculateSampleData(self, data):
     self.l_mean = data.groupby(level=0).mean().T.mean()
     self.l_ss = ((self.l_mean - self.total_mean) ** 2).sum() * self.r * self.t
     self.l_ms = self.l_ss / (self.s - 1)
     self.l_f = self.l_ms / self.e_ms
     self.l_f_distribute = f(self.s - 1, self.r * self.s * (self.t - 1))
     self.l_p = self.l_f_distribute.sf(self.l_f)
Beispiel #19
0
    def __init__(self, d1, d2):
        self.d1 = d1
        self.d2 = d2

        # set dist before calling super's __init__
        self.dist = st.f(d1, d2)
        super(F, self).__init__()
Beispiel #20
0
def anova_byHand():
    """ Calculate the ANOVA by hand. While you would normally not do that, this function shows
    how the underlying values can be calculated.
    """

    # Get the data
    data = getData("altman_910.txt", subDir="..\Data\data_altman")

    # Convert them to pandas-forman and group them by their group value
    df = pd.DataFrame(data, columns=["values", "group"])
    groups = df.groupby("group")

    # The "total sum-square" is the squared deviation from the mean
    ss_total = np.sum((df["values"] - df["values"].mean()) ** 2)

    # Calculate ss_treatment and  ss_error
    (ss_treatments, ss_error) = (0, 0)
    for val, group in groups:
        ss_error += sum((group["values"] - group["values"].mean()) ** 2)
        ss_treatments += len(group) * (group["values"].mean() - df["values"].mean()) ** 2

    df_groups = len(groups) - 1
    df_residuals = len(data) - len(groups)
    F = (ss_treatments / df_groups) / (ss_error / df_residuals)
    df = stats.f(df_groups, df_residuals)
    p = df.sf(F)

    print(("ANOVA-Results: F = {0}, and p<{1}".format(F, p)))

    return (F, p)
Beispiel #21
0
def anova_byHand():
    """Calculate the ANOVA by hand"""

    # Get the data
    data = getData('altman_910.txt', subDir='..\Data\data_altman')

    # Convert them to pandas-forman and group them by their group value
    df = pd.DataFrame(data, columns=['values', 'group'])
    groups = df.groupby('group')

    # The "total sum-square" is the squared deviation from the mean
    ss_total = np.sum((df['values'] - df['values'].mean())**2)

    # Calculate ss_treatment and  ss_error
    (ss_treatments, ss_error) = (0, 0)
    for val, group in groups:
        ss_error += sum((group['values'] - group['values'].mean())**2)
        ss_treatments += len(group) * (
            group['values'].mean() - df['values'].mean())**2

    df_groups = len(groups) - 1
    df_residuals = len(data) - len(groups)
    F = (ss_treatments / df_groups) / (ss_error / df_residuals)
    df = stats.f(df_groups, df_residuals)
    p = df.sf(F)

    print('ANOVA-Results: F = {0}, and p<{1}'.format(F, p))

    return (F, p)
Beispiel #22
0
def FQuantilesLowerTail(probs,dfn,dfd):
    if len(probs)>0 and dfn>0 and dfd>0:
        outputStr = ""
        yArray = []

        for prob in probs:
            outputStr += str(prob)

            if prob> 0 and prob<1:

                rv = stats.f(dfn,dfd, loc = 0, scale = 1)
                y = rv.ppf(prob)
                y = "{0:.5f}".format(y)
                yArray.append(y) 

            else:
                yArray.append("NaN") 
            
            if len(probs) >1 and probs.index(prob) < len(probs) - 1 : 
                    outputStr += ", "
            else: 
                    outputStr += "" 

        outputStr += ", serbestlik derecesi (pay): " + str(dfn) + ", serbestlik derecesi (payda): " + str(dfd)  
        return outputStr, yArray

    elif dfn<=0 or dfd <=0: 
        return False, "Serbestlik dereceleri 0'dan kucuk olamaz."
    else: 
        return False, "Gecerli olasilik degeri girilmelidir."
Beispiel #23
0
def tt(A, B):
    f_p = f(A, B).pvalue
    if f_p <= 0.05:
        t_p = ttest(A, B, equal_var=False).pvalue
    elif f_p > 0.05:
        t_p = ttest(A, B, equal_var=True).pvalue
    return t_p
 def __init__(self, groups):
     k = groups.num_groups()
     n = groups.n()
     dist = stats.f(k - 1, n - k)
     super(LinearContrastHyp, self).__init__(dist=dist,
                                             kind=AltHypKind.TWO_SIDED)
     self._groups = groups
Beispiel #25
0
    def statistic(self, alpha=0.05):
        x = self.train_x[:, 1]
        y = self.train_y
        y_pred = self.predict(self.train_x, add_const=False)

        k = self.num_features
        n = self.num_samples

        SSE = np.sum((y - y_pred)**2)
        SST = np.sum((y - y.mean()) * y)
        SSR = np.sum((y_pred - y_pred.mean())**2)
        sigma_e = SSE / (n - k - 1)
        F_test = SSR * (n - k - 1) / SSE / k

        F_q = stats.f(k, n - k - 1).ppf(1 - alpha)
        test_result = 'NO SIGNIFICANT LINEAR DEPENDENCE!' if F_test < F_q else 'SIGNIFICANT LINEAR DEPENDENCE!'
        print_list = [('k', k), ('n', n), ('SSE', SSE), ('SSR', SSR),
                      ('SST', SST), ('sigma_e', sigma_e)]

        if k == 1:
            Lxx = np.sum((x - x.mean()) * x)
            Lyy = SST
            Lxy = np.sum((y - y.mean()) * x)
            print_list.extend([('Lxx', Lxx), ('Lxy', Lxy), ('Lyy', Lyy)])

        print_list.extend([('F_test', F_test), ('F_q', F_q),
                           ('test_result', test_result)])

        print('=' * 30, 'statistics', '=' * 30)
        utils.pair_print(print_list)
        print('=' * (len('statistics') + 62), end='\n\n')
 def test_Normal_to_F(self):
     A, B, C, V, W, X, Y, Z = RV(Normal(mean=0, var=1)**8)
     sims = ((((A**2) + (B**2) + (C**2)) / 3) /
             (((V**2) + (W**2) + (X**2) + (Y**2) + (Z**2)) / 5)).sim(Nsim)
     cdf = stats.f(dfn=3, dfd=5).cdf
     pval = stats.kstest(sims, cdf).pvalue
     self.assertTrue(pval > .01)
Beispiel #27
0
def fix_alpha(alpha, Sigma, Sigma_star):
    p = Sigma.shape[0]
    Sigma_12 = fractional_matrix_power(Sigma, 0.5)

    matrix = Sigma_12.T @ np.linalg.inv(Sigma_star) @ Sigma_12

    lambdas = np.real(np.linalg.eigvals(matrix))
    factorials = [1, 1, 2, 8]
    k = np.asarray([factorials[r] * np.sum(lambdas**r) for r in [1,1,2,3]])

    t1 = 4*k[1]*k[2]**2 + k[3]*(k[2]-k[1]**2)
    t2 = k[3]*k[1] - 2*k[2]**2
    chi_quantile = sps.chi2(p).ppf(1-alpha)
    if t1 < 10**(-5):
        a_new = 2 + (k[1]**2)/(k[2]**2)
        b_new = (k[1]**3)/k[2] + k[1]
        s1 = 2*k[1]*(k[3]*k[1] + k[2]*k[1]**2 - k[2]**2)
        s2 = 3*t2 + 2*k[2]*(k[2] + k[1]**2)
        alpha_star = 1 - sps.invgamma(a_new, scale = b_new).cdf(chi_quantile)
    elif t2 < 10**(-5):
        a_new = (k[1]**2)/k[2]
        b_new = k[2]/k[1]
        alpha_star = 1 - sps.gamma(a_new, scale = b_new).cdf(chi_quantile)
    else:
        a1 = 2*k[1]*(k[3]*k[1] + k[2]*k[1]**2 - k[2]**2)/t1
        a2 = 3 + 2*k[2]*(k[2] + k[1]**2)/t2
        alpha_star = 1 - sps.f(2*a1, 2*a2).cdf(a2*t2*chi_quantile/(a1*t1))
        
    return alpha_star
def anova_byHand():
    """ Calculate the ANOVA by hand. While you would normally not do that, this function shows
    how the underlying values can be calculated.
    """

     # Get the data
    inFile = 'altman_910.txt'
    data = np.genfromtxt(inFile, delimiter=',')

    # Convert them to pandas-forman and group them by their group value
    df = pd.DataFrame(data, columns=['values', 'group'])
    groups = df.groupby('group')

    # The "total sum-square" is the squared deviation from the mean
    ss_total = np.sum((df['values']-df['values'].mean())**2)
    
    # Calculate ss_treatment and  ss_error
    (ss_treatments, ss_error) = (0, 0)
    for val, group in groups:
        ss_error += sum((group['values'] - group['values'].mean())**2)
        ss_treatments += len(group) * (group['values'].mean() - df['values'].mean())**2

    df_groups = len(groups)-1
    df_residuals = len(data)-len(groups)
    F = (ss_treatments/df_groups) / (ss_error/df_residuals)
    df = stats.f(df_groups,df_residuals)
    p = df.sf(F)

    print(('ANOVA-Results: F = {0}, and p<{1}'.format(F, p)))
    
    return (F, p)
Beispiel #29
0
def LoF_test(N, X, Y, alpha=0.05):
    '''
    Test for Lack of Fit.  
    H0: the linear regression model is appropriate.  
    In slides 608.  
    REQUIRE: multiple sampling for single x. N, X are 1-D lists. Y is a 2-D list.  
    RETURN: (SSE, SSE_pe, SSE_if), (Test statistics and critical value).
    '''
    k = len(N)
    n = sum(N)
    print(n, k)
    mean_Y = [sum(Y[i]) / N[i] for i in range(k)]
    SSE_pe = sum(
        sum([(Y[i][j] - mean_Y[i])**2 for j in range(N[i])]) for i in range(k))
    x = []
    for i in range(k):
        x.extend([X[i]] * N[i])
    y = []
    for i in range(k):
        y.extend(Y[i])
    model = SLR(n, x, y)
    SSE = model.SSE
    SSE_if = SSE - SSE_pe
    F = (SSE_if / (k - 2)) / (SSE_pe / (n - k))
    f = stats.f(k - 2, n - k).ppf(1 - alpha)
    return (SSE, SSE_pe, SSE_if), (F, f)
def fun7():
    print("open三大抽样分布")
    #绘制 正态分布 卡方分布 t分布 F分布
    nor_dis = stats.norm()
    chi2_dis = stats.chi2(df=eval(k_1.get()))
    t_dis = stats.t(df=eval(t_1.get()))
    f_dis = stats.f(dfn=eval(f_1.get()), dfd=eval(f_2.get()))

    x1 = np.linspace(nor_dis.ppf(0.001), nor_dis.ppf(0.999), 1000)
    x2 = np.linspace(chi2_dis.ppf(0.001), chi2_dis.ppf(0.999), 1000)
    x3 = np.linspace(t_dis.ppf(0.001), t_dis.ppf(0.999), 1000)
    x4 = np.linspace(f_dis.ppf(0.001), f_dis.ppf(0.999), 1000)
    fig, ax = plt.subplots(1, 1, figsize=(16, 8))
    ax.plot(x1, nor_dis.pdf(x1), 'r-', lw=2, label=r'N(0, 1)')
    ax.plot(x2,
            chi2_dis.pdf(x2),
            'g-',
            lw=2,
            label=r'$\chi^2$(%d)' % eval(k_1.get()))
    ax.plot(x3, t_dis.pdf(x3), 'b-', lw=2, label='t(%d)' % eval(t_1.get()))
    ax.plot(x4,
            f_dis.pdf(x4),
            'm-',
            lw=2,
            label='F(%d, %d)' % (eval(f_1.get()), eval(f_2.get())))

    plt.xlabel("x")
    plt.ylabel('Probability')
    plt.title(r'PDF of Three Sampling Distribution')
    ax.legend(loc='best', frameon=False)
    plt.grid()
    plt.show()
def f_threshold_twoway_rm(n_subjects, factor_levels, effects='A*B',
                       pvalue=0.05):
    """ Compute f-value thesholds for a two-way ANOVA

    Parameters
    ----------
    n_subjects : int
        The number of subjects to be analyzed.
    factor_levels : list-like
        The number of levels per factor.
    effects : str
        A string denoting the effect to be returned. The following
        mapping is currently supported:
            'A': main effect of A
            'B': main effect of B
            'A:B': interaction effect
            'A+B': both main effects
            'A*B': all three effects
    pvalue : float
        The p-value to be thresholded.

    Returns
    -------
    f_threshold : list | float
        list of f-values for each effect if the number of effects
        requested > 2, else float.
    """
    effect_picks = _check_effects(effects)

    f_threshold = []
    for _, df1, df2 in _iter_contrasts(n_subjects, factor_levels,
                                        effect_picks):
        f_threshold.append(stats.f(df1, df2).isf(pvalue))

    return f_threshold if len(f_threshold) > 1 else f_threshold[0]
Beispiel #32
0
def anova_byHand():
    """ Calculate the ANOVA by hand. While you would normally not do that, this function shows
    how the underlying values can be calculated.
    """

    # Get the data
    data = getData('altman_910.txt', subDir='.')

    # Convert them to pandas-forman and group them by their group value
    df = pd.DataFrame(data, columns=['values', 'group'])
    groups = df.groupby('group')

    # The "total sum-square" is the squared deviation from the mean
    ss_total = np.sum((df['values'] - df['values'].mean())**2)

    # Calculate ss_treatment and  ss_error
    (ss_treatments, ss_error) = (0, 0)
    for val, group in groups:
        ss_error += sum((group['values'] - group['values'].mean())**2)
        ss_treatments += len(group) * (group['values'].mean() -
                                       df['values'].mean())**2

    df_groups = len(groups) - 1
    df_residuals = len(data) - len(groups)
    F = (ss_treatments / df_groups) / (ss_error / df_residuals)
    df = stats.f(df_groups, df_residuals)
    p = df.sf(F)

    print(('ANOVA-Results: F = {0}, and p<{1}'.format(F, p)))

    return (F, p)
Beispiel #33
0
 def calculateColumnData(self, data):
     self.c_mean = data.mean()
     self.c_ss = ((self.c_mean - self.total_mean) ** 2).sum() * self.s * self.t
     self.c_ms = self.c_ss / (self.r - 1)
     self.c_f = self.c_ms / self.e_ms
     self.c_f_distribute = f(self.r - 1, self.r * self.s * (self.t - 1))
     self.c_p = self.c_f_distribute.sf(self.c_f)
Beispiel #34
0
def p_value(t2):
    ''' 
    Calculate the p-value of the F distribution at t2    
    '''

    T2 = (sample_size-dimension)/(dimension*(sample_size-1)) * t2
    f = stats.f( dimension, sample_size-dimension)
    return  f.cdf(T2)
Beispiel #35
0
def generate_matrix():
    def f(X1, X2, X3):
        from random import randrange
        y = 8.4+8.5*x1+5.7*x2+9.7*x3+8.9*x1*x1+0.2*x2*x2+0.5*x3*x3+2.0*x1*x2+0.7*x1*x3+4.3*x2*x3+9.7*x1*x2*x3 + randrange(0, 10) - 5
        return y

    matrix_with_y = [[f(matrix_x[j][0], matrix_x[j][1], matrix_x[j][2]) for i in range(m)] for j in range(N)]
    return matrix_with_y
Beispiel #36
0
 def calculateCorrData(self, data):
     l_mean = pd.concat([self.l_mean] * self.r, axis=1, keys=self.corr_mean.columns)
     c_mean = pd.concat([self.c_mean] * self.s, axis=1, keys=self.corr_mean.index).T
     self.corr_ss = ((self.corr_mean - l_mean - c_mean + self.total_mean) ** 2).sum().sum() * self.t
     self.corr_ms = self.corr_ss / ((self.s - 1) * (self.r - 1))
     self.corr_f = self.corr_ms / self.e_ms
     self.corr_f_distribute = f((self.s - 1) * (self.r - 1), self.r * self.s * (self.t - 1))
     self.corr_p = self.corr_f_distribute.sf(self.corr_f)
def Chow_Dickey(pd_series,
                split,
                reg_type='c',
                auto_lag='AIC',
                max_lag=4,
                verbose=False):
    if isinstance(pd_series, pd.DataFrame):
        temp_index = pd_series.index
        pd_series = pd.Series(pd_series, index=temp_index)
    if isinstance(split, pd._libs.tslib.Timestamp) | isinstance(split, str):
        mask = pd_series.index <= pd.to_datetime(split)
        split1 = pd_series[mask]
        split2 = pd_series[~mask]
    else:
        mask = pd_series.index.year <= split
        split1 = pd_series[mask]
        split2 = pd_series[~mask]

    nr_adf = adfuller(pd_series,
                      regression=reg_type,
                      autolag=auto_lag,
                      maxlag=max_lag,
                      regresults=True)[3]
    nr_lag = nr_adf.usedlag
    nr_model = nr_adf.resols
    nr_ssr = nr_model.ssr * nr_model.nobs
    param_length = nr_model.df_model + 1

    adf1 = adfuller(split1,
                    regression=reg_type,
                    autolag=None,
                    maxlag=max_lag,
                    regresults=True)[3]

    adf1_model = adf1.resols
    N1 = adf1_model.nobs
    adf1_ssr = adf1_model.ssr * N1

    adf2 = adfuller(split2,
                    regression=reg_type,
                    autolag=None,
                    maxlag=max_lag,
                    regresults=True)[3]

    adf2_model = adf2.resols
    N2 = adf2_model.nobs
    adf2_ssr = adf2_model.ssr * N2

    numerator = (nr_ssr - (adf1_ssr + adf2_ssr)) / param_length
    denominator = (adf1_ssr + adf2_ssr) / (N1 + N2 - 2 * param_length)

    F_stat = numerator / denominator

    f_dist = stat.f(param_length, N1 + N2 - 2 * param_length)

    p_val = 1 - f_dist.cdf(F_stat)

    return F_stat, p_val, nr_lag
def show_continuous():
    """Show a variety of continuous distributions"""
        
    x = linspace(-10,10,201)
    
    # Normal distribution
    showDistribution(x, stats.norm, stats.norm(loc=2, scale=4),
                     'Normal Distribution', 'Z', 'P(Z)','')
    
    # Exponential distribution
    showDistribution(x, stats.expon, stats.expon(loc=-2, scale=4),
                     'Exponential Distribution', 'X', 'P(X)','')
    
    # Students' T-distribution
    # ... with 4, and with 10 degrees of freedom (DOF)
    plot(x, stats.norm.pdf(x), 'g')
    hold(True)
    showDistribution(x, stats.t(4), stats.t(10),
                     'T-Distribution', 'X', 'P(X)',['normal', 't=4', 't=10'])
    
    # F-distribution
    # ... with (3,4) and (10,15) DOF
    showDistribution(x, stats.f(3,4), stats.f(10,15),
                     'F-Distribution', 'F', 'P(F)',['(3,4) DOF', '(10,15) DOF'])
    
    # Uniform distribution
    showDistribution(x, stats.uniform,'' ,
                     'Uniform Distribution', 'X', 'P(X)','')
    
    # Logistic distribution
    showDistribution(x, stats.norm, stats.logistic,
                     'Logistic Distribution', 'X', 'P(X)',['Normal', 'Logistic'])
    
    # Lognormal distribution
    x = logspace(-9,1,1001)+1e-9
    showDistribution(x, stats.lognorm(2), '',
                     'Lognormal Distribution', 'X', 'lognorm(X)','', xmin=-0.1)
    
    # The log-lin plot has to be done by hand:
    plot(log(x), stats.lognorm.pdf(x,2))
    xlim(-10, 4)
    title('Lognormal Distribution')
    xlabel('log(X)')
    ylabel('lognorm(X)')
    show()
Beispiel #39
0
def generate_matrix():
    def f(X1, X2, X3):
        from random import randrange
        y = 7.7 + 2.8 * X1 + 0.5 * X2 + 2.6 * X3 + 1.4 * X1 * X1 + 0.3 * X2 * X2 + 7.1 * X3 * X3 + 5.0 * X1 * X2 + \
            0.3 * X1 * X3 + 9.3 * X2 * X3 + 4.1 * X1 * X2 * X3 + randrange(0, 10) - 5
        return y

    matrix_with_y = [[f(matrix_x[j][0], matrix_x[j][1], matrix_x[j][2]) for i in range(m)] for j in range(N)]
    return matrix_with_y
Beispiel #40
0
def generate_matrix():
    def f(X1, X2, X3):
        from random import randrange
        y = 6.7 + 9.1 * X1 + 1.6 * X2 + 9.1 * X3 + 3.3 * X1 * X1 + 0.2 * X2 * X2 + 6.1 * X3 * X3 + 8.5 * X1 * X2 + \
            0.7 * X1 * X3 + 6.6 * X2 * X3 + 8.1 * X1 * X2 * X3 + randrange(0, 10) - 5
        return y

    matrix_with_y = [[f(matrix_x[j][0], matrix_x[j][1], matrix_x[j][2]) for i in range(m)] for j in range(N)]
    return matrix_with_y
Beispiel #41
0
def generate_matrix():
    def f(X1, X2, X3):
        """Генерація функції по варіанту"""
        y = (5.6 + 8.0 * X1 + 4.8 * X2 + 6.2 * X3 + 5.9 * X1 * X1 + 1.0 * X2 * X2 + 8.7 * X3 * X3 + 2.0 * X1 * X2 + \
            0.8 * X1 * X3 + 1.0 * X2 * X3 + 3.0 * X1 * X2 * X3 + randrange(0, 10) - 5)
        return y

    matrix_with_y = [[f(matrix_x[j][0], matrix_x[j][1], matrix_x[j][2]) for i in range(m)] for j in range(N)]
    return matrix_with_y
Beispiel #42
0
def generate_matrix():
    def f(X1, X2, X3):
        from random import randrange
        y = 3.5 + 6.6 * X1 + 5.3 * X2 + 5.0 * X3 + 5.1 * X1 * X1 + 0.1 * X2 * X2 + 7.2 * X3 * X3 + 1.4 * X1 * X2 \
            + 0.7 * X1 * X3 + 4.2 * X2 * X3 + 7.7 * X1 * X2 * X3 + randrange(0, 10) - 5
        return y

    matrix_with_y = [[f(matrix_x[j][0], matrix_x[j][1], matrix_x[j][2]) for _ in range(m)] for j in range(N)]
    return matrix_with_y
Beispiel #43
0
 def anova(arr):
     a = seasonal_matrix(arr)
     tau = a.shape[0]
     zbar = a.mean(0)
     v_zbar = zbar.var()
     v = a.ravel().var()
     stat = m * (tau - 1) / (m - 1) * v_zbar / (v - v_zbar)
     i_a = stat > stats.f(m - 1, m * (tau - 1)).ppf(.9)
     return i_a
Beispiel #44
0
def generate_matrix():
    def f(X1, X2, X3):
        # my function
        y = 5.4 + 3.6 * X1 + 6.6 * X2 + 7.7 * X3 + 8.0 * X1 * X1 + 0.3 * X2 * X2 + 2.5 * X3 * X3 + 5.9 * X1 * X2 + \
            0.3 * X1 * X3 + 7.2 * X2 * X3 + 5.3 * X1 * X2 * X3 + randrange(0, 10) - 5
        return y

    matrix_with_y = [[f(matrix_x[j][0], matrix_x[j][1], matrix_x[j][2]) for i in range(m)] for j in range(N)]
    return matrix_with_y
Beispiel #45
0
    def __init__(self, data):
        self.r = data.shape[1]
        self.k = data.shape[0]
        self.calculate_error_value(data)
        columnAnalysis = SingleAnalysisVariance(data)
        self.c_ss = columnAnalysis.between_group_ss
        self.c_ms = columnAnalysis.between_group_ss / (self.r - 1)
        self.c_f = self.c_ms / self.e_ms
        self.c_statistics_info = columnAnalysis.statistics_info
        self.c_f_distribute = f(self.r - 1, (self.r - 1) * (self.k - 1))
        self.c_p_value = self.c_f_distribute.sf(self.c_f)

        lineAnalysis = SingleAnalysisVariance(data.T)
        self.l_ss = lineAnalysis.between_group_ss
        self.l_ms = lineAnalysis.between_group_ss / (self.k - 1)
        self.l_f = self.l_ms / self.e_ms
        self.l_statistics_info = lineAnalysis.statistics_info
        self.l_f_distribute = f(self.k - 1, (self.r - 1) * (self.k - 1))
        self.l_p_value = self.l_f_distribute.sf(self.l_f)
Beispiel #46
0
def generate_matrix():
    def f(X1, X2, X3):
        y = 0.3 + 4.1 * X1 + 2.8 * X2 + 7.8 * X3 + 1.4 * X1 * X1 + 0.2 * X2 * X2 + 2.4 * X3 * X3 + 9.7 * X1 * X2 + \
            0.6 * X1 * X3 + 4.4 * X2 * X3 + 3.4 * X1 * X2 * X3 + randrange(0, 10) - 5
        return y

    matrix_y = [[
        f(matrix_x[j][0], matrix_x[j][1], matrix_x[j][2]) for i in range(m)
    ] for j in range(N)]
    return matrix_y
Beispiel #47
0
def F(d1, d2, tag=None):
    """
    An F (fisher) random variate
    
    Parameters
    ----------
    d1 : int
        Numerator degrees of freedom
    d2 : int
        Denominator degrees of freedom
    """
    assert isinstance(d1, int) and d1>1, 'd1 must be an int greater than 1'
    assert isinstance(d2, int) and d2>1, 'd2 must be an int greater than 1'
    return uv(rv=ss.f(d1, d2), tag=tag)
Beispiel #48
0
def Fisher(d1, d2, tag=None):
    """
    An F (fisher) random variate
    
    Parameters
    ----------
    d1 : int
        Numerator degrees of freedom
    d2 : int
        Denominator degrees of freedom
    """
    assert int(d1)==d1 and d1>=1, 'Fisher (F) "d1" must be an integer greater than 0'
    assert int(d2)==d2 and d2>=1, 'Fisher (F) "d2" must be an integer greater than 0'
    return uv(ss.f(d1, d2), tag=tag)
Beispiel #49
0
    def __init__(self):
        self.dist_equivalents = [
            #transf, stats.lognorm(1))
            (lognormalg, stats.lognorm(1)),
            #transf2
            (squarenormalg, stats.chi2(1)),
            (absnormalg, stats.halfnorm),
            (absnormalg, stats.foldnorm(1e-5)),  #try frozen
            #(negsquarenormalg, 1-stats.chi2),  # won't work as distribution
            (squaretg(10), stats.f(1, 10))]      #try both frozen


        l,s = 0.0, 1.0
        self.ppfq = [0.1,0.5,0.9]
        self.xx = [0.95,1.0,1.1]
        self.nxx = [-0.95,-1.0,-1.1]
Beispiel #50
0
def f_threshold_mway_rm(n_subjects, factor_levels, effects='A*B',
                        pvalue=0.05):
    """Compute F-value thresholds for a two-way ANOVA.

    Parameters
    ----------
    n_subjects : int
        The number of subjects to be analyzed.
    factor_levels : list-like
        The number of levels per factor.
    effects : str
        A string denoting the effect to be returned. The following
        mapping is currently supported:

            * ``'A'``: main effect of A
            * ``'B'``: main effect of B
            * ``'A:B'``: interaction effect
            * ``'A+B'``: both main effects
            * ``'A*B'``: all three effects

    pvalue : float
        The p-value to be thresholded.

    Returns
    -------
    F_threshold : list | float
        list of F-values for each effect if the number of effects
        requested > 2, else float.

    See Also
    --------
    f_oneway
    f_mway_rm

    Notes
    -----
    .. versionadded:: 0.10
    """
    from scipy.stats import f
    effect_picks, _ = _map_effects(len(factor_levels), effects)

    F_threshold = []
    for _, df1, df2 in _iter_contrasts(n_subjects, factor_levels,
                                       effect_picks):
        F_threshold.append(f(df1, df2).isf(pvalue))

    return F_threshold if len(F_threshold) > 1 else F_threshold[0]
Beispiel #51
0
    def setup_class(cls):
        cls.dist_equivalents = [
            #transf, stats.lognorm(1))
            #The below fails on the SPARC box with scipy 10.1
            #(lognormalg, stats.lognorm(1)),
            #transf2
           (squarenormalg, stats.chi2(1)),
           (absnormalg, stats.halfnorm),
           (absnormalg, stats.foldnorm(1e-5)),  #try frozen
           #(negsquarenormalg, 1-stats.chi2),  # won't work as distribution
           (squaretg(10), stats.f(1, 10))
            ]      #try both frozen

        l,s = 0.0, 1.0
        cls.ppfq = [0.1,0.5,0.9]
        cls.xx = [0.95,1.0,1.1]
        cls.nxx = [-0.95,-1.0,-1.1]
Beispiel #52
0
 def f_oneway(self, *args):
     if args[0]==None:
         return [None,None]
     result=[]
     n=len(args)*len(args[0])
     m=len(args)
     fS=m-1
     fe=n-m
     SA=self.getSA(*args)
     Se=self.getSe(*args)
     VA=SA/fS
     Ve=Se/fe
     FA=VA/Ve
     F=f(fS,fe)
     p=F.sf(FA)
     result.append(float('%.6f'%FA))
     result.append(float('%.6f'%p))
     return [float('%.6f'%FA),float('%.6f'%p)]
Beispiel #53
0
 def __init__(self, data):
     """
     :param data:  the data to analysis, it'a a DataFrame
     """
     self.n = data.notnull().sum().sum()
     self.k = data.shape[1]
     self.statistics_info = self._create_statistics_info(data)
     self.total_mean = self.statistics_info["sum"].sum() / self.statistics_info["count"].sum()
     self.between_group_ss = (
         self.statistics_info["count"] * (self.statistics_info["mean"] - self.total_mean) ** 2
     ).sum()
     self.ms_between_group = self.between_group_ss / (self.k - 1)
     self.inside_group_ss = self.statistics_info["sumdiff"].sum()
     self.ms_inside_group = self.inside_group_ss / (self.n - self.k)
     self.f = self.ms_between_group / self.ms_inside_group
     self.f_distribute = f(self.k - 1, self.n - self.k)
     self.p_value = self.f_distribute.sf(self.f)
     self.t_distribute = t(self.n - self.k)
Beispiel #54
0
def understand_f_fitting(sigma2):
    """
    Test function: Understanding the F scaled fitting procedure.
    
    """
    import matplotlib.pyplot as plt
    
    prms = st.f.fit(sigma2, f0=19)#, floc=0)
    print prms
    dfn = prms[0]
    dfd = prms[1]
    scale_ = prms[3]
    loc_ = prms[2]
    x = np.linspace(st.f.ppf(0.01, dfn, dfd, scale=scale_, loc=loc_),
                    st.f.ppf(0.99, dfn, dfd, scale=scale_, loc=loc_), 100)
    
    rv = st.f(dfn, dfd, scale=scale_)#, loc=loc_)
    plt.plot(x, rv.pdf(x), color='#ee9041', lw=2)
    h = plt.hist(sigma2, normed=True, color='#459db9')
Beispiel #55
0
def f_mway_rm(data, factor_levels, effects='all',
              correction=False, return_pvals=True):
    """Compute M-way repeated measures ANOVA for fully balanced designs.

    Parameters
    ----------
    data : ndarray
        3D array where the first two dimensions are compliant
        with a subjects X conditions scheme where the first
        factor repeats slowest::

                        A1B1 A1B2 A2B1 A2B2
            subject 1   1.34 2.53 0.97 1.74
            subject ... .... .... .... ....
            subject k   2.45 7.90 3.09 4.76

        The last dimensions is thought to carry the observations
        for mass univariate analysis.
    factor_levels : list-like
        The number of levels per factor.
    effects : str | list
        A string denoting the effect to be returned. The following
        mapping is currently supported (example with 2 factors):

            * ``'A'``: main effect of A
            * ``'B'``: main effect of B
            * ``'A:B'``: interaction effect
            * ``'A+B'``: both main effects
            * ``'A*B'``: all three effects
            * ``'all'``: all effects (equals 'A*B' in a 2 way design)

        If list, effect names are used: ``['A', 'B', 'A:B']``.
    correction : bool
        The correction method to be employed if one factor has more than two
        levels. If True, sphericity correction using the Greenhouse-Geisser
        method will be applied.
    return_pvals : bool
        If True, return p-values corresponding to F-values.

    Returns
    -------
    F_vals : ndarray
        An array of F-statistics with length corresponding to the number
        of effects estimated. The shape depends on the number of effects
        estimated.
    p_vals : ndarray
        If not requested via return_pvals, defaults to an empty array.

    See Also
    --------
    f_oneway
    f_threshold_mway_rm

    Notes
    -----
    .. versionadded:: 0.10
    """
    from scipy.stats import f

    if data.ndim == 2:  # general purpose support, e.g. behavioural data
        data = data[:, :, np.newaxis]
    elif data.ndim > 3:  # let's allow for some magic here.
        data = data.reshape(
            data.shape[0], data.shape[1], np.prod(data.shape[2:]))

    effect_picks, _ = _map_effects(len(factor_levels), effects)
    n_obs = data.shape[2]
    n_replications = data.shape[0]

    # put last axis in front to 'iterate' over mass univariate instances.
    data = np.rollaxis(data, 2)
    fvalues, pvalues = [], []
    for c_, df1, df2 in _iter_contrasts(n_replications, factor_levels,
                                        effect_picks):
        y = np.dot(data, c_)
        b = np.mean(y, axis=1)[:, np.newaxis, :]
        ss = np.sum(np.sum(y * b, axis=2), axis=1)
        mse = (np.sum(np.sum(y * y, axis=2), axis=1) - ss) / (df2 / df1)
        fvals = ss / mse
        fvalues.append(fvals)
        if correction:
            # sample covariances, leave off "/ (y.shape[1] - 1)" norm because
            # it falls out.
            v = np.array([np.dot(y_.T, y_) for y_ in y])
            v = (np.array([np.trace(vv) for vv in v]) ** 2 /
                 (df1 * np.sum(np.sum(v * v, axis=2), axis=1)))
            eps = v

        df1, df2 = np.zeros(n_obs) + df1, np.zeros(n_obs) + df2
        if correction:
            # numerical imprecision can cause eps=0.99999999999999989
            # even with a single category, so never let our degrees of
            # freedom drop below 1.
            df1, df2 = [np.maximum(d[None, :] * eps, 1.) for d in (df1, df2)]

        if return_pvals:
            pvals = f(df1, df2).sf(fvals)
        else:
            pvals = np.empty(0)
        pvalues.append(pvals)

    # handle single effect returns
    return [np.squeeze(np.asarray(vv)) for vv in (fvalues, pvalues)]
Beispiel #56
0
def FandPV(df1, df2, fval):
    rv = _ss.f(df1, df2)
    return 1 - rv.cdf(fval)
def f_twoway_rm(data, factor_levels, effects='A*B', alpha=0.05,
                   correction=False, return_pvals=True):
    """ 2 way repeated measures ANOVA for fully balanced designs

    data : ndarray
        3D array where the first two dimensions are compliant
        with a subjects X conditions scheme:

        first factor repeats slowest:

                    A1B1 A1B2 A2B1 B2B2
        subject 1   1.34 2.53 0.97 1.74
        subject ... .... .... .... ....
        subject k   2.45 7.90 3.09 4.76

        The last dimensions is thought to carry the observations
        for mass univariate analysis.
    factor_levels : list-like
        The number of levels per factor.
    effects : str
        A string denoting the effect to be returned. The following
        mapping is currently supported:
            'A': main effect of A
            'B': main effect of B
            'A:B': interaction effect
            'A+B': both main effects
            'A*B': all three effects
    alpha : float
        The significance threshold.
    correction : bool
        The correction method to be employed if one factor has more than two
        levels. If True, sphericity correction using the Greenhouse-Geisser
        method will be applied.
    return_pvals : bool
        If True, return p values corresponding to f values.

    Returns
    -------
    f_vals : ndarray
        An array of f values with length corresponding to the number
        of effects estimated. The shape depends on the number of effects
        estimated.
    p_vals : ndarray
        If not requested via return_pvals, defaults to an empty array.
    """
    if data.ndim == 2:  # general purpose support, e.g. behavioural data
        data = data[:, :, np.newaxis]
    elif data.ndim > 3:  # let's allow for some magic here.
        data = data.reshape(data.shape[0], data.shape[1],
            np.prod(data.shape[2:]))

    effect_picks = _check_effects(effects)
    n_obs = data.shape[2]
    n_replications = data.shape[0]

    # pute last axis in fornt to 'iterate' over mass univariate instances.
    data = np.rollaxis(data, 2)
    fvalues, pvalues = [], []
    for c_, df1, df2 in _iter_contrasts(n_replications, factor_levels,
            effect_picks):
        y = np.dot(data, c_)
        b = np.mean(y, axis=1)[:, np.newaxis, :]
        ss = np.sum(np.sum(y * b, axis=2), axis=1)
        mse = (np.sum(np.sum(y * y, axis=2), axis=1) - ss) / (df2 / df1)
        fvals = ss / mse
        fvalues.append(fvals)
        if correction:
            # sample covariances, leave off "/ (y.shape[1] - 1)" norm because
            # it falls out. the below line is faster than the equivalent:
            # v = np.array([np.dot(y_.T, y_) for y_ in y])
            v = np.array(map(np.dot, y.swapaxes(2, 1), y))
            v = (np.array(map(np.trace, v)) ** 2 /
                  (df1 * np.sum(np.sum(v * v, axis=2), axis=1)))
            eps = v

        df1, df2 = np.zeros(n_obs) + df1, np.zeros(n_obs) + df2
        if correction:
            df1, df2 = [d[None, :] * eps for d in df1, df2]

        if return_pvals:
            pvals = stats.f(df1, df2).sf(fvals)
        else:
            pvals = np.empty(0)
        pvalues.append(pvals)

    # handle single effect returns
    return [np.squeeze(np.asarray(v)) for v in fvalues, pvalues]
Beispiel #58
0
plt.figure(1)
plt.plot(support[ix], rv.pdf(support[ix]), label='Actual')
plt.plot(support[ix], dens_normal.pdf()[ix], label='Scott')
plt.plot(support[ix], dens_cvls.pdf()[ix], label='CV_LS')
plt.plot(support[ix], dens_cvml.pdf()[ix], label='CV_ML')
plt.title("Nonparametric Estimation of the Density of Beta Distributed " \
          "Random Variable")
plt.legend(('Actual', 'Scott', 'CV_LS', 'CV_ML'))

# f distribution
df = 100
dn = 100
nobs = 250

support = np.random.f(dn, df, size=nobs)
rv = stats.f(df, dn)
ix = np.argsort(support)

dens_normal = KDEMultivariate(data=[support], var_type='c', bw='normal_reference')
dens_cvls = KDEMultivariate(data=[support], var_type='c', bw='cv_ls')
dens_cvml = KDEMultivariate(data=[support], var_type='c', bw='cv_ml')

plt.figure(2)
plt.plot(support[ix], rv.pdf(support[ix]), label='Actual')
plt.plot(support[ix], dens_normal.pdf()[ix], label='Scott')
plt.plot(support[ix], dens_cvls.pdf()[ix], label='CV_LS')
plt.plot(support[ix], dens_cvml.pdf()[ix], label='CV_ML')
plt.title("Nonparametric Estimation of the Density of f Distributed " \
          "Random Variable")
plt.legend(('Actual', 'Scott', 'CV_LS', 'CV_ML'))
Beispiel #59
0
    def test_causality(self, equation, variables, kind='f', signif=0.05,
                       verbose=True):
        """Compute test statistic for null hypothesis of Granger-noncausality,
        general function to test joint Granger-causality of multiple variables

        Parameters
        ----------
        equation : string or int
            Equation to test for causality
        variables : sequence (of strings or ints)
            List, tuple, etc. of variables to test for Granger-causality
        kind : {'f', 'wald'}
            Perform F-test or Wald (chi-sq) test
        signif : float, default 5%
            Significance level for computing critical values for test,
            defaulting to standard 0.95 level

        Notes
        -----
        Null hypothesis is that there is no Granger-causality for the indicated
        variables. The degrees of freedom in the F-test are based on the
        number of variables in the VAR system, that is, degrees of freedom
        are equal to the number of equations in the VAR times degree of freedom
        of a single equation.

        Returns
        -------
        results : dict
        """
        if isinstance(variables, (basestring, int, np.integer)):
            variables = [variables]

        k, p = self.neqs, self.k_ar

        # number of restrictions
        N = len(variables) * self.k_ar

        # Make restriction matrix
        C = np.zeros((N, k ** 2 * p + k), dtype=float)

        eq_index = self.get_eq_index(equation)
        vinds = mat([self.get_eq_index(v) for v in variables])

        # remember, vec is column order!
        offsets = np.concatenate([k + k ** 2 * j + k * vinds + eq_index
                                  for j in range(p)])
        C[np.arange(N), offsets] = 1

        # Lutkepohl 3.6.5
        Cb = np.dot(C, vec(self.params.T))
        middle = L.inv(chain_dot(C, self.cov_params, C.T))

        # wald statistic
        lam_wald = statistic = chain_dot(Cb, middle, Cb)

        if kind.lower() == 'wald':
            df = N
            dist = stats.chi2(df)
        elif kind.lower() == 'f':
            statistic = lam_wald / N
            df = (N, k * self.df_resid)
            dist = stats.f(*df)
        else:
            raise Exception('kind %s not recognized' % kind)

        pvalue = dist.sf(statistic)
        crit_value = dist.ppf(1 - signif)

        conclusion = 'fail to reject' if statistic < crit_value else 'reject'
        results = {
            'statistic' : statistic,
            'crit_value' : crit_value,
            'pvalue' : pvalue,
            'df' : df,
            'conclusion' : conclusion,
            'signif' :  signif
        }

        if verbose:
            summ = output.causality_summary(results, variables, equation, kind)

            print summ

        return results
                 'Normal Distribution', 'Z', 'P(Z)','')

# Exponential distribution
showDistribution(stats.expon, stats.expon(loc=-2, scale=4),
                 'Exponential Distribution', 'X', 'P(X)','')

# Students' T-distribution
# ... with 4, and with 10 degrees of freedom (DOF)
plot(x, stats.norm.pdf(x), 'g')
hold(True)
showDistribution(stats.t(4), stats.t(10),
                 'T-Distribution', 'X', 'P(X)',['normal', 't=4', 't=10'])

# F-distribution
# ... with (3,4) and (10,15) DOF
showDistribution(stats.f(3,4), stats.f(10,15),
                 'F-Distribution', 'F', 'P(F)',['(3,4) DOF', '(10,15) DOF'])

# Uniform distribution
showDistribution(stats.uniform,'' ,
                 'Uniform Distribution', 'X', 'P(X)','')

# Logistic distribution
showDistribution(stats.norm, stats.logistic,
                 'Logistic Distribution', 'X', 'P(X)',['Normal', 'Logistic'])

# Lognormal distribution
x = logspace(-9,1,1001)+1e-9
showDistribution(stats.lognorm(2), '',
                 'Lognormal Distribution', 'X', 'lognorm(X)','', xmin=-0.1)