Beispiel #1
0
def f_test_two_p_variance(var1, var2, n1, n2, alpha_level):
    '''
    Perform an F-test for comparing variances between two samples.
    
    Inputs:
        var1: the variance of the population 1.
        var2: the variance of the population 2.
        n1: sample size of the population 1.
        n2: sample size of the population 2.
        alpha_level: an alpha threshold for rejecting Null hypothesis. (can be 0.1, 0.05, or 0.1)

    Outputs: 
        F_ratio: the ratio of variances between two populations.
        p-value: p-value.
        (lower, upper): confidence intervals given an alpha level.
    '''
    # F-test statistic
    F_ratio = var1 / var2

    # p-value: (two-sided test: the interest is to compare whether two variances are equal)
    p_value = min(f.cdf(F_ratio, n1 - 1, n2 - 1),
                  1 - f.cdf(F_ratio, n1 - 1, n2 - 1))
    p_value = p_value * 2

    # 95% C.I. if alpha_level = 0.05.
    upper = 1 / (f.ppf(alpha_level / 2, n1 - 1, n2 - 1)) * F_ratio
    lower = 1 / (f.ppf(1 - (alpha_level / 2), n1 - 1, n2 - 1)) * F_ratio

    # Outputs
    return round(F_ratio, 4), round(p_value, 4), (round(lower,
                                                        4), round(upper, 4))
Beispiel #2
0
def mc_wrapper(model_func, data, origin, weights, iterations, stat_cutoff):
    # GET IT?! LOL
    #the acutal monte carlo function for searching the error space

    rng = default_rng()
    n_pop, n_params = origin.shape
    random_hops = rng.standard_normal((iterations, n_pop, n_params)) * weights

    landscape_RSS = np.ones(iterations)
    model_landscape = np.zeros((iterations, n_pop, n_params))

    origin_RSS = np.linalg.norm(data - model_func(origin))
    model_DOF = len(data) - np.prod((n_pop, n_params))

    new_hop = origin

    for idx, hop in enumerate(random_hops):

        hop_RSS = np.linalg.norm(data - model_func(new_hop))
        landscape_RSS[idx] = hop_RSS
        model_landscape[idx] = new_hop

        if ftest.cdf(hop_RSS / origin_RSS, model_DOF, model_DOF) > stat_cutoff:
            new_hop = abs(origin +
                          hop)  #prevent negative values, particularly in amp
        else:
            new_hop = abs(new_hop +
                          hop)  #prevent negative values, particularly in amp

    landscape_statistics = ftest.cdf(landscape_RSS / origin_RSS, model_DOF,
                                     model_DOF)
    return model_landscape, landscape_statistics
def f_test(data1, data2, tail="both", ratio=1):
    """
    F 分布
    :param data1: 样本值 1
    :param data2: 样本值 2
    :param tail: 尾类型
    :param ratio:
    :return:
    """

    assert tail in ["both", "left", "right"], \
        'tail should be one of “both”, “left”, “right”'

    n1 = len(data1)
    n2 = len(data2)
    sample1_var = variance(data1)
    sample2_var = variance(data2)
    f_val = sample1_var / sample2_var / ratio
    df1 = n1 - 1
    df2 = n2 - 1

    if tail == "both":
        p = 2 * min(1 - f.cdf(f_val, df1, df2), f.cdf(f_val, df1, df2))
    elif tail == "left":
        p = f.cdf(f_val, df1, df2)
    else:
        p = 1 - f.cdf(f_val, df1, df2)

    return f_val, df1, df2, p
Beispiel #4
0
def f_test_var(data1, data2):
    """
    F Test to test hypothesis if two samples have different variances.
    H0: samples have same variances (p-value close to one).

    Parameters
    ----------
    data1: n,1 - dim array with data
    data2: n,1 - dim array with data

    Returns
    -------
    p-value of F test

    Notes
    -----
    See 3rd Edition of Numerical recipes chapter 14.2.2, p.730
    """
    var1, var2 = np.var(data1, ddof=1), np.var(data2,
                                               ddof=1)  # compute variance
    df1, df2, = len(data1) - 1, len(data2) - 1  # compute degrees of freedom
    if var1 > var2:
        prob = 2. * f.cdf(var1 / var2, df1, df2)
    else:
        prob = 2. * f.cdf(var2 / var1, df2, df1)
    if prob > 1.:
        return 2. - prob
    else:
        return prob
def perform_f_test(data1, data2, alpha, alternative):
    statistics = np.var(data1, ddof=1) / np.var(data2, ddof=1)
    df1 = len(data1) - 1
    df2 = len(data2) - 1
    if alternative == "≠":
        print(
            f"< 0, {round(f.ppf(alpha / 2, df1, df2), 3)} > < {round(f.ppf(1 - alpha / 2, df1, df2), 3)} , ∞)"
        )
    elif alternative == "<":
        print(f"< 0, {round(f.ppf(alpha, df1, df2), 3)} >")
    elif alternative == ">":
        print(f"< {round(f.ppf(1 - alpha, df1, df2), 3)} , ∞)")
    else:
        print("Incorrect alternative")
        return
    print(f"Test statistics: {round(statistics, 4)}")
    median = f.ppf(0.5, df1, df2)
    if alternative == "≠":
        if statistics < median:
            pvalue = f.cdf(statistics, df1, df2) * 2
        else:
            pvalue = (1 - f.cdf(statistics, df1, df2)) * 2
    elif (alternative == "<"
          and statistics < median) or (alternative == ">"
                                       and statistics > median):
        pvalue = f.cdf(statistics, df1, df2)
    else:
        pvalue = 1 - f.cdf(statistics, df1, df2)

    if pvalue < alpha:
        print("H0 rejected")
    else:
        print("H0 NOT rejected")
    print(f"p-value: {round(pvalue, 4)}")
    return pvalue
Beispiel #6
0
def f_test_var(data1,data2):
    """
    F Test to test hypothesis if two samples have different variances.
    H0: samples have same variances (p-value close to one).

    Parameters
    ----------
    data1: n,1 - dim array with data
    data2: n,1 - dim array with data

    Returns
    -------
    p-value of F test

    Notes
    -----
    See 3rd Edition of Numerical recipes chapter 14.2.2, p.730
    """
    var1, var2 = np.var(data1,ddof = 1),np.var(data2,ddof = 1)	# compute variance
    df1, df2, = len(data1) - 1, len(data2) - 1		# compute degrees of freedom
    if var1 > var2:
	prob = 2. * f.cdf(var1/var2,df1,df2)
    else:
	prob = 2. * f.cdf(var2/var1,df2,df1)
    if prob > 1.:
	return 2. - prob
    else:
	return prob
def anova_twoway(data):
    """双因素方差分析2×2"""
    r, s = 2, 2
    data = np.array(data)
    group_szs = np.tile(np.size(data, axis=1), (np.size(data, axis=0), 1))
    n = sum(group_szs)  # 样本总数

    # 计算均值
    group_means = np.mean(data, axis=1)
    group_mean = group_means.dot(group_szs) / n
    group_i_means = np.array([mean(group_means[:2]), mean(group_means[2:])])
    group_j_means = np.array([(group_means[0] + group_means[2]) / 2,
                              (group_means[1] + group_means[3]) / 2])

    # 计算i,j各水平的效应
    group_i_effect = group_i_means - group_mean
    group_j_effect = group_j_means - group_mean
    # 计算i, j的交叉效应
    group_ij_effect = (group_means.reshape(2, 2) - np.tile(
        group_mean,
        (2, 2))) - np.tile(group_i_effect,
                           (2, 1)).T - np.tile(group_j_effect, (2, 1))

    # 计算总变化
    sst = np.sum((data - group_mean)**2)
    # 计算第一个因素引起的变化
    ss_method = ((group_i_means - group_mean)**2).dot(
        [np.sum(group_szs[:2]), np.sum(group_szs[2:])])
    # 计算第二个因素引起的变化
    ss_reward = ((group_j_means - group_mean)**2).dot([
        np.sum([group_szs[0], group_szs[2]]),
        np.sum([group_szs[1], group_szs[3]])
    ])
    # 计算第一个因素与第二个因素交互引起的变化
    ss_mr = (group_ij_effect.reshape(1, 4)**2).dot(group_szs)
    # 其他因素引起的变化
    ss_error = np.sum((data - group_means.reshape(-1, 1))**2)

    # 计算其他因素引起的误差
    ms_error = ss_error / (n - r * s)
    # 计算第一个因素引起的变化ms值, f值, p值
    ms_method = ss_method / (r - 1)
    f_ms_method = ms_method / ms_error
    p_ms_method = 1 - f.cdf(f_ms_method, r - 1, n - r * s)
    # 计算第二个因素引起的变化ms值, f值, p值
    ms_reward = ss_reward / (r - 1)
    f_ms_reward = ms_reward / ms_error
    p_ms_reward = 1 - f.cdf(f_ms_reward, r - 1, n - r * s)
    # 计算第一、二个因素交互引起的变化ms值, f值, p值
    ms_mr = ss_mr / (r - 1)
    f_ms_mr = ms_mr / ms_error
    p_ms_mr = 1 - f.cdf(f_ms_mr, r - 1, n - r * s)

    # 整理输出矩阵各行
    method = [r - 1, ss_method, ms_method, f_ms_method, p_ms_method]
    reward = [r - 1, ss_reward, ms_reward, f_ms_reward, p_ms_reward]
    mr = [r - 1, ss_mr, ms_mr, f_ms_mr, p_ms_mr]
    residuals = [n - r * s, ss_error, ms_error, None, None]

    return np.array([method, reward, mr, residuals]).astype(np.float32)
def app_time(x, dfn, dfd, a, b):
    mean = 0.0
    dist = np.divide(f.pdf(x, dfn, dfd), (f.cdf(b, dfn, dfd) - f.cdf(a, dfn, dfd))) # f-dist for duration, truncated from a to b
    dist = np.divide(dist, np.sum(dist)) # normalization

    for item in zip(x, dist): mean = mean + (item[0] * item[1]) # expectation of duration

    return dist, mean
Beispiel #9
0
def dof(res1, v1, res2, v2):

    # Calculate chi**2 sums.
    Ea_1 = np.sum(res1**2, axis=0)
    Ea_2 = np.sum(res2**2, axis=0)

    Fobs = (Ea_1 / v1) / (Ea_2 / v2)
    P = 1 - (f.cdf(Fobs, v1, v2) - f.cdf(1 / Fobs, v1, v2))

    return P
Beispiel #10
0
def app_time(x, dfn, dfd, a, b):
    mean = 0.0
    dist = np.divide(
        f.pdf(x, dfn, dfd),
        (f.cdf(b, dfn, dfd) -
         f.cdf(a, dfn, dfd)))  # f-dist for duration, truncated from a to b
    dist = np.divide(dist, np.sum(dist))  # normalization

    for item in zip(x, dist):
        mean = mean + (item[0] * item[1])  # expectation of duration

    return dist, mean
Beispiel #11
0
def two_way(data, f1_name, f2_name):
    """Run two way analysis of variance in a factor by factor design.
       * Identify main effects for each factor.
       * Identify interaction between factors.
       * Print a table with a spss-style output.


    Parameters
    ----------
    data: ndarray
        | Each row represents a 1st factor level.
        | Each column respresents a 2nd factor level.
        | Each layer (depth dimension) is an observation.

    """

    #Sums of squares
    factor_1_effect, factor_2_effect, within_error = factor_sumofsq(data)

    total_sumofsq = np.sum((data.ravel() - data.mean())**2)

    interaction_sumofsq = total_sumofsq - factor_1_effect - factor_2_effect - within_error

    #degrees of freedom
    factor_1_df, factor_2_df = data.shape[1] - 1, data.shape[2] - 1
    error_df = (data.shape[0] - 1) * (data.shape[1] * data.shape[2])
    interaction_df = factor_1_df * factor_2_df
    #total_df = factor_1_df + factor_2_df + error_df + interaction_df

    #Mean squares
    within_mean_ssq = within_error / error_df
    f1_mean_ssq, f2_mean_ssq = factor_1_effect / factor_1_df, factor_2_effect / factor_2_df
    interaction_ssq = interaction_sumofsq / interaction_df

    #F values
    F1, F2 = f1_mean_ssq / within_mean_ssq, f2_mean_ssq / within_mean_ssq
    F_interaction = interaction_ssq / within_mean_ssq

    #P values
    p_F1 = 1 - f.cdf(F1, factor_1_df, error_df)
    p_F2 = 1 - f.cdf(F2, factor_2_df, error_df)
    p_interaction = 1 - f.cdf(F_interaction, interaction_df, error_df)

    print(
        tabulate([[f1_name, f1_mean_ssq, factor_1_df, F1, p_F1],
                  [f2_name, f2_mean_ssq, factor_2_df, F2, p_F2],
                  [
                      'Interaction', interaction_ssq, interaction_df,
                      F_interaction, p_interaction
                  ]], ['Source', 'Mean square', 'df', 'F-values', 'p-values'],
                 tablefmt='grid'))
Beispiel #12
0
    def granger_causality_test(self, alpha=0.05):
        """
        Computes granger causality test on the bivariate VAR model

        :param alpha: (float) Significance level (0.05 by default)
        :return: () *?
        """

        # Get lagged matrix and the two response variables
        idx = self.lag_order + self.fit_intercept
        ydx = range(idx, self.design.shape[1])
        ydx = [0] + list(ydx) if self.fit_intercept else ydx
        xlag = self.design[:, :idx]
        ylag = self.design[:, ydx]
        x = self.response[:, 0]
        y = self.response[:, 1]

        # Regress x against lags of itself
        self.lr.fit_intercept = False
        self.lr.fit(xlag, x)
        xrss_r, xddof_r = self.lr.rss, self.lr.ddof

        # Regress y against lags of itself
        self.lr.fit(ylag, y)
        yrss_r, yddof_r = self.lr.rss, self.lr.ddof

        # Get unstricted rss from original var model
        x_resid = self.residuals[:, 0]
        y_resid = self.residuals[:, 1]
        xrss_u = x_resid @ x_resid
        yrss_u = y_resid @ y_resid
        xddof_u = x_resid.shape[0] - self.k_params / 2
        yddof_u = y_resid.shape[0] - self.k_params / 2

        # Compute F test
        f_stat_x = ((xrss_r - xrss_u) / (xddof_r - xddof_u))
        f_stat_x *= xddof_u / xrss_u
        f_stat_y = (yrss_r - yrss_u) / (yddof_r - yddof_u)
        f_stat_y *= yddof_u / yrss_u

        # Pvalue for Ftest
        x_pval = ftest.cdf(f_stat_x, xddof_r, xddof_u)
        y_pval = ftest.cdf(f_stat_y, yddof_r, yddof_u)

        # Null hypothesis is x does not granger cause y
        result = {}
        result['x_granger_causes_y'] = x_pval < alpha
        result['y_granger_causes_x'] = y_pval < alpha

        return result
Beispiel #13
0
def get_p_value(year, month):
    global info
    if year == 2018:
        info = minK_2018
        f_stat = (
            (info[month][1] - info[month][2]) / 2) / (info[month][2] /
                                                      (info[month][3] - 4))
        p_value_2018[month] = 1 - fisher_f.cdf(f_stat, 2, info[month][3] - 4)
        # p_value_2018[month] = f_stat
    elif year == 2019:
        info = minK_2019
        f_stat = (
            (info[month][1] - info[month][2]) / 2) / (info[month][2] /
                                                      (info[month][3] - 4))
        p_value_2019[month] = 1 - fisher_f.cdf(f_stat, 2, info[month][3] - 4)
Beispiel #14
0
def f_compare(ndata, nparas, new_chi, best_chi, nfix=1):
    """Return the probability calculated using  the F-test.

    The null model (i.e., best-fit solution) is compared to an alternate model
    where one or more parameters are fixed.

    Parameters
    ----------
    ndata : int
        Number of data points: :math:`N`.
    nparas : int
        Number of variables in the alternate model.
    new_chi : float
        Chi-square of the alternate model.
    best_chi : float
        Chi-square of the null model.
    nfix : int
        Number of fixed parameters (default is 1).

    Returns
    -------
    prob : float
       Value of the calculated probality.

    """
    nparas = nparas + nfix
    nfree = ndata - nparas
    nfix = 1.0*nfix
    dchi = new_chi / best_chi - 1.0
    return f.cdf(dchi * nfree / nfix, nfix, nfree)
Beispiel #15
0
def ap_TS(mr, mf):

    dT, dN = mr.shape
    dT, dK = mf.shape

    valpha = np.empty((dN, 1))
    valpha_t = np.empty((dN, 1))
    mresid = np.empty((dT, dN))

    # Time-series regressions

    vones = np.ones((dT, 1))
    for i in range(0, dN):
        vres = newey(mr[:, i], np.hstack((vones, mf)).reshape(dT, dK + 1), 0)
        valpha[i] = vres.beta[0]
        valpha_t[i] = vres.tstat[0]
        mresid[:, i] = vres.resid

    ## Properties of risk premia

    vlambda = np.mean(mf, 0).transpose()
    vlambda_t = vlambda / np.sqrt(np.diag(np.cov(mf, rowvar=0)) / dT)

    ## GRS test

    dGRS = ((dT - dN - dK) / dN) * 1 / (
        1 + np.mean(mf, 0) @ np.linalg.inv(np.cov(mf, rowvar=0, bias=True))
        @ np.mean(mf, 0).transpose()) * valpha.transpose() @ np.linalg.inv(
            np.cov(mresid, rowvar=0, bias=True)) @ valpha
    dGRS_p = 1 - f.cdf(dGRS, dN, dT - dN - dK)

    return valpha, valpha_t, vlambda, vlambda_t, dGRS, dGRS_p
Beispiel #16
0
def t_test(group1, group2):
    mean1 = np.mean(group1)
    mean2 = np.mean(group2)
    std1 = np.std(group1)
    std2 = np.std(group2)
    nobs1 = len(group1)
    nobs2 = len(group2)

    modified_std1 = np.sqrt(np.float32(nobs1) / np.float32(nobs1 - 1)) * std1
    modified_std2 = np.sqrt(np.float32(nobs2) / np.float32(nobs2 - 1)) * std2
    #f檢定
    f1 = np.square(modified_std1) / np.square(modified_std2)
    fp = 1 - f.cdf(f1, nobs1 - 1, nobs2 - 1)
    if fp > 0.05:
        (statistic, pvalue) = stats.ttest_ind_from_stats(mean1=mean1,
                                                         std1=modified_std1,
                                                         nobs1=nobs1,
                                                         mean2=mean2,
                                                         std2=modified_std2,
                                                         nobs2=nobs2,
                                                         equal_var=True)
    else:
        (statistic, pvalue) = stats.ttest_ind_from_stats(mean1=mean1,
                                                         std1=modified_std1,
                                                         nobs1=nobs1,
                                                         mean2=mean2,
                                                         std2=modified_std2,
                                                         nobs2=nobs2,
                                                         equal_var=False)
    return [mean1, std1, mean2, std2, fp, statistic, pvalue]
Beispiel #17
0
def ftest(self):
    """
    Evaluates the significance of the predictors as regards the behaviour of the observations by performing
    an F-test. In particular, the null hypothesis states that the predictors do not explain the variation
    of the observations at all. The inverse of the p-value of such experiment (1 - p_value) is returned.
    Refer to the "fstats" method if what you are looking for is the value of the f-statistic rather than
    the p-value.
    """
    corrected_data = self.corrected_data()

    # Get the error obtained when using the full model (correctors + predictors)
    prediction_error = corrected_data - self.predicted_data()

    # Now compare the variances of the errors
    # Residual Sum of Squares for restricted model
    rss1 = (corrected_data**2).sum(axis=0)

    # Residual Sum of Squares for full model
    rss2 = (prediction_error**2).sum(axis=0)

    # Degrees of freedom
    dfc = self.df_correction()
    dfp = self.df_prediction()

    n = corrected_data.shape[0]
    df1 = dfp  # degrees of freedom of rss1 - rss2
    df2 = n - dfc - dfp  # degrees of freedom of rss2

    # Compute f-scores
    var1 = (rss1 - rss2) / df1
    var2 = rss2 / df2
    f_score = var1 / var2

    # Compute p-values
    return f_stat.cdf(f_score, df1, df2)
Beispiel #18
0
def test_scipy_f():
    rng = np.random.RandomState(20120407)
    x = rng.normal(size=(100)) * 4
    for m in np.arange(1, 15):
        for n in np.arange(1, 15):
            assert_array_almost_equal(f_sf(x, m, n), f.sf(x, m, n))
            assert_array_almost_equal(f_cdf(x, m, n), f.cdf(x, m, n))
Beispiel #19
0
def f_test(chi1,df1,chi2,df2,red_chi = True):
    """
    F Test to compare hypothesis 1 against hypothesis 2.
    Returns the significance that hypothesis 1 is more probable than hypothesis 2,
    i.e. if close to one, hypothesis one is preferred.

    Parameters
    ----------
    chi1: n-dim array / scalar, chi^2 value of first hypothesis test 
    df1: n-dim array / scalar, degrees of freedom of first hypothesis test 
    chi2: n-dim array / scalar, chi^2 value of second hypothesis test 
    df2: n-dim array / scalar, degrees of freedom of second hypothesis test 
    red_chi: if True, F-test is calculated for reduced chi values

    Returns
    -------
    p-value of F-test (float)
    """

#    if chi1/df1 > chi2/df2:
#	prob = 2. * f.cdf(chi1/df1, chi2/df2, df1, df2)
#    else:
#	prob = 2. * f.cdf(chi2/df2, chi1/df1, df2, df1)
    if red_chi:
	fval = (chi1/df1) / (chi2/df2)
    else:
	fval = chi1 / chi2
    prob = 2. * f.cdf((chi1/df1) / (chi2/df2), df1, df2)
    if prob > 1.: 
	return 2. - prob
    else:
	return prob
Beispiel #20
0
def fisher():
    global DisYs
    global F_val
    global F_cr
    AvDisYs = sum(DisYs) / len(DisYs)
    Sad = 0
    for dis in DisYs:
        Sad += dis * (m - 1)
    Sad = Sad * m / (N - d)
    F_val = Sad / AvDisYs
    x_vec = [i * 0.001 for i in range(int(10 / 0.001))]
    F_cr = None
    for i in x_vec:
        if abs(f.cdf(i, N - d, f3) - p) < 0.0001:
            F_cr = i
            break
    if not F_cr:
        print(
            "\nSomething went wrong.\nUnable to calculate critical value for Fisher's test"
        )
    elif F_cr >= F_val:
        print(
            "\nF = {}\t\t\tF_cr = {}\t\t\tF =< F_cr\nAccording to Fisher's F-test model is adequate to the original."
            .format(F_val, F_cr))
        return True
    else:
        print(
            "\nF = {}\t\t\tF_cr = {}\t\t\tF > F_cr\nAccording to Fisher's F-test model is not adequate to the original."
            .format(F_val, F_cr))
        return False
Beispiel #21
0
def F_stat(multarray, labels, cdf=True):
    """
    Given an a multidimensional array multarray and a set of trial labels
    (0 and 1) corresponding to the 0th axis of multarray, return an array
    of cdf values calculated from the F distribution that represents the
    ratio of means of the two label groups along the 0th dimension.
    If cdf is False, return the F statistic map.
    """
    lls = np.array(labels)  # make sure this is an array
    arr0 = multarray[lls == 0]
    arr1 = multarray[lls == 1]

    # if each element of arr0 is chi2(1), then the mean of d such
    # arrays is chi2(d)/d, and a ratio of such variables is F(d1, d2)
    chi2n = np.nanmean(arr0, axis=0)
    chi2d = np.nanmean(arr1, axis=0)

    # calculate degrees of freedom: assume 2 per pixel per trial
    nu = 2
    dfn = nu * np.sum(~np.isnan(arr0), axis=0)
    dfd = nu * np.sum(~np.isnan(arr1), axis=0)

    Fmap = chi2n / chi2d

    if cdf:
        # calculate cdf
        return fdist.cdf(Fmap, dfn, dfd)
    else:
        # return statistic itself
        return Fmap
Beispiel #22
0
def assert_equality_in_groups(results, alpha=0.05, groups="groups", test_var="test_var"):
    data = pd.DataFrame(results)
    means_models = data.groupby(groups).agg({test_var: np.mean})[test_var]
    grand_mean = data[test_var].mean()

    n = len(data)
    n_models = len(means_models)

    # Degrees of freedom
    df_models = n_models - 1  # Numerator
    df_error = n - n_models  # Denominator
    df_total = df_models + df_error

    # Sum of Squares
    ss_total = sum(data[test_var].map(lambda x: (x-grand_mean)**2))
    ss_error = sum(data.apply(lambda x: (x[test_var]-means_models[x[groups]])**2, axis=1))
    #ss_models = ss_total - ss_error
    ss_models = sum(means_models.map(lambda x: (x-grand_mean)**2)*(n/n_models))

    # Mean Square (Variance)
    ms_models = ss_models / df_models
    ms_error = ss_error / df_error

    # F Statistic
    f = ms_models / ms_error

    p = 1. - F.cdf(f, df_models, df_error)
    assert p >= alpha, "Theres is statistic evidence to confirm that the measure and the std measure is " \
                       "quite diferent for alpha=%.3f:\n %s\n\nANOVA table\n%s" % (alpha, data, tabulate([
        ["Source of Variation", "DF", "SS", "MS", "F", "p-value"],
        [groups, "%d" % df_models, "%.4f" % ss_models, "%.4f" % ms_models, "%.4f" % f, "%.4f" % p],
        ["Error", "%d" % df_error, "%.4f" % ss_error, "%.4f" % ms_error, "", ""],
        ["Total", "%d" % df_total, "%.4f" % ss_total, "", "", ""]
    ]))
Beispiel #23
0
def overall_anova(Xin, Yin):
    '''
        Xin : 2-D array
        Yin : 1-D array
    '''
    n = np.shape(Xin)[0]  #- Number of samples
    p = np.shape(Xin)[1]  #- Number of regression parameters

    X = np.hstack((np.vstack(np.ones(n)), Xin))
    Y = Yin

    #- Estimated regression coefficients: As a 1_D array and duplicated
    #  so each row gives all the estimated regression coefficients:
    beta = np.matmul(np.matmul(inv(np.matmul(X.T, X)), X.T), Y)
    beta_n = np.reshape(np.resize(beta, np.size(beta) * n), (n, np.size(beta)))

    #- Fitted response values (Yhat) and sum squares:
    Yhat = np.sum(beta_n * X, axis=1)
    SSR = np.sum((Yhat - np.mean(Yin))**2)
    SSE = np.sum((Yin - Yhat)**2)
    SSTO = np.sum((Yin - np.mean(Yin))**2)

    #- Mean square:
    MSR = SSR / (p - 1)
    MSE = SSE / (n - p)

    #- F-stat
    f_statistic = MSR / MSE
    p_value = 1.0 - f.cdf(f_statistic, p - 1, n - p)

    return SSR, p - 1, MSR, f_statistic, p_value, SSE, n - p, MSE, SSTO, n - 1
Beispiel #24
0
def hotelling_pval(X, mu):
    xbar = np.mean(X, axis=0)
    W = np.cov(X.T)
    n, p = X.shape
    t2 = n*np.dot(xbar-mu, np.linalg.solve(W, (xbar-mu).T))
    fstat = (n-p)*t2/p/(n-1)
    return 1-f.cdf(fstat, p, n-p)
def anova_oneway(data):
    k = len(data)
    assert k > 1

    group_means = [mean(group) for group in data]
    group_szs = [len(group) for group in data]
    n = sum(group_szs)
    assert n > k

    grand_mean = sum(group_mean * group_sz for group_mean,
                     group_sz in zip(group_means, group_szs))/n

    sst = sum(sum((y-grand_mean)**2 for y in group)for group in data)
    ssg = sum((group_mean-grand_mean)**2*group_sz for group_mean,
              group_sz in zip(group_means, group_szs))
    sse = sst-ssg

    dfg = k-1
    dfe = n-k
    msg = ssg/dfg
    mse = sse/dfe

    f_value = msg/mse
    p = 1-f.cdf(f_value, dfg, dfe)

    return f_value, dfg, dfe, p
def f_test(X, y, beta, alpha):
    ######## PERFORM F-TEST ########
    # INPUT
    # X: n by k (n=# of observations, k=# of input variables)
    # y: output target
    # beta: vector of estimated coefficient by do_linear_regression
    #       beta[0] is intercept
    #       the remained elements correspond to variables in X
    # alpha: significant level
    # OUTPUT
    # f: f-test statistic of the model
    # pvalue: p-value of f-test
    # decision: f-test result
    #           True = reject null hypothesis
    #           False = accept null hypothesis

    # TODO: F-test
    f = 0
    pvalue = 0
    decision = None
    n, p = X.shape
    MSR = cal_SS(X, y, beta)[1] / p
    MSE = cal_SS(X, y, beta)[2] / (n - p - 1)
    f = MSR / MSE
    pvalue = 1 - (fdist.cdf(f, p, n - p - 1))

    if pvalue < alpha:
        decision = True
    else:
        decision = False
    return (f, pvalue, decision)
Beispiel #27
0
def ANOVA(m_list, std_list, n_list, verbose=False):
	#m_list = list of means
	#std_list = list of std devs
	#n_list = list of number of elements in each sample
	df1 = len(n_list) - 1
	m_list = np.asarray(m_list)
	std_list = np.asarray(std_list)
	n_list = np.asarray(n_list)
	df2 = np.sum(n_list) - df1 - 1
	x_hat = np.sum(n_list * m_list) / float(np.sum(n_list))
	MS_error = np.sum(n_list * np.square(std_list))/float(df2)
	MS_group = np.sum(n_list * np.square(m_list - x_hat)) / float(df1)
	F = MS_group / MS_error
	p = 1 - f.cdf(F, df1, df2)
	if verbose:
		print '\n\n'
		print 'ANOVA Summary:'
		print 'df1 =', df1
		print 'df2 =', df2
		print 'SS_group =', np.sum(n_list * np.square(m_list - x_hat))
		print 'SS_error =', np.sum(n_list * np.square(std_list))
		print 'MS_group =', MS_group
		print 'MS_error =', MS_error
		print 'F =', F
		print 'p-value =', p
		print '\n\n'
	return F, p
def anova_oneway(data):
    """单因素方差分析"""
    k = len(data)  # 类别数
    assert k > 1, '数据量得大于1'

    group_means = [mean(group) for group in data]
    group_szs = [len(group) for group in data]
    n = sum(group_szs)  # 每个类别中元素个数之和,即数据总个数
    assert n > k

    group_mean = sum(
        group_mean * group_sz
        for group_mean, group_sz in zip(group_means, group_szs)) / n
    sst = np.sum((np.array(data) - group_mean)**2)
    ssg = ((np.array(group_means) - group_mean)**2).dot(np.array(group_szs))
    sse = np.sum((np.array(data) - np.array(group_means).reshape(-1, 1))**2)
    assert round(sse, 2) == round(sst - ssg, 2)

    dfg = k - 1
    dfe = n - k

    msg = ssg / dfg
    mse = sse / dfe

    f_value = msg / mse
    p = 1 - f.cdf(f_value, dfg, dfe)

    return round(f_value, 2), dfg, dfe, p
Beispiel #29
0
 def f_test(self, mse_A, mse_min, m):
     """F检验"""
     if mse_min > mse_A:
         return False
     F = mse_A / mse_min
     p_value = f.cdf(F, m, m)                             # 通过 F分布的累计分布函数来计算置信度
     return (p_value > 0.95)                              # 若置信度大于 0.95 ,则返回 True
Beispiel #30
0
def hotelling_pval(X, mu):
    xbar = np.mean(X, axis=0)
    W = np.cov(X.T)
    n, p = X.shape
    t2 = n * np.dot(xbar - mu, np.linalg.solve(W, (xbar - mu).T))
    fstat = (n - p) * t2 / p / (n - 1)
    return 1 - f.cdf(fstat, p, n - p)
Beispiel #31
0
def GrangerTest(data, lag, kx, alpha=0.05):
    B0 = Var_fit(data, lag)
    B = vectorize(B0)
    Y, Z = Organise2(data, lag)
    k = np.shape(Y)[0]
    T = np.shape(Y)[1] + lag
    if (len(np.shape(data)) > 2):
        x = np.shape(data)
        t = x[1] * x[2]
    else:
        t = np.shape(data)[1]
    Y_BZ = Y - np.matmul(B0, Z)
    sigma = np.matmul(Y_BZ, Y_BZ.T) / t
    del Y, Y_BZ, B0

    C = Cmatrix(k, kx, lag, 0)
    C_B = np.matmul(C, B)
    ZZ_t = np.linalg.pinv(np.matmul(Z, Z.T))
    M = np.kron(ZZ_t, sigma)
    CM = np.matmul(C, M)
    CMCinv = np.linalg.pinv(np.matmul(CM, C.T))
    d1 = np.shape(C)[0]
    del CM, M, C
    temp1 = np.matmul(C_B.T, CMCinv)
    lambdaf = np.matmul(temp1, C_B)
    d2 = T - k * lag - 1
    pvalue = 1 - f.cdf(lambdaf[0] / d1, d1, d2)[0]
    result = pvalue < alpha
    return pvalue, result, lambdaf[0] / d1
Beispiel #32
0
    def _f_stat_raw(self):
        """Returns the raw f-stat value."""
        from scipy.stats import f

        cols = self._x.columns

        if self._nw_lags is None:
            F = self._r2_raw / (self._r2_raw - self._r2_adj_raw)

            q = len(cols)
            if 'intercept' in cols:
                q -= 1

            shape = q, self.df_resid
            p_value = 1 - f.cdf(F, shape[0], shape[1])
            return F, shape, p_value

        k = len(cols)
        R = np.eye(k)
        r = np.zeros((k, 1))

        try:
            intercept = cols.get_loc('intercept')
            R = np.concatenate((R[0: intercept], R[intercept + 1:]))
            r = np.concatenate((r[0: intercept], r[intercept + 1:]))
        except KeyError:
            # no intercept
            pass

        return math.calc_F(R, r, self._beta_raw, self._var_beta_raw,
                           self._nobs, self.df)
def quade_test(*args):
    """Not found in either scipy or statsmodels
    Used to determine if there is at least one treatment different than the others. Not that it does not tell us which
    treatment is different or how many differences there are.

    Parameters
    ----------
    args: list or numpy array, 1-D
        An array containing the observations for each treatment. In this instance, each arg pertains to a specific
        treatment, with the indexes of each arg pertaining to a block

    Return
    ------
    q: float
        Our Q statistic, or a measure of if each treatment has identical effects
    p: float, 0 <= p <= 1
        The likelihood that our observed treatment effects would occur from a randomized block design
    """
    k = len(args)
    if k < 3:
        raise AttributeError("Quade Test not appropriate for {} levels".format(k))
    all_data = np.vstack(args).T
    b = all_data.shape[0]
    rank = np.apply_along_axis(rankdata, 1, all_data)
    rank_range = rankdata(np.ptp(all_data, axis=1))
    s_ij = rank_range.reshape(1, -1).T * rank
    s_j = np.sum(s_ij, axis=1)
    a_2 = np.sum(np.power(s_ij, 2))
    B = np.sum(np.power(s_j, 2)) / b
    q = (b - 1) * B / (a_2 - b)
    p = 1 - f.cdf(q, k - 1, (b - 1) * (k - 1))
    return q, p
Beispiel #34
0
    def _f_stat_raw(self):
        """Returns the raw f-stat value."""
        from scipy.stats import f

        cols = self._x.columns

        if self._nw_lags is None:
            F = self._r2_raw / (self._r2_raw - self._r2_adj_raw)

            q = len(cols)
            if 'intercept' in cols:
                q -= 1

            shape = q, self.df_resid
            p_value = 1 - f.cdf(F, shape[0], shape[1])
            return F, shape, p_value

        k = len(cols)
        R = np.eye(k)
        r = np.zeros((k, 1))

        try:
            intercept = cols.get_loc('intercept')
            R = np.concatenate((R[0:intercept], R[intercept + 1:]))
            r = np.concatenate((r[0:intercept], r[intercept + 1:]))
        except KeyError:
            # no intercept
            pass

        return math.calc_F(R, r, self._beta_raw, self._var_beta_raw,
                           self._nobs, self.df)
Beispiel #35
0
def f_compare(ndata, nparas, new_chi, best_chi, nfix=1):
    """Return the probability calculated using  the F-test.

    The null model (i.e., best-fit solution) is compared to an alternate model
    where one or more parameters are fixed.

    Parameters
    ----------
    ndata : int
        Number of data points: :math:`N`.
    nparas : int
        Number of variables in the alternate model.
    new_chi : float
        Chi-square of the alternate model.
    best_chi : float
        Chi-square of the null model.
    nfix : int
        Number of fixed parameters (default is 1).

    Returns
    -------
    prob : float
       Value of the calculated probality.

    """
    nparas = nparas + nfix
    nfree = ndata - nparas
    nfix = 1.0 * nfix
    dchi = new_chi / best_chi - 1.0
    return f.cdf(dchi * nfree / nfix, nfix, nfree)
Beispiel #36
0
def regression_model(x, y):
    """ Given the samplesm, implement and run a regression model in which
        heterozygous markers are ignored (F-test)
    """
    numerator = 0
    denominator = 0
    sample_size = len(x)
    x_mean = np.mean(x)
    y_mean = np.mean(y)

    # 1. calculate beta 1, beta 0
    for i in range(sample_size):
        numerator += (x[i] - x_mean) * (y[i] - y_mean)
        denominator += np.square(x[i] - x_mean)

    beta_1 = numerator / denominator
    beta_0 = np.mean(y) - (beta_1 * np.mean(x))

    # 2. calculate Sum of Squares
    SSR = 0
    SSE = 0
    for i in range(sample_size):
        y_hat = beta_0 + (beta_1 * x[i])
        SSR += np.square(y_hat - y_mean)
        SSE += np.square(y_hat - y[i])

    SST = SSR + SSE
    R_squared = SSR / SST

    # 3.calculate F star and p val
    MSE = SSE / (sample_size - 2)
    F = SSR / MSE
    p_val = (1 - f.cdf(F, 1, sample_size - 2))

    return F, p_val
Beispiel #37
0
def f_test():
    """ Compares the best fits of two models with different numbers of
    parameters on the same data set and calculates if the higher parameter
    model result is statistically significant! """
    m1_chi_sq = float(input('Chi-Squared value from first model: '))
    p1 = float(input('Number of Parameters in first model: '))
    m2_chi_sq = float(input('Chi-Squared value from second model: '))
    p2 = float(input('Number of parameters in second model: '))
    data_bins = float(input('Number of data points/bins: '))
    null_limit = float(input('Confidence Limit (%): '))
    alpha = 100 - null_limit

    """ Calculating the F-value """
    dof1 = p2-p1
    dof2 = data_bins-p2
    numer = (m1_chi_sq - m2_chi_sq)/(p2-p1)
    denom = (m2_chi_sq)/(data_bins-p2)
    f_value = numer/denom

    """ Calculate P-value and compare to null hypothesis level of acceptance """
    dof1 = p2-p1
    dof2 = data_bins-p2
    p_value = 1 - f.cdf(f_value,dof1,dof2)
    p_value = p_value*100
    print('\nP-value = ',p_value,'% (probability of chance improvement)')
    if p_value >= alpha:
        print('No significant improvement in fit!')
    else:
        print('Significant improvement in fit!')
Beispiel #38
0
def hotelling_t2(X, Y):

    # X and Y are 3D arrays
    # dim 0: number of features
    # dim 1: number of subjects
    # dim 2: number of mesh nodes or voxels

    nx = X.shape[1]
    ny = Y.shape[1]
    p = X.shape[0]
    Xbar = X.mean(1)
    Ybar = Y.mean(1)
    Xbar = Xbar.reshape(Xbar.shape[0], 1, Xbar.shape[1])
    Ybar = Ybar.reshape(Ybar.shape[0], 1, Ybar.shape[1])

    X_Xbar = X - Xbar
    Y_Ybar = Y - Ybar
    Wx = np.einsum('ijk,ljk->ilk', X_Xbar, X_Xbar)
    Wy = np.einsum('ijk,ljk->ilk', Y_Ybar, Y_Ybar)
    W = (Wx + Wy) / float(nx + ny - 2)
    Xbar_minus_Ybar = Xbar - Ybar
    x = np.linalg.solve(W.transpose(2, 0, 1),
                        Xbar_minus_Ybar.transpose(2, 0, 1))
    x = x.transpose(1, 2, 0)

    t2 = np.sum(Xbar_minus_Ybar * x, 0)
    t2 = t2 * float(nx * ny) / float(nx + ny)
    stat = (t2 * float(nx + ny - 1 - p) / (float(nx + ny - 2) * p))

    pval = 1 - np.squeeze(f_distrib.cdf(stat, p, nx + ny - 1 - p))
    return pval, t2
Beispiel #39
0
 def my_f_compare(best_fit, new_fit):
     nonlocal called
     called += 1
     nfree = best_fit.nfree
     nfix = best_fit.nfree - new_fit.nfree
     dchi = new_fit.chisqr / best_fit.chisqr - 1.0
     return f.cdf(dchi * nfree / nfix, nfix, nfree)
Beispiel #40
0
def solve_f(f_value=None, f1=None, f2=None, p=None):
    max_1_none(f_value, f1, f2, p)
    if f_value == None:
        return f(f1, f2, p)
    elif p == None:
        return sympify(sci_f.cdf(float(f_value), float(f1), float(f2)))
    else:
        raise NotImplemented("Not implemented yet - sorry")
Beispiel #41
0
def f_compare(ndata, nparams, new_chi, best_chi, nfix=1):
    """
    Returns the probalitiy for two given parameter sets.
    nfix is the number of fixed parameters.
    """
    nparams = nparams + nfix
    nfree = 1.0*(ndata - nparams)
    return f.cdf((new_chi / best_chi - 1) * nfree/nfix, nfix, nfree)
Beispiel #42
0
def f_compare(Ndata, Nparas, new_chi, best_chi, Nfix=1.):
    """
    Returns the probalitiy for two given parameter sets.
    Nfix is the number of fixed parameters.
    """

    Nparas = Nparas + Nfix
    return f.cdf((new_chi / best_chi - 1) * (Ndata - Nparas) / Nfix,
        Nfix, Ndata - Nparas)
Beispiel #43
0
def f_compare(ndata, nparas, new_chi, best_chi, nfix=1.):
    """
    Returns the probalitiy for two given parameter sets.
    nfix is the number of fixed parameters.
    """
    nparas = nparas + nfix
    nfree = ndata - nparas
    nfix = 1.0*nfix
    dchi = new_chi / best_chi - 1.0
    return f.cdf(dchi * nfree / nfix, nfix, nfree)
Beispiel #44
0
def make_test_data():
    # Make test data for stored data test
    # Run with:
    # import fisher322.tests.test_fisher as tf
    # tf.make_test_data()
    if not have_scipy:
        raise RuntimeError("Need scipy to store data")
    N = 10
    mN = 15
    nN = 15
    x = np.random.normal(size=(N,)) ** 2
    fcdf_data = np.zeros((mN, nN, N))
    for m in range(15):
        for n in range(15):
            fcdf_data[m, n][:] = f.cdf(x, m, n)
    np.savez(TEST_DATA_FNAME, fcdf_data=fcdf_data, x=x)
Beispiel #45
0
def global_difference(table):
    """ Runs and F-test on the ranks. """
    if (not table.is_summary) or (table.type != AbedTableTypes.RANKS):
        return None
    N = float(len(settings.DATASETS))
    k = float(len(settings.METHODS))
    averages = next((row for _id, row in table if _id == 'Average'), None)
    av_sq = sum([pow(float(x), 2.0) for x in averages])
    chi2 = 12.0*N/(k*(k+1))*(av_sq - (k*pow(k+1, 2.0)/4.0))

    # this can happen when the ordering of methods is always the same
    try:
        Fstat = (N - 1.0)*chi2/(N*(k - 1) - chi2)
    except ZeroDivisionError:
        Fstat = float('inf')
    Fprob = 1.0 - f_dist.cdf(Fstat, k-1, (k-1)*(N-1))
    return Fstat, Fprob
Beispiel #46
0
def welch_anova(*args):
    '''
    This helper function calculate Welch's ANOVA where
    the homogeneity assumption of variance is violated
    args here is the list of array-like data stores, ideally numpy arrays
    See this web link for the derived formula:
    http://www.uvm.edu/~dhowell/gradstat/psych340/Lectures/Anova/anova2.html
    '''
    # Number of groups
    k = len(args)
    total_weight = 0
    total_weighted_sum = 0
    weight_list = []
    mean_list = []
    count_list = []
    for sample in args:
        mean = sample.mean()
        mean_list.append(mean)
        var = sample.var()
        count = sample.count()
        count_list.append(count)
        weight = count / var
        weight_list.append(weight)
        total_weight += weight
        weighted_sum = weight * mean
        total_weighted_sum += weighted_sum
    weighted_grand_mean = total_weighted_sum / total_weight
    # Next, let's find Welch's F
    total_weighted_var = 0
    crazy_sum = 0
    for w, m, n in zip(weight_list, mean_list, count_list):
        # This part is used for f_stat calculation
        element = w * ((m - weighted_grand_mean) ** 2)
        total_weighted_var += element
        denom_squared_element = (1 - w / total_weight) ** 2
        crazy_element = denom_squared_element / (n - 1)
        crazy_sum += crazy_element
    f_numer = total_weighted_var / (k - 1)
    f_denom = 1 + 2 * (k - 2) * crazy_sum / (k**2 - 1) 
    f_stat = f_numer / f_denom
    # Next, let's find Welch's degree of freedom
    df = (k**2 - 1) / (3 * crazy_sum)
    # Now determine p-value from df
    pval = 1 - f.cdf(f_stat, k - 1, df)
    return f_stat, pval
Beispiel #47
0
def main(rank_file):
    ranks = []
    N = None
    with open(rank_file, 'r') as f:
        for line in f:
            _, dsets, r = line.strip().split(',')
            if N is None:
                N = int(dsets)
            ranks.append(float(r))

    k = len(ranks)
    ranksum = np.sum(np.square(ranks))
    friedman_statistic = (12.0*N/(k*(k+1)))*(ranksum - ((k * (k+1)**2) / 4.0))
    f_value = ((N - 1)*friedman_statistic) / (N*(k-1) - friedman_statistic)
    print 'p-value (Friedman Statistics): %f' % (1.0 - chi2.cdf(friedman_statistic, k-1))
    print '     p-value (Iman/Davenport): %f' % (1.0 - fdist.cdf(f_value, k-1, (k-1)*(N-1)))
    for alpha in (0.10, 0.05, 0.01):
        print 'CD_%.2f: %f' % (alpha, nemenyi.critical_difference(alpha, k, N))
Beispiel #48
0
def linRegStats(A, b):
	''' Use linear regression to solve for Ax=b where A and b are known.
	Also report stats from Wald test.
	'''
	o = ones(b.size)
	A = column_stack((A, o))
	lstsqStats = linalg.lstsq(A,b)
	
	x = lstsqStats[0]
	SSE = lstsqStats[1][0]
	
	meanResponse = sum(b) / b.size
	computedResponse = dot(A,x)
	SSR = dot( computedResponse - meanResponse, computedResponse - meanResponse )
	fstat = SSR / (SSE / (b.size - 2 ));
	
	p = 1.0 - f.cdf(fstat, 1, b.size - 2)
	
	return {'betas' : x , 'F' : fstat, 'p' : p, 'df1' : 1 , 'df2' : b.size-2}
Beispiel #49
0
    def F(self):
        """
        Calculate the F-statistic of the model, also used in `summary()` function

        Returns
        =======
        F, tuple(F, df-1, n-df, p-val)
            F value as defined in notes, degree of freedoms, p value

        Notes
        =====
        How F statistic been calculated:
        .. :math: \frac{(y-\bar{y})^2-RSS}{RSS}\frac{n-d.f.}{d.f.-1}
        """
        n, df = self.model[1].shape
        rss = np.var(self.model[0])*n
        fval = (rss-self.rss)/self.rss*(n-df)/(df-1)
        fval = float(fval) # convert design matrix to float
        p = f.cdf(fval, df-1, n-df)
        return fval, df-1, n-df, 1-p
Beispiel #50
0
def compute(data):
    N = data.size
    C = len(data.columns)

    dfc = C - 1
    dfer = N - C
    dft = N - 1

    cm = data.mean()
    tm = data.sum().sum() / N
    n = data.shape[0]

    SSC = sum((tm - cm) ** 2) * n
    MSC = SSC / dfc

    SSE = ((data - cm) ** 2).sum().sum()
    MSE = SSE / dfer

    SST = ((data - tm) ** 2).sum().sum()

    F = MSC / MSE

    alpha = 0.05
    p_value = 1 - f.cdf(F, dfc, dfer)

    print data
    print
    print pandas.DataFrame({'df': [dfc, dfer, dft],
                            'SS': [SSC, SSE, SST],
                            'MS': [MSC, MSE, ''],
                            'F': [F, '', ''],
                            'p value': [p_value, '', '']},
                           columns=['df', 'SS', 'MS', 'F', 'p value'],
                           index=['between', 'within', 'total'])
    print
    if p_value > alpha:
        print "Reject null hypothesis"
    else:
        print "Accept null hypothesis"
    print '~~~~~~~~~'
Beispiel #51
0
    def cfriedman(self):
        """        
        Friedman test based on Conover 1999.

        This method uses Conover's recommendation for an improved version
        that compares to the F-distribution rather than the Chi-square.
        Generates P value and distribution information.

        Parameters
        ----------
        none
   
        Sets property
        -------------
        self.P : float
                P value
        self.distribution : list
                String describing what distribution was used for test.
        self.pairwisePs : None
                Sets to None to prevent mismatches between testing methods

        Returns
        -------
        none
        """
        try:
            # Calculate p-value based on cdf of F-distribution for T2
            df1 = self.nts-1
            df2 = (self.nblocks-1)*(self.nts-1)
            self.P = 1-f.cdf(self.T2, df1, df2)
            self.distribution = ['f.cdf', [['df1', df1], ['df2', df2]]]
            self.pairwisePs = None
        except:
            self.P = None
            self.distribution = None
            self.pairwisePs = None
            print('Error in cfriedman')
Beispiel #52
0
def calc_F(R, r, beta, var_beta, nobs, df):
    """
    Computes the standard F-test statistic for linear restriction
    hypothesis testing

    Parameters
    ----------
    R: ndarray (N x N)
        Restriction matrix
    r: ndarray (N x 1)
        Restriction vector
    beta: ndarray (N x 1)
        Estimated model coefficients
    var_beta: ndarray (N x N)
        Variance covariance matrix of regressors
    nobs: int
        Number of observations in model
    df: int
        Model degrees of freedom

    Returns
    -------
    F value, (q, df_resid), p value
    """
    from scipy.stats import f

    hyp = np.dot(R, beta.reshape(len(beta), 1)) - r
    RSR = np.dot(R, np.dot(var_beta, R.T))

    q = len(r)

    F = np.dot(hyp.T, np.dot(inv(RSR), hyp)).squeeze() / q

    p_value = 1 - f.cdf(F, q, nobs - df)

    return F, (q, nobs - df), p_value
Beispiel #53
0
def anova_f(s, whichCat, nCategory, SigLevel):
    import numpy as np
    from scipy.stats import f 

    Xmean = np.mean(s, axis=2)
    
    df_b = nCategory - 1 
    df_w = s.shape[2] - nCategory

    SS_b = np.zeros((s.shape[0], s.shape[1])) 
    SS_w = np.zeros((s.shape[0], s.shape[1]))
    for kk in range(nCategory):
        Xk = s[:,:, whichCat[:, kk]>0]  
        Xkmean = np.mean(Xk, axis=2)
        dtemp = Xkmean-Xmean
        SS_b = SS_b + Xk.shape[2] * (dtemp * dtemp)
        for i in range(Xk.shape[2]):
            temp = (Xk[:,:,i] - Xkmean)    
            SS_w = SS_w + (temp * temp) 
    SS_t = SS_b + SS_w; 
    
    MS_b = SS_b / df_b
    MS_w = SS_w / df_w  
    XF = np.divide(MS_b, MS_w)
    PvalF = np.ones((XF.shape[0],XF.shape[1]))
    for ii in range(XF.shape[0]): 
        for jj in range(XF.shape[1]):
            PvalF[ii,jj] = 1-f.cdf(XF[ii,jj], df_b, df_w)
        
    SigF = 1.0 * (PvalF < SigLevel)
    nSigS  = np.sum(SigF, axis=0)  
    nSigT  = np.sum(SigF, axis=1) 
    statOut = {'F':XF,'Pval':PvalF,'Sig':SigF,'df_b':df_b,'df_w':df_w,'SS_b':SS_b,'SS_w':SS_w,'SS_t':SS_t,'MS_b':MS_b,'MS_w':MS_w,'nSigS':nSigS,'nSigT':nSigT}  
    #'T':XT,'Pval':PvalST,'Sig':SigST} 

    return statOut #[XF, PvalF, SigF, nSig, nSigT, df_b, df_w, SS_b, SS_w, MS_b, MS_w]
def separateRegression(response, predictor, sepData, bpChoices, predictorName, equaltestid, equaltestid2):
    results = np.zeros((len(bpChoices)-1, 4))
    print bpChoices
    chosencriterion = 'r2'  # max(r2) is the same criterion as min(sse)
    for bpid in range(len(bpChoices)-1):
        print bpid
        responseLeft, responseRight, predictorLeft, predictorRight, dataleftIdx, datarightIdx = separateData(response, predictor, sepData, bpChoices[bpid])
        leftmodel = ols.ols(responseLeft, predictorLeft,'y',predictorName)
        rightmodel = ols.ols(responseRight, predictorRight,'y',predictorName)
        results[bpid,0] = bpid
        if chosencriterion == 'r2':           
            results[bpid,1] = leftmodel.R2adj
            results[bpid,2] = rightmodel.R2adj
            #results[bpid,3] = 1 - (leftmodel.e.var() + rightmodel.e.var())/(leftmodel.y.var() + rightmodel.y.var())
            results[bpid,3] = calculateR2(leftmodel, rightmodel, np.mean(response))
            #results[bpid,3] = (leftmodel.R2 + rightmodel.R2)/2
        elif chosencriterion == 'sse':
            results[bpid,1] = leftmodel.sse * leftmodel.df_e
            results[bpid,2] = rightmodel.sse * rightmodel.df_e
            results[bpid,3] = results[bpid,1] + results[bpid,2]
            #yhatL = np.dot(leftmodel.x, leftmodel.b)
            #print results[bpid,1], np.sum((responseLeft - yhatL)**2)

    print results
    if chosencriterion == 'r2':
        optBP = int(results[np.argmax(results, axis = 0)[-1],0])
    elif chosencriterion == 'sse':
        optBP = int(results[np.argmin(results, axis = 0)[-1],0])

    responseLeft, responseRight, predictorLeft, predictorRight, dataleftIdx, datarightIdx = separateData(response, predictor, sepData, bpChoices[optBP])
    leftmodel = ols.ols(responseLeft, predictorLeft,'y',predictorName)
    rightmodel = ols.ols(responseRight, predictorRight,'y',predictorName)

    #equaltestid = int(equaltestid)
    if equaltestid[0] > -1:
        temppredictorLeft = predictorLeft
        temppredictorLeft[:,equaltestid[1]] = temppredictorLeft[:,equaltestid[0]] + temppredictorLeft[:,equaltestid[1]]
        temppredictorLeft = np.delete(temppredictorLeft, equaltestid[0], 1)
        temppredictorRight = predictorRight
        temppredictorRight[:,equaltestid[1]] = temppredictorRight[:,equaltestid[0]] + temppredictorRight[:,equaltestid[1]]
        temppredictorRight = np.delete(temppredictorRight, equaltestid[0], 1)
        temppredictorName = np.delete(predictorName, equaltestid[0], None)
        templeftmodel = ols.ols(responseLeft, temppredictorLeft,'y',temppredictorName)
        temprightmodel = ols.ols(responseRight, temppredictorRight,'y',temppredictorName)
        fleft = (leftmodel.R2 - templeftmodel.R2) * (leftmodel.nobs - len(predictorName) - 2) / (1 - leftmodel.R2)
        fright = (rightmodel.R2 - temprightmodel.R2) * (rightmodel.nobs - len(predictorName) - 2) / (1 - rightmodel.R2)
        pleft = 1 - f.cdf(fleft, 1, leftmodel.nobs - len(predictorName) - 2)
        pright = 1 - f.cdf(fright, 1, rightmodel.nobs - len(predictorName) - 2)

        if equaltestid2[0] > -1:
            temppredictorLeft = predictorLeft
            temppredictorLeft[:,equaltestid2[1]] = temppredictorLeft[:,equaltestid2[0]] + temppredictorLeft[:,equaltestid2[1]]
            temppredictorLeft = np.delete(temppredictorLeft, equaltestid2[0], 1)
            temppredictorRight = predictorRight
            temppredictorRight[:,equaltestid2[1]] = temppredictorRight[:,equaltestid2[0]] + temppredictorRight[:,equaltestid2[1]]
            temppredictorRight = np.delete(temppredictorRight, equaltestid2[0], 1)
            temppredictorName = np.delete(predictorName, equaltestid2[0], None)
            templeftmodel = ols.ols(responseLeft, temppredictorLeft,'y',temppredictorName)
            temprightmodel = ols.ols(responseRight, temppredictorRight,'y',temppredictorName)
            fleft = (leftmodel.R2 - templeftmodel.R2) * (leftmodel.nobs - len(predictorName) - 2) / (1 - leftmodel.R2)
            fright = (rightmodel.R2 - temprightmodel.R2) * (rightmodel.nobs - len(predictorName) - 2) / (1 - rightmodel.R2)
            pleft2 = 1 - f.cdf(fleft, 1, leftmodel.nobs - len(predictorName) - 2)
            pright2 = 1 - f.cdf(fright, 1, rightmodel.nobs - len(predictorName) - 2)

    yhatL = np.dot(leftmodel.x, leftmodel.b)
    yhatR = np.dot(rightmodel.x, rightmodel.b)
    yhat = np.zeros(len(response))

    for i in range(len(yhatL)):
        yhat[dataleftIdx[i]] = yhatL[i]

    for i in range(len(yhatR)):
        yhat[datarightIdx[i]] = yhatR[i]

    yhat = np.exp(yhat)
    fileLoc = filepath + 'separateR_model2_y_hat.csv'
    #np.savetxt(fileLoc, yhat, delimiter=',', fmt = '%s')
    print 'Optimal Index:', optBP
    print 'Optimal changepoint: ', bpChoices[optBP], ' exp value: ', np.exp(bpChoices[optBP]), ' with R2 = ', calculateR2(leftmodel, rightmodel, np.mean(response))
   
    print '----------------------------- left model -----------------------------'
    print leftmodel.summary()
    print '----------------------------- right model -----------------------------'
    print rightmodel.summary()

    print 'Optimal Index:', optBP
    print 'Optimal changepoint: ', bpChoices[optBP], ' exp value: ', np.exp(bpChoices[optBP]), ' with R2 = ', results[optBP, -1]
   
    outputstring = 'before bp'
    for i in range(len(predictorName)+1):
        outputstring += ', b' + str(i) + ' = ' + "%.2f" %(leftmodel.b[i]) + '(' + "%.3f" %(leftmodel.se[i])+ ')'
    outputstring += ', with R2 = ' + "%.4f" %(leftmodel.R2adj)
    if equaltestid[0] > -1:
        outputstring += ', f1 <> f2 with pvalue = ' + "%.2f" %(pleft)
        if equaltestid2[0] > -1:
            outputstring += ', f12 <> f22 with pvalue = ' + "%.2f" %(pleft2)
    print outputstring

    outputstring = 'after bp'
    for i in range(len(predictorName)+1):
        outputstring += ', b' + str(i) + ' = ' + "%.2f" %(rightmodel.b[i]) + '(' + "%.3f" %(rightmodel.se[i])+ ')'
    outputstring += ', with R2 = ' + "%.4f" %(rightmodel.R2adj)
    if equaltestid[0] > -1:
        outputstring += ', f1 <> f2 with pvalue = ' + "%.2f" %(pright)
        if equaltestid2[0] > -1:
            outputstring += ', f12 <> f22 with pvalue = ' + "%.2f" %(pright2)
    print outputstring

    #calpredictedvalue(predictor, bpChoices[optBP], zip(leftmodel.b, rightmodel.b), 'exp_inoutflow_model2B.csv')
    #calconfidenceinterval(predictorLeft, predictorRight, [leftmodel.sse, rightmodel.sse], response, predictor, bpChoices[optBP], zip(leftmodel.b, rightmodel.b), 'ci_model2B.csv')
    return results, yhat
Beispiel #55
0
    for fiber in fiber_list:
        mod = Model(lambda x, a, b: a * x + b)
        slope_displ = mod.fit(fiber.binned_exp['static_fr_mean'],
                              x=fiber.binned_exp['displ_mean'],
                              a=1, b=1).best_values['a']
        slope_force = mod.fit(fiber.binned_exp['static_fr_mean'],
                              x=fiber.binned_exp['force_mean'],
                              a=1, b=1).best_values['a']
        slope_displ_list.append(slope_displ)
        slope_force_list.append(slope_force)
    slope_displ_arr = np.array(slope_displ_list)
    slope_force_arr = np.array(slope_force_list)
    sensitivity_df = pd.DataFrame(
        np.c_[slope_displ_arr, slope_force_arr],
        index=['#' + str(i+1) for i in range(slope_displ_arr.size)],
        columns=['Displacement sensitivity (Hz/mm)',
                 'Force sensitivity (Hz/mN)'])
    for column in sensitivity_df.columns:
        sensitivity_df[column[:5] + '_normalized'] = sensitivity_df[column] /\
            sensitivity_df[column].median()
    sensitivity_df.transpose().to_excel('./csvs/sensitivity.xlsx')
    print(sensitivity_df.var())
    from scipy.stats import f, bartlett, levene
    print(f.cdf(sensitivity_df['Displ_normalized'].var() /
                sensitivity_df['Force_normalized'].var(),
          sensitivity_df.shape[0], sensitivity_df.shape[0]))
    print(bartlett(sensitivity_df['Displ_normalized'],
                   sensitivity_df['Force_normalized']))
    print(levene(sensitivity_df['Displ_normalized'],
                 sensitivity_df['Force_normalized']))
Beispiel #56
0
#%
maxFZ     = geneFZ(Lruns_CLT, N, xsensors_m, Fs_Hz, range_azimuth_deg, 
                   range_elevation_deg, range_velocity_mps)
maxFF     = geneFF(Lruns_CLT, N, xsensors_m, Fs_Hz, range_azimuth_deg, 
                  range_elevation_deg, range_velocity_mps)
FF        = maxFsimul[0:ir+1,0]

#== compute the p-value with the asymptotic distribution
#   (not independent)
ppv  = pvalunderH0(FF, N, xsensors_m, Fs_Hz,
                   range_azimuth_deg, 
                   range_elevation_deg, range_velocity_mps);

# pvalues with he limG independent and Findependent
ppvG = 1-norm.cdf(FF,1.0,sqrt(2.0*M/(M-1.0)/N))**Q;
ppvF = 1-f.cdf(FF,N,N*(M-1))**Q;

# pdf of the max of the limG independent and Findependent
linx        = linspace(0.69,1.3,200)
sigmaGlim   = sqrt(2.0*M/(M-1.0)/N)
nu1         = N
nu2         = N*(M-1)
pdffromF    = f.pdf(linx,nu1,nu2)
pdffromFind = Q * pdffromF * (f.cdf(linx,nu1,nu2)**(Q-1));
pdffromGind = Q * norm.pdf(linx,1.0,sigmaGlim) * (norm.cdf(linx,1.0,sigmaGlim)**(Q-1));


dirfigsave = '/Users/maurice/etudes/stephenA/propal2/figures/'

#%%
#
Beispiel #57
0
 def get_result_simple(Fst, d):
     return Fst, (q, d), 1 - f.cdf(Fst, q, d)
            chi2_m     = np.sum((BimgSky - polyValues)**2)

            ## Loop through polynomial degrees and store Ftest result
            alpha  = 0.05   #Use a 5% "random probability" as a cutoff
            # sigma  = 3.0    #Use a 3 sigma requirement for so much data
            # alpha  = (1 - norm.cdf(sigma))
            Ftests = []
            coeffs = []
            for deg in range(1,6):
                dof        = numSamp - deg - 1
                polyCoeffs = np.polyfit(BimgTimes, BimgSky, deg)
                coeffs.append(polyCoeffs)
                polyValues = np.polyval(polyCoeffs, BimgTimes)
                chi2_m1    = np.sum((BimgSky - polyValues)**2)
                Fchi       = (chi2_m - chi2_m1)/(chi2_m1/dof)
                prob       = 1.0 - f.cdf(Fchi, 1, dof)
                Ftests.append(prob < alpha)

                ## Store chi2_m1 in chi2 for use in next iteration
                chi2_m     = chi2_m1

            # Find the lowest order FAILED F-test to get higest order good fit
            bestDegree = np.min(np.where([not test for test in Ftests]))

            # Fit the best fitting polynomial
            polyCoeffs = np.polyfit(BimgTimes, BimgSky, bestDegree)
            polyValues = np.polyval(polyCoeffs, BimgTimes)

            # Subtract the best fitting polynomial and save for use in the FFT
            BimgSky1 = BimgSky - polyValues
def fcdf(x, d1, d2):
    result = f.cdf(x, d1, d2)
    if isnan(result):
        return betainc(d1/2., d2/2., d1*x*1./(d1*x+d2))
    return result