def _games_howell_test(self): combs = list(combinations(np.unique(self.group), 2)) sample_stats = self._group_sample_statistics() means_d = dict(sample_stats['Group Means']) obs_d = dict(sample_stats['Group Observations']) var_d = dict(sample_stats['Group Variance']) group_comps = [] mean_differences = [] degrees_freedom = [] t_values = [] p_values = [] std_err = [] up_conf = [] low_conf = [] for comb in combs: diff = means_d[comb[1]] - means_d[comb[0]] t_val = np.absolute(diff) / np.sqrt((var_d[comb[0]] / obs_d[comb[0]]) + (var_d[comb[1]] / obs_d[comb[1]])) df_num = (var_d[comb[0]] / obs_d[comb[0]] + var_d[comb[1]] / obs_d[comb[1]]) ** 2 df_denom = ((var_d[comb[0]] / obs_d[comb[0]]) ** 2 / (obs_d[comb[0]] - 1) + (var_d[comb[1]] / obs_d[comb[1]]) ** 2 / (obs_d[comb[1]] - 1)) df = df_num / df_denom p_val = psturng(t_val * np.sqrt(2), sample_stats['Number of Groups'], df) se = np.sqrt(0.5 * (var_d[comb[0]] / obs_d[comb[0]] + var_d[comb[1]] / obs_d[comb[1]])) upper_conf = diff + qsturng(1 - self.alpha, sample_stats['Number of Groups'], df) lower_conf = diff - qsturng(1 - self.alpha, sample_stats['Number of Groups'], df) mean_differences.append(diff) degrees_freedom.append(df) t_values.append(t_val) p_values.append(p_val) std_err.append(se) up_conf.append(upper_conf) low_conf.append(lower_conf) group_comps.append(str(comb[0]) + ' : ' + str(comb[1])) result_df = pd.DataFrame({'groups': group_comps, 'mean_difference': mean_differences, 'std_error': std_err, 't_value': t_values, 'p_value': p_values, 'upper_limit': up_conf, 'lower limit': low_conf}) return result_df
def test_qstrung(): rows = [ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 24, 30, 40, 60, 120, 9999] cols = np.arange(2,11) for alpha in [0.01, 0.05]: for k in cols: c1 = get_tukeyQcrit(k, rows, alpha=alpha) c2 = qsturng(1-alpha, k, rows) assert_almost_equal(c1, c2, decimal=2) #roundtrip assert_almost_equal(psturng(qsturng(1-alpha, k, rows), k, rows), alpha, 5)
def test_qstrung(alpha, k): rows = [ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 24, 30, 40, 60, 120, 9999 ] c1 = get_tukeyQcrit(k, rows, alpha=alpha) c2 = qsturng(1 - alpha, k, rows) assert_almost_equal(c1, c2, decimal=2) # roundtrip assert_almost_equal(psturng(qsturng(1 - alpha, k, rows), k, rows), alpha, 5)
def test_qstrung(): rows = [ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 24, 30, 40, 60, 120, 9999 ] cols = np.arange(2, 11) for alpha in [0.01, 0.05]: for k in cols: c1 = get_tukeyQcrit(k, rows, alpha=alpha) c2 = qsturng(1 - alpha, k, rows) assert_almost_equal(c1, c2, decimal=2) #roundtrip assert_almost_equal(psturng(qsturng(1 - alpha, k, rows), k, rows), alpha, 5)
def _qvalue(self): r""" Computes the q-value. Returns ------- q : float The q-value Notes ----- :math:`q` can be found similarly to the t-statistic: .. math:: q_{\alpha,k,N-k} The studentized range distribution of :math:`q` is defined as: .. math:: q_s = \frac{Y_{max} - Y_{min}}{SE} Where :math:`Y_{max}` and :math:`Y_{min}` are the larger and smaller means of the two groups being compared. :math:`SE` is defined as the standard error of the entire design. """ q = qsturng(self.alpha, self.k, self.n - self.k) return q
def test_handful_to_tbl(self): cases = [ (0.75, 30.0, 12.0, 5.01973488482), (0.975, 15.0, 18.0, 6.00428263999), (0.1, 8.0, 11.0, 1.76248712658), (0.995, 6.0, 17.0, 6.13684839819), (0.85, 15.0, 18.0, 4.65007986215), (0.75, 17.0, 18.0, 4.33179650607), (0.75, 60.0, 16.0, 5.50520795792), (0.99, 100.0, 2.0, 50.3860723433), (0.9, 2.0, 40.0, 2.38132493732), (0.8, 12.0, 20.0, 4.15361239056), (0.675, 8.0, 14.0, 3.35011529943), (0.75, 30.0, 24.0, 4.77976803574), (0.75, 2.0, 18.0, 1.68109190167), (0.99, 7.0, 120.0, 5.00525918406), (0.8, 19.0, 15.0, 4.70694373713), (0.8, 15.0, 8.0, 4.80392205906), (0.5, 12.0, 11.0, 3.31672775449), (0.85, 30.0, 2.0, 10.2308503607), (0.675, 20.0, 18.0, 4.23706426096), (0.1, 60.0, 60.0, 3.69215469278), ] for p, r, v, q in cases: assert_almost_equal(q, qsturng(p, r, v), 5)
def test_handful_to_ch(self): cases = [ (0.8699908, 10.0, 465.4956, 3.997799075635331), (0.8559087, 43.0, 211.7474, 5.1348419692951675), (0.6019187, 11.0, 386.5556, 3.3383101487698821), (0.658888, 51.0, 74.652, 4.8108880483153733), (0.6183604, 77.0, 479.8493, 4.9864059321732874), (0.9238978, 77.0, 787.5278, 5.7871053003022936), (0.8408322, 7.0, 227.3483, 3.5555798311413578), (0.5930279, 60.0, 325.3461, 4.7658023123882396), (0.6236158, 61.0, 657.5285, 4.8207812755987867), (0.9344575, 72.0, 846.4138, 5.8014341329259107), (0.8761198, 56.0, 677.8171, 5.362460718311719), (0.7901517, 41.0, 131.525, 4.9222831341950544), (0.6396423, 44.0, 624.3828, 4.6015127250083152), (0.8085966, 14.0, 251.4224, 4.0793058424719746), (0.716179, 45.0, 136.7055, 4.8055498089340087), (0.8204, 6.0, 290.9876, 3.3158771384085597), (0.8705345, 83.0, 759.6216, 5.5969334564485376), (0.8249085, 18.0, 661.9321, 4.3283725986180395), (0.9503, 2.0, 4.434, 3.7871158594867262), (0.7276132, 95.0, 91.43983, 5.4100384868499889), ] for p, r, v, q in cases: assert_almost_equal(q, qsturng(p, r, v), 5)
def test_10000_to_ch(self): import os curdir = os.path.dirname(os.path.abspath(__file__)) ps, rs, vs, qs = read_ch(curdir + '/bootleg.dat') # <- generated by qtukey in R qs = np.array(qs) errors = np.abs(qs-qsturng(ps,rs,vs))/qs assert_equal(np.array([]), np.where(errors > .03)[0])
def get_critical_distance(x, level=0.95): n_rows, n_cols = x.shape a = qsturng(level, n_cols, np.inf) b = np.sqrt((n_cols * (n_cols + 1)) / (12 * n_rows)) return a * b
def test_10000_to_ch(self): import os curdir = os.path.dirname(os.path.abspath(__file__)) ps, rs, vs, qs = read_ch(curdir + '/bootleg.dat') # <- generated by qtukey in R qs = np.array(qs) errors = np.abs(qs - qsturng(ps, rs, vs)) / qs assert_equal(np.array([]), np.where(errors > .03)[0])
def studentized_range(alpha=0.05, samples=10, df=140): """ Get critical value from a studentized range distribution. Used in Tukey posthoc tests etc. :param alpha: alpha in [0,1] :param samples: number of samples(groups) :param df: degrees of freedom :return: """ return float(qsturng(1 - alpha, samples, df))
def test_vector(self): # vector input -> vector output assert_array_almost_equal(np.array([3.98832389, 4.56835318, 6.26400894]), qsturng([.8932, .9345,.9827], [4, 4, 4], [6, 6, 6]), 5)
def test_vector(self): "vector input -> vector output" assert_array_almost_equal(np.array([3.98832389, 4.56835318, 6.26400894]), qsturng([.8932, .9345,.9827], [4, 4, 4], [6, 6, 6]), 5)
def test_10000_to_ch(self): import os curdir = os.path.dirname(os.path.abspath(__file__)) #ps, rs, vs, qs = read_ch(curdir + '/bootleg.dat') # <- generated by qtukey in R # work around problem getting libqsturng.tests.bootleg.dat installed ps, rs, vs, qs = read_ch(os.path.split(os.path.split(curdir)[0])[0] + '/tests/results/bootleg.csv') qs = np.array(qs) errors = np.abs(qs-qsturng(ps,rs,vs))/qs assert_equal(np.array([]), np.where(errors > .03)[0])
def get_thsd(mci): var_ = np.var(mci.groupstats.groupdemean(), ddof=len(mci.groupsunique)) means = mci.groupstats.groupmean nobs = mci.groupstats.groupnobs resi = tukeyhsd(means, nobs, var_, df=None, alpha=0.05, q_crit=qsturng(0.95, len(means), (nobs-1).sum())) print(resi[4]) var2 = (mci.groupstats.groupvarwithin() * (nobs - 1)).sum() \ / (nobs - 1).sum() assert_almost_equal(var_, var2, decimal=14) return resi
def nemenyi(k, n, critical=0.05): """I cannot find pdf of studentized range distribution. So we must use quantile function. """ studentized_range_statistics = qsturng(1 - critical, k, 1e5) q_alpha = studentized_range_statistics / np.sqrt( 2) # 1e5 is close to infinity # print r'$q_\alpha$ in Nemenyi test: {}'.format(q_alpha) nemenyi_range = np.sqrt(1. * k * (k + 1) / (6 * n)) * q_alpha return nemenyi_range
def create_turkey_intervals( row_mean, row_var, mse, alpha, k, b, df, pairs ): # Which treatment means are significantly different from each other ? (small sample) Q = qsturng(1 - alpha, k, df) L = Q * numpy.sqrt(mse / b) return tuple( map( lambda pair: ANOVATreatmentInterval( row_mean[pair[0]] - row_mean[pair[1]], L, pair), pairs))
def test_100_random_values(self): n = 100 ps = np.random.random(n)*(.999 - .1) + .1 rs = np.random.random_integers(2, 100, n) vs = np.random.random(n)*998. + 2. qs = qsturng(ps, rs, vs) estimates = psturng(qs, rs, vs) actuals = 1. - ps errors = estimates - actuals assert_equal(np.array([]), np.where(errors > 1e-5)[0])
def get_thsd(mci, alpha=0.05): var_ = np.var(mci.groupstats.groupdemean(), ddof=len(mci.groupsunique)) means = mci.groupstats.groupmean nobs = mci.groupstats.groupnobs resi = tukeyhsd(means, nobs, var_, df=None, alpha=alpha, q_crit=qsturng(1-alpha, len(means), (nobs-1).sum())) #print resi[4] var2 = (mci.groupstats.groupvarwithin() * (nobs - 1)).sum() \ / (nobs - 1).sum() assert_almost_equal(var_, var2, decimal=14) return resi
def test_1000_random_values(self): n = 1000 ps = np.random.random(n) * (.999 - .1) + .1 rs = np.random.random_integers(2, 100, n) vs = np.random.random(n) * 998. + 2. qs = qsturng(ps, rs, vs) estimates = psturng(qs, rs, vs) actuals = 1. - ps errors = estimates - actuals assert_equal(np.array([]), np.where(errors > 1e-5)[0])
def test_100_random_values(self, reset_randomstate): n = 100 random_state = np.random.RandomState(12345) ps = random_state.random_sample(n)*(.999 - .1) + .1 rs = random_state.randint(2, 101, n) vs = random_state.random_sample(n)*998. + 2. qs = qsturng(ps, rs, vs) estimates = psturng(qs, rs, vs) actuals = 1. - ps errors = estimates - actuals assert_equal(np.array([]), np.where(errors > 1e-5)[0])
def test_100_random_values(self, reset_randomstate): n = 100 random_state = np.random.RandomState(12345) ps = random_state.random_sample(n) * (.999 - .1) + .1 rs = random_state.randint(2, 101, n) vs = random_state.random_sample(n) * 998. + 2. qs = qsturng(ps, rs, vs) estimates = psturng(qs, rs, vs) actuals = 1. - ps errors = estimates - actuals assert_equal(np.array([]), np.where(errors > 1e-5)[0])
def test_all_to_tbl(self): ps, rs, vs, qs = [], [], [], [] for p in T: for v in T[p]: for r in R.keys(): ps.append(p) vs.append(v) rs.append(r) qs.append(T[p][v][R[r]]) qs = np.array(qs) errors = np.abs(qs-qsturng(ps,rs,vs))/qs assert_equal(np.array([]), np.where(errors > .03)[0])
def test_all_to_tbl(self): ps, rs, vs, qs = [], [], [], [] for p in T: for v in T[p]: for r in R.keys(): ps.append(p) vs.append(v) rs.append(r) qs.append(T[p][v][R[r]]) qs = np.array(qs) errors = np.abs(qs - qsturng(ps, rs, vs)) / qs assert_equal(np.array([]), np.where(errors > .03)[0])
def t_est_all_to_tbl(self): from statsmodels.stats.libqsturng.make_tbls import T,R ps, rs, vs, qs = [], [], [], [] for p in T: for v in T[p]: for r in R.keys(): ps.append(p) vs.append(v) rs.append(r) qs.append(T[p][v][R[r]]) qs = np.array(qs) errors = np.abs(qs-qsturng(ps,rs,vs))/qs assert_equal(np.array([]), np.where(errors > .03)[0])
def t_est_all_to_tbl(self): from statsmodels.stats.libqsturng.make_tbls import T,R ps, rs, vs, qs = [], [], [], [] for p in T: for v in T[p]: for r in iterkeys(R): ps.append(p) vs.append(v) rs.append(r) qs.append(T[p][v][R[r]]) qs = np.array(qs) errors = np.abs(qs-qsturng(ps,rs,vs))/qs assert_equal(np.array([]), np.where(errors > .03)[0])
def NemenyiCD(alpha: float, num_alg, num_dataset): """ Computes Nemenyi's critical difference: * CD = q_alpha * sqrt(num_alg*(num_alg + 1)/(6*num_prob)) where q_alpha is the critical value, of the Studentized range statistic divided by sqrt(2). :param alpha: {0.1, 0.999}. Significance level. :param num_alg: number of tested algorithms. :param num_dataset: Number of problems/datasets where the algorithms have been tested. """ # get critical value q_alpha = qsturng(p=1 - alpha, r=num_alg, v=num_alg * (num_dataset - 1)) / np.sqrt(2) # compute the critical difference cd = q_alpha * np.sqrt(num_alg * (num_alg + 1) / (6.0 * num_dataset)) return cd
def pairwise_difference_nemenyi( avg_ranks: Dict[Method, float], n_datasets: int, alpha=0.05 ): """Compute the Nemenyi test on all pairwise differences""" N = n_datasets k = len(avg_ranks) q_alpha = qsturng(1 - alpha, k, np.inf) / np.sqrt(2) CD = q_alpha * np.sqrt(k * (k + 1) / (6 * N)) sigdiff = {} for method in avg_ranks: sigdiff[method] = {} for other in avg_ranks: if method == other: continue rank_diff = abs(avg_ranks[method] - avg_ranks[other]) sigdiff[method][other] = rank_diff >= CD return sigdiff, CD
class Generator: enabled = False display_name = "statsmodel" @staticmethod def init_parser(parser): parser.add_argument('--statsmodel', action="store_true") @classmethod def init_args(cls, arg): cls.enabled = arg.statsmodel @staticmethod def process(case, dop): p, k, nu = case.p, case.k, case.v res = qsturng(p, k, nu) #print(res) return res
def test_handful_to_ch(self): cases = [(0.8699908, 10.0, 465.4956, 3.997799075635331), (0.8559087, 43.0, 211.7474, 5.1348419692951675), (0.6019187, 11.0, 386.5556, 3.3383101487698821), (0.658888, 51.0, 74.652, 4.8108880483153733), (0.6183604, 77.0, 479.8493, 4.9864059321732874), (0.9238978, 77.0, 787.5278, 5.7871053003022936), (0.8408322, 7.0, 227.3483, 3.5555798311413578), (0.5930279, 60.0, 325.3461, 4.7658023123882396), (0.6236158, 61.0, 657.5285, 4.8207812755987867), (0.9344575, 72.0, 846.4138, 5.8014341329259107), (0.8761198, 56.0, 677.8171, 5.362460718311719), (0.7901517, 41.0, 131.525, 4.9222831341950544), (0.6396423, 44.0, 624.3828, 4.6015127250083152), (0.8085966, 14.0, 251.4224, 4.0793058424719746), (0.716179, 45.0, 136.7055, 4.8055498089340087), (0.8204, 6.0, 290.9876, 3.3158771384085597), (0.8705345, 83.0, 759.6216, 5.5969334564485376), (0.8249085, 18.0, 661.9321, 4.3283725986180395), (0.9503, 2.0, 4.434, 3.7871158594867262), (0.7276132, 95.0, 91.43983, 5.4100384868499889)] for p, r, v, q in cases: assert_almost_equal(q, qsturng(p, r, v), 5)
def test_handful_to_tbl(self): cases = [(0.75, 30.0, 12.0, 5.01973488482), (0.975, 15.0, 18.0, 6.00428263999), (0.1, 8.0, 11.0, 1.76248712658), (0.995, 6.0, 17.0, 6.13684839819), (0.85, 15.0, 18.0, 4.65007986215), (0.75, 17.0, 18.0, 4.33179650607), (0.75, 60.0, 16.0, 5.50520795792), (0.99, 100.0, 2.0, 50.3860723433), (0.9, 2.0, 40.0, 2.38132493732), (0.8, 12.0, 20.0, 4.15361239056), (0.675, 8.0, 14.0, 3.35011529943), (0.75, 30.0, 24.0, 4.77976803574), (0.75, 2.0, 18.0, 1.68109190167), (0.99, 7.0, 120.0, 5.00525918406), (0.8, 19.0, 15.0, 4.70694373713), (0.8, 15.0, 8.0, 4.80392205906), (0.5, 12.0, 11.0, 3.31672775449), (0.85, 30.0, 2.0, 10.2308503607), (0.675, 20.0, 18.0, 4.23706426096), (0.1, 60.0, 60.0, 3.69215469278)] for p, r, v, q in cases: assert_almost_equal(q, qsturng(p, r, v), 5)
def _critical_distance(alpha, k, n): """ Determines the critical distance for the Nemenyi test with infinite degrees of freedom. """ return qsturng(1 - alpha, k, np.inf) * np.sqrt(k * (k + 1) / (12 * n))
def _duncan_test(table, response_cols, factor_col, alpha=0.05): result = dict() rb = BrtcReprBuilder() rb.addMD("""## Duncan test Result""") for response_col in response_cols: mean_by_factor = table.groupby(factor_col).mean()[response_col].sort_values(ascending=False) count_by_factor = table.groupby(factor_col).count()[response_col] columns = list(table.columns) sse = np.sum([np.square(row[columns.index(response_col)] - mean_by_factor[row[columns.index(factor_col)]]) for row in table.values]) df = table.shape[0] - count_by_factor.shape[0] mse = sse / df n = harmonic_mean(count_by_factor) sigma_d = np.sqrt(mse / n) classes = table[factor_col].unique() classes_cnt = len(classes) critical_val = dict() critical_val['p'] = range(2, classes_cnt + 1) critical_val['critical_value'] = [] p = 1 - alpha for i in range(1, classes_cnt): if p < 0.1 or p > 0.999: critical_val['critical_value'].append('Not statistically meaningful') else: critical_val['critical_value'].append(sigma_d * qsturng(p, i + 1, df)) p = p * (1 - alpha) comp_by_factor = dict() comp_by_factor['compared_factors'] = [] comp_by_factor['difference'] = [] comp_by_factor['critical_value'] = [] comp_by_factor['significant'] = [] titles = mean_by_factor.index for i in range(classes_cnt): for j in range(i + 1, classes_cnt): title = str(titles[i]) + ' - ' + str(titles[j]) comp_by_factor['compared_factors'].append(title) difference = abs(mean_by_factor[titles[i]] - mean_by_factor[titles[j]]) comp_by_factor['difference'].append(difference) critical_value = critical_val['critical_value'][critical_val['p'].index(j - i + 1)] comp_by_factor['critical_value'].append(critical_value) if isinstance(critical_value, (float, int)): if difference > critical_value: comp_by_factor['significant'].append('YES') else: comp_by_factor['significant'].append('NO') else: comp_by_factor['significant'].append(critical_value) critical_val = pd.DataFrame(critical_val) mean_by_factor = pd.DataFrame(mean_by_factor).reset_index() comp_by_factor = pd.DataFrame(comp_by_factor) rb.addMD(strip_margin(""" | ## {response_col} by {factor_col} | | ### Critical value | {critical_val} | | ### Mean value by factor | {mean_by_factor} | | ### Difference by factor | {comp_by_factor} """.format(response_col=response_col, factor_col=factor_col, critical_val=pandasDF2MD(critical_val, num_rows=critical_val.shape[0]), mean_by_factor=pandasDF2MD(mean_by_factor, num_rows=mean_by_factor.shape[0]), comp_by_factor=pandasDF2MD(comp_by_factor, num_rows=comp_by_factor.shape[0])))) group = response_col + '_' + factor_col result[group] = dict() result[group]['critical_val'] = critical_val result[group]['mean_by_factor'] = mean_by_factor result[group]['comp_by_factor'] = comp_by_factor result['_repr_brtc_'] = rb.get() return {'result': result}
def q_inv(m, phi, P): return libqsturng.qsturng(P, m, phi)
n_T = sum(n_list) k = len(mean_list) sum_square = 22829.14 SST = sum_square - n_T * np.square(mean_total) SSTr = sum(n_list[i] * np.square(mean_list[i]) for i in range(k)) - n_T * np.square(mean_total) SSE = SST - SSTr # The mean squares for treatments MSTr = SSTr / (k - 1) # The mean square error MSE = SSE / (n_T - k) F_stat = MSTr / MSE p = f.sf(F_stat, dfn=k - 1, dfd=n_T - k) # H0: all means are equal # q (alpha, k, v) s = np.sqrt(MSE) q = qsturng(p=1 - alpha, r=k, v=n_T - k) for i1 in range(k - 1): for i2 in range(i1 + 1, k): diff = mean_list[i1] - mean_list[i2] lower_bd = diff - s * q * np.sqrt(0.5 / n_list[i1] + 0.5 / n_list[i2]) upper_bd = diff + s * q * np.sqrt(0.5 / n_list[i1] + 0.5 / n_list[i2]) print('({}, {}): ({}, {})'.format(i1 + 1, i2 + 1, lower_bd, upper_bd)) # when all ni are equal (= n), L = 2 * s * q * np.sqrt(1 / n) # required length L0 = 2 n0 = 4 * np.square(s * q / L0)
def gamesHowellTest(df, target, hue): alpha = 0.05 k = len(np.unique(df[target])) group_means = dict(df.groupby(hue)[target].mean()) group_obs = dict(df.groupby(hue)[target].size()) group_variance = dict(df.groupby(hue)[target].var()) combs = list(combinations(np.unique(df[hue]), 2)) group_comps = [] mean_differences = [] degrees_freedom = [] t_values = [] p_values = [] std_err = [] up_conf = [] low_conf = [] for comb in combs: # Mean differences of each group combination diff = group_means[comb[1]] - group_means[comb[0]] # t-value of each group combination t_val = np.abs(diff) / np.sqrt((group_variance[comb[0]] / group_obs[comb[0]]) + (group_variance[comb[1]] / group_obs[comb[1]])) # Numerator of the Welch-Satterthwaite equation df_num = (group_variance[comb[0]] / group_obs[comb[0]] + group_variance[comb[1]] / group_obs[comb[1]]) ** 2 # Denominator of the Welch-Satterthwaite equation df_denom = ((group_variance[comb[0]] / group_obs[comb[0]]) ** 2 / (group_obs[comb[0]] - 1) + (group_variance[comb[1]] / group_obs[comb[1]]) ** 2 / (group_obs[comb[1]] - 1)) # Degrees of freedom df = df_num / df_denom # p-value of the group comparison p_val = psturng(t_val * np.sqrt(2), k, df) # Standard error of each group combination se = np.sqrt(0.5 * (group_variance[comb[0]] / group_obs[comb[0]] + group_variance[comb[1]] / group_obs[comb[1]])) # Upper and lower confidence intervals upper_conf = diff + qsturng(1 - alpha, k, df) lower_conf = diff - qsturng(1 - alpha, k, df) # Append the computed values to their respective lists. mean_differences.append(diff) degrees_freedom.append(df) t_values.append(t_val) p_values.append(p_val) std_err.append(se) up_conf.append(upper_conf) low_conf.append(lower_conf) group_comps.append(str(comb[0]) + ' : ' + str(comb[1])) result_df = pd.DataFrame({'groups': group_comps, 'mean_difference': mean_differences, 'std_error': std_err, 't_value': t_values, 'p_value': p_values, 'upper_limit': up_conf, 'lower limit': low_conf}) return result_df
def test_scalar(self): "scalar input -> scalar output" assert_almost_equal(4.43645545899562, qsturng(.9, 5, 6), 5)
def cal_f(*args, alpha_level=0.05): """execute one-way ANOVA execute post-hoc Tukey's HSD test if AVONA result statistically significant Args: a series of sample arrays alpha_level (float, optional): confidence level. Defaults to 0.05. """ # Store mean of each sample group_means = [] # Concatenate all sample as a 1D numpy array concate_np = [] # Store the size of each sample sample_sizes = [] # Sum of squared deviation ss_within = 0 # number of samples collected num_groups = 0 # Iterate each sample and calculate its descriptive statisstics for sample in args: num_groups += 1 sample_sizes = np.append(sample_sizes, len(sample)) single_mean = np.mean(sample) group_means = np.append(group_means, single_mean) concate_np = np.concatenate([concate_np, sample]) ss_within += np.sum((np.subtract(sample, single_mean))**2) # Calc degree of freedom for between-subject dof_between = num_groups - 1 # Calc degree of freedom for within-subject dof_within = np.size(concate_np) - num_groups # Calc grand mean of combined samples grand_mean = np.mean(concate_np) # Calc sum of squares for between-subject and the total SS ss_between = np.sum(sample_sizes*(group_means - grand_mean)**2) ss_total = ss_between + ss_within # Mean of SS for between and within subject respectively ms_between = ss_between/dof_between ms_within = ss_within/dof_within # Calc f statistic f_statistic = ms_between/ms_within # Look up f critical on given alpha level and dof f_critical = f.ppf(1-alpha_level, dof_between, dof_within) # Look up probability on calculated f statistic and dof p_value = f.sf(f_statistic, dof_between, dof_within) # Calculate effect size (eta2) explained_var = ss_between/ss_total # Rounded group means with precision=2 group_means_ = np.around(group_means, 2) # Define hypothesis test result conclusions = ['Reject the Null as statistically significant.', 'Fail to reject the Null.'] # Judge the result res = conclusions[0] if f_statistic >= f_critical else conclusions[1] # Report contents print_out = """ ================= ONE-WAY ANOVA TEST REPORT ================= Size in Each Sample: {0} Mean in Each Sample: {1} Grand Mean: {2: .2f} Total Sum of Squares: {14: .4f} Between-Groups Sum of Squares: {3: .4f} Degree of Freedom: {4} Mean Squares: {5: .4f} Within-Groups Sum of Squares: {6: .4f} Degree of Freedom: {7} Mean Squares: {8: .4f} F Statistic: {9: .4f} P Value: {10: .4f} F Critical: {11: .4f} Explained Variance (eta_sqd): {12: .4f} --------------------------------- Conclusion: {13} ==================== ONE-WAY ANOAVA END ======================= """ # Get rid of indentation of in report formatted_print = textwrap.dedent(print_out) # Print the result print(formatted_print.format(sample_sizes, group_means_, grand_mean, ss_between, dof_between, ms_between, ss_within, dof_within, ms_within, f_statistic, p_value, f_critical, explained_var, res, ss_total)) # -------- Multiple Comparison Test Part ----------- # if res == conclusions[1]: print("No Need for Tukey's HSD as non-statistically significant from Aone-way ANOVA.") else: # Check each sample size is the same or not flag = True first = sample_sizes[0] for i in sample_sizes: if first != i: flag = False break else: continue # Construct labels for multiple comparison results if flag == True: labels_choices = ['A', 'B', 'C', 'D', 'E', 'F', 'G'] labels_collections = [] for i in range(len(sample_sizes)): label = labels_choices.pop(0) labels_collections.append(label) group_labels = np.repeat(labels_collections, repeats=sample_sizes[0]) # Use statsmodels.stats.libqsturng.qsturng() to calculate q value # studentized range statistic table: # https://www2.stat.duke.edu/courses/Spring98/sta110c/qtable.html q_cirtical = qsturng(1-alpha_level, len(group_means), dof_within) # Calculate Tukey's hsd value hsd_value = q_cirtical*np.sqrt(ms_within/sample_sizes[0]) # Generate tukey's hsd result table tukey_hsd = pairwise_tukeyhsd(concate_np, group_labels) # Report format hsd_res = """ =============== Multiple Comparison Test REPORT =============== Critical Q Value: {0:.2f} Critical Tukey's HSD: {1:.2f} {2} ================= Multiple Comparison Test END ================ """ # Get rid of indentation in the report formatted_res = textwrap.dedent(hsd_res) print(formatted_res.format(q_cirtical, hsd_value, tukey_hsd)) else: print("This version does not support Tukey's HSD analysis if sample sizes are different.")
def test_scalar(self): "scalar input -> scalar output" assert_almost_equal(4.43645545899562, qsturng(.9,5,6), 5)
names = list(ranks_per_technique.keys()) print(avg_ranks, len(ranks_per_technique['GENDIS']) ) from statsmodels.stats.libqsturng import qsturng # Sources for the formula for cd: # https://mlr.mlr-org.com/reference/generateCritDifferencesData.html # https://scikit-learn-general.narkive.com/ebgsIj5T/critical-difference-diagram plt.figure() alpha = 0.1 n_methods = len(avg_ranks) n_datasets = len(ranks_per_technique['GENDIS']) q_alpha = qsturng(1 - alpha, n_methods, np.inf) / np.sqrt(2) cd = q_alpha * np.sqrt(n_methods * (n_methods + 1) / (6 * n_datasets)) Orange.evaluation.graph_ranks(avg_ranks, names, cd=cd, highv=n_methods - 1) plt.gcf().set_size_inches(10, 3) plt.subplots_adjust(left=0.25) plt.savefig('results/cd_diagram_1.svg', format='svg') plt.figure() alpha = 0.05 n_methods = len(avg_ranks) n_datasets = len(ranks_per_technique['GENDIS']) q_alpha = qsturng(1 - alpha, n_methods, np.inf) / np.sqrt(2) cd = q_alpha * np.sqrt(n_methods * (n_methods + 1) / (6 * n_datasets)) Orange.evaluation.graph_ranks(avg_ranks, names, cd=cd, highv=n_methods - 1) plt.gcf().set_size_inches(10, 3) plt.subplots_adjust(left=0.25)
def get_q_crit(k, df, alpha=0.05): return qsturng(1 - alpha, k, df)