def friedmanchisquare(args: List(List(float))) -> (float, float): """ Friedman Chi-Square is a non-parametric, one-way within-subjects ANOVA. This function calculates the Friedman Chi-square test for repeated measures and returns the result, along with the associated probability value. It assumes 3 or more repeated measures. Only 3 levels requires a minimum of 10 subjects in the study. Four levels requires 5 subjects per level(??). Usage: lfriedmanchisquare(*args) Returns: chi-square statistic, associated p-value """ k = len(args) if k < 3: raise ValueError('Less than 3 levels. Friedman test not appropriate.') if k > 3: raise ValueError( 'bg: specialized this code to 3 levels for Reticulated experiment') n = len(args[0]) #bg#data = pstat.abut(l0,l1,l2) data = pstat.abut(pstat.abut(args[0], args[1]), args[2]) for i in range(len(data)): data[i] = support.rankdata(data[i]) ssbn = 0 for i in range(k): ssbn = ssbn + sum(args[i])**2 chisq = 12.0 / (k * n * (k + 1)) * ssbn - 3 * n * (k + 1) return chisq, probability.chisqprob(chisq, k - 1)
def kruskalwallish(args: List(List(float))) -> (float, float): """ The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more groups, requiring at least 5 subjects in each group. This function calculates the Kruskal-Wallis H-test for 3 or more independent samples and returns the result. Usage: lkruskalwallish(*args) Returns: H-statistic (corrected for ties), associated p-value """ args = list(args) n = [0] * len(args) all = [] n = list(map(len, args)) for i in range(len(args)): all = all + args[i] ranked = support.rankdata(all) T = tiecorrect(ranked) for i in range(len(args)): args[i] = ranked[0:n[i]] del ranked[0:n[i]] rsums = [] for i in range(len(args)): rsums.append(sum(args[i])**2) rsums[i] = rsums[i] / float(n[i]) ssbn = sum(rsums) totaln = sum(n) h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1) df = len(args) - 1 if T == 0: raise ValueError('All numbers are identical in lkruskalwallish') h = h / float(T) return h, probability.chisqprob(h, df)
def wilcoxont(x: List(float), y: List(float)) -> (float, float): """ Calculates the Wilcoxon T-test for related samples and returns the result. A non-parametric T-test. Usage: lwilcoxont(x,y) Returns: a t-statistic, two-tail probability estimate """ if len(x) != len(y): raise ValueError('Unequal N in wilcoxont. Aborting.') d = [] for i in range(len(x)): diff = x[i] - y[i] if diff != 0: d.append(diff) count = len(d) absd = list(map(abs, d)) absranked = support.rankdata(absd) r_plus = 0.0 r_minus = 0.0 for i in range(len(absd)): if d[i] < 0: r_minus = r_minus + absranked[i] else: r_plus = r_plus + absranked[i] wt = min(r_plus, r_minus) mn = count * (count + 1) * 0.25 se = sqrt(count * (count + 1) * (2.0 * count + 1.0) / 24.0) _z = fabs(wt - mn) / se prob = 2 * (1.0 - probability.zprob(abs(_z))) return wt, prob
def mannwhitneyu(x: List(float), y: List(float)) -> (float, float): """ Calculates a Mann-Whitney U statistic on the provided scores and returns the result. Use only when the n in each condition is < 20 and you have 2 independent samples of ranks. NOTE: Mann-Whitney U is significant if the u-obtained is LESS THAN or equal to the critical value of U found in the tables. Equivalent to Kruskal-Wallis H with just 2 groups. Usage: lmannwhitneyu(data) Returns: u-statistic, one-tailed p-value (i.e., p(z(U))) """ n1 = len(x) n2 = len(y) ranked = support.rankdata(x + y) rankx = ranked[0:n1] # get the x-ranks ranky = ranked[n1:] # the rest are y-ranks u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - sum(rankx) # calc U for x u2 = n1 * n2 - u1 # remainder is U for y bigu = max(u1, u2) smallu = min(u1, u2) proportion = bigu / float(n1 * n2) T = sqrt(tiecorrect(ranked)) # correction factor for tied scores if T == 0: raise ValueError('All numbers are identical in lmannwhitneyu') sd = sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0) z = abs((bigu - n1 * n2 / 2.0) / sd) # normal approximation for prob calc return smallu, 1.0 - probability.zprob(z) #, proportion
def ranksums(x: List(float), y: List(float)) -> (float, float): """ Calculates the rank sums statistic on the provided scores and returns the result. Use only when the n in each condition is > 20 and you have 2 independent samples of ranks. Usage: lranksums(x,y) Returns: a z-statistic, two-tailed p-value """ n1 = len(x) n2 = len(y) alldata = x + y ranked = support.rankdata(alldata) x = ranked[:n1] y = ranked[n1:] s = sum(x) expected = n1 * (n1 + n2 + 1) / 2.0 _z = (s - expected) / sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0) prob = 2 * (1.0 - probability.zprob(abs(_z))) return _z, prob
ll = [l] * 5 m = [float(x) for x in range(4, LIST_SIZE + 3)] m[10] = 34. print('\n\nF_oneway:') print(anova.F_oneway([l, m])) print(anova.F_oneway([l, l])) #print 'F_value:',stats.F_value(l),stats.F_value(l) print('\nSUPPORT') print('sum:', support.sum(l), support.sum(lf), support.sum(l), support.sum(lf)) print('cumsum:') print(support.cumsum([int(x) for x in l])) print(support.cumsum([int(x) for x in lf])) print('ss:', support.ss(l), support.ss(lf), support.ss(l), support.ss(lf)) print('summult:', support.summult(l, m), support.summult(lf, m), support.summult(l, l), support.summult(lf, l)) print('sumsquared:', support.square_of_sums(l), support.square_of_sums(lf), support.square_of_sums(l), support.square_of_sums(lf)) print('sumdiffsquared:', support.sumdiffsquared(l, m), support.sumdiffsquared(lf, m), support.sumdiffsquared(l, l), support.sumdiffsquared(lf, l)) print('shellsort:') print(support.shellsort(m)) print(support.shellsort(l)) print('rankdata:') print(support.rankdata(m)) print(support.rankdata(l))