def test_f_high(self): """F high should match values from R for integer successes""" expected = { (1, 1, 0): 1, (1, 1, 1): 0.5, (1, 1, 20): 0.1400487, (1, 1, 1000000): 0.0006366196, (1, 10, 0): 1, (1,10, 5): 0.0493322, (1, 10, 20): 0.001193467, (10, 1, 0):1, (10, 10, 14.7): 0.0001062585, (13.7, 11.9, 3.8): 0.01340347, #test non-integer degrees of freedom #used following series to track down a bug after a failed test case (28, 29, 2): 0.03424088, (28, 29, 10): 1.053019e-08, (28, 29, 20): 1.628245e-12, (28, 29, 300): 5.038791e-29, (28, 35, 1): 0.4946777, (28, 37, 1): 0.4934486, (28, 38, 1): 0.4928721, (28, 38.001, 1): 0.4928716, (28, 38.5, 1): 0.4925927, (28, 39, 1): 0.492319, (28, 39, 10): 1.431901e-10, (28, 39, 20): 1.432014e-15, (28, 39, 30): 1.059964e-18, (28, 39, 50): 8.846678e-23, (28, 39, 10): 1.431901e-10, (28, 39, 300): 1.226935e-37, (28, 39, 50): 8.846678e-23, (28,39,304.7): 9.08154e-38, (28.4, 39.2, 304.7): 5.573927e-38, (1032, 2050, 0): 1, (1032, 2050, 4.15): 1.23535e-165, (1032, 2050, 0.5): 1, (1032, 2050, 0.1): 1, } e = expected.items() e.sort() for (key, value) in e: self.assertFloatEqualRel(f_high(*key), value)
def f_two_sample(a, b, tails=None): """Returns the dfn, dfd, F-value and probability for two samples a, and b. a and b: should be independent samples of scores. Should be lists of observations (numbers). tails should be None(default, two-sided test), 'high' or 'low'. This implementation returns the same results as the F test in R. """ dfn, dfd, F = f_value(a, b) if tails == 'low': return dfn, dfd, F, f_low(dfn, dfd, F) elif tails == 'high': return dfn, dfd, F, f_high(dfn, dfd, F) else: if var(a) >= var(b): side='right' else: side='left' return dfn, dfd, F, fprob(dfn, dfd, F, side=side)
def ANOVA_one_way(a): """Performs a one way analysis of variance a is a list of lists of observed values. Each list is the values within a category. The analysis must include 2 or more categories(lists). the lists must have a Mean and variance attribute. Recommende to make the Numbers objects An F value is first calculated as the variance of the group means divided by the mean of the within-group variances. """ group_means = [] group_variances = [] num_cases = 0 all_vals = [] for i in a: num_cases += len(i) group_means.append(i.Mean) group_variances.append(i.Variance * (len(i)-1)) all_vals.extend(i) group_means = Numbers(group_means) #get within group variances (denominator) group_variances = Numbers(group_variances) dfd = num_cases - len(group_means) within_MS = sum(group_variances)/dfd #get between group variances (numerator) grand_mean = Numbers(all_vals).Mean between_MS = 0 for i in a: diff = i.Mean - grand_mean diff_sq = diff * diff x = diff_sq * len(i) between_MS += x dfn = len(group_means) - 1 between_MS = between_MS/dfn F = between_MS/within_MS return dfn, dfd, F, between_MS, within_MS, group_means, f_high(dfn, dfd, F)