def _DegreesOfFreedom(stats1, stats2): """Calculates degrees of freedom using the Welch-Satterthwaite formula. Degrees of freedom is a measure of sample size. For other types of tests, degrees of freedom is sometimes N - 1, where N is the sample size. However, for the Welch's t-test, the degrees of freedom is approximated with the "Welch-Satterthwaite equation". The degrees of freedom returned from this function should be at least 1.0 because the first row in the t-table is for degrees of freedom of 1.0. Args: stats1: An SampleStats named tuple for the first sample. stats2: An SampleStats named tuple for the second sample. Returns: An estimate of degrees of freedom. Guaranteed to be at least 1.0. Raises: RuntimeError: Invalid input. """ # When there's no variance in either sample, return 1. if stats1.var == 0 and stats2.var == 0: return 1.0 if stats1.size < 2: raise RuntimeError('Sample 1 size < 2. Actual size: %s' % stats1.size) if stats2.size < 2: raise RuntimeError('Sample 2 size < 2. Actual size: %s' % stats2.size) df = math_utils.Divide( (stats1.var / stats1.size + stats2.var / stats2.size)**2, math_utils.Divide(stats1.var**2, (stats1.size**2) * (stats1.size - 1)) + math_utils.Divide(stats2.var**2, (stats2.size**2) * (stats2.size - 1))) return max(1.0, df)
def _TValue(stats1, stats2): """Calculates a t-statistic value using the formula for Welch's t-test. The t value can be thought of as a signal-to-noise ratio; a higher t-value tells you that the groups are more different. Args: stats1: An SampleStats named tuple for the first sample. stats2: An SampleStats named tuple for the second sample. Returns: A t value, which may be negative or positive. """ # If variance of both segments is zero, then a very high t-value should # be returned because any difference between the two samples could be # considered a very clear difference. Also, in the equation, as the # variance approaches zero, the quotient approaches infinity. if stats1.var == 0 and stats2.var == 0: return float('inf') return math_utils.Divide( stats1.mean - stats2.mean, math.sqrt(stats1.var / stats1.size + stats2.var / stats2.size))
def testDivide_UsesFloatArithmetic(self): self.assertEqual(1.5, math_utils.Divide(3, 2))
def testDivide_ByZero_ReturnsZero(self): self.assertTrue(math.isnan(math_utils.Divide(1, 0)))
def _Normalize(values): """Makes a series with the same shape but with variance = 1, mean = 0.""" mean = math_utils.Mean(values) zeroed = [x - mean for x in values] stddev = math_utils.StandardDeviation(zeroed) return [math_utils.Divide(x, stddev) for x in zeroed]