Example #1
0
def _DegreesOfFreedom(stats1, stats2):
    """Calculates degrees of freedom using the Welch-Satterthwaite formula.

  Degrees of freedom is a measure of sample size. For other types of tests,
  degrees of freedom is sometimes N - 1, where N is the sample size. However,
  for the Welch's t-test, the degrees of freedom is approximated with the
  "Welch-Satterthwaite equation".

  The degrees of freedom returned from this function should be at least 1.0
  because the first row in the t-table is for degrees of freedom of 1.0.

  Args:
    stats1: An SampleStats named tuple for the first sample.
    stats2: An SampleStats named tuple for the second sample.

  Returns:
    An estimate of degrees of freedom. Guaranteed to be at least 1.0.

  Raises:
    RuntimeError: Invalid input.
  """
    # When there's no variance in either sample, return 1.
    if stats1.var == 0 and stats2.var == 0:
        return 1.0
    if stats1.size < 2:
        raise RuntimeError('Sample 1 size < 2. Actual size: %s' % stats1.size)
    if stats2.size < 2:
        raise RuntimeError('Sample 2 size < 2. Actual size: %s' % stats2.size)
    df = math_utils.Divide(
        (stats1.var / stats1.size + stats2.var / stats2.size)**2,
        math_utils.Divide(stats1.var**2,
                          (stats1.size**2) * (stats1.size - 1)) +
        math_utils.Divide(stats2.var**2, (stats2.size**2) * (stats2.size - 1)))
    return max(1.0, df)
Example #2
0
def _TValue(stats1, stats2):
    """Calculates a t-statistic value using the formula for Welch's t-test.

  The t value can be thought of as a signal-to-noise ratio; a higher t-value
  tells you that the groups are more different.

  Args:
    stats1: An SampleStats named tuple for the first sample.
    stats2: An SampleStats named tuple for the second sample.

  Returns:
    A t value, which may be negative or positive.
  """
    # If variance of both segments is zero, then a very high t-value should
    # be returned because any difference between the two samples could be
    # considered a very clear difference. Also, in the equation, as the
    # variance approaches zero, the quotient approaches infinity.
    if stats1.var == 0 and stats2.var == 0:
        return float('inf')
    return math_utils.Divide(
        stats1.mean - stats2.mean,
        math.sqrt(stats1.var / stats1.size + stats2.var / stats2.size))
Example #3
0
 def testDivide_UsesFloatArithmetic(self):
     self.assertEqual(1.5, math_utils.Divide(3, 2))
Example #4
0
 def testDivide_ByZero_ReturnsZero(self):
     self.assertTrue(math.isnan(math_utils.Divide(1, 0)))
Example #5
0
def _Normalize(values):
    """Makes a series with the same shape but with variance = 1, mean = 0."""
    mean = math_utils.Mean(values)
    zeroed = [x - mean for x in values]
    stddev = math_utils.StandardDeviation(zeroed)
    return [math_utils.Divide(x, stddev) for x in zeroed]