Beispiel #1
0
def Test(expected, observed, num_trials=1000, stir=-1):
    """Run a simulation to estimate the p-value of the observed values.

    Args:
      expected: Hist of expected values
      observed: Hist of observed values
      num_trials: how many simulations to run
      stir: how much to stir the simulated vat of nuts 
            (-1 means perfect mixing)

    Returns:
      float p-value
    """

    # compute the chi-squared stat
    threshold = ChiSquared(expected, observed)
    print 'chi-squared', threshold

    print 'simulated %d trials' % num_trials
    chi2s = []
    count = 0.0
    num_nuts = observed.Total()
    cdf = thinkstats2.MakeCdfFromHist(expected)

    for _ in range(num_trials):
        simulated = SimulateSample(expected, cdf, num_nuts, stir=stir)
        chi2 = ChiSquared(expected, simulated)
        chi2s.append(chi2)
        if chi2 >= threshold:
            count += 1

    print 'max chi2', max(chi2s)

    pvalue = count / num_trials
    print 'p-value', pvalue

    return pvalue
    def testCdf(self):
        t = [1, 2, 2, 3, 5]
        pmf = thinkstats2.Pmf(t)
        hist = thinkstats2.Hist(t)

        cdf = thinkstats2.Cdf(pmf)
        self.assertEqual(len(str(cdf)), 37)

        self.assertEqual(cdf[0], 0)
        self.assertAlmostEqual(cdf[1], 0.2)
        self.assertAlmostEqual(cdf[2], 0.6)
        self.assertAlmostEqual(cdf[3], 0.8)
        self.assertAlmostEqual(cdf[4], 0.8)
        self.assertAlmostEqual(cdf[5], 1)
        self.assertAlmostEqual(cdf[6], 1)

        xs = range(7)
        ps = cdf.Probs(xs)
        for p1, p2 in zip(ps, [0, 0.2, 0.6, 0.8, 0.8, 1, 1]):
            self.assertAlmostEqual(p1, p2)

        self.assertEqual(cdf.Value(0), 1)
        self.assertEqual(cdf.Value(0.1), 1)
        self.assertEqual(cdf.Value(0.2), 1)
        self.assertEqual(cdf.Value(0.3), 2)
        self.assertEqual(cdf.Value(0.4), 2)
        self.assertEqual(cdf.Value(0.5), 2)
        self.assertEqual(cdf.Value(0.6), 2)
        self.assertEqual(cdf.Value(0.7), 3)
        self.assertEqual(cdf.Value(0.8), 3)
        self.assertEqual(cdf.Value(0.9), 5)
        self.assertEqual(cdf.Value(1), 5)

        ps = np.linspace(0, 1, 11)
        xs = cdf.ValueArray(ps)
        self.assertTrue((xs == [1, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5]).all())

        np.random.seed(17)
        xs = cdf.Sample(7)
        self.assertListEqual(xs.tolist(), [2, 2, 1, 1, 3, 3, 3])

        # when you make a Cdf from a Pdf, you might get some floating
        # point representation error
        self.assertEqual(len(cdf), 4)
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
        self.assertAlmostEqual(cdf[2], 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromPmf(pmf)
        self.assertEqual(len(cdf), 4)
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromItems(pmf.Items())
        self.assertEqual(len(cdf), 4)
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(pmf.d)
        self.assertEqual(len(cdf), 4)
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromDict(pmf.d)
        self.assertEqual(len(cdf), 4)
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(hist)
        self.assertEqual(len(cdf), 4)
        self.assertEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromHist(hist)
        self.assertEqual(len(cdf), 4)
        self.assertEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(t)
        self.assertEqual(len(cdf), 4)
        self.assertEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromList(t)
        self.assertEqual(len(cdf), 4)
        self.assertEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(Counter(t))
        self.assertEqual(len(cdf), 4)
        self.assertEqual(cdf.Prob(2), 0.6)
        self.assertEqual(cdf.Value(0.6), 2)

        cdf2 = cdf.Copy()
        self.assertEqual(cdf2.Prob(2), 0.6)
        self.assertEqual(cdf2.Value(0.6), 2)
    def testCdf(self):
        t = [1, 2, 2, 3, 5]
        pmf = thinkstats2.Pmf(t)
        hist = thinkstats2.Hist(t)

        cdf = thinkstats2.Cdf(pmf)
        self.assertEquals(len(str(cdf)), 40)

        # when you make a Cdf from a Pdf, you might get some floating
        # point representation error
        self.assertEquals(len(cdf), 4)
        self.assertAlmostEquals(cdf.Prob(2), 0.6)
        self.assertAlmostEquals(cdf[2], 0.6)
        self.assertEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromPmf(pmf)
        self.assertEquals(len(cdf), 4)
        self.assertAlmostEquals(cdf.Prob(2), 0.6)
        self.assertEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(pmf.Items())
        self.assertEquals(len(cdf), 4)
        self.assertAlmostEquals(cdf.Prob(2), 0.6)
        self.assertEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromItems(pmf.Items())
        self.assertEquals(len(cdf), 4)
        self.assertAlmostEquals(cdf.Prob(2), 0.6)
        self.assertEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(pmf.d)
        self.assertEquals(len(cdf), 4)
        self.assertAlmostEquals(cdf.Prob(2), 0.6)
        self.assertEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromDict(pmf.d)
        self.assertEquals(len(cdf), 4)
        self.assertAlmostEquals(cdf.Prob(2), 0.6)
        self.assertEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(hist)
        self.assertEquals(len(cdf), 4)
        self.assertEquals(cdf.Prob(2), 0.6)
        self.assertEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromHist(hist)
        self.assertEquals(len(cdf), 4)
        self.assertEquals(cdf.Prob(2), 0.6)
        self.assertEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(t)
        self.assertEquals(len(cdf), 4)
        self.assertEquals(cdf.Prob(2), 0.6)
        self.assertEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.MakeCdfFromList(t)
        self.assertEquals(len(cdf), 4)
        self.assertEquals(cdf.Prob(2), 0.6)
        self.assertEquals(cdf.Value(0.6), 2)

        cdf = thinkstats2.Cdf(Counter(t))
        self.assertEquals(len(cdf), 4)
        self.assertEquals(cdf.Prob(2), 0.6)
        self.assertEquals(cdf.Value(0.6), 2)

        cdf2 = cdf.Copy()
        self.assertEquals(cdf2.Prob(2), 0.6)
        self.assertEquals(cdf2.Value(0.6), 2)