def Test(expected, observed, num_trials=1000, stir=-1): """Run a simulation to estimate the p-value of the observed values. Args: expected: Hist of expected values observed: Hist of observed values num_trials: how many simulations to run stir: how much to stir the simulated vat of nuts (-1 means perfect mixing) Returns: float p-value """ # compute the chi-squared stat threshold = ChiSquared(expected, observed) print 'chi-squared', threshold print 'simulated %d trials' % num_trials chi2s = [] count = 0.0 num_nuts = observed.Total() cdf = thinkstats2.MakeCdfFromHist(expected) for _ in range(num_trials): simulated = SimulateSample(expected, cdf, num_nuts, stir=stir) chi2 = ChiSquared(expected, simulated) chi2s.append(chi2) if chi2 >= threshold: count += 1 print 'max chi2', max(chi2s) pvalue = count / num_trials print 'p-value', pvalue return pvalue
def testCdf(self): t = [1, 2, 2, 3, 5] pmf = thinkstats2.Pmf(t) hist = thinkstats2.Hist(t) cdf = thinkstats2.Cdf(pmf) self.assertEqual(len(str(cdf)), 37) self.assertEqual(cdf[0], 0) self.assertAlmostEqual(cdf[1], 0.2) self.assertAlmostEqual(cdf[2], 0.6) self.assertAlmostEqual(cdf[3], 0.8) self.assertAlmostEqual(cdf[4], 0.8) self.assertAlmostEqual(cdf[5], 1) self.assertAlmostEqual(cdf[6], 1) xs = range(7) ps = cdf.Probs(xs) for p1, p2 in zip(ps, [0, 0.2, 0.6, 0.8, 0.8, 1, 1]): self.assertAlmostEqual(p1, p2) self.assertEqual(cdf.Value(0), 1) self.assertEqual(cdf.Value(0.1), 1) self.assertEqual(cdf.Value(0.2), 1) self.assertEqual(cdf.Value(0.3), 2) self.assertEqual(cdf.Value(0.4), 2) self.assertEqual(cdf.Value(0.5), 2) self.assertEqual(cdf.Value(0.6), 2) self.assertEqual(cdf.Value(0.7), 3) self.assertEqual(cdf.Value(0.8), 3) self.assertEqual(cdf.Value(0.9), 5) self.assertEqual(cdf.Value(1), 5) ps = np.linspace(0, 1, 11) xs = cdf.ValueArray(ps) self.assertTrue((xs == [1, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5]).all()) np.random.seed(17) xs = cdf.Sample(7) self.assertListEqual(xs.tolist(), [2, 2, 1, 1, 3, 3, 3]) # when you make a Cdf from a Pdf, you might get some floating # point representation error self.assertEqual(len(cdf), 4) self.assertAlmostEqual(cdf.Prob(2), 0.6) self.assertAlmostEqual(cdf[2], 0.6) self.assertEqual(cdf.Value(0.6), 2) cdf = thinkstats2.MakeCdfFromPmf(pmf) self.assertEqual(len(cdf), 4) self.assertAlmostEqual(cdf.Prob(2), 0.6) self.assertEqual(cdf.Value(0.6), 2) cdf = thinkstats2.MakeCdfFromItems(pmf.Items()) self.assertEqual(len(cdf), 4) self.assertAlmostEqual(cdf.Prob(2), 0.6) self.assertEqual(cdf.Value(0.6), 2) cdf = thinkstats2.Cdf(pmf.d) self.assertEqual(len(cdf), 4) self.assertAlmostEqual(cdf.Prob(2), 0.6) self.assertEqual(cdf.Value(0.6), 2) cdf = thinkstats2.MakeCdfFromDict(pmf.d) self.assertEqual(len(cdf), 4) self.assertAlmostEqual(cdf.Prob(2), 0.6) self.assertEqual(cdf.Value(0.6), 2) cdf = thinkstats2.Cdf(hist) self.assertEqual(len(cdf), 4) self.assertEqual(cdf.Prob(2), 0.6) self.assertEqual(cdf.Value(0.6), 2) cdf = thinkstats2.MakeCdfFromHist(hist) self.assertEqual(len(cdf), 4) self.assertEqual(cdf.Prob(2), 0.6) self.assertEqual(cdf.Value(0.6), 2) cdf = thinkstats2.Cdf(t) self.assertEqual(len(cdf), 4) self.assertEqual(cdf.Prob(2), 0.6) self.assertEqual(cdf.Value(0.6), 2) cdf = thinkstats2.MakeCdfFromList(t) self.assertEqual(len(cdf), 4) self.assertEqual(cdf.Prob(2), 0.6) self.assertEqual(cdf.Value(0.6), 2) cdf = thinkstats2.Cdf(Counter(t)) self.assertEqual(len(cdf), 4) self.assertEqual(cdf.Prob(2), 0.6) self.assertEqual(cdf.Value(0.6), 2) cdf2 = cdf.Copy() self.assertEqual(cdf2.Prob(2), 0.6) self.assertEqual(cdf2.Value(0.6), 2)
def testCdf(self): t = [1, 2, 2, 3, 5] pmf = thinkstats2.Pmf(t) hist = thinkstats2.Hist(t) cdf = thinkstats2.Cdf(pmf) self.assertEquals(len(str(cdf)), 40) # when you make a Cdf from a Pdf, you might get some floating # point representation error self.assertEquals(len(cdf), 4) self.assertAlmostEquals(cdf.Prob(2), 0.6) self.assertAlmostEquals(cdf[2], 0.6) self.assertEquals(cdf.Value(0.6), 2) cdf = thinkstats2.MakeCdfFromPmf(pmf) self.assertEquals(len(cdf), 4) self.assertAlmostEquals(cdf.Prob(2), 0.6) self.assertEquals(cdf.Value(0.6), 2) cdf = thinkstats2.Cdf(pmf.Items()) self.assertEquals(len(cdf), 4) self.assertAlmostEquals(cdf.Prob(2), 0.6) self.assertEquals(cdf.Value(0.6), 2) cdf = thinkstats2.MakeCdfFromItems(pmf.Items()) self.assertEquals(len(cdf), 4) self.assertAlmostEquals(cdf.Prob(2), 0.6) self.assertEquals(cdf.Value(0.6), 2) cdf = thinkstats2.Cdf(pmf.d) self.assertEquals(len(cdf), 4) self.assertAlmostEquals(cdf.Prob(2), 0.6) self.assertEquals(cdf.Value(0.6), 2) cdf = thinkstats2.MakeCdfFromDict(pmf.d) self.assertEquals(len(cdf), 4) self.assertAlmostEquals(cdf.Prob(2), 0.6) self.assertEquals(cdf.Value(0.6), 2) cdf = thinkstats2.Cdf(hist) self.assertEquals(len(cdf), 4) self.assertEquals(cdf.Prob(2), 0.6) self.assertEquals(cdf.Value(0.6), 2) cdf = thinkstats2.MakeCdfFromHist(hist) self.assertEquals(len(cdf), 4) self.assertEquals(cdf.Prob(2), 0.6) self.assertEquals(cdf.Value(0.6), 2) cdf = thinkstats2.Cdf(t) self.assertEquals(len(cdf), 4) self.assertEquals(cdf.Prob(2), 0.6) self.assertEquals(cdf.Value(0.6), 2) cdf = thinkstats2.MakeCdfFromList(t) self.assertEquals(len(cdf), 4) self.assertEquals(cdf.Prob(2), 0.6) self.assertEquals(cdf.Value(0.6), 2) cdf = thinkstats2.Cdf(Counter(t)) self.assertEquals(len(cdf), 4) self.assertEquals(cdf.Prob(2), 0.6) self.assertEquals(cdf.Value(0.6), 2) cdf2 = cdf.Copy() self.assertEquals(cdf2.Prob(2), 0.6) self.assertEquals(cdf2.Value(0.6), 2)