def _make_normal_model(self, weights, root, xmax=175, xlabel='adult weight (kg)', axis=None): cdf = _13_Cdf._make_cdf_from_list(weights) pyplot.clf() t = weights[:] t.sort() mu, var = _03_thinkstats._trimmed_mean_var(t) print('n, Mean, Var', len(weights), mu, var) sigma = math.sqrt(var) print('Sigma', sigma) xs, ps = continuous._render_normal_cdf(mu, sigma, xmax) pyplot.plot(xs, ps, label='model', linewidth=4, color='0.7') xs, ps = cdf._render() pyplot.plot(xs, ps, label='data', linewidth=2, color='blue') _05_myplot._save(root, title='Adult weight', xlabel=xlabel, ylabel='CDF', axis=axis or [0, xmax, 0, 1])
def _plot_ages(resp): """Plot the distribution of ages.""" ages = [r.age for r in resp.records] cdf = _13_Cdf._make_cdf_from_list(ages) _05_myplot._clf() _05_myplot._cdf(cdf) _05_myplot._show()
def testRender(self): t = [2, 1, 3, 2, 5] cdf = _13_Cdf._make_cdf_from_list(t, 'bob') vs, ps = cdf._render() self.assertEqual(vs, [1, 1, 2, 2, 3, 3, 5, 5]) for got, expected in zip(ps, [0.0, 0.2, 0.2, 0.6, 0.6, 0.8, 0.8, 1.0]): self.assertAlmostEqual(got, expected)
def _check_cdf2(): """Compare chi2 values from the simulation with a chi-squared dist.""" df = 3 t = [_simulate_chi2() for i in range(1000)] t2 = [scipy.stats.chi2.cdf(x, df) for x in t] cdf = _13_Cdf._make_cdf_from_list(t2) _05_myplot._cdf(cdf) _05_myplot._show()
def testItems(self): t = [2, 1, 3, 2, 5] cdf = _13_Cdf._make_cdf_from_list(t, 'bob') items = cdf._items() expected = [(1, 0.2), (2, 0.6), (3, 0.8), (5, 1.0)] for p1, p2 in zip(items, expected): for x1, x2 in zip(p1, p2): self.assertEqual(x1, x2)
def _resample(cdf, n=10000): sample = cdf._sample(n) new_cdf = _13_Cdf._make_cdf_from_list(sample, 'resampled') _05_myplot._clf() _05_myplot._cdfs([cdf, new_cdf]) _05_myplot._save(root='resample_cdf', title='CDF', xlabel='weight in oz', ylabel='CDF(x)')
def _process(table, name): """ Runs various analyses on this table. Creates instance variables: ages: sequence of int ages in years age_pmf: Pmf object age_cdf: Cdf object weights: sequence of total weight in ounces weight_cdf: Cdf object """ cumulative._process(table, name) table.ages = [p.agepreg for p in table.records if p.agepreg != 'NA'] table.age_pmf = _04_Pmf._make_pmf_from_list(table.ages, table.name) table.age_cdf = _13_Cdf._make_cdf_from_list(table.ages, table.name) table.weights = [p.totalwgt_oz for p in table.records if p.totalwgt_oz != 'NA'] table.weight_cdf = _13_Cdf._make_cdf_from_list(table.weights, table.name)
def testProb(self): t = [2, 1, 3, 2, 5] cdf = _13_Cdf._make_cdf_from_list(t, 'bob') self.assertEqual(cdf._prob(-1), 0.0) self.assertEqual(cdf._prob(1), 0.2) self.assertEqual(cdf._prob(2), 0.6) self.assertEqual(cdf._prob(2.5), 0.6) self.assertEqual(cdf._prob(4), 0.8) self.assertEqual(cdf._prob(5), 1.0) self.assertEqual(cdf._prob(7), 1.0)
def _make_example(): """Make a simple example CDF.""" t = [2, 1, 3, 2, 5] cdf = _13_Cdf._make_cdf_from_list(t) _05_myplot._clf() _05_myplot._cdf(cdf) _05_myplot._save(root='example_cdf', title='CDF', xlabel='x', ylabel='CDF(x)', axis=[0, 6, 0, 1], legend=False)
def _winsorize(xs, p=0.01): """Compresses outliers.""" cdf = _13_Cdf._make_cdf_from_list(xs) low, high = cdf._value(p), cdf._value(1 - p) print(low, high) outliers = [x for x in xs if x < low or x > high] outliers.sort() print(outliers) wxs = [min(max(low, x), high) for x in xs] return wxs
def main(): results = _10_relay._read_results() speeds = _10_relay._get_speeds(results) # plot the distribution of actual speeds cdf = _13_Cdf._make_cdf_from_list(speeds, 'speeds') _05_myplot._cdf(cdf) _05_myplot._save(root='relay_cdf', title='CDF of running speed', xlabel='speed (mph)', ylabel='probability')
def _check_cdf(): """Compare chi2 values from simulation with chi2 distributions.""" for df in [1, 2, 3]: xs, ys = _chi2_cdf(df=df, high=15) pyplot.plot(xs, ys, label=df) t = [_simulate_chi2() for i in range(1000)] cdf = _13_Cdf._make_cdf_from_list(t) _05_myplot._cdf(cdf) _05_myplot._save(root='khan3', xlabel='chi2 value', ylabel="CDF", formats=['png'])
def _plot_cdfs(samples): """Make CDFs showing the distribution of outliers.""" cdfs = [] for label, sample in samples.iteritems(): outliers = [x for x in sample if x < 150] cdf = _13_Cdf._make_cdf_from_list(outliers, label) cdfs.append(cdf) _05_myplot._clf() _05_myplot._cdfs(cdfs) _05_myplot._save(root='bayes_height_cdfs', title='CDF of height', xlabel='Reported height (cm)', ylabel='CDF')
def _make_figure(xmin=100, alpha=1.7, mu=150, sigma=25): """ Makes a figure showing the CDF of height in ParetoWorld. Compared to a normal distribution. Args: xmin: parameter of the Pareto distribution alpha: parameter of the Pareto distribution mu: parameter of the Normal distribution sigma: parameter of the Normal distribution """ t1 = [xmin * random.paretovariate(alpha) for i in range(10000)] cdf1 = _13_Cdf._make_cdf_from_list(t1, name='pareto') t2 = [random.normalvariate(mu, sigma) for i in range(10000)] cdf2 = _13_Cdf._make_cdf_from_list(t2, name='normal') _05_myplot._clf() _05_myplot._cdfs([cdf1, cdf2]) _05_myplot._save(root='pareto_world2', title='Pareto World', xlabel='height (cm)', ylabel='CDF')
def _process(table, name): """ Runs various analyses on this table. Creates instance variables: weights: sequence of int total weights in ounces weight_pmf: Pmf object weight_cdf: Cdf object oz_pmf: Pmf of just the ounce field """ _06_descriptive._process(table, name) table.weights = [p.totalwgt_oz for p in table.records if p.totalwgt_oz != 'NA'] table.weight_pmf = _04_Pmf._make_pmf_from_list(table.weights, table.name) table.weight_cdf = _13_Cdf._make_cdf_from_list(table.weights, table.name)
def main(script): # read 'em and sort 'em birthdays = _read_birthdays() birthdays.sort() # compute the intervals in days deltas = _diff(birthdays) days = [inter.days for inter in deltas] # make and plot the CCDF on a log scale. cdf = _13_Cdf._make_cdf_from_list(days, name='intervals') scale = _05_myplot._cdf(cdf, transform='exponential') _05_myplot._save(root='intervals', xlabel='days', ylabel='ccdf', **scale)
def testValue(self): t = [2, 1, 3, 2, 5] cdf = _13_Cdf._make_cdf_from_list(t, 'bob') self.assertEqual(cdf._value(0.0), 1) self.assertEqual(cdf._value(0.1), 1) self.assertEqual(cdf._value(0.2), 1) self.assertEqual(cdf._value(0.3), 2) self.assertEqual(cdf._value(0.4), 2) self.assertEqual(cdf._value(0.5), 2) self.assertEqual(cdf._value(0.6), 2) self.assertEqual(cdf._value(0.7), 3) self.assertEqual(cdf._value(0.8), 3) self.assertEqual(cdf._value(0.9), 5) self.assertEqual(cdf._value(1.0), 5) self.assertRaises(ValueError, cdf._value, -0.1) self.assertRaises(ValueError, cdf._value, 1.1)
def _make_cdfs(lens): cdf = _13_Cdf._make_cdf_from_list(lens, 'slashdot') _05_myplot._clf() _05_myplot._cdf(cdf) _05_myplot._save(root='slashdot.logx', xlabel='Number of friends/foes', ylabel='CDF', xscale='log') _05_myplot._clf() _05_myplot._cdf(cdf, complement=True) _05_myplot._save(root='slashdot.loglog', xlabel='Number of friends/foes', ylabel='CDF', xscale='log', yscale='log')
def _make_figures(): pops = _21_populations._read_data() print(len(pops)) cdf = _13_Cdf._make_cdf_from_list(pops, 'populations') _05_myplot._clf() _05_myplot._cdf(cdf) _05_myplot._save(root='populations', title='City/Town Populations', xlabel='population', ylabel='CDF', legend=False) _05_myplot._clf() _05_myplot._cdf(cdf) _05_myplot._save(root='populations_logx', title='City/Town Populations', xlabel='population', ylabel='CDF', xscale='log', legend=False) _05_myplot._clf() _05_myplot._cdf(cdf, complement=True) _05_myplot._save(root='populations_loglog', title='City/Town Populations', xlabel='population', ylabel='Complementary CDF', yscale='log', xscale='log', legend=False) t = [math.log(x) for x in pops] t.sort() _17_rankit._make_normal_plot(t, 'populations_rankit')
def _make_normal_model(weights): """Plot the CDF of birthweights with a normal model.""" # estimate parameters: trimming outliers yields a better fit mu, var = _03_thinkstats._trimmed_mean_var(weights, p=0.01) print('Mean, Var', mu, var) # plot the model sigma = math.sqrt(var) print('Sigma', sigma) xs, ps = _render_normal_cdf(mu, sigma, 200) pyplot.clf() pyplot.plot(xs, ps, label='model', linewidth=4, color='0.8') # plot the data cdf = _13_Cdf._make_cdf_from_list(weights) xs, ps = cdf._render() pyplot.plot(xs, ps, label='data', linewidth=2, color='blue') _05_myplot._save('nsfg_birthwgt_model', title='Birth weights', xlabel='birth weight (oz)', ylabel='CDF')
def testMakeCdfFromList(self): t = [2, 1, 3, 2, 5] cdf = _13_Cdf._make_cdf_from_list(t, 'bob') self.checkCdf(cdf)
def testMean(self): t = [2, 1, 3, 2, 5] cdf = _13_Cdf._make_cdf_from_list(t, 'bob') self.assertAlmostEqual(cdf._mean(), 13.0 / 5.0)