def _scatter_plot(root, heights, weights, alpha=1.0): pyplot.scatter(heights, weights, alpha=alpha, edgecolors='none') _05_myplot._save(root=root, xlabel='Height (cm)', ylabel='Weight (kg)', axis=[140, 210, 20, 200], legend=False)
def _hex_bin(root, heights, weights, cmap=matplotlib.cm.Blues): pyplot.hexbin(heights, weights, cmap=cmap) _05_myplot._save(root=root, xlabel='Height (cm)', ylabel='Weight (kg)', axis=[140, 210, 20, 200], legend=False)
def main(): # make a uniform prior param = 1.2 prior = _make_uniform_suite(0.5, 1.5, 1000) # try out the sample in the book t = [] sample = [2.675, 0.198, 1.152, 0.787, 2.717, 4.269] name = 'post%d' % len(sample) posterior = _estimate_parameter(prior, sample, name) t.append(posterior) # try out a range of sample sizes for n in [10, 20, 40]: # generate a sample sample = [random.expovariate(param) for _ in range(n)] name = 'post%d' % n # compute the posterior posterior = _estimate_parameter(prior, sample, name) t.append(posterior) # plot the posterior distributions for i, posterior in enumerate(t): pyplot.subplot(2, 2, i + 1) _05_myplot._pmf(posterior) pyplot.xlabel('lambda') pyplot.ylabel('Posterior probability') pyplot.legend() _05_myplot._save(root='posteriors')
def _make_figures(firsts, others): """Plot Hists and Pmfs for the pregnancy length.""" # bar options is a list of option dictionaries to be passed to myplot.bar bar_options = [ dict(color='0.9'), dict(color='blue') ] # make the histogram axis = [23, 46, 0, 2700] _hists([firsts.hist, others.hist]) _05_myplot._save(root='nsfg_hist', title='Histogram', xlabel='weeks', ylabel='frequency', axis=axis) # make the PMF axis = [23, 46, 0, 0.6] _hists([firsts.pmf, others.pmf]) _05_myplot._save(root='nsfg_pmf', title='PMF', xlabel='weeks', ylabel='probability', axis=axis)
def _make_figure(firsts, others): """Makes a figure showing...""" weeks = range(35, 46) # probs is a map from table name to list of conditional probabilities probs = {} for table in [firsts, others]: name = table.pmf.name probs[name] = [] for week in weeks: cond = _condition_on_weeks(table.pmf, week) prob = cond._prob(week) print(week, prob, table.pmf.name) probs[name].append(prob) # make a plot with one line for each table pyplot.clf() # for name, ps in probs.iteritems(): for name, ps in probs.items(): pyplot.plot(weeks, ps, label=name) print(name, ps) _05_myplot._save( root="conditional", xlabel="weeks", ylabel=r"Prob{x $=$ weeks | x $\geq$ weeks}", title="Conditional Probability", )
def _make_normal_model(self, weights, root, xmax=175, xlabel='adult weight (kg)', axis=None): cdf = _13_Cdf._make_cdf_from_list(weights) pyplot.clf() t = weights[:] t.sort() mu, var = _03_thinkstats._trimmed_mean_var(t) print('n, Mean, Var', len(weights), mu, var) sigma = math.sqrt(var) print('Sigma', sigma) xs, ps = continuous._render_normal_cdf(mu, sigma, xmax) pyplot.plot(xs, ps, label='model', linewidth=4, color='0.7') xs, ps = cdf._render() pyplot.plot(xs, ps, label='data', linewidth=2, color='blue') _05_myplot._save(root, title='Adult weight', xlabel=xlabel, ylabel='CDF', axis=axis or [0, xmax, 0, 1])
def main(): results = _10_relay._read_results() speeds = _10_relay._get_speeds(results) # plot the distribution of actual speeds pmf = _04_Pmf._make_pmf_from_list(speeds, 'actual speeds') # myplot.Clf() # myplot.Hist(pmf) # myplot.Save(root='observed_speeds', # title='PMF of running speed', # xlabel='speed (mph)', # ylabel='probability') # plot the biased distribution seen by the observer biased = _bias_pmf(pmf, 7.5, name='observed speeds') _05_myplot._clf() _05_myplot._hist(biased) _05_myplot._save(root='observed_speeds', title='PMF of running speed', xlabel='speed (mph)', ylabel='probability') cdf = _13_Cdf._make_cdf_from_pmf(biased) _05_myplot._clf() _05_myplot._cdf(cdf) _05_myplot._save(root='observed_speeds_cdf', title='CDF of running speed', xlabel='speed (mph)', ylabel='cumulative probability')
def _make_spaghetti(iters=1000, lines=100, n=300, thresh=0.05, index=2): """ Makes a spaghetti plot of random-walk lines. Args: iters: number of simulations to run lines: number of lines to plot n: number of trials to simulate thresh: threshold p-value """ pyplot.clf() if thresh is not None: pyplot.plot([1, n], [thresh, thresh], color='red', alpha=1, linewidth=2) count = 0.0 for i in range(iters): lists = _simulate(0.5, 0.5, n) pairs = lists[index] xs, ys = zip(*pairs) if _crosses(ys, thresh): count += 1 if i < lines: pyplot.plot(xs, ys, alpha=0.2) print(iters, count / iters) labels = ['Difference in success rate', 'chi-squared stat', 'p-value'] _05_myplot._save(root='khan%d' % index, xlabel='Number of trials', ylabel=labels[index], title='A-B test random walk', formats=['png'])
def _resample(cdf, n=10000): sample = cdf._sample(n) new_cdf = _13_Cdf._make_cdf_from_list(sample, 'resampled') _05_myplot._clf() _05_myplot._cdfs([cdf, new_cdf]) _05_myplot._save(root='resample_cdf', title='CDF', xlabel='weight in oz', ylabel='CDF(x)')
def _make_normal_cdf(): """Generates a plot of the normal CDF.""" xs, ps = _render_normal_cdf(2.0, 0.5, 4.0) pyplot.clf() pyplot.plot(xs, ps, linewidth=2) _05_myplot._save('normal_cdf', title='Normal CDF', xlabel='x', ylabel='CDF', legend=False)
def _log_cdf_time_interval(): timeInterval = _calc_time_interval() pmf = _04_Pmf._make_pmf_from_list(timeInterval, "baby birth interval") cdf = _13_Cdf._make_cdf_from_pmf(pmf, "baby birth interval") _05_myplot._clf() _05_myplot._cdf(cdf, complement=True, xscale="linear", yscale="log") _05_myplot._save( root="baby_birth_interval_logccdf", title="LogCCDF of baby birth interval", xlabel="interval(minutes)", ylabel="LogCCdf", )
def _make_example(): """Make a simple example CDF.""" t = [2, 1, 3, 2, 5] cdf = _13_Cdf._make_cdf_from_list(t) _05_myplot._clf() _05_myplot._cdf(cdf) _05_myplot._save(root='example_cdf', title='CDF', xlabel='x', ylabel='CDF(x)', axis=[0, 6, 0, 1], legend=False)
def main(): results = _10_relay._read_results() speeds = _10_relay._get_speeds(results) # plot the distribution of actual speeds cdf = _13_Cdf._make_cdf_from_list(speeds, 'speeds') _05_myplot._cdf(cdf) _05_myplot._save(root='relay_cdf', title='CDF of running speed', xlabel='speed (mph)', ylabel='probability')
def _make_line_plot(age_bins): xs = [] ys = [] for bin, weights in sorted(age_bins.iteritems()): xs.append(bin) ys.append(_03_thinkstats._mean(weights)) _05_myplot._plot(xs, ys, 'bs-') _05_myplot._save(root='agemodel_line', xlabel="Mother's age (years)", ylabel='Mean birthweight (oz)', legend=False)
def _check_cdf(): """Compare chi2 values from simulation with chi2 distributions.""" for df in [1, 2, 3]: xs, ys = _chi2_cdf(df=df, high=15) pyplot.plot(xs, ys, label=df) t = [_simulate_chi2() for i in range(1000)] cdf = _13_Cdf._make_cdf_from_list(t) _05_myplot._cdf(cdf) _05_myplot._save(root='khan3', xlabel='chi2 value', ylabel="CDF", formats=['png'])
def _normal_prob_plot(samples): """Makes a normal probability plot for each sample in samples.""" pyplot.clf() markers = dict(male='b', female='g') for label, sample in samples.iteritems(): _normal_plot(sample, label, markers[label], jitter=0.0) _05_myplot._save(show=True, # root='bayes_height_normal', title='Normal probability plot', xlabel='Standard normal', ylabel='Reported height (cm)')
def _plot_cdfs(samples): """Make CDFs showing the distribution of outliers.""" cdfs = [] for label, sample in samples.iteritems(): outliers = [x for x in sample if x < 150] cdf = _13_Cdf._make_cdf_from_list(outliers, label) cdfs.append(cdf) _05_myplot._clf() _05_myplot._cdfs(cdfs) _05_myplot._save(root='bayes_height_cdfs', title='CDF of height', xlabel='Reported height (cm)', ylabel='CDF')
def _make_figures(pmf, biased_pmf): """Makes figures showing the CDF of the biased and unbiased PMFs""" cdf = _13_Cdf._make_cdf_from_pmf(pmf, 'unbiased') print('unbiased median', cdf._percentile(50)) print('percent < 100', cdf._prob(100)) print('percent < 1000', cdf._prob(1000)) biased_cdf = _13_Cdf._make_cdf_from_pmf(biased_pmf, 'biased') print('biased median', biased_cdf._percentile(50)) _05_myplot._clf() _05_myplot._cdfs([cdf, biased_cdf]) _05_myplot._save(root='slashdot.logx', xlabel='Number of friends/foes', ylabel='CDF', xscale='log')
def main(script): # read 'em and sort 'em birthdays = _read_birthdays() birthdays.sort() # compute the intervals in days deltas = _diff(birthdays) days = [inter.days for inter in deltas] # make and plot the CCDF on a log scale. cdf = _13_Cdf._make_cdf_from_list(days, name='intervals') scale = _05_myplot._cdf(cdf, transform='exponential') _05_myplot._save(root='intervals', xlabel='days', ylabel='ccdf', **scale)
def _make_cdfs(lens): cdf = _13_Cdf._make_cdf_from_list(lens, 'slashdot') _05_myplot._clf() _05_myplot._cdf(cdf) _05_myplot._save(root='slashdot.logx', xlabel='Number of friends/foes', ylabel='CDF', xscale='log') _05_myplot._clf() _05_myplot._cdf(cdf, complement=True) _05_myplot._save(root='slashdot.loglog', xlabel='Number of friends/foes', ylabel='CDF', xscale='log', yscale='log')
def _make_figures(pool, firsts, others): """Creates several figures for the book.""" # CDF of all ages _05_myplot._clf() _05_myplot._cdf(pool.age_cdf) _05_myplot._save(root='agemodel_age_cdf', title="Distribution of mother's age", xlabel='age (years)', ylabel='CDF', legend=False) # CDF of all weights _05_myplot._clf() _05_myplot._cdf(pool.weight_cdf) _05_myplot._save(root='agemodel_weight_cdf', title="Distribution of birth weight", xlabel='birth weight (oz)', ylabel='CDF', legend=False) # plot CDFs of birth ages for first babies and others _05_myplot._clf() _05_myplot._cdfs([firsts.age_cdf, others.age_cdf]) _05_myplot._save(root='agemodel_age_cdfs', title="Distribution of mother's age", xlabel='age (years)', ylabel='CDF') _05_myplot._clf() _05_myplot._cdfs([firsts.weight_cdf, others.weight_cdf]) _05_myplot._save(root='agemodel_weight_cdfs', title="Distribution of birth weight", xlabel='birth weight (oz)', ylabel='CDF') # make a scatterplot of ages and weights ages, weights = _get_age_weight(pool) pyplot.clf() # pyplot.scatter(ages, weights, alpha=0.2) pyplot.hexbin(ages, weights, cmap=matplotlib.cm.gray_r) _05_myplot._save(root='agemodel_scatter', xlabel='Age (years)', ylabel='Birth weight (oz)', legend=False)
def _make_diff_figure(firsts, others): """Plot the difference between the PMFs.""" weeks = range(35, 46) diffs = [] for week in weeks: p1 = firsts.pmf._prob(week) p2 = others.pmf._prob(week) diff = 100 * (p1 - p2) diffs.append(diff) pyplot.clf() pyplot.bar(weeks, diffs, align='center') _05_myplot._save(root='nsfg_diffs', title='Difference in PMFs', xlabel='weeks', ylabel='100 (PMF$_{first}$ - PMF$_{other}$)', legend=False)
def _make_pareto_cdf(): """Generates a plot of the CDF of height in Pareto World.""" n = 50 max = 1000.0 xs = [max * i / n for i in range(n)] xmin = 100 alpha = 1.7 ps = [_pareto_cdf(x, alpha, xmin) for x in xs] print('Median', _pareto_median(xmin, alpha)) pyplot.clf() pyplot.plot(xs, ps, linewidth=2) _05_myplot._save('pareto_world1', title='Pareto CDF', xlabel='height (cm)', ylabel='CDF', legend=False)
def _make_pareto_cdf(): """Generates a plot of the Pareto CDF.""" n = 50 max = 10.0 xs = [max * i / n for i in range(n)] xmin = 0.5 alpha = 1.0 ps = [_pareto_cdf(x, alpha, xmin) for x in xs] print('Fraction <= 10', _pareto_cdf(xmin, alpha, 10)) pyplot.clf() pyplot.plot(xs, ps, linewidth=2) _05_myplot._save('pareto_cdf', title='Pareto CDF', xlabel='x', ylabel='CDF', legend=False)
def _make_expo_cdf(): """Generates a plot of the exponential CDF.""" n = 40 max = 2.5 xs = [max * i / n for i in range(n)] lam = 2.0 ps = [_expo_cdf(x, lam) for x in xs] percentile = -math.log(0.05) / lam print('Fraction <= ', percentile, _expo_cdf(lam, percentile)) pyplot.clf() pyplot.plot(xs, ps, linewidth=2) _05_myplot._save('expo_cdf', title='Exponential CDF', xlabel='x', ylabel='CDF', legend=False)
def _plot_marginals(suite): """Plot the marginal distributions for a 2-D joint distribution.""" pmf_m, pmf_s = _compute_marginals(suite) pyplot.clf() pyplot.figure(1, figsize=(7, 4)) pyplot.subplot(1, 2, 1) cdf_m = _13_Cdf._make_cdf_from_pmf(pmf_m, 'mu') _05_myplot._cdf(cdf_m) pyplot.xlabel('Mean height (cm)') pyplot.ylabel('CDF') pyplot.subplot(1, 2, 2) cdf_s = _13_Cdf._make_cdf_from_pmf(pmf_s, 'sigma') _05_myplot._cdf(cdf_s) pyplot.xlabel('Std Dev height (cm)') pyplot.ylabel('CDF') _05_myplot._save(root='bayes_height_marginals_%s' % suite.name)
def main(): print('pae', 0.3 / (0.3 + 3.0 / 13)) doorA = _make_uniform_suite(0.0, 1.0, 101, name='Door A') evidence = 3, 2 _update(doorA, evidence) doorC = _make_uniform_suite(0.0, 1.0, 101, name='Door C') evidence = 3, 10 _update(doorC, evidence) print(_total_probability(doorA, doorC, _prob_winning)) # plot the posterior distributions _05_myplot._pmfs([doorA, doorC]) _05_myplot._save(root='blinky', formats=['pdf', 'png'], title='Probability of blinking', xlabel='P(blink)', ylabel='Posterior probability')
def _make_normal_plot(ys, root=None, line_options={}, **options): """ Makes a normal probability plot. Args: ys: sequence of values line_options: dictionary of options for pyplot.plot options: dictionary of options for myplot.Save """ # TODO: when n is small, generate a larger sample and desample n = len(ys) xs = [random.normalvariate(0.0, 1.0) for i in range(n)] pyplot.clf() pyplot.plot(sorted(xs), sorted(ys), 'b.', markersize=3, **line_options) _05_myplot._save(root, xlabel='Standard normal values', legend=False, **options)
def _make_figures(pool, firsts, others): """Creates several figures for the book.""" # plot PMFs of birth weights for first babies and others _05_myplot._clf() _05_myplot._hist(firsts.weight_pmf, linewidth=0, color='blue') _05_myplot._hist(others.weight_pmf, linewidth=0, color='orange') _05_myplot._save(root='nsfg_birthwgt_pmf', title='Birth weight PMF', xlabel='weight (ounces)', ylabel='probability') # plot CDFs of birth weights for first babies and others _05_myplot._clf() _05_myplot._cdf(firsts.weight_cdf, linewidth=2, color='blue') _05_myplot._cdf(others.weight_cdf, linewidth=2, color='orange') _05_myplot._save(root='nsfg_birthwgt_cdf', title='Birth weight CDF', xlabel='weight (ounces)', ylabel='probability', axis=[0, 200, 0, 1])
def main(): upper_bound = 200 prior = _make_uniform_suite(1, upper_bound, upper_bound) prior.name = 'prior' evidence = 60 posterior = prior._copy() _update(posterior, evidence) posterior.name = 'posterior' print(_credible_interval(posterior, 90)) # plot the posterior distribution pyplot.subplots_adjust(wspace=0.4, left=0.15) plot_options = dict(linewidth=2) _05_myplot._pmf(posterior, **plot_options) _05_myplot._save(root='locomotive', title='Locomotive problem', xlabel='Number of trains', ylabel='Posterior probability')