Example #1
0
 def _scatter_plot(root, heights, weights, alpha=1.0):
     pyplot.scatter(heights, weights, alpha=alpha, edgecolors='none')
     _05_myplot._save(root=root,
                      xlabel='Height (cm)',
                      ylabel='Weight (kg)',
                      axis=[140, 210, 20, 200],
                      legend=False)
Example #2
0
 def _hex_bin(root, heights, weights, cmap=matplotlib.cm.Blues):
     pyplot.hexbin(heights, weights, cmap=cmap)
     _05_myplot._save(root=root,
                      xlabel='Height (cm)',
                      ylabel='Weight (kg)',
                      axis=[140, 210, 20, 200],
                      legend=False)
Example #3
0
def main():
    # make a uniform prior
    param = 1.2
    prior = _make_uniform_suite(0.5, 1.5, 1000)

    # try out the sample in the book
    t = []
    sample = [2.675, 0.198, 1.152, 0.787, 2.717, 4.269]
    name = 'post%d' % len(sample)
    posterior = _estimate_parameter(prior, sample, name)
    t.append(posterior)

    # try out a range of sample sizes
    for n in [10, 20, 40]:
        # generate a sample
        sample = [random.expovariate(param) for _ in range(n)]
        name = 'post%d' % n

        # compute the posterior
        posterior = _estimate_parameter(prior, sample, name)
        t.append(posterior)

    # plot the posterior distributions
    for i, posterior in enumerate(t):
        pyplot.subplot(2, 2, i + 1)
        _05_myplot._pmf(posterior)
        pyplot.xlabel('lambda')
        pyplot.ylabel('Posterior probability')
        pyplot.legend()

    _05_myplot._save(root='posteriors')
Example #4
0
def _make_figures(firsts, others):
    """Plot Hists and Pmfs for the pregnancy length."""

    # bar options is a list of option dictionaries to be passed to myplot.bar
    bar_options = [
        dict(color='0.9'),
        dict(color='blue')
    ]

    # make the histogram
    axis = [23, 46, 0, 2700]
    _hists([firsts.hist, others.hist])
    _05_myplot._save(root='nsfg_hist',
                     title='Histogram',
                     xlabel='weeks',
                     ylabel='frequency',
                     axis=axis)

    # make the PMF
    axis = [23, 46, 0, 0.6]
    _hists([firsts.pmf, others.pmf])
    _05_myplot._save(root='nsfg_pmf',
                     title='PMF',
                     xlabel='weeks',
                     ylabel='probability',
                     axis=axis)
Example #5
0
def _make_figure(firsts, others):
    """Makes a figure showing..."""

    weeks = range(35, 46)

    # probs is a map from table name to list of conditional probabilities
    probs = {}
    for table in [firsts, others]:
        name = table.pmf.name
        probs[name] = []
        for week in weeks:
            cond = _condition_on_weeks(table.pmf, week)
            prob = cond._prob(week)
            print(week, prob, table.pmf.name)
            probs[name].append(prob)

    # make a plot with one line for each table
    pyplot.clf()
    # for name, ps in probs.iteritems():
    for name, ps in probs.items():
        pyplot.plot(weeks, ps, label=name)
        print(name, ps)

    _05_myplot._save(
        root="conditional",
        xlabel="weeks",
        ylabel=r"Prob{x $=$ weeks | x $\geq$ weeks}",
        title="Conditional Probability",
    )
Example #6
0
    def _make_normal_model(self,
                           weights,
                           root,
                           xmax=175,
                           xlabel='adult weight (kg)',
                           axis=None):
        cdf = _13_Cdf._make_cdf_from_list(weights)

        pyplot.clf()

        t = weights[:]
        t.sort()
        mu, var = _03_thinkstats._trimmed_mean_var(t)
        print('n, Mean, Var', len(weights), mu, var)

        sigma = math.sqrt(var)
        print('Sigma', sigma)

        xs, ps = continuous._render_normal_cdf(mu, sigma, xmax)
        pyplot.plot(xs, ps, label='model', linewidth=4, color='0.7')

        xs, ps = cdf._render()
        pyplot.plot(xs, ps, label='data', linewidth=2, color='blue')

        _05_myplot._save(root,
                         title='Adult weight',
                         xlabel=xlabel,
                         ylabel='CDF',
                         axis=axis or [0, xmax, 0, 1])
Example #7
0
def main():
    results = _10_relay._read_results()
    speeds = _10_relay._get_speeds(results)

    # plot the distribution of actual speeds
    pmf = _04_Pmf._make_pmf_from_list(speeds, 'actual speeds')

    # myplot.Clf()
    # myplot.Hist(pmf)
    # myplot.Save(root='observed_speeds',
    #             title='PMF of running speed',
    #             xlabel='speed (mph)',
    #             ylabel='probability')

    # plot the biased distribution seen by the observer
    biased = _bias_pmf(pmf, 7.5, name='observed speeds')

    _05_myplot._clf()
    _05_myplot._hist(biased)
    _05_myplot._save(root='observed_speeds',
                     title='PMF of running speed',
                     xlabel='speed (mph)',
                     ylabel='probability')

    cdf = _13_Cdf._make_cdf_from_pmf(biased)

    _05_myplot._clf()
    _05_myplot._cdf(cdf)
    _05_myplot._save(root='observed_speeds_cdf',
                     title='CDF of running speed',
                     xlabel='speed (mph)',
                     ylabel='cumulative probability')
Example #8
0
def _make_spaghetti(iters=1000, lines=100, n=300, thresh=0.05, index=2):
    """
    Makes a spaghetti plot of random-walk lines.

    Args:
        iters:  number of simulations to run
        lines:  number of lines to plot
        n:      number of trials to simulate
        thresh: threshold p-value
    """
    pyplot.clf()
    if thresh is not None:
        pyplot.plot([1, n], [thresh, thresh], color='red', alpha=1, linewidth=2)

    count = 0.0
    for i in range(iters):
        lists = _simulate(0.5, 0.5, n)
        pairs = lists[index]
        xs, ys = zip(*pairs)
        if _crosses(ys, thresh):
            count += 1

        if i < lines:
            pyplot.plot(xs, ys, alpha=0.2)

    print(iters, count / iters)

    labels = ['Difference in success rate', 'chi-squared stat', 'p-value']

    _05_myplot._save(root='khan%d' % index,
                     xlabel='Number of trials',
                     ylabel=labels[index],
                     title='A-B test random walk',
                     formats=['png'])
Example #9
0
def _resample(cdf, n=10000):
    sample = cdf._sample(n)
    new_cdf = _13_Cdf._make_cdf_from_list(sample, 'resampled')
    _05_myplot._clf()
    _05_myplot._cdfs([cdf, new_cdf])
    _05_myplot._save(root='resample_cdf',
                     title='CDF',
                     xlabel='weight in oz',
                     ylabel='CDF(x)')
Example #10
0
def _make_normal_cdf():
    """Generates a plot of the normal CDF."""
    xs, ps = _render_normal_cdf(2.0, 0.5, 4.0)

    pyplot.clf()
    pyplot.plot(xs, ps, linewidth=2)
    _05_myplot._save('normal_cdf',
                     title='Normal CDF',
                     xlabel='x',
                     ylabel='CDF',
                     legend=False)
Example #11
0
def _log_cdf_time_interval():
    timeInterval = _calc_time_interval()
    pmf = _04_Pmf._make_pmf_from_list(timeInterval, "baby birth interval")
    cdf = _13_Cdf._make_cdf_from_pmf(pmf, "baby birth interval")
    _05_myplot._clf()
    _05_myplot._cdf(cdf, complement=True, xscale="linear", yscale="log")
    _05_myplot._save(
        root="baby_birth_interval_logccdf",
        title="LogCCDF of baby birth interval",
        xlabel="interval(minutes)",
        ylabel="LogCCdf",
    )
Example #12
0
def _make_example():
    """Make a simple example CDF."""
    t = [2, 1, 3, 2, 5]
    cdf = _13_Cdf._make_cdf_from_list(t)
    _05_myplot._clf()
    _05_myplot._cdf(cdf)
    _05_myplot._save(root='example_cdf',
                     title='CDF',
                     xlabel='x',
                     ylabel='CDF(x)',
                     axis=[0, 6, 0, 1],
                     legend=False)
Example #13
0
def main():
    results = _10_relay._read_results()
    speeds = _10_relay._get_speeds(results)

    # plot the distribution of actual speeds
    cdf = _13_Cdf._make_cdf_from_list(speeds, 'speeds')

    _05_myplot._cdf(cdf)
    _05_myplot._save(root='relay_cdf',
                     title='CDF of running speed',
                     xlabel='speed (mph)',
                     ylabel='probability')
Example #14
0
def _make_line_plot(age_bins):
    xs = []
    ys = []
    for bin, weights in sorted(age_bins.iteritems()):
        xs.append(bin)
        ys.append(_03_thinkstats._mean(weights))

    _05_myplot._plot(xs, ys, 'bs-')
    _05_myplot._save(root='agemodel_line',
                     xlabel="Mother's age (years)",
                     ylabel='Mean birthweight (oz)',
                     legend=False)
Example #15
0
def _check_cdf():
    """Compare chi2 values from simulation with chi2 distributions."""
    for df in [1, 2, 3]:
        xs, ys = _chi2_cdf(df=df, high=15)
        pyplot.plot(xs, ys, label=df)

    t = [_simulate_chi2() for i in range(1000)]
    cdf = _13_Cdf._make_cdf_from_list(t)

    _05_myplot._cdf(cdf)
    _05_myplot._save(root='khan3',
                     xlabel='chi2 value',
                     ylabel="CDF",
                     formats=['png'])
Example #16
0
def _normal_prob_plot(samples):
    """Makes a normal probability plot for each sample in samples."""
    pyplot.clf()

    markers = dict(male='b', female='g')

    for label, sample in samples.iteritems():
        _normal_plot(sample, label, markers[label], jitter=0.0)

    _05_myplot._save(show=True,
                     # root='bayes_height_normal',
                     title='Normal probability plot',
                     xlabel='Standard normal',
                     ylabel='Reported height (cm)')
Example #17
0
def _plot_cdfs(samples):
    """Make CDFs showing the distribution of outliers."""
    cdfs = []
    for label, sample in samples.iteritems():
        outliers = [x for x in sample if x < 150]

        cdf = _13_Cdf._make_cdf_from_list(outliers, label)
        cdfs.append(cdf)

    _05_myplot._clf()
    _05_myplot._cdfs(cdfs)
    _05_myplot._save(root='bayes_height_cdfs',
                     title='CDF of height',
                     xlabel='Reported height (cm)',
                     ylabel='CDF')
Example #18
0
def _make_figures(pmf, biased_pmf):
    """Makes figures showing the CDF of the biased and unbiased PMFs"""
    cdf = _13_Cdf._make_cdf_from_pmf(pmf, 'unbiased')
    print('unbiased median', cdf._percentile(50))
    print('percent < 100', cdf._prob(100))
    print('percent < 1000', cdf._prob(1000))

    biased_cdf = _13_Cdf._make_cdf_from_pmf(biased_pmf, 'biased')
    print('biased median', biased_cdf._percentile(50))

    _05_myplot._clf()
    _05_myplot._cdfs([cdf, biased_cdf])
    _05_myplot._save(root='slashdot.logx',
                     xlabel='Number of friends/foes',
                     ylabel='CDF',
                     xscale='log')
Example #19
0
def main(script):
    # read 'em and sort 'em
    birthdays = _read_birthdays()
    birthdays.sort()

    # compute the intervals in days
    deltas = _diff(birthdays)
    days = [inter.days for inter in deltas]

    # make and plot the CCDF on a log scale.
    cdf = _13_Cdf._make_cdf_from_list(days, name='intervals')
    scale = _05_myplot._cdf(cdf, transform='exponential')
    _05_myplot._save(root='intervals',
                     xlabel='days',
                     ylabel='ccdf',
                     **scale)
Example #20
0
def _make_cdfs(lens):
    cdf = _13_Cdf._make_cdf_from_list(lens, 'slashdot')

    _05_myplot._clf()
    _05_myplot._cdf(cdf)
    _05_myplot._save(root='slashdot.logx',
                     xlabel='Number of friends/foes',
                     ylabel='CDF',
                     xscale='log')

    _05_myplot._clf()
    _05_myplot._cdf(cdf, complement=True)
    _05_myplot._save(root='slashdot.loglog',
                     xlabel='Number of friends/foes',
                     ylabel='CDF',
                     xscale='log',
                     yscale='log')
Example #21
0
def _make_figures(pool, firsts, others):
    """Creates several figures for the book."""

    # CDF of all ages
    _05_myplot._clf()
    _05_myplot._cdf(pool.age_cdf)
    _05_myplot._save(root='agemodel_age_cdf',
                     title="Distribution of mother's age",
                     xlabel='age (years)',
                     ylabel='CDF',
                     legend=False)

    # CDF of all weights
    _05_myplot._clf()
    _05_myplot._cdf(pool.weight_cdf)
    _05_myplot._save(root='agemodel_weight_cdf',
                     title="Distribution of birth weight",
                     xlabel='birth weight (oz)',
                     ylabel='CDF',
                     legend=False)

    # plot CDFs of birth ages for first babies and others
    _05_myplot._clf()
    _05_myplot._cdfs([firsts.age_cdf, others.age_cdf])
    _05_myplot._save(root='agemodel_age_cdfs',
                     title="Distribution of mother's age",
                     xlabel='age (years)',
                     ylabel='CDF')

    _05_myplot._clf()
    _05_myplot._cdfs([firsts.weight_cdf, others.weight_cdf])
    _05_myplot._save(root='agemodel_weight_cdfs',
                     title="Distribution of birth weight",
                     xlabel='birth weight (oz)',
                     ylabel='CDF')

    # make a scatterplot of ages and weights
    ages, weights = _get_age_weight(pool)
    pyplot.clf()
    # pyplot.scatter(ages, weights, alpha=0.2)
    pyplot.hexbin(ages, weights, cmap=matplotlib.cm.gray_r)
    _05_myplot._save(root='agemodel_scatter',
                     xlabel='Age (years)',
                     ylabel='Birth weight (oz)',
                     legend=False)
Example #22
0
def _make_diff_figure(firsts, others):
    """Plot the difference between the PMFs."""

    weeks = range(35, 46)
    diffs = []
    for week in weeks:
        p1 = firsts.pmf._prob(week)
        p2 = others.pmf._prob(week)
        diff = 100 * (p1 - p2)
        diffs.append(diff)

    pyplot.clf()
    pyplot.bar(weeks, diffs, align='center')
    _05_myplot._save(root='nsfg_diffs',
                     title='Difference in PMFs',
                     xlabel='weeks',
                     ylabel='100 (PMF$_{first}$ - PMF$_{other}$)',
                     legend=False)
Example #23
0
def _make_pareto_cdf():
    """Generates a plot of the CDF of height in Pareto World."""
    n = 50
    max = 1000.0
    xs = [max * i / n for i in range(n)]

    xmin = 100
    alpha = 1.7
    ps = [_pareto_cdf(x, alpha, xmin) for x in xs]
    print('Median', _pareto_median(xmin, alpha))

    pyplot.clf()
    pyplot.plot(xs, ps, linewidth=2)
    _05_myplot._save('pareto_world1',
                     title='Pareto CDF',
                     xlabel='height (cm)',
                     ylabel='CDF',
                     legend=False)
Example #24
0
def _make_pareto_cdf():
    """Generates a plot of the Pareto CDF."""
    n = 50
    max = 10.0
    xs = [max * i / n for i in range(n)]

    xmin = 0.5
    alpha = 1.0
    ps = [_pareto_cdf(x, alpha, xmin) for x in xs]
    print('Fraction <= 10', _pareto_cdf(xmin, alpha, 10))

    pyplot.clf()
    pyplot.plot(xs, ps, linewidth=2)
    _05_myplot._save('pareto_cdf',
                     title='Pareto CDF',
                     xlabel='x',
                     ylabel='CDF',
                     legend=False)
Example #25
0
def _make_expo_cdf():
    """Generates a plot of the exponential CDF."""
    n = 40
    max = 2.5
    xs = [max * i / n for i in range(n)]

    lam = 2.0
    ps = [_expo_cdf(x, lam) for x in xs]

    percentile = -math.log(0.05) / lam
    print('Fraction <= ', percentile, _expo_cdf(lam, percentile))

    pyplot.clf()
    pyplot.plot(xs, ps, linewidth=2)
    _05_myplot._save('expo_cdf',
                     title='Exponential CDF',
                     xlabel='x',
                     ylabel='CDF',
                     legend=False)
Example #26
0
def _plot_marginals(suite):
    """Plot the marginal distributions for a 2-D joint distribution."""
    pmf_m, pmf_s = _compute_marginals(suite)

    pyplot.clf()
    pyplot.figure(1, figsize=(7, 4))

    pyplot.subplot(1, 2, 1)
    cdf_m = _13_Cdf._make_cdf_from_pmf(pmf_m, 'mu')
    _05_myplot._cdf(cdf_m)
    pyplot.xlabel('Mean height (cm)')
    pyplot.ylabel('CDF')

    pyplot.subplot(1, 2, 2)
    cdf_s = _13_Cdf._make_cdf_from_pmf(pmf_s, 'sigma')
    _05_myplot._cdf(cdf_s)
    pyplot.xlabel('Std Dev height (cm)')
    pyplot.ylabel('CDF')

    _05_myplot._save(root='bayes_height_marginals_%s' % suite.name)
Example #27
0
def main():
    print('pae', 0.3 / (0.3 + 3.0 / 13))

    doorA = _make_uniform_suite(0.0, 1.0, 101, name='Door A')
    evidence = 3, 2
    _update(doorA, evidence)

    doorC = _make_uniform_suite(0.0, 1.0, 101, name='Door C')
    evidence = 3, 10
    _update(doorC, evidence)

    print(_total_probability(doorA, doorC, _prob_winning))

    # plot the posterior distributions
    _05_myplot._pmfs([doorA, doorC])
    _05_myplot._save(root='blinky',
                     formats=['pdf', 'png'],
                     title='Probability of blinking',
                     xlabel='P(blink)',
                     ylabel='Posterior probability')
Example #28
0
def _make_normal_plot(ys, root=None, line_options={}, **options):
    """
    Makes a normal probability plot.
    
    Args:
        ys:           sequence of values
        line_options: dictionary of options for pyplot.plot        
        options:      dictionary of options for myplot.Save
    """
    # TODO: when n is small, generate a larger sample and desample
    n = len(ys)
    xs = [random.normalvariate(0.0, 1.0) for i in range(n)]

    pyplot.clf()
    pyplot.plot(sorted(xs), sorted(ys), 'b.', markersize=3, **line_options)

    _05_myplot._save(root,
                     xlabel='Standard normal values',
                     legend=False,
                     **options)
Example #29
0
def _make_figures(pool, firsts, others):
    """Creates several figures for the book."""

    # plot PMFs of birth weights for first babies and others
    _05_myplot._clf()
    _05_myplot._hist(firsts.weight_pmf, linewidth=0, color='blue')
    _05_myplot._hist(others.weight_pmf, linewidth=0, color='orange')
    _05_myplot._save(root='nsfg_birthwgt_pmf',
                     title='Birth weight PMF',
                     xlabel='weight (ounces)',
                     ylabel='probability')

    # plot CDFs of birth weights for first babies and others
    _05_myplot._clf()
    _05_myplot._cdf(firsts.weight_cdf, linewidth=2, color='blue')
    _05_myplot._cdf(others.weight_cdf, linewidth=2, color='orange')
    _05_myplot._save(root='nsfg_birthwgt_cdf',
                     title='Birth weight CDF',
                     xlabel='weight (ounces)',
                     ylabel='probability',
                     axis=[0, 200, 0, 1])
Example #30
0
def main():
    upper_bound = 200
    prior = _make_uniform_suite(1, upper_bound, upper_bound)
    prior.name = 'prior'

    evidence = 60
    posterior = prior._copy()
    _update(posterior, evidence)
    posterior.name = 'posterior'

    print(_credible_interval(posterior, 90))

    # plot the posterior distribution
    pyplot.subplots_adjust(wspace=0.4, left=0.15)
    plot_options = dict(linewidth=2)

    _05_myplot._pmf(posterior, **plot_options)
    _05_myplot._save(root='locomotive',
                     title='Locomotive problem',
                     xlabel='Number of trains',
                     ylabel='Posterior probability')