Ejemplo n.º 1
0
    def _make_normal_model(self,
                           weights,
                           root,
                           xmax=175,
                           xlabel='adult weight (kg)',
                           axis=None):
        cdf = _13_Cdf._make_cdf_from_list(weights)

        pyplot.clf()

        t = weights[:]
        t.sort()
        mu, var = _03_thinkstats._trimmed_mean_var(t)
        print('n, Mean, Var', len(weights), mu, var)

        sigma = math.sqrt(var)
        print('Sigma', sigma)

        xs, ps = continuous._render_normal_cdf(mu, sigma, xmax)
        pyplot.plot(xs, ps, label='model', linewidth=4, color='0.7')

        xs, ps = cdf._render()
        pyplot.plot(xs, ps, label='data', linewidth=2, color='blue')

        _05_myplot._save(root,
                         title='Adult weight',
                         xlabel=xlabel,
                         ylabel='CDF',
                         axis=axis or [0, xmax, 0, 1])
Ejemplo n.º 2
0
def _plot_ages(resp):
    """Plot the distribution of ages."""
    ages = [r.age for r in resp.records]
    cdf = _13_Cdf._make_cdf_from_list(ages)
    _05_myplot._clf()
    _05_myplot._cdf(cdf)
    _05_myplot._show()
Ejemplo n.º 3
0
    def testRender(self):
        t = [2, 1, 3, 2, 5]
        cdf = _13_Cdf._make_cdf_from_list(t, 'bob')

        vs, ps = cdf._render()
        self.assertEqual(vs, [1, 1, 2, 2, 3, 3, 5, 5])
        for got, expected in zip(ps, [0.0, 0.2, 0.2, 0.6, 0.6, 0.8, 0.8, 1.0]):
            self.assertAlmostEqual(got, expected)
Ejemplo n.º 4
0
def _check_cdf2():
    """Compare chi2 values from the simulation with a chi-squared dist."""
    df = 3
    t = [_simulate_chi2() for i in range(1000)]
    t2 = [scipy.stats.chi2.cdf(x, df) for x in t]
    cdf = _13_Cdf._make_cdf_from_list(t2)

    _05_myplot._cdf(cdf)
    _05_myplot._show()
Ejemplo n.º 5
0
    def testItems(self):
        t = [2, 1, 3, 2, 5]
        cdf = _13_Cdf._make_cdf_from_list(t, 'bob')

        items = cdf._items()
        expected = [(1, 0.2), (2, 0.6), (3, 0.8), (5, 1.0)]
        for p1, p2 in zip(items, expected):
            for x1, x2 in zip(p1, p2):
                self.assertEqual(x1, x2)
Ejemplo n.º 6
0
def _resample(cdf, n=10000):
    sample = cdf._sample(n)
    new_cdf = _13_Cdf._make_cdf_from_list(sample, 'resampled')
    _05_myplot._clf()
    _05_myplot._cdfs([cdf, new_cdf])
    _05_myplot._save(root='resample_cdf',
                     title='CDF',
                     xlabel='weight in oz',
                     ylabel='CDF(x)')
Ejemplo n.º 7
0
def _process(table, name):
    """
    Runs various analyses on this table.

    Creates instance variables:
        ages: sequence of int ages in years
        age_pmf: Pmf object
        age_cdf: Cdf object
        weights: sequence of total weight in ounces
        weight_cdf: Cdf object
    """
    cumulative._process(table, name)

    table.ages = [p.agepreg for p in table.records if p.agepreg != 'NA']
    table.age_pmf = _04_Pmf._make_pmf_from_list(table.ages, table.name)
    table.age_cdf = _13_Cdf._make_cdf_from_list(table.ages, table.name)

    table.weights = [p.totalwgt_oz for p in table.records if p.totalwgt_oz != 'NA']
    table.weight_cdf = _13_Cdf._make_cdf_from_list(table.weights, table.name)
Ejemplo n.º 8
0
    def testProb(self):
        t = [2, 1, 3, 2, 5]
        cdf = _13_Cdf._make_cdf_from_list(t, 'bob')

        self.assertEqual(cdf._prob(-1), 0.0)
        self.assertEqual(cdf._prob(1), 0.2)
        self.assertEqual(cdf._prob(2), 0.6)
        self.assertEqual(cdf._prob(2.5), 0.6)
        self.assertEqual(cdf._prob(4), 0.8)
        self.assertEqual(cdf._prob(5), 1.0)
        self.assertEqual(cdf._prob(7), 1.0)
Ejemplo n.º 9
0
def _make_example():
    """Make a simple example CDF."""
    t = [2, 1, 3, 2, 5]
    cdf = _13_Cdf._make_cdf_from_list(t)
    _05_myplot._clf()
    _05_myplot._cdf(cdf)
    _05_myplot._save(root='example_cdf',
                     title='CDF',
                     xlabel='x',
                     ylabel='CDF(x)',
                     axis=[0, 6, 0, 1],
                     legend=False)
Ejemplo n.º 10
0
def _winsorize(xs, p=0.01):
    """Compresses outliers."""
    cdf = _13_Cdf._make_cdf_from_list(xs)
    low, high = cdf._value(p), cdf._value(1 - p)
    print(low, high)

    outliers = [x for x in xs if x < low or x > high]
    outliers.sort()
    print(outliers)

    wxs = [min(max(low, x), high) for x in xs]
    return wxs
Ejemplo n.º 11
0
def main():
    results = _10_relay._read_results()
    speeds = _10_relay._get_speeds(results)

    # plot the distribution of actual speeds
    cdf = _13_Cdf._make_cdf_from_list(speeds, 'speeds')

    _05_myplot._cdf(cdf)
    _05_myplot._save(root='relay_cdf',
                     title='CDF of running speed',
                     xlabel='speed (mph)',
                     ylabel='probability')
Ejemplo n.º 12
0
def _check_cdf():
    """Compare chi2 values from simulation with chi2 distributions."""
    for df in [1, 2, 3]:
        xs, ys = _chi2_cdf(df=df, high=15)
        pyplot.plot(xs, ys, label=df)

    t = [_simulate_chi2() for i in range(1000)]
    cdf = _13_Cdf._make_cdf_from_list(t)

    _05_myplot._cdf(cdf)
    _05_myplot._save(root='khan3',
                     xlabel='chi2 value',
                     ylabel="CDF",
                     formats=['png'])
Ejemplo n.º 13
0
def _plot_cdfs(samples):
    """Make CDFs showing the distribution of outliers."""
    cdfs = []
    for label, sample in samples.iteritems():
        outliers = [x for x in sample if x < 150]

        cdf = _13_Cdf._make_cdf_from_list(outliers, label)
        cdfs.append(cdf)

    _05_myplot._clf()
    _05_myplot._cdfs(cdfs)
    _05_myplot._save(root='bayes_height_cdfs',
                     title='CDF of height',
                     xlabel='Reported height (cm)',
                     ylabel='CDF')
Ejemplo n.º 14
0
def _make_figure(xmin=100, alpha=1.7, mu=150, sigma=25):
    """
    Makes a figure showing the CDF of height in ParetoWorld.

    Compared to a normal distribution.

    Args:
        xmin:  parameter of the Pareto distribution
        alpha: parameter of the Pareto distribution
        mu:    parameter of the Normal distribution
        sigma: parameter of the Normal distribution
    """
    t1 = [xmin * random.paretovariate(alpha) for i in range(10000)]
    cdf1 = _13_Cdf._make_cdf_from_list(t1, name='pareto')

    t2 = [random.normalvariate(mu, sigma) for i in range(10000)]
    cdf2 = _13_Cdf._make_cdf_from_list(t2, name='normal')

    _05_myplot._clf()
    _05_myplot._cdfs([cdf1, cdf2])
    _05_myplot._save(root='pareto_world2',
                     title='Pareto World',
                     xlabel='height (cm)',
                     ylabel='CDF')
Ejemplo n.º 15
0
def _process(table, name):
    """
    Runs various analyses on this table.

    Creates instance variables:
        weights:    sequence of int total weights in ounces
        weight_pmf: Pmf object
        weight_cdf: Cdf object
        oz_pmf:     Pmf of just the ounce field
    """
    _06_descriptive._process(table, name)

    table.weights = [p.totalwgt_oz for p in table.records if p.totalwgt_oz != 'NA']
    table.weight_pmf = _04_Pmf._make_pmf_from_list(table.weights, table.name)
    table.weight_cdf = _13_Cdf._make_cdf_from_list(table.weights, table.name)
Ejemplo n.º 16
0
def main(script):
    # read 'em and sort 'em
    birthdays = _read_birthdays()
    birthdays.sort()

    # compute the intervals in days
    deltas = _diff(birthdays)
    days = [inter.days for inter in deltas]

    # make and plot the CCDF on a log scale.
    cdf = _13_Cdf._make_cdf_from_list(days, name='intervals')
    scale = _05_myplot._cdf(cdf, transform='exponential')
    _05_myplot._save(root='intervals',
                     xlabel='days',
                     ylabel='ccdf',
                     **scale)
Ejemplo n.º 17
0
    def testValue(self):
        t = [2, 1, 3, 2, 5]
        cdf = _13_Cdf._make_cdf_from_list(t, 'bob')

        self.assertEqual(cdf._value(0.0), 1)
        self.assertEqual(cdf._value(0.1), 1)
        self.assertEqual(cdf._value(0.2), 1)
        self.assertEqual(cdf._value(0.3), 2)
        self.assertEqual(cdf._value(0.4), 2)
        self.assertEqual(cdf._value(0.5), 2)
        self.assertEqual(cdf._value(0.6), 2)
        self.assertEqual(cdf._value(0.7), 3)
        self.assertEqual(cdf._value(0.8), 3)
        self.assertEqual(cdf._value(0.9), 5)
        self.assertEqual(cdf._value(1.0), 5)
        self.assertRaises(ValueError, cdf._value, -0.1)
        self.assertRaises(ValueError, cdf._value, 1.1)
Ejemplo n.º 18
0
def _make_cdfs(lens):
    cdf = _13_Cdf._make_cdf_from_list(lens, 'slashdot')

    _05_myplot._clf()
    _05_myplot._cdf(cdf)
    _05_myplot._save(root='slashdot.logx',
                     xlabel='Number of friends/foes',
                     ylabel='CDF',
                     xscale='log')

    _05_myplot._clf()
    _05_myplot._cdf(cdf, complement=True)
    _05_myplot._save(root='slashdot.loglog',
                     xlabel='Number of friends/foes',
                     ylabel='CDF',
                     xscale='log',
                     yscale='log')
Ejemplo n.º 19
0
def _make_figures():
    pops = _21_populations._read_data()
    print(len(pops))

    cdf = _13_Cdf._make_cdf_from_list(pops, 'populations')

    _05_myplot._clf()
    _05_myplot._cdf(cdf)
    _05_myplot._save(root='populations',
                     title='City/Town Populations',
                     xlabel='population',
                     ylabel='CDF',
                     legend=False)

    _05_myplot._clf()
    _05_myplot._cdf(cdf)
    _05_myplot._save(root='populations_logx',
                     title='City/Town Populations',
                     xlabel='population',
                     ylabel='CDF',
                     xscale='log',
                     legend=False)

    _05_myplot._clf()
    _05_myplot._cdf(cdf, complement=True)
    _05_myplot._save(root='populations_loglog',
                     title='City/Town Populations',
                     xlabel='population',
                     ylabel='Complementary CDF',
                     yscale='log',
                     xscale='log',
                     legend=False)

    t = [math.log(x) for x in pops]
    t.sort()
    _17_rankit._make_normal_plot(t, 'populations_rankit')
Ejemplo n.º 20
0
def _make_normal_model(weights):
    """Plot the CDF of birthweights with a normal model."""

    # estimate parameters: trimming outliers yields a better fit
    mu, var = _03_thinkstats._trimmed_mean_var(weights, p=0.01)
    print('Mean, Var', mu, var)

    # plot the model
    sigma = math.sqrt(var)
    print('Sigma', sigma)
    xs, ps = _render_normal_cdf(mu, sigma, 200)

    pyplot.clf()
    pyplot.plot(xs, ps, label='model', linewidth=4, color='0.8')

    # plot the data
    cdf = _13_Cdf._make_cdf_from_list(weights)
    xs, ps = cdf._render()
    pyplot.plot(xs, ps, label='data', linewidth=2, color='blue')

    _05_myplot._save('nsfg_birthwgt_model',
                     title='Birth weights',
                     xlabel='birth weight (oz)',
                     ylabel='CDF')
Ejemplo n.º 21
0
 def testMakeCdfFromList(self):
     t = [2, 1, 3, 2, 5]
     cdf = _13_Cdf._make_cdf_from_list(t, 'bob')
     self.checkCdf(cdf)
Ejemplo n.º 22
0
 def testMean(self):
     t = [2, 1, 3, 2, 5]
     cdf = _13_Cdf._make_cdf_from_list(t, 'bob')
     self.assertAlmostEqual(cdf._mean(), 13.0 / 5.0)