def MakeParetoCdf2():
    """Generates a plot of the CDF of height in Pareto World."""
    xmin = 100
    alpha = 1.7
    xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 1000.0, n=100)
    thinkplot.Plot(xs, ps)

    thinkplot.Save(root='analytic_pareto_height',
                   title='Pareto CDF',
                   xlabel='height (cm)',
                   ylabel='CDF',
                   legend=False)
def MakeParetoCdf():
    """Generates a plot of the Pareto CDF."""
    xmin = 0.5

    thinkplot.PrePlot(3)
    for alpha in [2.0, 1.0, 0.5]:
        xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 10.0, n=100)
        thinkplot.Plot(xs, ps, label=r'$\alpha=%g$' % alpha)

    thinkplot.Save(root='analytic_pareto_cdf',
                   title='Pareto CDF',
                   xlabel='x',
                   ylabel='CDF')
def MakeFigures():
    """Plots the CDF of populations in several forms.

    On a log-log scale the tail of the CCDF looks like a straight line,
    which suggests a Pareto distribution, but that turns out to be misleading.

    On a log-x scale the distribution has the characteristic sigmoid of
    a lognormal distribution.

    The normal probability plot of log(sizes) confirms that the data fit the
    lognormal model very well.

    Many phenomena that have been described with Pareto models can be described
    as well, or better, with lognormal models.
    """
    pops = ReadData()
    print('Number of cities/towns', len(pops))
    
    log_pops = np.log10(pops)
    cdf = thinkstats2.Cdf(pops, label='data')
    cdf_log = thinkstats2.Cdf(log_pops, label='data')

    # pareto plot
    xs, ys = thinkstats2.RenderParetoCdf(xmin=5000, alpha=1.4, low=0, high=1e7)
    thinkplot.Plot(np.log10(xs), 1-ys, label='model', color='0.8')

    thinkplot.Cdf(cdf_log, complement=True) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CCDF',
                     yscale='log')
    thinkplot.Save(root='populations_pareto')

    # lognormal plot
    thinkplot.PrePlot(cols=2)

    mu, sigma = log_pops.mean(), log_pops.std()
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=8)
    thinkplot.Plot(xs, ps, label='model', color='0.8')

    thinkplot.Cdf(cdf_log) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CDF')

    thinkplot.SubPlot(2)
    thinkstats2.NormalProbabilityPlot(log_pops, label='data')
    thinkplot.Config(xlabel='z',
                     ylabel='log10 population',
                     xlim=[-5, 5])

    thinkplot.Save(root='populations_normal')
Esempio n. 4
0
def MakeFigures(df):
    """Plots the CDF of income in several forms.
    """
    xs, ps = df.income.values, df.ps.values
    cdf = SmoothCdf(xs, ps, label='data')
    cdf_log = SmoothCdf(np.log10(xs), ps, label='data')

    # linear plot
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='hinc_linear', xlabel='household income', ylabel='CDF')

    # pareto plot
    # for the model I chose parameters by hand to fit the tail
    xs, ys = thinkstats2.RenderParetoCdf(xmin=55000,
                                         alpha=2.5,
                                         low=0,
                                         high=250000)
    thinkplot.Plot(xs, 1 - ys, label='model', color='0.8')

    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Save(root='hinc_pareto',
                   xlabel='log10 household income',
                   ylabel='CCDF',
                   xscale='log',
                   yscale='log')

    # lognormal plot
    # for the model I estimate mu and sigma using
    # percentile-based statistics
    median = cdf_log.Percentile(50)
    iqr = cdf_log.Percentile(75) - cdf_log.Percentile(25)
    std = iqr / 1.349

    # choose std to match the upper tail
    std = 0.35
    print(median, std)

    xs, ps = thinkstats2.RenderNormalCdf(median, std, low=3.5, high=5.5)
    thinkplot.Plot(xs, ps, label='model', color='0.8')

    thinkplot.Cdf(cdf_log)
    thinkplot.Save(root='hinc_normal',
                   xlabel='log10 household income',
                   ylabel='CDF')
Esempio n. 5
0
MakeNormalPlot(advertisement_data.Area_income)
thinkplot.Config(title='Area_income, normal plot', xlabel='CDF',
                 ylabel='area income in dollars', loc='upper left')
thinkplot.show()


log_Area_income = np.log10(advertisement_data.Area_income)
MakeNormalModel(log_Area_income)
thinkplot.Config(title='Area Income, log scale', xlabel='Area income (log10 kg)',
                 ylabel='CDF', loc='upper right')
thinkplot.show()


cdf = thinkstats2.Cdf(advertisement_data.Area_income, label='data')
cdf_log = thinkstats2.Cdf(np.log10(advertisement_data.Area_income), label='data')
xs, ys = thinkstats2.RenderParetoCdf(xmin=13996, alpha=2.5, 
                                     low=0, high=79484)

thinkplot.Plot(xs, 1-ys, label='model', color='0.8')


thinkplot.Cdf(cdf, complement=True) 
thinkplot.Config(xlabel='log10 household income',
                 ylabel='CCDF',
                 xscale='log',
                 yscale='log', 
                 loc='lower left')

thinkplot.show()


Esempio n. 6
0
thinkplot.Config(title='Adult weight, lognormal plot',
                 xlabel='Weight (log10 kg)',
                 ylabel='CDF',
                 loc='upper left')

#%% [markdown]
# ## Pareto distribution
#
# Here's what the Pareto CDF looks like with a range of parameters.

#%%
xmin = 0.5

thinkplot.PrePlot(3)
for alpha in [2.0, 1.0, 0.5]:
    xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 10.0, n=100)
    thinkplot.Plot(xs, ps, label=r'$\alpha=%g$' % alpha)

thinkplot.Config(title='Pareto CDF',
                 xlabel='x',
                 ylabel='CDF',
                 loc='lower right')

#%% [markdown]
# The distribution of populations for cities and towns is sometimes said to be Pareto-like.

#%%
import populations

pops = populations.ReadData()
print('Number of cities/towns', len(pops))
Esempio n. 7
0
pops = populations.ReadData()
print('Number of cities/towns', len(pops))

#%%
cdf = thinkstats2.Cdf(pops)
thinkplot.Cdf(cdf)
thinkplot.Config(xlabel="population", ylabel="CDF")

#%%
log_pops = np.log10(pops)
log_cdf = thinkstats2.Cdf(log_pops, label='data')
thinkplot.Cdf(log_cdf, complement=True)
xmin = 5000
alpha = 1.4
xs, ys = thinkstats2.RenderParetoCdf(xmin=xmin, alpha=alpha, low=0, high=1.0e7)
thinkplot.Plot(np.log10(xs),
               1 - ys,
               label=r'model $x_m={}$  $\alpha={}$'.format(xmin, alpha))
thinkplot.Config(yscale='log', xlabel='log10 pupulation', ylabel='CCDF')

#%%
thinkplot.Cdf(log_cdf)
mu, var = thinkstats2.TrimmedMeanVar(log_pops, p=0.01)
sigma = np.sqrt(var)
xmin = mu - 4.0 * sigma
xmax = mu + 4.0 * sigma
xs, ys = thinkstats2.RenderNormalCdf(mu, sigma, xmin, xmax)
thinkplot.plot(xs,
               ys,
               label=r'model $\mu$={:.2f} $\sigma$={:.2f}'.format(mu, sigma))