def MakeParetoCdf2(): """Generates a plot of the CDF of height in Pareto World.""" xmin = 100 alpha = 1.7 xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 1000.0, n=100) thinkplot.Plot(xs, ps) thinkplot.Save(root='analytic_pareto_height', title='Pareto CDF', xlabel='height (cm)', ylabel='CDF', legend=False)
def MakeParetoCdf(): """Generates a plot of the Pareto CDF.""" xmin = 0.5 thinkplot.PrePlot(3) for alpha in [2.0, 1.0, 0.5]: xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 10.0, n=100) thinkplot.Plot(xs, ps, label=r'$\alpha=%g$' % alpha) thinkplot.Save(root='analytic_pareto_cdf', title='Pareto CDF', xlabel='x', ylabel='CDF')
def MakeFigures(): """Plots the CDF of populations in several forms. On a log-log scale the tail of the CCDF looks like a straight line, which suggests a Pareto distribution, but that turns out to be misleading. On a log-x scale the distribution has the characteristic sigmoid of a lognormal distribution. The normal probability plot of log(sizes) confirms that the data fit the lognormal model very well. Many phenomena that have been described with Pareto models can be described as well, or better, with lognormal models. """ pops = ReadData() print('Number of cities/towns', len(pops)) log_pops = np.log10(pops) cdf = thinkstats2.Cdf(pops, label='data') cdf_log = thinkstats2.Cdf(log_pops, label='data') # pareto plot xs, ys = thinkstats2.RenderParetoCdf(xmin=5000, alpha=1.4, low=0, high=1e7) thinkplot.Plot(np.log10(xs), 1-ys, label='model', color='0.8') thinkplot.Cdf(cdf_log, complement=True) thinkplot.Config(xlabel='log10 population', ylabel='CCDF', yscale='log') thinkplot.Save(root='populations_pareto') # lognormal plot thinkplot.PrePlot(cols=2) mu, sigma = log_pops.mean(), log_pops.std() xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=8) thinkplot.Plot(xs, ps, label='model', color='0.8') thinkplot.Cdf(cdf_log) thinkplot.Config(xlabel='log10 population', ylabel='CDF') thinkplot.SubPlot(2) thinkstats2.NormalProbabilityPlot(log_pops, label='data') thinkplot.Config(xlabel='z', ylabel='log10 population', xlim=[-5, 5]) thinkplot.Save(root='populations_normal')
def MakeFigures(df): """Plots the CDF of income in several forms. """ xs, ps = df.income.values, df.ps.values cdf = SmoothCdf(xs, ps, label='data') cdf_log = SmoothCdf(np.log10(xs), ps, label='data') # linear plot thinkplot.Cdf(cdf) thinkplot.Save(root='hinc_linear', xlabel='household income', ylabel='CDF') # pareto plot # for the model I chose parameters by hand to fit the tail xs, ys = thinkstats2.RenderParetoCdf(xmin=55000, alpha=2.5, low=0, high=250000) thinkplot.Plot(xs, 1 - ys, label='model', color='0.8') thinkplot.Cdf(cdf, complement=True) thinkplot.Save(root='hinc_pareto', xlabel='log10 household income', ylabel='CCDF', xscale='log', yscale='log') # lognormal plot # for the model I estimate mu and sigma using # percentile-based statistics median = cdf_log.Percentile(50) iqr = cdf_log.Percentile(75) - cdf_log.Percentile(25) std = iqr / 1.349 # choose std to match the upper tail std = 0.35 print(median, std) xs, ps = thinkstats2.RenderNormalCdf(median, std, low=3.5, high=5.5) thinkplot.Plot(xs, ps, label='model', color='0.8') thinkplot.Cdf(cdf_log) thinkplot.Save(root='hinc_normal', xlabel='log10 household income', ylabel='CDF')
MakeNormalPlot(advertisement_data.Area_income) thinkplot.Config(title='Area_income, normal plot', xlabel='CDF', ylabel='area income in dollars', loc='upper left') thinkplot.show() log_Area_income = np.log10(advertisement_data.Area_income) MakeNormalModel(log_Area_income) thinkplot.Config(title='Area Income, log scale', xlabel='Area income (log10 kg)', ylabel='CDF', loc='upper right') thinkplot.show() cdf = thinkstats2.Cdf(advertisement_data.Area_income, label='data') cdf_log = thinkstats2.Cdf(np.log10(advertisement_data.Area_income), label='data') xs, ys = thinkstats2.RenderParetoCdf(xmin=13996, alpha=2.5, low=0, high=79484) thinkplot.Plot(xs, 1-ys, label='model', color='0.8') thinkplot.Cdf(cdf, complement=True) thinkplot.Config(xlabel='log10 household income', ylabel='CCDF', xscale='log', yscale='log', loc='lower left') thinkplot.show()
thinkplot.Config(title='Adult weight, lognormal plot', xlabel='Weight (log10 kg)', ylabel='CDF', loc='upper left') #%% [markdown] # ## Pareto distribution # # Here's what the Pareto CDF looks like with a range of parameters. #%% xmin = 0.5 thinkplot.PrePlot(3) for alpha in [2.0, 1.0, 0.5]: xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 10.0, n=100) thinkplot.Plot(xs, ps, label=r'$\alpha=%g$' % alpha) thinkplot.Config(title='Pareto CDF', xlabel='x', ylabel='CDF', loc='lower right') #%% [markdown] # The distribution of populations for cities and towns is sometimes said to be Pareto-like. #%% import populations pops = populations.ReadData() print('Number of cities/towns', len(pops))
pops = populations.ReadData() print('Number of cities/towns', len(pops)) #%% cdf = thinkstats2.Cdf(pops) thinkplot.Cdf(cdf) thinkplot.Config(xlabel="population", ylabel="CDF") #%% log_pops = np.log10(pops) log_cdf = thinkstats2.Cdf(log_pops, label='data') thinkplot.Cdf(log_cdf, complement=True) xmin = 5000 alpha = 1.4 xs, ys = thinkstats2.RenderParetoCdf(xmin=xmin, alpha=alpha, low=0, high=1.0e7) thinkplot.Plot(np.log10(xs), 1 - ys, label=r'model $x_m={}$ $\alpha={}$'.format(xmin, alpha)) thinkplot.Config(yscale='log', xlabel='log10 pupulation', ylabel='CCDF') #%% thinkplot.Cdf(log_cdf) mu, var = thinkstats2.TrimmedMeanVar(log_pops, p=0.01) sigma = np.sqrt(var) xmin = mu - 4.0 * sigma xmax = mu + 4.0 * sigma xs, ys = thinkstats2.RenderNormalCdf(mu, sigma, xmin, xmax) thinkplot.plot(xs, ys, label=r'model $\mu$={:.2f} $\sigma$={:.2f}'.format(mu, sigma))