def process_noise(signal, root='red'): wave = signal.make_wave(duration=0.5, framerate=11025) # 0: waveform segment = wave.segment(duration=0.1) segment.plot(linewidth=1, alpha=0.5) thinkplot.save(root=root + 'noise0', xlabel='time (s)', ylabel='amplitude') spectrum = wave.make_spectrum() # 1: spectrum spectrum.plot_power(linewidth=1, alpha=0.5) thinkplot.save(root=root + 'noise1', xlabel='frequency (Hz)', ylabel='power density') slope, _, _, _, _ = spectrum.estimate_slope() print 'estimated slope', slope # 2: integrated spectrum integ = spectrum.make_integrated_spectrum() integ.plot_power() thinkplot.save(root=root + 'noise2', xlabel='frequency (Hz)', ylabel='normalized power') # 3: log-log spectral density spectrum.plot_power(low=1, linewidth=1, alpha=0.5) thinkplot.save(root=root + 'noise3', xlabel='frequency (Hz)', ylabel='power density', xscale='log', yscale='log') # 4: CDF of power density cdf = thinkstats2.MakeCdfFromList(spectrum.power) thinkplot.cdf(cdf) thinkplot.save(root=root + 'noise4', xlabel='power density', ylabel='CDF') # 5: CCDF of power density, log-y thinkplot.cdf(cdf, complement=True) thinkplot.save(root=root + 'noise5', xlabel='power density', ylabel='log(CCDF)', yscale='log') thinkstats2.NormalProbabilityPlot(spectrum.real, label='real', data_color='#253494') thinkstats2.NormalProbabilityPlot(spectrum.imag - 50, label='imag-50', data_color='#1D91C0') thinkplot.save(root=root + 'noise6', xlabel='normal sample', ylabel='power density')
def plot_gaussian_noise(): """Shows the distribution of the spectrum of Gaussian noise. """ thinkdsp.random_seed(18) signal = thinkdsp.UncorrelatedGaussianNoise() wave = signal.make_wave(duration=0.5, framerate=48000) spectrum = wave.make_spectrum() thinkplot.preplot(2, cols=2) thinkstats2.NormalProbabilityPlot(spectrum.real, label='real') thinkplot.config(xlabel='Normal sample', ylabel='Amplitude', ylim=[-250, 250], loc='lower right') thinkplot.subplot(2) thinkstats2.NormalProbabilityPlot(spectrum.imag, label='imag') thinkplot.config(xlabel='Normal sample', ylim=[-250, 250], loc='lower right') thinkplot.save(root='noise1')
def MakeFigures(): """Plots the CDF of populations in several forms. On a log-log scale the tail of the CCDF looks like a straight line, which suggests a Pareto distribution, but that turns out to be misleading. On a log-x scale the distribution has the characteristic sigmoid of a lognormal distribution. The normal probability plot of log(sizes) confirms that the data fit the lognormal model very well. Many phenomena that have been described with Pareto models can be described as well, or better, with lognormal models. """ pops = ReadData() print('Number of cities/towns', len(pops)) log_pops = np.log10(pops) cdf = thinkstats2.Cdf(pops, label='data') cdf_log = thinkstats2.Cdf(log_pops, label='data') # pareto plot xs, ys = thinkstats2.RenderParetoCdf(xmin=5000, alpha=1.4, low=0, high=1e7) thinkplot.Plot(np.log10(xs), 1-ys, label='model', color='0.8') thinkplot.Cdf(cdf_log, complement=True) thinkplot.Config(xlabel='log10 population', ylabel='CCDF', yscale='log') thinkplot.Save(root='populations_pareto') # lognormal plot thinkplot.PrePlot(cols=2) mu, sigma = log_pops.mean(), log_pops.std() xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=8) thinkplot.Plot(xs, ps, label='model', color='0.8') thinkplot.Cdf(cdf_log) thinkplot.Config(xlabel='log10 population', ylabel='CDF') thinkplot.SubPlot(2) thinkstats2.NormalProbabilityPlot(log_pops, label='data') thinkplot.Config(xlabel='z', ylabel='log10 population', xlim=[-5, 5]) thinkplot.Save(root='populations_normal')
def NormalPlotSamples(samples, plot=1, ylabel=''): """Makes normal probability plots for samples. samples: list of samples label: string """ for n, sample in samples: thinkplot.SubPlot(plot) thinkstats2.NormalProbabilityPlot(sample) thinkplot.Config(title='n=%d' % n, legend=False, xticks=[], yticks=[], ylabel=ylabel) plot += 1
def PlotAdultWeights(live): """Makes a normal probability plot of log10 adult weight. live: DataFrame of live births results: With n=40 the distribution is approximately lognormal except for the lowest weights. Actual distribution might deviate from lognormal because it is a mixture of people at different ages, or because annual weight gains are correlated. """ birth_weights = live.totalwgt_lb.dropna().values aws = [GenerateAdultWeight(birth_weights, 40) for _ in range(1000)] log_aws = np.log10(aws) thinkstats2.NormalProbabilityPlot(log_aws) thinkplot.Show(xlabel='standard normal values', ylabel='adult weight (log10 lbs)')
#%% thinkplot.PrePlot(cols=2) mu, sigma = log_pops.mean(), log_pops.std() xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=8) thinkplot.Plot(xs, ps, label='model', color='0.8') thinkplot.Cdf(cdf_log) thinkplot.Config(xlabel='log10 population', ylabel='CDF', loc='lower right') #%% [markdown] # Here's a normal probability plot for the log-populations. The model fits the data well except in the right tail, where the biggest cities are bigger than expected. #%% thinkstats2.NormalProbabilityPlot(log_pops, label='data') thinkplot.Config(xlabel='Random variate', ylabel='log10 population', xlim=[-5, 5]) #%% [markdown] # ## Random variates # # When we have an analytic CDF, we can sometimes invert it to generate random values. The following function generates values from an exponential distribution. #%% import random def expovariate(lam): p = random.random()