Example #1
0
def window_plot():
    """Makes a plot showing a sinusoid, hamming window, and their product.
    """
    signal = thinkdsp.SinSignal(freq=440)
    duration = signal.period * 10.25
    wave1 = signal.make_wave(duration)
    wave2 = signal.make_wave(duration)

    ys = numpy.hamming(len(wave1.ys))
    window = thinkdsp.Wave(ys, wave1.framerate)

    wave2.hamming()

    thinkplot.preplot(rows=3, cols=1)

    pyplot.subplots_adjust(wspace=0.3,
                           hspace=0.3,
                           right=0.95,
                           left=0.1,
                           top=0.95,
                           bottom=0.05)

    thinkplot.subplot(1)
    wave1.plot()
    thinkplot.Config(axis=[0, duration, -1.07, 1.07])

    thinkplot.subplot(2)
    window.plot()
    thinkplot.Config(axis=[0, duration, -1.07, 1.07])

    thinkplot.subplot(3)
    wave2.plot()
    thinkplot.Config(axis=[0, duration, -1.07, 1.07], xlabel='time (s)')

    thinkplot.save(root='windowing2')
Example #2
0
def main():
    filename = 'mystery0.dat'
    data = read_file(filename)

    pmf = thinkstats2.MakePmfFromList(data)
    cdf = thinkstats2.MakeCdfFromList(data)

    pdf = thinkstats2.EstimatedPdf(data)
    low, high = min(data), max(data)
    xs = numpy.linspace(low, high, 101)
    kde_pmf = pdf.MakePmf(xs)

    bin_data = BinData(data, low, high, 51)
    bin_pmf = thinkstats2.MakePmfFromList(bin_data)

    thinkplot.SubPlot(2, 2, 1)
    thinkplot.Hist(pmf, width=0.1)
    thinkplot.Config(title='Naive Pmf')

    thinkplot.SubPlot(2, 2, 2)
    thinkplot.Hist(bin_pmf)
    thinkplot.Config(title='Binned Hist')

    thinkplot.SubPlot(2, 2, 3)
    thinkplot.Pmf(kde_pmf)
    thinkplot.Config(title='KDE PDF')

    thinkplot.SubPlot(2, 2, 4)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='CDF')

    thinkplot.Show()
Example #3
0
def MakeBabyBoom():
    """Plot CDF of interarrival time on log and linear scales.
    """
    # compute the interarrival times
    df = ReadBabyBoom()
    diffs = df.minutes.diff()
    cdf = thinkstats2.Cdf(diffs, label='actual')

    thinkplot.PrePlot(cols=2)
    thinkplot.Cdf(cdf)
    thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CCDF',
                     yscale='log',
                     legend=False)

    thinkplot.Save(root='analytic_interarrivals')

    n = len(diffs)
    lam = 44 / 24 * 60.0
    sample = [random.expovariate(lam) for _ in range(n)]
    model = thinkstats2.Cdf(sample, label='model')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([cdf, model], complement=True)
    thinkplot.Save(root='analytic_interarrivals_model',
                   title='Time between births',
                   xlabel='minutes',
                   ylabel='CCDF',
                   yscale='log')
Example #4
0
def PlotRemainingLifetime(sf1, sf2):
    """Plots remaining lifetimes for pregnancy and age at first marriage.

    sf1: SurvivalFunction for pregnancy length
    sf2: SurvivalFunction for age at first marriage
    """
    thinkplot.PrePlot(cols=2)
    rem_life1 = sf1.RemainingLifetime()
    thinkplot.Plot(rem_life1)
    thinkplot.Config(title='remaining pregnancy length',
                     xlabel='weeks',
                     ylabel='mean remaining weeks')

    thinkplot.SubPlot(2)
    func = lambda pmf: pmf.Percentile(50)
    rem_life2 = sf2.RemainingLifetime(filler=np.inf, func=func)
    thinkplot.Plot(rem_life2)
    thinkplot.Config(title='years until first marriage',
                     ylim=[0, 15],
                     xlim=[11, 31],
                     xlabel='age (years)',
                     ylabel='median remaining years')

    thinkplot.Save(root='survival6',
                   formats=FORMATS)
Example #5
0
def MakeFigures(df):
    """Generates CDFs and normal prob plots for weights and log weights."""
    weights = df.wtkg2.dropna()
    log_weights = np.log10(weights)

    # plot weights on linear and log scales
    thinkplot.PrePlot(cols=2)
    MakeNormalModel(weights)
    thinkplot.Config(xlabel='adult weight (kg)', ylabel='CDF')

    thinkplot.SubPlot(2)
    MakeNormalModel(log_weights)
    thinkplot.Config(xlabel='adult weight (log10 kg)')

    thinkplot.Save(root='brfss_weight')

    # make normal probability plots on linear and log scales
    thinkplot.PrePlot(cols=2)
    MakeNormalPlot(weights)
    thinkplot.Config(xlabel='z', ylabel='weights (kg)')

    thinkplot.SubPlot(2)
    MakeNormalPlot(log_weights)
    thinkplot.Config(xlabel='z', ylabel='weights (log10 kg)')

    thinkplot.Save(root='brfss_weight_normal')
Example #6
0
def MakePdfs(greq, less):
    greqpdf = thinkstats2.EstimatedPdf(greq.totalwgt_lb.dropna())
    lesspdf = thinkstats2.EstimatedPdf(less.totalwgt_lb.dropna())
    thinkplot.PrePlot(rows=1, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Pdf(greqpdf, label='greater/equal to 30')
    thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF')
    thinkplot.SubPlot(2)
    thinkplot.Pdf(lesspdf, label='less than 30')
    thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF')
    thinkplot.Show()
Example #7
0
def pmf_stuff(width, x_low, x_high, third, pmf_one, pmf_two, label,
              y_axis_scale):
    width = width
    axis = [x_low, x_high, third, y_axis_scale]
    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(pmf_one, align='right', width=width)
    thinkplot.Hist(pmf_two, align='left', width=width)
    thinkplot.Config(xlabel=label, ylabel='PMF', axis=axis)

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([pmf_one, pmf_two])
    thinkplot.Config(xlabel=label, ylabel='PMF', axis=axis)
    thinkplot.Show()
def MakeFigures():
    """Plots the CDF of populations in several forms.

    On a log-log scale the tail of the CCDF looks like a straight line,
    which suggests a Pareto distribution, but that turns out to be misleading.

    On a log-x scale the distribution has the characteristic sigmoid of
    a lognormal distribution.

    The normal probability plot of log(sizes) confirms that the data fit the
    lognormal model very well.

    Many phenomena that have been described with Pareto models can be described
    as well, or better, with lognormal models.
    """
    pops = ReadData()
    print('Number of cities/towns', len(pops))
    
    log_pops = np.log10(pops)
    cdf = thinkstats2.Cdf(pops, label='data')
    cdf_log = thinkstats2.Cdf(log_pops, label='data')

    # pareto plot
    xs, ys = thinkstats2.RenderParetoCdf(xmin=5000, alpha=1.4, low=0, high=1e7)
    thinkplot.Plot(np.log10(xs), 1-ys, label='model', color='0.8')

    thinkplot.Cdf(cdf_log, complement=True) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CCDF',
                     yscale='log')
    thinkplot.Save(root='populations_pareto')

    # lognormal plot
    thinkplot.PrePlot(cols=2)

    mu, sigma = log_pops.mean(), log_pops.std()
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=8)
    thinkplot.Plot(xs, ps, label='model', color='0.8')

    thinkplot.Cdf(cdf_log) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CDF')

    thinkplot.SubPlot(2)
    thinkstats2.NormalProbabilityPlot(log_pops, label='data')
    thinkplot.Config(xlabel='z',
                     ylabel='log10 population',
                     xlim=[-5, 5])

    thinkplot.Save(root='populations_normal')
Example #9
0
def PlotSurvivalFunctions(sf_map, predict_flag=False):
    """Plot estimated survival functions.

    sf_map: map from group name to sequence of survival functions
    predict_flag: whether the lines are predicted or actual
    """
    thinkplot.PrePlot(len(sf_map))

    for name, sf_seq in sorted(sf_map.items(), reverse=True):
        if len(sf_seq) == 0:
            continue

        sf = sf_seq[0]
        if len(sf) == 0:
            continue

        ts, rows = MakeSurvivalCI(sf_seq, [10, 50, 90])
        thinkplot.FillBetween(ts, rows[0], rows[2], color='gray')

        if not predict_flag:
            thinkplot.Plot(ts, rows[1], label='19%d'%name)

    thinkplot.Config(xlabel='age (years)', ylabel='prob unmarried',
                     xlim=[14, 45], ylim=[0, 1],
                     legend=True, loc='upper right')
Example #10
0
def main():
    df = ReadData()
    cdf = thinkstats2.Cdf(df['ps'])

    thinkplot.PrePlot(rows=1, cols=2)
    thinkplot.SubPlot(1)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.show(legend=False)
    
    print(df)
Example #11
0
def MakeArrivalDepartureDelayScatterPlots(flights):
    """Make scatterplots.
    """
    sample = thinkstats2.SampleRows(flights, 10000)

    # simple scatter plot
    thinkplot.PrePlot(cols=2)
    #    departureDelays, arrivalDelays = GetArrivalDepartureDelay(sample)
    #    airports = sample.AIRLINE
    #   arrivalDelays = sample.ARRIVAL_DELAY
    #    ScatterPlot(airports, arrivalDelays)

    # scatter plot with jitter
    #    thinkplot.SubPlot(2)
    departureDelays, arrivalDelays = GetArrivalDepartureDelay(sample,
                                                              hjitter=1.3,
                                                              wjitter=0.5)

    thinkplot.Scatter(arrivalDelays, departureDelays, alpha=1)
    thinkplot.Config(
        xlabel='arrival delay (min)',
        ylabel='departure delay (min)',
        #                     axis=[-20, 20, 20, 200],
        legend=False)

    thinkplot.Save(root='ArrivalDepartureDelayScatterplot')
Example #12
0
def SimulateSample(lam=2, n=10, m=1000):
    """Sampling distribution of L as an estimator of exponential parameter.

    lam: parameter of an exponential distribution
    n: sample size
    m: number of iterations
    """
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    estimates = []
    for j in range(m):
        xs = np.random.exponential(1 / lam, n)
        lamhat = 1 / np.mean(xs)
        estimates.append(lamhat)

    stderr = RMSE(estimates, lam)
    print('standard error', stderr)

    cdf = thinkstats2.Cdf(estimates)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    print('confidence interval', ci)
    VertLine(ci[0])
    VertLine(ci[1])

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Config(root='estimation2',
                     xlabel='estimate',
                     ylabel='CDF',
                     title='Sampling distribution')

    return stderr
Example #13
0
def Sample(lam=2, iters=1000):
    """ Sampling dist. of L as an estimator of exp parameter
    """
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    # repreat for multiple values of n
    for n in [5, 10, 15]:
        estimates = []

        for i in range(iters):
            xs = np.random.exponential(1 / lam, n)
            lamhat = 1 / np.mean(xs)
            estimates.append(lamhat)

        stderr = RMSE(estimates, lam)
        print("The standard error for n = {} is: {:.5f}".format(n, stderr))

        cdf = thinkstats2.Cdf(estimates)
        ci = cdf.Percentile(5), cdf.Percentile(95)
        print("The 90'%' confidence interval is: {}".format(ci))

        if n == 10:
            # set the lower/upper ends of confidence interval
            VertLine(ci[0])
            VertLine(ci[1])

            # plot the CDF
            thinkplot.Cdf(cdf)
            thinkplot.Config(xlabel='estimate',
                             ylabel='CDF',
                             title='Sampling Distribution')
def ScatterPlot(ages, weights, alpha=1.0):
    thinkplot.Scatter(ages, weights, alpha=alpha)
    thinkplot.Config(xlabel='age (years)',
                     ylabel='weight (lbs)',
                     xlim=[10, 45],
                     ylim=[0, 15],
                     legend=False)
Example #15
0
def MakeStep(greq, less):
    axis = [0, 50, 0, 0.6]

    greqpmf = thinkstats2.Pmf(greq.prglngth, label='greater/equal to 30')
    lesspmf = thinkstats2.Pmf(less.prglngth, label='less than 30')
    thinkplot.Pmfs([greqpmf, lesspmf])
    thinkplot.Config(xlabel='Pregnancy length(weeks)', axis=axis)
    thinkplot.Show()
Example #16
0
def PlotScatter(age, wgt, xmin, xmax, ymin, ymax):
    thinkplot.Scatter(age, wgt, alpha=1.0)
    thinkplot.Config(xlabel='Age (Years)',
                     ylabel='Birth Weight (lbs)',
                     xlim=[xmin, xmax],
                     ylim=[ymin, ymax],
                     legend=False)
    thinkplot.Show()
def PlotDailies(dailies):
    """Makes a plot with daily prices for different qualities.

    dailies: map from name to DataFrame
    """
    thinkplot.PrePlot(rows=3)
    for i, (name, daily) in enumerate(dailies.items()):
        thinkplot.SubPlot(i + 1)
        title = 'price per gram ($)' if i == 0 else ''
        thinkplot.Config(ylim=[0, 20], title=title)
        thinkplot.Scatter(daily.ppg, s=10, label=name)
        if i == 2:
            pyplot.xticks(rotation=30)
        else:
            thinkplot.Config(xticks=[])

    thinkplot.Save(root='timeseries1', formats=FORMATS)
def HexBin(ages, weights, bins=None):
    """Make a hexbin plot and save it.

    ages: sequence of float
    weights: sequence of float
    bins: 'log' or None for linear
    """
    thinkplot.HexBin(ages, weights, bins=bins)
    thinkplot.Config(xlabel='age (years)', ylabel='weight (lbs)', legend=False)
Example #19
0
def MakeCdfs(male, female):
    malecdf = thinkstats2.Cdf(male.totalwgt_lb, label='Male')
    femalecdf = thinkstats2.Cdf(female.totalwgt_lb, label='Female')
    thinkplot.PrePlot(2)
    thinkplot.Cdfs([malecdf, femalecdf])
    thinkplot.Config(xlabel='Baby Weight (Lbs)',
                     ylabel='CDF',
                     title='Baby Weights')
    thinkplot.Show()
Example #20
0
def MakeCdfs(male, female):
    malecdf = thinkstats2.Cdf(male.alcwknd, label='Male')
    femalecdf = thinkstats2.Cdf(female.alcwknd, label='Female')
    thinkplot.PrePlot(2)
    thinkplot.Cdfs([malecdf, femalecdf])
    thinkplot.Config(xlabel='Alcohol Consumed (grams)',
                     ylabel='CDF',
                     title='Weekend Alcohol Consumption')
    thinkplot.Show()
def PrintDiffMeansOneSided(
        data,
        title="CDF of sampling distribution of null hypothesis",
        label="difference in mean album score"):
    ht = hyp.DiffMeansOneSided(data)
    pvalue = ht.PValue()
    ht.PlotCdf(label='CDF')
    tp.Config(loc=2)
    tp.Show(xlabel=label, ylabel='CDF', title=title)
    print("Calculated p-value:", pvalue)
Example #22
0
def MakePmfs(greq, less):
    axis = [0, 15, 0, 0.04]
    width = .4 / 16

    greqpmf = thinkstats2.Pmf(greq.totalwgt_lb, label='greater/equal to 30')
    lesspmf = thinkstats2.Pmf(less.totalwgt_lb, label='less than 30')
    thinkplot.Pmf(lesspmf, align='left', width=width)
    thinkplot.Pmf(greqpmf, align='right', width=width)
    thinkplot.Config(axis=axis)
    thinkplot.Show()
Example #23
0
def MakeCdfs(greq, less):
    greqcdf = thinkstats2.Cdf(greq.totalwgt_lb, label='greater/equal to 30')
    lesscdf = thinkstats2.Cdf(less.totalwgt_lb, label='less than 30')
    thinkplot.PrePlot(2)
    thinkplot.Cdfs([greqcdf, lesscdf])
    thinkplot.Config(xlabel='Weight (lbs)', ylabel='CDF')
    thinkplot.Show()

    print 'Greater/equal to 30 50th percentile:', greqcdf.Percentile(50)
    print 'Less than 30 50th percentile:', lesscdf.Percentile(50)
Example #24
0
def MakePmfs(greq, less):
    width = 0.45
    axis = [0, 50, 0, 0.6]

    greqpmf = thinkstats2.Pmf(greq.prglngth, label='greater/equal to 30')
    lesspmf = thinkstats2.Pmf(less.prglngth, label='less than 30')
    thinkplot.Hist(lesspmf, align='left', width=width)
    thinkplot.Hist(greqpmf, align='right', width=width)
    thinkplot.Config(axis=axis)
    thinkplot.Show()
def MakeBabyBoom():
    """Plot CDF of interarrival time on log and linear scales.
    """
    # compute the interarrival times
    df = ReadBabyBoom()
    diffs = df.minutes.diff()
    cdf = thinkstats2.Cdf(diffs, label='actual')

    thinkplot.PrePlot(cols=2)
    thinkplot.Cdf(cdf)
    thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CCDF',
                     yscale='log',
                     legend=False)

    thinkplot.Save(root='analytic_interarrivals', legend=False)
Example #26
0
def MakeStep(male, female):
    axis = [0, 800, 0, 0.1]

    malepmf = thinkstats2.Pmf(male.alcwknd, label='Male')
    femalepmf = thinkstats2.Pmf(female.alcwknd, label='Female')
    thinkplot.Pmfs([malepmf, femalepmf])
    thinkplot.Config(xlabel='Alcohol Consumption (grams)',
                     ylabel='PMF',
                     axis=axis,
                     title='Weekend Alcohol Consumption')
    thinkplot.Show()
Example #27
0
def PlotFilled(daily, name):
    """Plot the EWMA and filled data.

    daily: DataFrame of daily prices
    name: string
    """
    filled = FillMissing(daily, span=30)
    thinkplot.Scatter(filled.ppg, s=15, alpha=0.2, label=name)
    thinkplot.Plot(filled.ewma, label='EWMA', color='#ff7f00')
    plt.xticks(rotation=30)
    thinkplot.Config(label='Price per gram ($)')
    thinkplot.Show()
Example #28
0
def PlotSurvival(complete):
    """Plots survival and hazard curves.

    complete: list of complete lifetimes
    """
    thinkplot.PrePlot(3, rows=2)

    cdf = thinkstats2.Cdf(complete, label='cdf')
    sf = MakeSurvivalFromCdf(cdf, label='survival')
    print(cdf[13])
    print(sf[13])

    thinkplot.Plot(sf)
    thinkplot.Cdf(cdf, alpha=0.2)
    thinkplot.Config()

    thinkplot.SubPlot(2)
    hf = sf.MakeHazardFunction(label='hazard')
    print(hf[39])
    thinkplot.Plot(hf)
    thinkplot.Config(ylim=[0, 0.75])
Example #29
0
def ScatterPlot(heights, weights, alpha=1.0):
    """Make a scatter plot and save it.

    heights: sequence of float
    weights: sequence of float
    alpha: float
    """
    thinkplot.Scatter(heights, weights, alpha=alpha)
    thinkplot.Config(xlabel='height (cm)',
                     ylabel='weight (kg)',
                     axis=[140, 210, 20, 200],
                     legend=False)
Example #30
0
def HexBin(heights, weights, bins=None):
    """Make a hexbin plot and save it.

    heights: sequence of float
    weights: sequence of float
    bins: 'log' or None for linear
    """
    thinkplot.HexBin(heights, weights, bins=bins)
    thinkplot.Config(xlabel='height (cm)',
                     ylabel='weight (kg)',
                     axis=[140, 210, 20, 200],
                     legend=False)