Python SubPlotの例、thinkplot.SubPlot Pythonの例

コード例 #1

0

ファイルを表示

def main():
    filename = 'mystery0.dat'
    data = read_file(filename)

    pmf = thinkstats2.MakePmfFromList(data)
    cdf = thinkstats2.MakeCdfFromList(data)

    pdf = thinkstats2.EstimatedPdf(data)
    low, high = min(data), max(data)
    xs = numpy.linspace(low, high, 101)
    kde_pmf = pdf.MakePmf(xs)

    bin_data = BinData(data, low, high, 51)
    bin_pmf = thinkstats2.MakePmfFromList(bin_data)

    thinkplot.SubPlot(2, 2, 1)
    thinkplot.Hist(pmf, width=0.1)
    thinkplot.Config(title='Naive Pmf')

    thinkplot.SubPlot(2, 2, 2)
    thinkplot.Hist(bin_pmf)
    thinkplot.Config(title='Binned Hist')

    thinkplot.SubPlot(2, 2, 3)
    thinkplot.Pmf(kde_pmf)
    thinkplot.Config(title='KDE PDF')

    thinkplot.SubPlot(2, 2, 4)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='CDF')

    thinkplot.Show()

コード例 #2

0

ファイルを表示

def MakeFigures(df):
    """Generates CDFs and normal prob plots for weights and log weights."""
    weights = df.wtkg2.dropna()
    log_weights = np.log10(weights)

    # plot weights on linear and log scales
    thinkplot.PrePlot(cols=2)
    MakeNormalModel(weights)
    thinkplot.Config(xlabel='adult weight (kg)', ylabel='CDF')

    thinkplot.SubPlot(2)
    MakeNormalModel(log_weights)
    thinkplot.Config(xlabel='adult weight (log10 kg)')

    thinkplot.Save(root='brfss_weight')

    # make normal probability plots on linear and log scales
    thinkplot.PrePlot(cols=2)
    MakeNormalPlot(weights)
    thinkplot.Config(xlabel='z', ylabel='weights (kg)')

    thinkplot.SubPlot(2)
    MakeNormalPlot(log_weights)
    thinkplot.Config(xlabel='z', ylabel='weights (log10 kg)')

    thinkplot.Save(root='brfss_weight_normal')

コード例 #3

0

ファイルを表示

ファイル: scatter.py プロジェクト: avinashalapati09/dsc530

def MakeFigures(df):
    """Make scatterplots.
    """
    sample = thinkstats2.SampleRows(df, 5000)

    # simple scatter plot
    thinkplot.PrePlot(cols=2)
    heights, weights = GetHeightWeight(sample)
    ScatterPlot(heights, weights)

    # scatter plot with jitter
    thinkplot.SubPlot(2)
    heights, weights = GetHeightWeight(sample, hjitter=1.3, wjitter=0.5)
    ScatterPlot(heights, weights)

    thinkplot.Save(root='scatter1')

    # with jitter and transparency
    thinkplot.PrePlot(cols=2)
    ScatterPlot(heights, weights, alpha=0.1)

    # hexbin plot
    thinkplot.SubPlot(2)
    heights, weights = GetHeightWeight(df, hjitter=1.3, wjitter=0.5)
    HexBin(heights, weights)
    thinkplot.Save(root='scatter2')

コード例 #4

0

ファイルを表示

ファイル: chap8ex.py プロジェクト: seppomerimaa/ThinkStats2

def ex3():
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    lam = 4
    goal_totals = [SimulateGame(lam=lam) for _ in range(1000)]
    print('RMSE', RMSE(goal_totals, lam))
    hist = thinkstats2.Hist(goal_totals)
    cdf = thinkstats2.Cdf(goal_totals)
    thinkplot.PrePlot(rows=2, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Hist(hist)
    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf)
    VertLine(cdf.Percentile(5))
    VertLine(cdf.Percentile(95))
    thinkplot.SubPlot(3)

    # lambda vs. rmse
    # rmse goes up as lambda goes up
    lams = range(1, 15)
    rmses = [RMSE([SimulateGame(lam=l) for _ in range(1000)], l) for l in lams]
    thinkplot.Plot(lams, rmses)
    thinkplot.SubPlot(4)

    # m vs. rmse
    # maybe rmse very slowly goes down as m goes up?
    # not at all clear that's really the case...
    ms = np.arange(10, 1000, 10)
    rmses = [RMSE([SimulateGame() for _ in range(m)], 4) for m in ms]
    thinkplot.Plot(ms, rmses)

    thinkplot.show()

コード例 #5

0

ファイルを表示

ファイル: 9_HO1.py プロジェクト: fullern1/previouscode

def MakePdfs(greq, less):
    greqpdf = thinkstats2.EstimatedPdf(greq.totalwgt_lb.dropna())
    lesspdf = thinkstats2.EstimatedPdf(less.totalwgt_lb.dropna())
    thinkplot.PrePlot(rows=1, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Pdf(greqpdf, label='greater/equal to 30')
    thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF')
    thinkplot.SubPlot(2)
    thinkplot.Pdf(lesspdf, label='less than 30')
    thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF')
    thinkplot.Show()

コード例 #6

0

ファイルを表示

def PlotRemainingLifetime(sf1, sf2):
    """Plots remaining lifetimes for pregnancy and age at first marriage.

    sf1: SurvivalFunction for pregnancy length
    sf2: SurvivalFunction for age at first marriage
    """
    thinkplot.PrePlot(cols=2)
    rem_life1 = sf1.RemainingLifetime()
    thinkplot.Plot(rem_life1)
    thinkplot.Config(title='remaining pregnancy length',
                     xlabel='weeks',
                     ylabel='mean remaining weeks')

    thinkplot.SubPlot(2)
    func = lambda pmf: pmf.Percentile(50)
    rem_life2 = sf2.RemainingLifetime(filler=np.inf, func=func)
    thinkplot.Plot(rem_life2)
    thinkplot.Config(title='years until first marriage',
                     ylim=[0, 15],
                     xlim=[11, 31],
                     xlabel='age (years)',
                     ylabel='median remaining years')

    thinkplot.Save(root='survival6',
                   formats=FORMATS)

コード例 #7

0

ファイルを表示

ファイル: hinc.py プロジェクト: seppomerimaa/ThinkStats2

def main():
    df = ReadData()
    cdf = thinkstats2.Cdf(df['ps'])

    thinkplot.PrePlot(rows=1, cols=2)
    thinkplot.SubPlot(1)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.show(legend=False)
    
    print(df)

コード例 #8

0

ファイルを表示

ファイル: analytic.py プロジェクト: wu12345/ThinkStats2

def MakeBabyBoom():
    """Plot CDF of interarrival time on log and linear scales.
    """
    # compute the interarrival times
    df = ReadBabyBoom()
    diffs = df.minutes.diff()
    cdf = thinkstats2.Cdf(diffs, label='actual')

    thinkplot.PrePlot(cols=2)
    thinkplot.Cdf(cdf)
    thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CCDF',
                     yscale='log',
                     legend=False)

    thinkplot.Save(root='analytic_interarrivals')

    n = len(diffs)
    lam = 44 / 24 * 60.0
    sample = [random.expovariate(lam) for _ in range(n)]
    model = thinkstats2.Cdf(sample, label='model')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([cdf, model], complement=True)
    thinkplot.Save(root='analytic_interarrivals_model',
                   title='Time between births',
                   xlabel='minutes',
                   ylabel='CCDF',
                   yscale='log')

コード例 #9

0

ファイルを表示

def MakeHists(live):
    """Plot Hists for live births

    live: DataFrame
    others: DataFrame
    """
    hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg')
    thinkplot.PrePlot(2, cols=2)

    thinkplot.SubPlot(1)
    thinkplot.Hist(hist)
    thinkplot.Config(xlabel='years', ylabel='frequency', axis=[0, 45, 0, 700])

    thinkplot.SubPlot(2)
    thinkplot.Pmf(hist)

    thinkplot.Save(root='probability_agepreg_hist',
                   xlabel='years',
                   axis=[0, 45, 0, 700])

コード例 #10

0

ファイルを表示

ファイル: test_models.py プロジェクト: RachelONelson/DSC530

def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(num=6, rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf, color='C0', label=filename)
    thinkplot.Config(title='CDF on linear scale', ylabel='CDF')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log', color='C0')
    thinkplot.Config(title='CDF on log-x scale', ylabel='CDF', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential', color='C0')
    thinkplot.Config(title='CCDF on log-y scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys, color='C0')
    thinkplot.Config(title='Normal probability plot',
                     xlabel='random normal',
                     ylabel='data')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto', color='C0')
    thinkplot.Config(title='CCDF on log-log scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull', color='C0')
    thinkplot.Config(title='CCDF on loglog-y log-x scale',
                     ylabel='log log CCDF',
                     **scale)

    thinkplot.Show(legend=False)

コード例 #11

0

ファイルを表示

def main():
    filename = 'mystery0.dat'
    data = read_file(filename)
    cdf = thinkstats2.MakeCdfFromList(data)

    thinkplot.SubPlot(2, 3, 1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2, 3, 2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2, 3, 3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(2, 3, 4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(2, 3, 5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(2, 3, 6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show()

コード例 #12

0

ファイルを表示

def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show(legend=False)

コード例 #13

0

ファイルを表示

ファイル: project.py プロジェクト: fullern1/previouscode

def MakeHists(male, female):
    """Plot Hists for live births

    live: DataFrame
    others: DataFrame
    """
    thinkplot.PrePlot(rows=1, cols=2)
    hist = thinkstats2.Hist(male.alcwknd)
    thinkplot.SubPlot(1)
    thinkplot.Config(axis=[0, 800, 0, 600],
                     ylabel='Number of people',
                     xlabel='Alcohol consumed (grams)',
                     title='Weekend Alcohol Consumption for Men')
    thinkplot.Hist(hist, alpha=1)

    hist = thinkstats2.Hist(female.alcwknd)
    thinkplot.SubPlot(2)
    thinkplot.Config(axis=[0, 800, 0, 1200],
                     ylabel='Number of people',
                     xlabel='Alcohol consumed (grams)',
                     title='Weekend Alcohol Consumption for Women')
    thinkplot.Hist(hist, alpha=1)
    thinkplot.Show()

コード例 #14

0

ファイルを表示

ファイル: populations.py プロジェクト: UnderPaidMathematician/ThinkStats2

def MakeFigures():
    """Plots the CDF of populations in several forms.

    On a log-log scale the tail of the CCDF looks like a straight line,
    which suggests a Pareto distribution, but that turns out to be misleading.

    On a log-x scale the distribution has the characteristic sigmoid of
    a lognormal distribution.

    The normal probability plot of log(sizes) confirms that the data fit the
    lognormal model very well.

    Many phenomena that have been described with Pareto models can be described
    as well, or better, with lognormal models.
    """
    pops = ReadData()
    print('Number of cities/towns', len(pops))
    
    log_pops = np.log10(pops)
    cdf = thinkstats2.Cdf(pops, label='data')
    cdf_log = thinkstats2.Cdf(log_pops, label='data')

    # pareto plot
    xs, ys = thinkstats2.RenderParetoCdf(xmin=5000, alpha=1.4, low=0, high=1e7)
    thinkplot.Plot(np.log10(xs), 1-ys, label='model', color='0.8')

    thinkplot.Cdf(cdf_log, complement=True) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CCDF',
                     yscale='log')
    thinkplot.Save(root='populations_pareto')

    # lognormal plot
    thinkplot.PrePlot(cols=2)

    mu, sigma = log_pops.mean(), log_pops.std()
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=8)
    thinkplot.Plot(xs, ps, label='model', color='0.8')

    thinkplot.Cdf(cdf_log) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CDF')

    thinkplot.SubPlot(2)
    thinkstats2.NormalProbabilityPlot(log_pops, label='data')
    thinkplot.Config(xlabel='z',
                     ylabel='log10 population',
                     xlim=[-5, 5])

    thinkplot.Save(root='populations_normal')

コード例 #15

0

ファイルを表示

def pmf_stuff(width, x_low, x_high, third, pmf_one, pmf_two, label,
              y_axis_scale):
    width = width
    axis = [x_low, x_high, third, y_axis_scale]
    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(pmf_one, align='right', width=width)
    thinkplot.Hist(pmf_two, align='left', width=width)
    thinkplot.Config(xlabel=label, ylabel='PMF', axis=axis)

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([pmf_one, pmf_two])
    thinkplot.Config(xlabel=label, ylabel='PMF', axis=axis)
    thinkplot.Show()

コード例 #16

0

ファイルを表示

ファイル: Chap3PMF.py プロジェクト: pansh94/Maths_Stats

def plot_bar_step(first_pmf, other_pmf):
    """PrePlot takes optional parameters rows and cols to make a grid of figures for bar grapg"""
    width = 0.5
    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(first_pmf, align="left", width=width)
    thinkplot.Hist(other_pmf, align="right", width=width)
    thinkplot.Config(xlabel="weeks",
                     ylabel="probability",
                     axis=[27, 46, 0, 0.6])
    #for step graph
    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([first_pmf, other_pmf])
    thinkplot.Show(xlabel="weeks", axis=[27, 46, 0, 0.6])

コード例 #17

0

ファイルを表示

def NormalPlotSamples(samples, plot=1, ylabel=''):
    """Makes normal probability plots for samples.

    samples: list of samples
    label: string
    """
    for n, sample in samples:
        thinkplot.SubPlot(plot)
        thinkstats2.NormalProbabilityPlot(sample)

        thinkplot.Config(title='n=%d' % n,
                         legend=False,
                         xticks=[],
                         yticks=[],
                         ylabel=ylabel)
        plot += 1

コード例 #18

0

ファイルを表示

ファイル: timeseries.py プロジェクト: crskbel-ca/PythonBayesianStatistics

def PlotDailies(dailies):
    """Makes a plot with daily prices for different qualities.

    dailies: map from name to DataFrame
    """
    thinkplot.PrePlot(rows=3)
    for i, (name, daily) in enumerate(dailies.items()):
        thinkplot.SubPlot(i + 1)
        title = 'price per gram ($)' if i == 0 else ''
        thinkplot.Config(ylim=[0, 20], title=title)
        thinkplot.Scatter(daily.ppg, s=10, label=name)
        if i == 2:
            pyplot.xticks(rotation=30)
        else:
            thinkplot.Config(xticks=[])

    thinkplot.Save(root='timeseries1', formats=FORMATS)

コード例 #19

0

ファイルを表示

ファイル: timeseries.py プロジェクト: husseingb/ThinkStats2

def MakeAcfPlot(dailies):
    """Makes a figure showing autocorrelation functions.

    dailies: map from category name to DataFrame of daily prices    
    """
    axis = [0, 41, -0.2, 0.2]

    thinkplot.PrePlot(cols=2)
    PlotAutoCorrelation(dailies, add_weekly=False)
    thinkplot.Config(axis=axis,
                     loc='lower right',
                     ylabel='correlation',
                     xlabel='lag (day)')

    thinkplot.SubPlot(2)
    PlotAutoCorrelation(dailies, add_weekly=True)
    thinkplot.Save(root='timeseries9',
                   axis=axis,
                   loc='lower right',
                   xlabel='lag (days)')

コード例 #20

0

ファイルを表示

ファイル: analytic.py プロジェクト: AxleMaxGit/python-data-science-projects

def MakeBabyBoom():
    """Plot CDF of interarrival time on log and linear scales.
    """
    # compute the interarrival times
    df = ReadBabyBoom()
    diffs = df.minutes.diff()
    cdf = thinkstats2.Cdf(diffs, label='actual')

    thinkplot.PrePlot(cols=2)
    thinkplot.Cdf(cdf)
    thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CCDF',
                     yscale='log',
                     legend=False)

    thinkplot.Save(root='analytic_interarrivals', legend=False)

コード例 #21

0

ファイルを表示

def PlotMarriageData(resp):
    """Plots hazard and survival functions.

    resp: DataFrame of respondents
    """
    hf, sf = EstimateMarriageSurvival(resp)

    thinkplot.PrePlot(rows=2)
    thinkplot.Plot(hf)
    thinkplot.Config(ylabel='hazard', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Plot(sf)
    thinkplot.Save(root='survival2',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   ylim=[0, 1],
                   legend=False,
                   formats=FORMATS)
    return sf

コード例 #22

0

ファイルを表示

def PlotSurvival(complete):
    """Plots survival and hazard curves.

    complete: list of complete lifetimes
    """
    thinkplot.PrePlot(3, rows=2)

    cdf = thinkstats2.Cdf(complete, label='cdf')
    sf = MakeSurvivalFromCdf(cdf, label='survival')
    print(cdf[13])
    print(sf[13])

    thinkplot.Plot(sf)
    thinkplot.Cdf(cdf, alpha=0.2)
    thinkplot.Config()

    thinkplot.SubPlot(2)
    hf = sf.MakeHazardFunction(label='hazard')
    print(hf[39])
    thinkplot.Plot(hf)
    thinkplot.Config(ylim=[0, 0.75])

コード例 #23

0

ファイルを表示

ファイル: timeseries.py プロジェクト: crskbel-ca/PythonBayesianStatistics

def PlotRollingMean(daily, name):
    """Plots rolling mean and EWMA.

    daily: DataFrame of daily prices
    """
    dates = pandas.date_range(daily.index.min(), daily.index.max())
    reindexed = daily.reindex(dates)

    thinkplot.PrePlot(cols=2)
    thinkplot.Scatter(reindexed.ppg, s=15, alpha=0.1, label=name)
    roll_mean = pandas.rolling_mean(reindexed.ppg, 30)
    thinkplot.Plot(roll_mean, label='rolling mean')
    pyplot.xticks(rotation=30)
    thinkplot.Config(ylabel='price per gram ($)')

    thinkplot.SubPlot(2)
    thinkplot.Scatter(reindexed.ppg, s=15, alpha=0.1, label=name)
    ewma = pandas.ewma(reindexed.ppg, span=30)
    thinkplot.Plot(ewma, label='EWMA')
    pyplot.xticks(rotation=30)
    thinkplot.Save(root='timeseries10', formats=FORMATS)

コード例 #24

0

ファイルを表示

def MakeFigures(firsts, others):
    """Plot Pmfs of pregnancy length.

    firsts: DataFrame
    others: DataFrame
    """
    # plot the PMFs
    first_pmf = thinkstats2.Pmf(firsts.prglngth, label='first')
    other_pmf = thinkstats2.Pmf(others.prglngth, label='other')
    width = 0.45

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(first_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Config(xlabel='weeks',
                     ylabel='probability',
                     axis=[27, 46, 0, 0.6])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([first_pmf, other_pmf])
    thinkplot.Save(root='probability_nsfg_pmf',
                   xlabel='weeks',
                   axis=[27, 46, 0, 0.6])

    # plot the differences in the PMFs
    weeks = range(35, 46)
    diffs = []
    for week in weeks:
        p1 = first_pmf.Prob(week)
        p2 = other_pmf.Prob(week)
        diff = 100 * (p1 - p2)
        diffs.append(diff)

    thinkplot.Bar(weeks, diffs)
    thinkplot.Save(root='probability_nsfg_diffs',
                   title='Difference in PMFs',
                   xlabel='weeks',
                   ylabel='percentage points',
                   legend=False)

コード例 #25

0

ファイルを表示

ファイル: survival.py プロジェクト: husseingb/ThinkStats2

def PlotMarriageData():
    resp = chap01ex_soln.ReadFemResp()
    resp.cmmarrhx.replace([9997, 9998, 9999], np.nan, inplace=True)

    resp['agemarry'] = (resp.cmmarrhx - resp.cmbirth) / 12.0
    cdf = thinkstats2.Cdf(resp.agemarry)
    resp['age'] = (resp.cmintvw - resp.cmbirth) / 12.0
    cdf = thinkstats2.Cdf(resp.age)

    complete = resp[resp.evrmarry == 1].agemarry
    ongoing = resp[resp.evrmarry == 0].age

    hf = EstimateHazardFunction(complete, ongoing, label='hazard')
    sf = hf.MakeSurvival(label='survival')

    thinkplot.PrePlot(rows=2)
    thinkplot.Plot(hf)
    thinkplot.Config()

    thinkplot.SubPlot(2)
    thinkplot.Plot(sf)
    thinkplot.Save(root='survival2', xlabel='age (years)', ylim=[0, 1])

コード例 #26

0

ファイルを表示

def main():
    p1 = thinkbayes2.MakeNormalPmf(0, 1, 3, n=101)
    p1.label = 'p1'
    p2 = p1.Copy(label='p2')

    q1 = thinkbayes2.MakeNormalPmf(0, 1, 3, n=101)
    q1.label = 'q1'
    q2 = q1.Copy(label='q2')

    p1, q1 = Update(p1, q1, True)
    p1, q2 = Update(p1, q2, True)
    p2, q1 = Update(p2, q1, True)
    p2, q2 = Update(p2, q2, False)

    thinkplot.PrePlot(num=4, rows=2)
    thinkplot.Pmfs([p1, p2])
    thinkplot.Config(legend=True)

    thinkplot.SubPlot(2)
    thinkplot.Pmfs([q1, q2])
    thinkplot.Show()

    print('Prob p1 > p2', p1 > p2)
    print('Prob q1 > q2', q1 > q2)

コード例 #27

0

ファイルを表示

def compareDetroitAirport(flights):
    """Create PMF to compare Atlanta airport versus other airports
       Per JD Power: Detroit Metropolitan Wayne County Airport ranks highest in passenger satisfaction among mega airports with a score of 786. 
       https://www.jdpower.com/business/press-releases/2019-north-america-airport-satisfaction-study 

    """
    detroit = flights[flights.DESTINATION_AIRPORT == 'DTW']
    others = flights[flights.AIRLINE != 'DTW']
    detroit_pmf = thinkstats2.Pmf(detroit.ARRIVAL_DELAY,
                                  label='Detroit Metro Arrival Delay')
    other_pmf = thinkstats2.Pmf(others.ARRIVAL_DELAY, label='other')
    width = 0.45

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(detroit_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='-100to100DetroitDelayBarPMF',
                   title='-100 to 100 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -100 to 100 mins',
                   axis=[-100, 100, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([detroit_pmf, other_pmf])
    thinkplot.Save(root='-100to100DetroitDelayStepPMF',
                   title='-100 to 100 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -100 to 100 mins',
                   axis=[-100, 100, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(detroit_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='-30to30DetroitDelayBarPMF',
                   title='-30 to 30 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -30 to 30 mins',
                   axis=[-30, 30, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([detroit_pmf, other_pmf])
    thinkplot.Save(root='-30to30DetroitDelayStepPMF',
                   title='-30 to 30 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -30 to 30 mins',
                   axis=[-30, 30, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(detroit_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='-60to0DetroitDelayBarPMF',
                   title='-60 to 0 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -60 to 0 mins',
                   axis=[-60, 0, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([detroit_pmf, other_pmf])
    thinkplot.Save(root='-60to0DetroitDelayStepPMF',
                   title='-60 to 0 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -60 to 0 mins',
                   axis=[-60, 0, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(detroit_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='0to60DetroitDelayBarPMF',
                   title='0 to 60 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability 0 to 60 mins',
                   axis=[0, 60, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([detroit_pmf, other_pmf])
    thinkplot.Save(root='0to60DetroitDelayStepPMF',
                   title='0 to 60 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability 0 to 60 mins',
                   axis=[0, 60, 0, 0.032])

コード例 #28

0

ファイルを表示

def compareDay4(flights):
    """Create PMF to compare Day 4 (Thursday) with other days.
       I chose Day 4 (Thursday) because it showed the most flights for that day in the scatterplot

    """
    labelString = "Day 4 Arrival Delay"
    xLabelString = "day 4 arrival delay"
    day = flights[flights.DAY == 4]
    others = flights[flights.DAY != 4]
    day_pmf = thinkstats2.Pmf(day.ARRIVAL_DELAY, label=labelString)
    other_pmf = thinkstats2.Pmf(others.ARRIVAL_DELAY, label='other')
    width = 0.45

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(day_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='Thursday-100to100ArrivalDelayBarPMF',
                   title='-100 to 100 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -100 to 100 mins',
                   axis=[-100, 100, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([day_pmf, other_pmf])
    thinkplot.Save(root='Thursday-100to100ArrivalDelayStepPMF',
                   title='-100 to 100 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -100 to 100 mins',
                   axis=[-100, 100, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(day_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='Thursday-30to30ArrivalDelayBarPMF',
                   title='-30 to 30 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -30 to 30 mins',
                   axis=[-30, 30, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([day_pmf, other_pmf])
    thinkplot.Save(root='Thursday-30to30ArrivalDelayStepPMF',
                   title='-30 to 30 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -30 to 30 mins',
                   axis=[-30, 30, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(day_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='Thursday-60to0ArrivalDelayBarPMF',
                   title='-60 to 0 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -60 to 0 mins',
                   axis=[-60, 0, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([day_pmf, other_pmf])
    thinkplot.Save(root='Thursday-60to0ArrivalDelayStepPMF',
                   title='-60 to 0 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -60 to 0 mins',
                   axis=[-60, 0, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(day_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='Thursday0to60ArrivalDelayBarPMF',
                   title='0 to 60 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability 0 to 60 mins',
                   axis=[0, 60, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([day_pmf, other_pmf])
    thinkplot.Save(root='Thursday0to60ArrivalDelayStepPMF',
                   title='0 to 60 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability 0 to 60 mins',
                   axis=[0, 60, 0, 0.032])

コード例 #29

0

ファイルを表示

    def RunModel(self):
        np.random.shuffle(self.fake_data)
        return self.fake_data


## main scripts
if __name__ == '__main__':

    ## read csv and group by quality and day
    transactions = pd.read_csv('mj-clean.csv', parse_dates=[5])
    dailies = GroupByQualityAndDay(transactions)

    ## plot time series by quality
    thinkplot.PrePlot(rows=3)
    for i, (name, daily) in enumerate(dailies.items()):
        thinkplot.SubPlot(i + 1)
        title = 'Price per gram ($)' if i == 0 else ''
        thinkplot.Config(ylim=[0, 20], title=title)
        thinkplot.Scatter(daily.ppg, s=10, label=name)
        if i == 2:
            plt.xticks(rotation=30)
            thinkplot.Config()
        else:
            thinkplot.Config(xticks=[])

    plt.show()

    ## calculate linear regressions for each quality

    for name, daily in dailies.items():
        model, results = RunLinearModel(daily)

コード例 #30

0

ファイルを表示

ファイル: ch7.py プロジェクト: smithb16/ThinkStats2

    ## make HexBin plot
    thinkplot.HexBin(heights, weights)
    thinkplot.Show(xlabel='Height (cm)',
                   ylabel='Weight (kg)',
                   axis=[140, 210, 20, 200],
                   legend=False)

    ## now use entire dataset
    heights_all, weights_all = sample.htm3, sample.wtkg2
    heights_all = Jitter(heights, 1.4)
    weights_all = Jitter(weights, 0.5)

    ## make scatter plot
    thinkplot.PrePlot(num=2, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Scatter(heights_all, weights_all, alpha=0.1, s=10)
    thinkplot.Show(xlabel='Height (cm)',
                   ylabel='Weight (kg)',
                   axis=[140, 210, 20, 200],
                   legend=False)

    thinkplot.SubPlot(2)
    thinkplot.HexBin(heights_all, weights_all)
    thinkplot.Show(xlabel='Height (cm)',
                   ylabel='Weight (kg)',
                   axis=[140, 210, 20, 200],
                   legend=False)

    ## bin data
    cleaned = df.dropna(subset=['htm3', 'wtkg2'])