def MakeFigures(firsts, others): """Plot Hists and Pmfs for the pregnancy length.""" # bar options is a list of option dictionaries to be passed to myplot.bar bar_options = [dict(color='0.9'), dict(color='blue')] # make the histogram axis = [23, 46, 0, 2700] Hists([firsts.hist, others.hist]) myplot.Save(root='nsfg_hist', title='Histogram', xlabel='weeks', ylabel='frequency', axis=axis) # make the PMF axis = [23, 46, 0, 0.6] Hists([firsts.pmf, others.pmf]) myplot.Save(root='nsfg_pmf', title='PMF', xlabel='weeks', ylabel='probability', axis=axis)
def NormalProbPlot(samples): """Makes a normal probability plot for each sample in samples.""" pyplot.clf() markers = dict(male='b', female='g') for label, sample in samples.iteritems(): NormalPlot(sample, label, markers[label], jitter=0.0) myplot.Save( show=True, #root='bayes_height_normal', title='Normal probability plot', xlabel='Standard normal', ylabel='Reported height (cm)')
def PlotCdfs(samples): """Make CDFs showing the distribution of outliers.""" cdfs = [] for label, sample in samples.iteritems(): outliers = [x for x in sample if x < 150] cdf = Cdf.MakeCdfFromList(outliers, label) cdfs.append(cdf) myplot.Clf() myplot.Cdfs(cdfs) myplot.Save(root='bayes_height_cdfs', title='CDF of height', xlabel='Reported height (cm)', ylabel='CDF')
def PlotDiffs(filename='heri.0', root='heri1', flag=False): pyplot.clf() data = ReadData(filename) xs, ys = zip(*data) if flag: RunFit(xs, ys) pyplot.plot(xs, ys, 'b.:', markersize=15) myplot.Save(root=root, title='Yearly changes', xlabel='', ylabel='percentage points', axis=[1972, 2013, -1.2, 2.1])
def MakeFigures(): pops = populations.ReadData() print len(pops) cdf = Cdf.MakeCdfFromList(pops, 'populations') myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='populations', title='City/Town Populations', xlabel='population', ylabel='CDF', legend=False) myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='populations_logx', title='City/Town Populations', xlabel='population', ylabel='CDF', xscale='log', legend=False) myplot.Clf() myplot.Cdf(cdf, complement=True) myplot.Save(root='populations_loglog', title='City/Town Populations', xlabel='population', ylabel='Complementary CDF', yscale='log', xscale='log', legend=False) t = [math.log(x) for x in pops] t.sort() rankit.MakeNormalPlot(t, 'populations_rankit')
def PlotProbs(filename='p.heri.31'): pyplot.clf() for x in [1975.5, 1984.5, 1998.5, 2006.5]: xs = [x, x] ys = [0, 1] pyplot.plot(xs, ys, color='0.8', linewidth=10) data = ReadData(filename) xs, ys = zip(*data) pyplot.plot(xs, ys, 'bo-', color='blue', linewidth=2, markersize=6) myplot.Save(root='heri2', title='Location of changepoints', xlabel='', ylabel='cumulative probability', axis=[1972, 2010, 0, 1])
def PlotOneSimulation(xscale='linear'): pyplot.clf() PlotSimulation(100000) if xscale == 'linear': pyplot.axis([-0.1, 1.1, 0.0, 0.7]) pyplot.xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0]) else: pyplot.subplots_adjust(bottom=0.15) pyplot.xscale(xscale) myplot.Save(root='world_record_sim_%s' % xscale, title='Simulated world record progression', xlabel='Fraction of population tested', ylabel='Max potential seen')
def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) pmf = Pmf.MakePmfFromList(speeds, 'speeds') pmf = BiasPmf(7,pmf) myplot.Hist(pmf) #myplot.Show(title='PMF of observed speed', # xlabel='speed (mph)', # ylabel='probability') myplot.Save( formats=['png'], root='runner', title='PMF of observed speed', xlabel='speed (mph)', ylabel='probability')
def CheckCdf(): """Compare chi2 values from simulation with chi2 distributions. """ for df in [1, 2, 3]: xs, ys = Chi2Cdf(df=df, high=15) pyplot.plot(xs, ys, label=df) t = [SimulateChi2() for i in range(1000)] cdf = Cdf.MakeCdfFromList(t) myplot.Cdf(cdf) myplot.Save(root='khan3', xlabel='chi2 value', ylabel="CDF", formats=['png'])
def MakeNormalPlot(ys, root=None, line_options={}, **options): """Makes a normal probability plot. Args: ys: sequence of values line_options: dictionary of options for pyplot.plot options: dictionary of options for myplot.Save """ # TODO: when n is small, generate a larger sample and desample n = len(ys) xs = [random.normalvariate(0.0, 1.0) for i in range(n)] pyplot.clf() pyplot.plot(sorted(xs), sorted(ys), 'b.', markersize=3, **line_options) myplot.Save(root, xlabel='Standard normal values', legend=False, **options)
def MakeFigures(pmf, biased_pmf): """Makes figures showing the CDF of the biased and unbiased PMFs""" cdf = Cdf.MakeCdfFromPmf(pmf, 'unbiased') print('unbiased median', cdf.Percentile(50)) print('percent < 100', cdf.Prob(100)) print('percent < 1000', cdf.Prob(1000)) biased_cdf = Cdf.MakeCdfFromPmf(biased_pmf, 'biased') print('biased median', biased_cdf.Percentile(50)) myplot.Clf() myplot.Cdfs([cdf, biased_cdf]) myplot.Save(root='slashdot.logx', xlabel='Number of friends/foes', ylabel='CDF', xscale='log')
def main(): weeks = range(35, 46) pyplot.clf() p = {'first': [], 'others': []} for week in weeks: firstBabies, otherBabies = BornAtButNotBefore(week) p['first'].append(firstBabies.Prob(week)) p['others'].append(otherBabies.Prob(week)) pyplot.plot(weeks, p['first'], label="First babies") pyplot.plot(weeks, p['others'], label="Others babies") myplot.Save(root='first_conditional_pmf', title='My conditional', xlabel='weeks', ylabel='probability')
def Main(script): # read 'em and sort 'em birthdays = ReadBirthdays() birthdays.sort() # compute the intervals in days deltas = Diff(birthdays) days = [inter.days for inter in deltas] # make and plot the CCDF on a log scale. cdf = Cdf.MakeCdfFromList(days, name='intervals') scale = myplot.Cdf(cdf, transform='exponential') myplot.Save(root='intervals', xlabel='days', ylabel='ccdf', **scale)
def MakePercentiles(shelf, n=50): pairs = ReadShelf(shelf) pairs.sort() for x, y in pairs: print x, y return xs = [] plists = [] for i in range(0, len(pairs), n): subset = pairs[i:i + n] print i, len(subset) halfs, fulls = zip(*subset) cdf = Cdf.MakeCdfFromList(fulls) ys = [cdf.Percentile(x) for x in [5, 25, 50, 75, 95]] x = thinkstats.Mean(halfs) print x, ys xs.append(x) plists.append(ys) # drop the last point xs.pop() plists.pop() ylists = zip(*plists) plot_options = [ dict(color='red', label='5%ile', linestyle='dotted'), dict(color='orange', label='25%ile', linestyle='dashed'), dict(color='yellow', label='50%ile', linestyle='solid'), dict(color='green', label='75%ile', linestyle='dashed'), dict(color='cyan', label='95%ile', linestyle='dotted'), ] pyplot.plot([94, 94], [100, 350]) for ys, d in zip(ylists, plot_options): pyplot.plot(xs, ys, linewidth=3, **d) myplot.Save(root='race_predictor4', xlabel='Half marathon (min)', ylabel='Marathon (min)', show=True)
def MakeDiffFigure(firsts, others): """Plot the difference between the PMFs.""" weeks = range(35, 46) diffs = [] for week in weeks: p1 = firsts.pmf.Prob(week) p2 = others.pmf.Prob(week) diff = 100 * (p1 - p2) diffs.append(diff) pyplot.clf() pyplot.bar(weeks, diffs, align='center') myplot.Save(root='nsfg_diffs', title='Difference in PMFs', xlabel='weeks', ylabel='100 (PMF$_{first}$ - PMF$_{other}$)', legend=False)
def plot_data(self, root='caws.accident'): """Plots a time series of monthly accidents. root: string prefix of the output files. """ pyplot.clf() for name, av_dict in self.subsets.iteritems(): hist = self.count_accidents(av_dict) print name, 'Total accidents', hist.Total() years, counts = zip(*sorted(hist.Items())) pyplot.plot(years, counts, label=name) myplot.Save(root=root, title='Monthly Accident Counts', xlabel='Year', ylabel='Number of accidents', axis=[1991.5, 2002.5, 0, 40])
def MakeParetoCdf(): """Generates a plot of the CDF of height in Pareto World.""" n = 50 max = 1000.0 xs = [max * i / n for i in range(n)] xmin = 100 alpha = 1.7 ps = [ParetoCdf(x, alpha, xmin) for x in xs] print 'Median', ParetoMedian(xmin, alpha) pyplot.clf() pyplot.plot(xs, ps, linewidth=2) myplot.Save('pareto_world1', title='Pareto CDF', xlabel='height (cm)', ylabel='CDF', legend=False)
def MakeParetoCdf(): """Generates a plot of the Pareto CDF.""" n = 50 max = 10.0 xs = [max * i / n for i in range(n)] xmin = 0.5 alpha = 1.0 ps = [ParetoCdf(x, alpha, xmin) for x in xs] print('Fraction <= 10', ParetoCdf(xmin, alpha, 10)) pyplot.clf() pyplot.plot(xs, ps, linewidth=2) myplot.Save('pareto_cdf', title='Pareto CDF', xlabel='x', ylabel='CDF', legend=False)
def PlotReligiousSubset(years, cols, labels, i, j): """Helper function that factors out common plotting code. years: sequence of years cols: list of columns to plot labels: list of labels (corresponding to cols) i,j: slice indices of the columns to plot """ pyplot.clf() options = dict(linewidth=3, markersize=0, alpha=0.7) for col, label in zip(cols[i:j], labels[i:j]): pyplot.plot(years, col, label=label, **options) root = 'heri.religious.%d.%d' % (i, j) myplot.Save(root=root, formats=FORMATS, xlabel='Year', ylabel='% None', title='Religious preference')
def MakeExpoCdf(): """Generates a plot of the exponential CDF.""" n = 40 max = 2.5 xs = [max * i / n for i in range(n)] lam = 2.0 ps = [ExpoCdf(x, lam) for x in xs] percentile = -math.log(0.05) / lam print('Fraction <= ', percentile, ExpoCdf(lam, percentile)) pyplot.clf() pyplot.plot(xs, ps, linewidth=2) myplot.Save('expo_cdf', title='Exponential CDF', xlabel='x', ylabel='CDF', legend=False)
def PlotCurves(curves, root=None, clf=False): """Plots a set of curves. curves is a list of curves; each curve is a list of (x, y) pairs. """ if root: pyplot.clf() n = len(curves) for i, curve in enumerate(curves): curve = OffsetCurve(curve, i, n) xs, ys = zip(*curve) pyplot.plot(xs, ys, color='blue', alpha=0.2) myplot.Save(root=root, clf=clf, xlabel='# samples', ylabel='# taxa', legend=False)
def plot_data(self, root='caws.traffic'): """Makes a plot of AADT for each location.""" pyplot.clf() series = {} for loc, name in self.locs.iteritems(): for year in self.years: adt = self.lookup(year, loc) / 1000 series.setdefault(name, []).append(adt) # TODO: fix the year labels for name, adts in series.iteritems(): pyplot.plot(self.years, adts, label=name) myplot.Save(root=root, title='Traffic volume', xlabel='Year', ylabel='AADT', axis=[1991.5, 2002.5, 0, 160])
def main(): print 'pae', 0.3 / (0.3 + 3.0 / 13) doorA = MakeUniformSuite(0.0, 1.0, 101, name='Door A') evidence = 3, 2 Update(doorA, evidence) doorC = MakeUniformSuite(0.0, 1.0, 101, name='Door C') evidence = 3, 10 Update(doorC, evidence) print TotalProbability(doorA, doorC, ProbWinning) # plot the posterior distributions myplot.Pmfs([doorA, doorC]) myplot.Save(root='blinky', formats=['pdf', 'png'], title='Probability of blinking', xlabel='P(blink)', ylabel='Posterior probability')
def PlotMarginals(suite): """Plot the marginal distributions for a 2-D joint distribution.""" pmf_m, pmf_s = ComputeMarginals(suite) pyplot.clf() pyplot.figure(1, figsize=(7, 4)) pyplot.subplot(1, 2, 1) cdf_m = Cdf.MakeCdfFromPmf(pmf_m, 'mu') myplot.Cdf(cdf_m) pyplot.xlabel('Mean height (cm)') pyplot.ylabel('CDF') pyplot.subplot(1, 2, 2) cdf_s = Cdf.MakeCdfFromPmf(pmf_s, 'sigma') myplot.Cdf(cdf_s) pyplot.xlabel('Std Dev height (cm)') pyplot.ylabel('CDF') myplot.Save(root='bayes_height_marginals_%s' % suite.name)
def MakeDiffFigure(firsts, others): ''' 绘制两个 PMF 的不同之处 ''' weeks = range(35, 46) # 只绘制这个范围的,我们关注这个范围 diffs = [] for week in weeks: p1 = firsts.pmf.Prob(week) p2 = others.pmf.Prob(week) diff = 100 * (p1 - p2) #计算两种概率的差异 diffs.append(diff) # 要开始绘图了 pyplot.clf() # 数值序列,概率差异序列,对其方式 pyplot.bar(weeks, diffs, align='center') myplot.Save(root='nsfg_diffs', title='Difference in PMFs', xlabel='weeks', ylabel='100 (PMF$_{first}$ - PMF$_{other}$)', legend=False)
def main(): upper_bound = 200 prior = MakeUniformSuite(1, upper_bound, upper_bound) prior.name = 'prior' evidence = 60 posterior = prior.Copy() Update(posterior, evidence) posterior.name = 'posterior' print CredibleInterval(posterior, 90) # plot the posterior distribution pyplot.subplots_adjust(wspace=0.4, left=0.15) plot_options = dict(linewidth=2) myplot.Pmf(posterior, **plot_options) myplot.Save(root='locomotive', title='Locomotive problem', xlabel='Number of trains', ylabel='Posterior probability')
def PlotSimulations(): pyplot.rc('figure', figsize=(4, 4.5)) pyplot.rc('font', size=9.0) pyplot.rc('xtick.major', size=0) pyplot.rc('ytick.major', size=0) pyplot.subplots_adjust(wspace=0.4, hspace=0.4, right=0.95, left=0.1, top=0.95, bottom=0.05) pyplot.title('Simulated world records') for i in range(1, 5): pyplot.subplot(2, 2, i) pyplot.xscale('log') PlotSimulation(100000) myplot.Save(root='world_record_sim2') pyplot.rcdefaults()
def PlotPosterior(xs, ys, suite, pcolor=False, contour=True): """Makes a contour plot. xs: sequence of values ys: sequence of values suite: Pmf that maps (x, y) to z """ X, Y = numpy.meshgrid(xs, ys) func = lambda x, y: suite.Prob((x, y)) prob = numpy.vectorize(func) Z = prob(X, Y) pyplot.clf() if pcolor: pyplot.pcolor(X, Y, Z) if contour: pyplot.contour(X, Y, Z) myplot.Save(root='bayes_height_posterior_%s' % suite.name, title='Posterior joint distribution', xlabel='Mean height (cm)', ylabel='Stddev (cm)')
def PlotCoefVariation(suites): """Plot the posterior distributions for CV. suites: map from label to Pmf of CVs. """ pyplot.clf() pmfs = {} for label, suite in suites.iteritems(): pmf = ComputeCoefVariation(suite) cdf = Cdf.MakeCdfFromPmf(pmf, label) myplot.Cdf(cdf) pmfs[label] = pmf myplot.Save(root='bayes_height_cv', title='Coefficient of variation', xlabel='cv', ylabel='CDF') print 'female bigger', ProbBigger(pmfs['female'], pmfs['male']) print 'male bigger', ProbBigger(pmfs['male'], pmfs['female'])
def MakeNormalModel(values): """Plot the CDF of birthweights with a normal model.""" # estimate parameters: trimming outliers yields a better fit mu, var = thinkstats.TrimmedMeanVar(values, p=0.01) print 'Mean, Var', mu, var # plot the model sigma = math.sqrt(var) print 'Sigma', sigma xs, ps = RenderNormalCdf(mu, sigma, 200) pyplot.clf() pyplot.plot(xs, ps, label='model', linewidth=4, color='0.8') # plot the data cdf = Cdf.MakeCdfFromList(values) xs, ps = cdf.Render() pyplot.plot(xs, ps, label='data', linewidth=2, color='red') myplot.Save(show=True, ylabel = 'CDF')