def PlotOutliers(samples): """Make CDFs showing the distribution of outliers.""" cdfs = [] for label, sample in samples.iteritems(): outliers = [x for x in sample if x < 150] cdf = thinkbayes.MakeCdfFromList(outliers, label) cdfs.append(cdf) thinkplot.Clf() thinkplot.Cdfs(cdfs) thinkplot.Save(root='variability_cdfs', title='CDF of height', xlabel='Reported height (cm)', ylabel='CDF')
def PlotCdfs(d, labels): """Plot CDFs for each sequence in a dictionary. Jitters the data and subtracts away the mean. d: map from key to sequence of values labels: map from key to string label """ thinkplot.Clf() for key, xs in d.iteritems(): mu = thinkstats.Mean(xs) xs = thinkstats.Jitter(xs, 1.3) xs = [x - mu for x in xs] cdf = thinkbayes.MakeCdfFromList(xs) thinkplot.Cdf(cdf, label=labels[key]) thinkplot.Show()
def TestCorrelation(cdf): """Tests the correlated generator. Makes sure that the sequence has the right distribution and correlation. """ n = 10000 rho = 0.4 rdt_seq = CorrelatedGenerator(cdf, rho) xs = [rdt_seq.next() for _ in range(n)] rho2 = correlation.SerialCorr(xs) print(rho, rho2) cdf2 = thinkbayes.MakeCdfFromList(xs) thinkplot.Cdfs([cdf, cdf2]) thinkplot.Show()
def main(): ctr1 = 0.05 ctr2 = 0.05 global sample_diff sample_diff = SampleDistOfDiff(ctr1, ctr2, n=2000) #thinkplot.Cdf(sample_diff) #thinkplot.Show() sample_pval = SampleDistPval(ctr1, ctr2) thinkplot.Cdf(sample_pval) thinkplot.Save(root='abtest3', xlabel='p-value', ylabel='CDF', formats=FORMATS) return sample_dist = SampleDist(ctr1, ctr2) thinkplot.Cdf(sample_dist) thinkplot.Save(root='abtest1', xlabel='prob A > B', ylabel='CDF', formats=FORMATS) return sample_pred = SamplePredDist(ctr1, ctr2) thinkplot.Cdf(sample_dist) thinkplot.Cdf(sample_pred) thinkplot.Save(root='abtest2', xlabel='prob A > B', ylabel='CDF', formats=FORMATS) return # plot the prior distribution of CTR ps = SampleCtr(100) cdf = thinkbayes.MakeCdfFromList(ps) thinkplot.Cdf(cdf) thinkplot.Show()
def SamplePredDist(ctr1, ctr2, n=30): """Computes the sample distribution of p. Where p is the predictive posterior probability of A>B. ctr1: CTR of A ctr2: CTR of B n: number of iterations returns: Cdf of p """ ps = [] for i in range(n): pred = PredDist(ctr1, ctr2) p = pred.Mean() ps.append(p) sample_pred = thinkbayes.MakeCdfFromList(ps, name='pred means') return sample_pred
def PredDist(ctr1, ctr2, n=100): """Predictive posterior distribution of prob A>B. ctr1: float CTR for A ctr2: float CTR for B n: number of simulations to run returns: Cdf of posterior probs """ data1 = FakeData(100, ctr1) data2 = FakeData(100, ctr2) cdf1 = MakePosterior(data1).MakeCdf() cdf2 = MakePosterior(data2).MakeCdf() ctr1s = cdf1.Sample(n) ctr2s = cdf2.Sample(n) ps = [RunSimulation(q1, q2) for q1, q2 in zip(ctr1s, ctr2s)] pred = thinkbayes.MakeCdfFromList(ps) return pred
def __init__(self, prices, bids, diffs): self.pdf_price = EstimatedPdf(prices) self.cdf_diff = thinkbayes.MakeCdfFromList(diffs) mu =0 sigma = numpy.std(diffs) self.pdf_error = GaussianPdf(mu, sigma)
351.0, 286.0, 373.0, 232.0, 393.0, 745.0, 636.0, 758.0, ] #print(OBSERVED_GAP_TIMES) print "cumulated data number :", len(OBSERVED_GAP_TIMES) #OBSERVED_GAP_TIMES = OBSERVED_GAP_TIMES/60 # this is not working... for i in xrange(0, len(OBSERVED_GAP_TIMES)): OBSERVED_GAP_TIMES[i] = OBSERVED_GAP_TIMES[i] / 60 #print(OBSERVED_GAP_TIMES) cdf_z = thinkbayes.MakeCdfFromList(OBSERVED_GAP_TIMES) sample_z = cdf_z.Sample(220) pmf_z = thinkbayes.MakePmfFromList(sample_z) #pmf_z = scipy.stats.gaussian_kde(sample_z) thinkplot.Clf() thinkplot.preplot(2) thinkplot.Clf() thinkplot.Pmf(pmf_z) thinkplot.Save(root='chapter8_self1', xlabel='', ylabel='Probability', formats=['pdf'])