Beispiel #1
0
def PlotOutliers(samples):
    """Make CDFs showing the distribution of outliers."""
    cdfs = []
    for label, sample in samples.iteritems():
        outliers = [x for x in sample if x < 150]

        cdf = thinkbayes.MakeCdfFromList(outliers, label)
        cdfs.append(cdf)

    thinkplot.Clf()
    thinkplot.Cdfs(cdfs)
    thinkplot.Save(root='variability_cdfs',
                title='CDF of height',
                xlabel='Reported height (cm)',
                ylabel='CDF')
Beispiel #2
0
def PlotCdfs(d, labels):
    """Plot CDFs for each sequence in a dictionary.

    Jitters the data and subtracts away the mean.

    d: map from key to sequence of values
    labels: map from key to string label
    """
    thinkplot.Clf()
    for key, xs in d.iteritems():
        mu = thinkstats.Mean(xs)
        xs = thinkstats.Jitter(xs, 1.3)
        xs = [x - mu for x in xs]
        cdf = thinkbayes.MakeCdfFromList(xs)
        thinkplot.Cdf(cdf, label=labels[key])
    thinkplot.Show()
Beispiel #3
0
def TestCorrelation(cdf):
    """Tests the correlated generator.

    Makes sure that the sequence has the right distribution and correlation.
    """
    n = 10000
    rho = 0.4

    rdt_seq = CorrelatedGenerator(cdf, rho)
    xs = [rdt_seq.next() for _ in range(n)]

    rho2 = correlation.SerialCorr(xs)
    print(rho, rho2)
    cdf2 = thinkbayes.MakeCdfFromList(xs)

    thinkplot.Cdfs([cdf, cdf2])
    thinkplot.Show()
Beispiel #4
0
def main():
    ctr1 = 0.05
    ctr2 = 0.05

    global sample_diff
    sample_diff = SampleDistOfDiff(ctr1, ctr2, n=2000)
    #thinkplot.Cdf(sample_diff)
    #thinkplot.Show()

    sample_pval = SampleDistPval(ctr1, ctr2)

    thinkplot.Cdf(sample_pval)
    thinkplot.Save(root='abtest3',
                   xlabel='p-value',
                   ylabel='CDF',
                   formats=FORMATS)

    return

    sample_dist = SampleDist(ctr1, ctr2)

    thinkplot.Cdf(sample_dist)
    thinkplot.Save(root='abtest1',
                   xlabel='prob A > B',
                   ylabel='CDF',
                   formats=FORMATS)

    return

    sample_pred = SamplePredDist(ctr1, ctr2)

    thinkplot.Cdf(sample_dist)
    thinkplot.Cdf(sample_pred)
    thinkplot.Save(root='abtest2',
                   xlabel='prob A > B',
                   ylabel='CDF',
                   formats=FORMATS)

    return

    # plot the prior distribution of CTR
    ps = SampleCtr(100)
    cdf = thinkbayes.MakeCdfFromList(ps)
    thinkplot.Cdf(cdf)
    thinkplot.Show()
Beispiel #5
0
def SamplePredDist(ctr1, ctr2, n=30):
    """Computes the sample distribution of p.

    Where p is the predictive posterior probability of A>B.

    ctr1: CTR of A
    ctr2: CTR of B
    n: number of iterations

    returns: Cdf of p
    """
    ps = []
    for i in range(n):
        pred = PredDist(ctr1, ctr2)
        p = pred.Mean()
        ps.append(p)

    sample_pred = thinkbayes.MakeCdfFromList(ps, name='pred means')
    return sample_pred
Beispiel #6
0
def PredDist(ctr1, ctr2, n=100):
    """Predictive posterior distribution of prob A>B.

    ctr1: float CTR for A
    ctr2: float CTR for B
    n: number of simulations to run

    returns: Cdf of posterior probs
    """
    data1 = FakeData(100, ctr1)
    data2 = FakeData(100, ctr2)

    cdf1 = MakePosterior(data1).MakeCdf()
    cdf2 = MakePosterior(data2).MakeCdf()

    ctr1s = cdf1.Sample(n)
    ctr2s = cdf2.Sample(n)

    ps = [RunSimulation(q1, q2) for q1, q2 in zip(ctr1s, ctr2s)]

    pred = thinkbayes.MakeCdfFromList(ps)
    return pred
Beispiel #7
0
 def __init__(self, prices, bids, diffs):
     self.pdf_price = EstimatedPdf(prices)
     self.cdf_diff = thinkbayes.MakeCdfFromList(diffs)
     mu =0
     sigma = numpy.std(diffs)
     self.pdf_error = GaussianPdf(mu, sigma)
    351.0,
    286.0,
    373.0,
    232.0,
    393.0,
    745.0,
    636.0,
    758.0,
]
#print(OBSERVED_GAP_TIMES)
print "cumulated data number :", len(OBSERVED_GAP_TIMES)

#OBSERVED_GAP_TIMES = OBSERVED_GAP_TIMES/60        # this is not working...
for i in xrange(0, len(OBSERVED_GAP_TIMES)):
    OBSERVED_GAP_TIMES[i] = OBSERVED_GAP_TIMES[i] / 60
#print(OBSERVED_GAP_TIMES)

cdf_z = thinkbayes.MakeCdfFromList(OBSERVED_GAP_TIMES)
sample_z = cdf_z.Sample(220)
pmf_z = thinkbayes.MakePmfFromList(sample_z)
#pmf_z = scipy.stats.gaussian_kde(sample_z)

thinkplot.Clf()
thinkplot.preplot(2)
thinkplot.Clf()
thinkplot.Pmf(pmf_z)
thinkplot.Save(root='chapter8_self1',
               xlabel='',
               ylabel='Probability',
               formats=['pdf'])