Example #1
0
def ProcessScoresTeamwise(pairs):
    """Average number of goals for each team.

    pairs: map from (team1, team2) to (score1, score2)
    """
    # map from team to list of goals scored
    goals_scored = {}
    for key, entries in pairs.iteritems():
        t1, t2 = key
        for entry in entries:
            g1, g2 = entry
            goals_scored.setdefault(t1, []).append(g1)
            goals_scored.setdefault(t2, []).append(g2)

    # make a list of average goals scored
    lams = []
    for key, goals in goals_scored.iteritems():
        lam = thinkbayes2.Mean(goals)
        lams.append(lam)

    # make the distribution of average goals scored
    cdf = thinkbayes2.MakeCdfFromList(lams)
    thinkplot.Cdf(cdf)
    thinkplot.Show()

    mu, var = thinkbayes2.MeanVar(lams)
    print('mu, sig', mu, math.sqrt(var))
Example #2
0
    def __init__(self, prices, bids, diffs):
        """Construct the Player.
        prices: sequence of prices
        bids: sequence of bids
        diffs: sequence of underness (negative means over)
        """
        self.pdf_price = thinkbayes2.EstimatedPdf(prices)
        self.cdf_diff = thinkbayes2.MakeCdfFromList(diffs)

        mu = 0
        sigma = numpy.std(diffs)
        self.pdf_error = thinkbayes2.NormalPdf(mu, sigma)
Example #3
0
def Summarize(xs):
    """Prints summary statistics from a sequence of values.

    xs: sequence of values
    """
    # print smallest and largest
    xs.sort()
    print('smallest', xs[:10])
    print('largest', xs[-10:])

    # print median and interquartile range
    cdf = thinkbayes2.MakeCdfFromList(xs)
    print(cdf.Percentile(25), cdf.Percentile(50), cdf.Percentile(75))
Example #4
0
def GenerateCdf(n=1000, pc=0.35, lam1=0.79, lam2=5.0):
    """Generates a sample of RDTs and returns its CDF.

    n: sample size
    pc: probablity of negative growth
    lam1: exponential parameter of positive growth
    lam2: exponential parameter of negative growth

    Returns: Cdf of generated sample
    """
    xs = GenerateSample(n, pc, lam1, lam2)
    cdf = thinkbayes2.MakeCdfFromList(xs)
    return cdf
Example #5
0
def MedianIPR(xs, p):
    """Computes the median and interpercentile range.

    xs: sequence of values
    p: range (0-1), 0.5 yields the interquartile range

    returns: tuple of float (median, IPR)
    """
    cdf = thinkbayes2.MakeCdfFromList(xs)
    median = cdf.Percentile(50)

    alpha = (1 - p) / 2
    ipr = cdf.Value(1 - alpha) - cdf.Value(alpha)
    return median, ipr
Example #6
0
def PlotOutliers(samples):
    """Make CDFs showing the distribution of outliers."""
    cdfs = []
    for label, sample in samples.items():
        outliers = [x for x in sample if x < 150]

        cdf = thinkbayes2.MakeCdfFromList(outliers, label)
        cdfs.append(cdf)

    thinkplot.Clf()
    thinkplot.Cdfs(cdfs)
    thinkplot.Save(root='variability_cdfs',
                   title='CDF of height',
                   xlabel='Reported height (cm)',
                   ylabel='CDF')
Example #7
0
def PlotCdfs(d, labels):
    """Plot CDFs for each sequence in a dictionary.

    Jitters the data and subtracts away the mean.

    d: map from key to sequence of values
    labels: map from key to string label
    """
    thinkplot.Clf()
    for key, xs in d.items():
        mu = thinkbayes2.Mean(xs)
        xs = thinkbayes2.Jitter(xs, 1.3)
        xs = [x - mu for x in xs]
        cdf = thinkbayes2.MakeCdfFromList(xs)
        thinkplot.Cdf(cdf, label=labels[key])
    thinkplot.Show()
Example #8
0
def TestCorrelation(cdf):
    """Tests the correlated generator.

    Makes sure that the sequence has the right distribution and correlation.
    """
    n = 10000
    rho = 0.4

    rdt_seq = CorrelatedGenerator(cdf, rho)
    xs = [rdt_seq.next() for _ in range(n)]
    
    rho2 = correlation.SerialCorr(xs)
    print((rho, rho2))
    cdf2 = thinkbayes2.MakeCdfFromList(xs)

    thinkplot.Cdfs([cdf, cdf2])
    thinkplot.Show()