Ejemplo n.º 1
0
    def testKaplanMeier(self):
        complete = [1,3,6]
        ongoing = [2,3,5,7]

        pmf_complete = Pmf.from_seq(complete, normalize=False)
        pmf_ongoing = Pmf.from_seq(ongoing, normalize=False)

        res = pmf_complete + pmf_ongoing
        self.assertListEqual(list(res), [1,1,2,1,1,1])

        res = pmf_complete - pmf_ongoing
        self.assertListEqual(list(res), [1.0, -1.0, 0.0, -1.0, 1.0, -1.0])

        res = pmf_complete * pmf_ongoing
        self.assertListEqual(list(res), [0.0, 0.0, 1.0, 0.0, 0.0, 0.0])

        res = pmf_complete / pmf_ongoing
        self.assertListEqual(list(res), [np.inf, 0.0, 1.0, 0.0, np.inf, 0.0])

        surv_complete = pmf_complete.make_surv()
        surv_ongoing = pmf_ongoing.make_surv()

        done = pmf_complete + pmf_ongoing

        s1 = surv_complete(done.index)
        self.assertListEqual(list(s1), [2., 2., 1., 1., 0., 0.])

        s2 = surv_ongoing(done.index)
        self.assertListEqual(list(s2), [4., 3., 2., 1., 1., 0.])

        at_risk = done + s1 + s2
        self.assertListEqual(list(at_risk), [7.0, 6.0, 5.0, 3.0, 2.0, 1.0])

        haz = pmf_complete / at_risk
        self.assertListEqual(list(haz), [0.14285714285714285, 0.0, 0.2, 0.0, 0.5, 0.0])
    def testComparison(self):
        pmf1 = Pmf.from_seq([1, 2, 3, 4, 5, 6])
        pmf2 = Pmf.from_seq([1, 2, 3, 4])

        self.assertAlmostEqual(pmf1.eq_dist(3), 1 / 6)
        self.assertAlmostEqual(pmf1.ne_dist(3), 5 / 6)
        self.assertAlmostEqual(pmf1.gt_dist(3), 3 / 6)
        self.assertAlmostEqual(pmf1.ge_dist(3), 4 / 6)
        self.assertAlmostEqual(pmf1.lt_dist(3), 2 / 6)
        self.assertAlmostEqual(pmf1.le_dist(3), 3 / 6)

        self.assertAlmostEqual(pmf1.eq_dist(pmf2), 1 / 6)
        self.assertAlmostEqual(pmf1.ne_dist(pmf2), 5 / 6)
        self.assertAlmostEqual(pmf1.gt_dist(pmf2), 0.5833333)
        self.assertAlmostEqual(pmf1.ge_dist(pmf2), 3 / 4)
        self.assertAlmostEqual(pmf1.lt_dist(pmf2), 1 / 4)
        self.assertAlmostEqual(pmf1.le_dist(pmf2), 0.41666666)

        self.assertAlmostEqual(pmf1.prob_eq(3), 1 / 6)
        self.assertAlmostEqual(pmf1.prob_ne(3), 5 / 6)
        self.assertAlmostEqual(pmf1.prob_gt(3), 3 / 6)
        self.assertAlmostEqual(pmf1.prob_ge(3), 4 / 6)
        self.assertAlmostEqual(pmf1.prob_lt(3), 2 / 6)
        self.assertAlmostEqual(pmf1.prob_le(3), 3 / 6)

        self.assertAlmostEqual(pmf1.prob_eq(pmf2), 1 / 6)
        self.assertAlmostEqual(pmf1.prob_ne(pmf2), 5 / 6)
        self.assertAlmostEqual(pmf1.prob_gt(pmf2), 0.5833333)
        self.assertAlmostEqual(pmf1.prob_ge(pmf2), 3 / 4)
        self.assertAlmostEqual(pmf1.prob_lt(pmf2), 1 / 4)
        self.assertAlmostEqual(pmf1.prob_le(pmf2), 0.41666666)
 def testMinMax(self):
     pmf = Pmf.from_seq([1, 2, 3])
     pmf2 = pmf.max_dist(2)
     ans = Pmf([1 / 9, 3 / 9, 5 / 9], pmf.index)
     self.almost_equal_dist(pmf2, ans)
     pmf3 = pmf.min_dist(2)
     ans = Pmf([5 / 9, 3 / 9, 1 / 9], pmf.index)
     self.almost_equal_dist(pmf3, ans)
Ejemplo n.º 4
0
def distribution_of_income(brfss):
    # Extract income
    income = brfss["INCOME2"]

    # Plot the PMF
    Pmf.from_seq(income).bar()

    # Label the axes
    plt.xlabel('Income level')
    plt.ylabel('PMF')
    plt.show()
Ejemplo n.º 5
0
    def testAdd(self):
        pmf1 = Pmf.from_seq([1, 2, 3, 4, 5, 6])
        pmf2 = Pmf.from_seq([1, 2, 3, 4])

        total = pmf1 + pmf2
        total.normalize()
        self.assertAlmostEqual(total.mean(), 3)

        total = pmf1.add(pmf2)
        total.normalize()
        self.assertAlmostEqual(total.mean(), 3)
Ejemplo n.º 6
0
def pmf_from_dist(dist, qs):
    """Make a discrete approximation.
    
    dist: SciPy distribution object
    qs: quantities
    
    returns: Pmf
    """
    ps = dist.pdf(qs)
    pmf = Pmf(ps, qs)
    pmf.normalize()
    return pmf
Ejemplo n.º 7
0
def make_poisson_pmf(lam, qs):
    """Make a PMF of a Poisson distribution.
    
    lam: event rate
    qs: sequence of values for `k`
    
    returns: Pmf
    """
    ps = poisson(lam).pmf(qs)
    pmf = Pmf(ps, qs)
    pmf.normalize()
    return pmf
Ejemplo n.º 8
0
def kde_from_sample(sample, qs):
    """Make a kernel density estimate from a sample
    
    sample: sequence of values
    qs: quantities where we should evaluate the KDE
    
    returns: normalized Pmf
    """
    kde = gaussian_kde(sample)
    ps = kde(qs)
    pmf = Pmf(ps, qs)
    pmf.normalize()
    return pmf
Ejemplo n.º 9
0
def plot_pmf(T, S):
    pmfT = Pmf.from_seq(T)
    pmfS = Pmf.from_seq(S)

    fig = plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    pmfT.plot(xlim=(0, 50), xlabel="Avalanche duration", ylabel="PMF")

    plt.subplot(1, 2, 2)
    pmfS.plot(xlim=(0, 50), xlabel="Avalanche size", ylabel="PMF")
    plt.show('PMF size and duration')

    fig.savefig("pmf_plot.png")
Ejemplo n.º 10
0
def make_uniform(qs, name=None, **options):
    """Make a Pmf that represents a uniform distribution.
    
    qs: quantities
    name: string name for the quantities
    options: passed to Pmf
    
    returns: Pmf
    """
    pmf = Pmf(1.0, qs, **options)
    pmf.normalize()
    if name:
        pmf.index.name = name
    return pmf
Ejemplo n.º 11
0
def kde_from_pmf(pmf, n=101):
    """Make a kernel density estimate from a Pmf.
    
    pmf: Pmf object
    n: number of points
    
    returns: Pmf object
    """
    kde = gaussian_kde(pmf.qs, weights=pmf.ps)
    qs = np.linspace(pmf.qs.min(), pmf.qs.max(), n)
    ps = kde.evaluate(qs)
    pmf = Pmf(ps, qs)
    pmf.normalize()
    return pmf
Ejemplo n.º 12
0
def pmf_from_dist(dist, low, high):
    """Make a discrete approximation of a continuous distribution.
    
    dist: any SciPy distribution object
    low: low end of range
    high: high end of range
    
    returns: normalized Pmf
    """
    qs = np.linspace(low, high, 101)
    ps = dist.pdf(qs)
    pmf = Pmf(ps, qs)
    pmf.normalize()
    return pmf
Ejemplo n.º 13
0
    def testSort(self):
        t = list('allen')
        pmf = Pmf.from_seq(t, sort=False)
        pmf.sort_index(inplace=True)
        self.assertEqual(pmf.qs[0], 'a')
        self.assertEqual(pmf.qs[-1], 'n')

        cdf = pmf.make_cdf()
        self.assertEqual(cdf.qs[0], 'a')
        self.assertEqual(cdf.qs[-1], 'n')

        # currently Pmf.from_seq sorts numerical sort_values
        # regardless of the sort keyword
        pmf = Pmf.from_seq([3, 6, 1, 7, 2], sort=False)
        self.assertEqual(pmf.qs[0], 1)
Ejemplo n.º 14
0
    def test_joint(self):
        pmf1 = Pmf.from_seq([1, 2, 2])
        pmf2 = Pmf.from_seq([1, 2, 3])

        joint = Pmf.make_joint(pmf1, pmf2)

        mar1 = joint.marginal(0)
        mar2 = joint.marginal(1)
        self.assertAlmostEqual(mar1.mean(), pmf1.mean())
        self.assertAlmostEqual(mar2.mean(), pmf2.mean())

        cond1 = joint.conditional(0, 1, 1)
        cond2 = joint.conditional(1, 0, 1)
        self.assertAlmostEqual(cond1.mean(), pmf1.mean())
        self.assertAlmostEqual(cond2.mean(), pmf2.mean())
Ejemplo n.º 15
0
def kde_from_pmf(pmf, n=101, **options):
    """Make a kernel density estimate from a Pmf.
    
    pmf: Pmf object
    n: number of points
    
    returns: Pmf object
    """
    # TODO: should this take qs rather than use min-max?
    kde = gaussian_kde(pmf.qs, weights=pmf.ps)
    qs = np.linspace(pmf.qs.min(), pmf.qs.max(), n)
    ps = kde.evaluate(qs)
    pmf = Pmf(ps, qs, **options)
    pmf.normalize()
    return pmf
Ejemplo n.º 16
0
    def testStats(self):
        pmf = Pmf.from_seq([1, 2, 3, 4, 5, 6])
        self.assertAlmostEqual(pmf.mean(), 3.5)
        self.assertAlmostEqual(pmf.var(), 2.91666666)
        self.assertAlmostEqual(pmf.std(), 1.70782512)
        self.assertAlmostEqual(pmf.median(), 3)
        self.assertAlmostEqual(pmf.quantile(0.8), 5)

        cdf = pmf.make_cdf()
        self.assertAlmostEqual(cdf.mean(), 3.5)
        self.assertAlmostEqual(cdf.var(), 2.91666666)
        self.assertAlmostEqual(cdf.std(), 1.70782512)
        self.assertAlmostEqual(cdf.median(), 3)
        self.assertAlmostEqual(cdf.quantile(0.8), 5)

        surv = pmf.make_surv()
        self.assertAlmostEqual(surv.mean(), 3.5)
        self.assertAlmostEqual(surv.var(), 2.91666666)
        self.assertAlmostEqual(surv.std(), 1.70782512)
        self.assertAlmostEqual(surv.median(), 3)
        self.assertAlmostEqual(surv.quantile(0.8), 5)

        haz = pmf.make_hazard()
        self.assertAlmostEqual(haz.mean(), 3.5)
        self.assertAlmostEqual(haz.var(), 2.91666666)
        self.assertAlmostEqual(haz.std(), 1.70782512)
        self.assertAlmostEqual(haz.median(), 3)
        self.assertAlmostEqual(haz.quantile(0.8), 5)

        haz = cdf.make_hazard()
        self.assertAlmostEqual(haz.mean(), 3.5)
        self.assertAlmostEqual(haz.var(), 2.91666666)
        self.assertAlmostEqual(haz.std(), 1.70782512)
        self.assertAlmostEqual(haz.median(), 3)
        self.assertAlmostEqual(haz.quantile(0.8), 5)
Ejemplo n.º 17
0
    def testHead(self):
        pmf1 = Pmf.from_seq([1, 2, 3, 4, 5, 6])
        h = pmf1.head()
        self.assertEqual(type(h), type(pmf1))

        cdf1 = pmf1.make_cdf()
        h = cdf1.head()
        self.assertEqual(type(h), type(cdf1))
 def testCredible(self):
     t = np.arange(101)
     pmf = Pmf.from_seq(t)
     cdf = pmf.make_cdf()
     ci = pmf.credible_interval(0.9)
     self.assertListEqual(list(ci), [5, 95])
     ci = cdf.credible_interval(0.9)
     self.assertListEqual(list(ci), [5, 95])
Ejemplo n.º 19
0
def make_uniform(start, stop, num=51, name=None, **options):
    """Make a Pmf that represents a uniform distribution.
    
    start: lower bound
    stop: upper bound
    num: number of points
    name: string name for the quantities
    options: passed to Pmf
    
    returns: Pmf
    """
    qs = np.linspace(start, stop, num)
    pmf = Pmf(1.0, qs, **options)
    pmf.normalize()
    if name:
        pmf.index.name = name
    return pmf
Ejemplo n.º 20
0
def pmf_marginal(joint_pmf, level):
    """Compute a marginal distribution.
    
    joint_pmf: Pmf representing a joint distribution
    level: int, level to sum along
    
    returns: Pmf
    """
    return Pmf(joint_pmf.sum(level=level))
Ejemplo n.º 21
0
def make_die(sides):
    """Pmf that represents a die with the given number of sides.
    
    sides: int
    
    returns: Pmf
    """
    outcomes = np.arange(1, sides + 1)
    die = Pmf(1 / sides, outcomes)
    return die
Ejemplo n.º 22
0
def make_binomial(n, p):
    """Make a binomial distribution.
    
    n: number of trials
    p: probability of success
    
    returns: Pmf representing the distribution of k
    """
    ks = np.arange(n + 1)
    ps = binom.pmf(ks, n, p)
    return Pmf(ps, ks)
Ejemplo n.º 23
0
def marginal(joint, axis):
    """Compute a marginal distribution.
    
    axis=1 returns the marginal distribution of the first variable
    axis=0 returns the marginal distribution of the second variable
    
    joint: DataFrame representing a joint distribution
    axis: int axis to sum along
    
    returns: Pmf
    """
    return Pmf(joint.sum(axis=axis))
Ejemplo n.º 24
0
def pmf_of_age(brfss):
    # Extract age
    age = brfss["AGE"]

    # Plot the PMF
    pmf_age = Pmf.from_seq(age)
    pmf_age.bar()

    # Label the axes
    plt.xlabel('Age in years')
    plt.ylabel('PMF')
    plt.show()
Ejemplo n.º 25
0
    def testPmfSampling(self):
        pmf = Pmf.from_seq([1, 2, 3, 4, 5, 6])
        expected = [2, 4, 2, 1, 5, 4, 4, 4, 1, 3]

        # test choice
        np.random.seed(17)
        a = pmf.choice(10)
        self.assertTrue(np.all((a == expected)))

        # test sample
        a = pmf.sample(10, replace=True, random_state=17)
        self.assertTrue(np.all((a == expected)))
Ejemplo n.º 26
0
    def testCopy(self):
        t = [1, 2, 2, 3, 5]
        pmf = Pmf.from_seq(t)

        pmf2 = pmf.copy()
        for x in pmf.qs:
            self.assertAlmostEqual(pmf[x], pmf2[x])

        cdf = pmf.make_cdf()
        cdf2 = cdf.copy()
        for x in cdf.qs:
            self.assertAlmostEqual(cdf[x], cdf2[x])
Ejemplo n.º 27
0
    def testNormalize(self):
        t = [0, 1, 2, 3, 3, 4, 4, 4, 5]

        pmf = Pmf.from_seq(t, normalize=False)
        total = pmf.normalize()
        self.assertAlmostEqual(total, 9)
        self.assertAlmostEqual(pmf[3], 0.22222222)

        cdf = Cdf.from_seq(t, normalize=False)
        total = cdf.normalize()
        self.assertAlmostEqual(total, 9)
        self.assertAlmostEqual(cdf(3), 0.55555555)
Ejemplo n.º 28
0
def make_mixture(pmf, pmf_seq):
    """Make a mixture of distributions.
    
    pmf: mapping from each hypothesis to its probability
    pmf_seq: sequence of Pmfs, each representing 
             a conditional distribution for one hypothesis
             
    returns: Pmf representing the mixture
    """
    df = pd.DataFrame(pmf_seq).fillna(0).transpose()
    df *= pmf.ps
    total = df.sum(axis=1)
    return Pmf(total)
Ejemplo n.º 29
0
def make_a_pmf(gss):
    # Select the age column
    age = gss['age'].values

    # Make a PMF of age
    pmf_age = Pmf.from_seq(age)

    # Plot the PMF
    pmf_age.bar()

    # Label the axes
    plt.xlabel('Age')
    plt.ylabel('PMF')
    plt.show()
Ejemplo n.º 30
0
def compare_fb_to_ws():
    """Plots Facebook network data vs. Watts-Strogatz
    """
    dirname = '/Users/bensmith/Documents/ThinkSeries/ThinkComplexity2/data/'
    fin = dirname + 'facebook_combined.txt.gz'
    fb = read_graph(fin)

    print('Facebook')
    n, m, k, degs = analyze_graph(fb)
    pmf_fb = Pmf.from_seq(degs)

    x = 25
    print('fewer than %i friends: %.3f' %(x, cumulative_prob(pmf_fb, x)))

    ws = nx.watts_strogatz_graph(n, k, 0.05, seed=15)
    print('Watts-Strogatz')
    n, m, k, degs = analyze_graph(ws)
    pmf_ws = Pmf.from_seq(degs)

    plt.figure(figsize=(8,4))
    options = dict(ls='', marker='.')


    plt.subplot(1,2,1)
    plt.plot([20, 1000], [5e-2, 2e-4], color='gray', linestyle='dashed')
    pmf_fb.plot(label='Facebook', color='C0', **options)
    decorate(xlabel='Degree', ylabel='PMF',
                xscale='log', yscale='log')

    plt.subplot(1,2,2)
    pmf_ws.plot(label='WS graph', color='C1', **options)
    decorate(xlabel='Degree',
                xscale='log', yscale='log')

    savefig('myfigs/chap04-1')
    plt.show()