Exemple #1
0
def main():
    pregnancies = first.get_pregnancies_dataset()
    live_pregnancies = [preg for preg in pregnancies.records if preg.outcome == 1]
    firsts, subsequents = first.split_one_vs_all(live_pregnancies, 
                                                 birthord=1)
    period_first = first.get_values_for_field(firsts, 'prglength')
    period_subsequent = first.get_values_for_field(subsequents, 'prglength')
    var_first = thinkstats.Var(period_first)
    var_second = thinkstats.Var(period_subsequent)

    print('Variance of the first children gestation periods: {} weeks^2'
          .format(var_first))
    print('\tAnd the sd is {} weeks.'.format(math.sqrt(var_first)))
    print('Variance of the subsequent children gestation periods: {} weeks^2'
          .format(var_second))
    print('\tAnd the sd is {} weeks.'.format(math.sqrt(var_second)))

    weights = get_pumpkin_weights()
    print('Finding the variance and mean for these pumpkins: {}'
          .format(weights))
    mu, var = pumpkins(weights)
    sd = math.sqrt(var)
    print('\tmean = {}\tvariance = {}\tsd = {}'.format(mu, var, sd))
    bar_chart(pregnancies)
    plot_pregnancy_pmf(pregnancies)
    plot_pregnancies_pmf_differences(pregnancies)
Exemple #2
0
def plot_pregnancies_pmf_differences(prg, week_lo=35, week_hi=46):
    live_prg  = first.get_live_pregnancies(prg)
    firsts, subsequents = first.split_one_vs_all(live_prg, birthord=1)
    first_prglen = [record.prglength for record in firsts]
    subsequent_prglen = [record.prglength for record in subsequents]
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    p1 = pmf.MakePmfFromList(first_prglen)
    p2 = pmf.MakePmfFromList(subsequent_prglen)
    result = {week: 100 * (p1.Prob(week) - p2.Prob(week))
              for week in xrange(week_lo, week_hi)}
    ax.bar(result.keys(), result.values(), width=1., color='blue') 
    ax.set_title('Percent differences in probabilities of gestation periods')
    ax.set_xlabel('Gestation period(weeks)')
    ax.set_ylabel('100(Pfirst - Psubsequent)')
    plt.show()
Exemple #3
0
def main():
    live = first.get_live_pregnancies(first.get_pregnancies_dataset())
    firsts, others = first.split_one_vs_all(live, birthord=1)
    pmf_firsts = Pmf.MakePmfFromList(item.prglength for item in firsts)
    pmf_others = Pmf.MakePmfFromList(item.prglength for item in others)
    pmf_all = Pmf.MakePmfFromList(item.prglength for item in live)
    birth_types = ('first babies', 'subsequent babies', 'all live births')
    prob_types = ('early', 'on time', 'late')
    for i, pmf in enumerate((pmf_firsts, pmf_others, pmf_all)):
        for j, f in enumerate((prob_early, prob_on_time, prob_late)):
            print('The probability of {} being born {} is {} %'.format(
                birth_types[i], prob_types[j], f(pmf) * 100
            ))
    risks = get_relative_risks(pmf_firsts, pmf_others)
    print()
    for prob_type in prob_types:
        print('The relative risk of first babies to others arriving {} is {}'
              .format(prob_type, risks[prob_type]))
Exemple #4
0
def bar_chart(pregnancies):
    live_pregnancies = first.get_live_pregnancies(pregnancies)
    firsts, seconds = first.split_one_vs_all(live_pregnancies, birthord=1)
    period_first = first.get_values_for_field(firsts, 'prglength')
    period_second = first.get_values_for_field(seconds, 'prglength')
    hist1 = pmf.MakeHistFromList(period_first)
    hist2 = pmf.MakeHistFromList(period_second)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    width = 0.35
    x1, y1 = hist1.Render()
    x2, y2 = hist2.Render()
    r1 = ax.bar(x1, y1, width, color='blue')
    r2 = ax.bar(np.array(x2) + width, y2, width, color='grey')
    ax.set_xlabel('Gestation period(weeks)')
    ax.set_ylabel('Frequency')
    ax.set_title('Comparison of the gestation periods of first and subsequent'
                 ' babies')
    ax.legend((r1[0], r2[0]), ('First Babies', 'Subsequent Babies'))
    plt.show()
Exemple #5
0
def plot_pregnancy_pmf(prg):
    live_prg  = first.get_live_pregnancies(prg)
    firsts, subsequents = first.split_one_vs_all(live_prg, birthord=1)
    first_prglen = [record.prglength for record in firsts]
    subsequent_prglen = [record.prglength for record in subsequents]
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    width = 0.35
    
    p1, p2 = map(pmf.MakePmfFromList, (first_prglen, subsequent_prglen))
    x1, y1 = p1.Render()
    x2, y2 = p2.Render()

    r1 = ax.bar(x1, y1, width, color='blue')
    r2 = ax.bar(np.array(x2) + width, y2, width, color='grey')
    ax.set_xlabel('Gestation period(weeks)')
    ax.set_ylabel('Probability')
    ax.set_title('Comparison of the gestation periods of first and subsequent'
                 ' babies')
    ax.legend((r1[0], r2[0]), ('First Babies', 'Subsequent Babies'))
    plt.show()
    it has not happened before it.
    Args:
        pmf(Pmf.Pmf): The PMF of pregnancies.

    '''
    my_pmf = pmf.Copy()
    for value in my_pmf.Values():
        if value < week:
            my_pmf.Remove(value)
    my_pmf.Normalize()
    return my_pmf.Prob(week)


if __name__ == '__main__':
    live_pregnancies = get_live_pregnancies(get_pregnancies_dataset())
    firsts, others = split_one_vs_all(live_pregnancies,
                                      birthord=1)
    first_durations = [record.prglength for record in firsts]
    others_durations = [record.prglength for record in others]
    first_pmf = Pmf.MakePmfFromList(first_durations)
    others_pmf = Pmf.MakePmfFromList(others_durations)
    fig = plt.figure()
    sp = fig.add_subplot(111)
    # weeks1, weeks2 = (range(min(first_durations), max(first_durations)),
    #                   range(min(others_durations), max(others_durations)))
    weeks1, weeks2 = map(lambda t: range(*t), ((36, 45), (36, 45)))
    probs_first = [probability_of_birth_in_week(first_pmf, week)
                   for week in weeks1]
    probs_others = [probability_of_birth_in_week(others_pmf, week)
                    for week in weeks2]
    r1 = sp.plot(weeks1, probs_first, color='b')
    r2 = sp.plot(weeks2, probs_others, color='r')