pyplot.plot(weeks, v, label=k) myplot.Save(root='2_7', xlabel='weeks', ylabel=r'Prob{x $=$ weeks | x $\geq$ weeks}', title='Conditional Probability') # On first blush, this is really just another survival analysis problem. So # we build a Pmf.Pmf for babies' births and use the same kind of analysis # as we did for Exercise 2-4 using utils.remaining_lifetime. if __name__ == '__main__': data_dir = sys.argv[1] table = survey.Pregnancies() table.ReadRecords(data_dir) firsts, others = partition_births(table) descriptive.Process(firsts, 'firsts') descriptive.Process(others, 'others') # Part 1 - conditional probability that a baby will be born in week 39 # Relevant output from official code: # 39 0.633693045564 first babies # 39 0.715792395226 others _print_survival_analysis(39, firsts) _print_survival_analysis(39, others) # Part 2 isn't really a meaningful task if we're right to just use # the survival analysis approach. In that case, Part 1 was just a # special use case. Let's demonstrate this.
def prob_in_weeks(pmf, start, end): prob_sum = 0 for val, prob in pmf.Items(): if start <= val <= end: prob_sum += prob return prob_sum prob_early = functools.partial(prob_in_weeks, start=0, end=37) prob_on_time = functools.partial(prob_in_weeks, start=38, end=40) prob_late = functools.partial(prob_in_weeks, start=41, end=1000000) if __name__ == '__main__': data_dir = sys.argv[1] table = survey.Pregnancies() table.ReadRecords(data_dir) first_births, other_births = my_first.partition_births(table) live_pmf = Pmf.MakePmfFromList([prg.prglength for prg in table.records]) first_pmf = Pmf.MakePmfFromList([prg.prglength for prg in first_births.records]) other_pmf = Pmf.MakePmfFromList([prg.prglength for prg in other_births.records]) prob_early_first = prob_early(first_pmf) prob_early_other = prob_early(other_pmf) print 'Early probability (all live):', prob_early(live_pmf) * 100 print 'Early probability (firsts):', prob_early_first * 100 print 'Early probability (others):', prob_early_other * 100 print 'Relative risk of being early (first vs. other):', prob_early_first / prob_early_other print prob_on_time_first = prob_on_time(first_pmf)
See README.md for more info. """ import sys from my_first import partition_births import survey from thinkstats import Mean, Var from utils import std_dev if __name__ == '__main__': data_dir = sys.argv[1] table = survey.Pregnancies() table.ReadRecords(data_dir) firsts, others = partition_births(table) firsts_gestation_lengths = list((p.prglength for p in firsts.records)) others_gestation_lengths = list((p.prglength for p in others.records)) for births in (firsts, others): births_gestation_lengths = list((p.prglength for p in births.records)) births.mean = Mean(births_gestation_lengths) births.variance = Var(births_gestation_lengths, births.mean) births.std_dev = std_dev(births_gestation_lengths, births.mean, births.variance) print 'The mean gestation time for firstborns is:', firsts.mean print 'The mean gestation time for others is:', others.mean print 'The gestation time variance for firstborns is:', firsts.variance
prob_sum = 0 for val, prob in pmf.Items(): if start <= val <= end: prob_sum += prob return prob_sum prob_early = functools.partial(prob_in_weeks, start=0, end=37) prob_on_time = functools.partial(prob_in_weeks, start=38, end=40) prob_late = functools.partial(prob_in_weeks, start=41, end=1000000) if __name__ == '__main__': data_dir = sys.argv[1] table = survey.Pregnancies() table.ReadRecords(data_dir) first_births, other_births = my_first.partition_births(table) live_pmf = Pmf.MakePmfFromList([prg.prglength for prg in table.records]) first_pmf = Pmf.MakePmfFromList( [prg.prglength for prg in first_births.records]) other_pmf = Pmf.MakePmfFromList( [prg.prglength for prg in other_births.records]) prob_early_first = prob_early(first_pmf) prob_early_other = prob_early(other_pmf) print 'Early probability (all live):', prob_early(live_pmf) * 100 print 'Early probability (firsts):', prob_early_first * 100 print 'Early probability (others):', prob_early_other * 100 print 'Relative risk of being early (first vs. other):', prob_early_first / prob_early_other print