예제 #1
0
def PartitionRecords(table):
    """Divides records into two lists: first babies and others.

    Only live births are included

    Args:
        table: pregnancy Table
    """
    firsts = survey.Pregnancies()
    others = survey.Pregnancies()

    for p in table.records:
        # skip non-live births
        if p.outcome != 1:
            continue

        if p.birthord == 1:
            firsts.AddRecord(p)
        else:
            others.AddRecord(p)

    return firsts, others
예제 #2
0
def PoolRecords(*tables):
    """Construct a table with records from all tables.
    
    Args:
        constructor: init method used to make the new table
    
        tables: any number of tables
    Returns:
        new table object
    """
    pool = survey.Pregnancies()
    for table in tables:
        pool.ExtendRecords(table.records)
    return pool
예제 #3
0
def main():
    # Exercise 3.11
    table = survey.Pregnancies()
    table.ReadRecords()
    unfilteredLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz)
                                  for p in table.records if p.outcome == 1]
    liveBirthWeights = [
        lbs * 16 + oz for lbs, oz in unfilteredLiveBirthWeights
        if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200
    ]
    liveBirthWeightsCdf = Cdf.MakeCdfFromList(liveBirthWeights,
                                              name="live birth weights")
    print('25th: %d 50th: %d 75th: %d interquartile range: %d' %
          (liveBirthWeightsCdf.Value(.25), median(liveBirthWeightsCdf),
           liveBirthWeightsCdf.Value(.75), interquartile(liveBirthWeightsCdf)))
예제 #4
0
def Summarize(data_dir):
    """Prints summary statistics for first babies and others.
    
    Returns:
        tuple of Tables
    """
    table = survey.Pregnancies()
    table.ReadRecords(data_dir)

    # make a map from caseid to list of pairs
    d = {}
    for record in table.records:
        # skip non-live births
        if record.outcome != 1:
            continue

        # skip multiple births
        if record.nbrnaliv > 1:
            continue

        pair = record.birthord, record.prglength
        d.setdefault(record.caseid, []).append(pair)

    print len(d)

    # find all caseids with more than one live birth
    pmf = Pmf()
    for caseid, t in d.iteritems():
        if len(t) <= 1:
            continue

        t.sort()
        _, prglength1 = t[0]
        _, prglength2 = t[1]

        if prglength1 < 15 or prglength2 < 15:
            continue

        diff = prglength2 - prglength1
        if abs(diff) > 15:
            print caseid, prglength1, prglength2

        pmf.Incr(diff)

    pmf.Normalize()
    return pmf
예제 #5
0
def PartitionBabies():
    firsts, others, babies = [],[],[]

    table = survey.Pregnancies() 
    table.ReadRecords('res')

    for baby in table.records:
        if baby.outcome != 1:
            continue		
        data = (baby.prglength, baby.totalwgt_oz)
        babies.append(data)
        if baby.birthord == 1:
            firsts.append(data)
        else:
            others.append(data)

    return firsts, others, babies
예제 #6
0
def main(name, data_dir='.'):
    table = survey.Pregnancies()
    table.ReadRecords(data_dir)
    print "Number of Pregnancies: ", len(table.records)
    live_births = 0
    tot_births = 0
    first_babies = []
    other_bablies = []
    for record in table.records:
        tot_births += 1
        if record.outcome == 1:
            live_births += 1
            if record.birthord == 1:
                first_babies.append(record)
            else:
                other_bablies.append(record)

    print "live_births: ", live_births, " out of: ", tot_births, " records."
    print "first_babies: ", len(first_babies)
    print "other_bablies: ", len(other_bablies)

    print thinkstats.Mean([1, 1, 1, 3, 3, 591])
예제 #7
0
def GetDurations(data_dir, keep_codes):
    """Reads pregnancy durations from NSFG data.

    data_dir: location of the data file
    """
    preg = survey.Pregnancies()
    preg.ReadRecords(data_dir)
    print 'Number of pregnancies', len(preg.records)

    pmf = thinkstats2.Pmf()
    for record in preg.records:
        pmf.Incr(record.outcome)

    pmf.Print()

    durations = [
        record.prglength for record in preg.records
        if record.outcome in keep_codes
    ]

    print 'Number of relevant pregnancies', len(durations)
    return durations
예제 #8
0
def main():
    # Exercise 3.9
    table = survey.Pregnancies()
    table.ReadRecords()
    unfilteredLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz)
                                  for p in table.records if p.outcome == 1]
    liveBirthWeights = [
        lbs * 16 + oz for lbs, oz in unfilteredLiveBirthWeights
        if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200
    ]
    liveBirthWeightsCdf = Cdf.MakeCdfFromList(liveBirthWeights,
                                              name="live birth weights")
    samepleListLiveBirthWeights = sample(liveBirthWeightsCdf, 1000)
    myplot.Cdf(Cdf.MakeCdfFromList(samepleListLiveBirthWeights))
    myplot.show(title="CDF of live births resampled")

    # Exercise 3.10
    randomList = [random.random() for x in range(1000)]
    myplot.Pmf(Pmf.MakePmfFromList(randomList))
    myplot.show(title="random pmf")
    myplot.Cdf(Cdf.MakeCdfFromList(randomList))
    myplot.Show(title="random cdf")
예제 #9
0
def main():
    table = survey.Pregnancies()
    table.read_records()

    # Calculate the first babies and other babies averages.
    firsts, others = first.collect_live_births(table)
    firsts_average, others_average = first.averages(firsts, others)

    # Get the pregnancy lengths for first babies and others.
    firsts_lengths = [r.prglength for r in firsts]
    others_lengths = [r.prglength for r in others]

    # Compute the variance for first babies.
    firsts_var = thinkstats.variance(firsts_lengths, firsts_average)

    # Compute the variance for other babies.
    others_var = thinkstats.variance(others_lengths, others_average)

    # Show standard deviations.
    print 'Standard deviation for first gestations: {0}'.format(
        math.sqrt(firsts_var))
    print 'Standard deviation for other gestations: {0}'.format(
        math.sqrt(others_var))
예제 #10
0
    def testPregnancies(self):
        preg = survey.Pregnancies()
        preg.ReadRecords()
        self.assertEquals(len(preg.records), 13593)

        hist = MakeHist(preg, 'nbrnaliv')
        self.assertEquals(hist.Freq(1), 8981)

        hist = MakeHist(preg, 'babysex')
        self.assertEquals(hist.Freq(1), 4641)
        self.assertEquals(hist.Freq(2), 4500)

        hist = MakeHist(preg, 'outcome')
        self.assertEquals(hist.Freq(1), 9148)

        hist = MakeHist(preg, 'birthord')
        self.assertEquals(hist.Freq(1), 4413)

        hist = MakeHist(preg, 'birthwgt_lb')
        self.assertEquals(hist.Freq(6), 2223)

        hist = MakeHist(preg, 'birthwgt_oz')
        self.assertEquals(hist.Freq(6), 709)

        hist = MakeHist(preg, 'agepreg')
        self.assertEquals(hist.Freq('NA'), 352)
        self.assertEquals(hist.Freq(25.0), 58)

        hist = MakeHist(preg, 'totalwgt_oz')
        self.assertEquals(hist.Freq('NA'), 4509)

        hist = MakeHist(preg, 'finalwgt')
        t = hist.Values()
        low, high = min(t), max(t)
        self.assertAlmostEquals(low, 118.656789706)
        self.assertAlmostEquals(high, 261879.9538641)
예제 #11
0
def main():
    table = survey.Pregnancies()
    table.read_records()

    print 'Number of pregnancies: {0}'.format(len(table.records))

    # 2nd exercise
    live = live_births(table)

    print 'Live birhs: {0}'.format(live)

    # 3rd exercise
    firsts, others = collect_live_births(table)

    print 'First babies: {0}'.format(len(firsts))
    print 'Other babies: {0}'.format(len(others))

    # 4th exercise
    firsts_average, others_average = averages(firsts, others)

    difference_days = (firsts_average - others_average) * 7.0

    print 'Difference in days: {0}'.format(difference_days)
    print 'Difference in hours: {0}'.format(difference_days * 24.0)
예제 #12
0
import survey
table = survey.Pregnancies()
table.ReadRecords()
print 'Num of pregnancies', len(table)
예제 #13
0
def main():
	table = survey.Pregnancies()
	table.ReadRecords()
	unfilteredLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz) for p in table.records if p.outcome == 1]
	liveBirthWeights = [lbs * 16 + oz for lbs, oz in unfilteredLiveBirthWeights if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200]
	print(liveBirthWeights[0])
예제 #14
0
def MakeTables(data_dir='.'):
    """Reads survey data and returns tables for first babies and others."""
    table = survey.Pregnancies()
    table.ReadRecords(data_dir)
    firsts, others = PartitionRecords(table)
    return table, firsts, others
예제 #15
0
import survey
import Pmf
import matplotlib.pyplot as plt
import numpy as np

table = survey.Pregnancies()
table.readRecords(data_dir='data')

firsts_babies = survey.Pregnancies()
others_babies = survey.Pregnancies()

for r in table.records:
    if r.outcome != 1:
        continue
    if r.birthord == 1:
        firsts_babies.addRecord(r)
    else:
        others_babies.addRecord(r)

firsts_prglengths = [r.prglength for r in firsts_babies.records]
others_prglengths = [r.prglength for r in others_babies.records]

firts_hist = Pmf.Hist(firsts_prglengths)
others_hist = Pmf.Hist(others_prglengths)

times1, values1 = firts_hist.render()
times2, values2 = others_hist.render()
times2 = np.array(times2)

plt.bar(times1, values1, width=0.45)
plt.bar(times2 + 0.45, values2, width=0.45)
예제 #16
0
import survey

table = survey.Pregnancies()
table.ReadRecords()
firsts = survey.Pregnancies()
others = survey.Pregnancies()

for p in table.records:
    if p.outcome != 1: continue
    else:
        if p.birthord == 1:
            firsts.AddRecord(p)
        else:
            others.AddRecord(p)

print 'Number of first babies', len(firsts.records)
print 'Number of other babies', len(others.records)
예제 #17
0
def PoolRecords(*tables):
    ''' 合并数据表 '''
    pool = survey.Pregnancies()
    for table in tables:
        pool.ExtendRecords(table.records)
    return pool
예제 #18
0
def ReadPregnancyRecords():
    """Reads survey data and returns a table of records."""
    table = survey.Pregnancies()
    table.ReadRecords()
    return table
예제 #19
0
#!/usr/bin/env python

import survey
import thinkstats

TABLE = survey.Pregnancies()
TABLE.ReadRecords()


def ex1_3_2():
    """Number of live births
    """
    d = {'live': 0, 'death': 0}

    for r in TABLE.records:
        if r.outcome == 1:
            d['live'] += 1
        elif r.outcome == 2:
            d['death'] += 1
    return d


def ex1_3_3():
    """Number of live, first births vs. non-first
    """
    d = {'first': 0, 'other': 0}

    for r in TABLE.records:
        if r.outcome == 1:
            if r.birthord == 1:
                d['first'] += 1
예제 #20
0
def makeTables(data_dir):
    table = survey.Pregnancies()
    table.readRecords(data_dir)
    firsts, others = partitionRecords(table)

    return table, firsts, others
예제 #21
0
import survey
import thinkstats
import math

pregnancies = survey.Pregnancies()
pregnancies.ReadRecords()
print 'Number of pregnancies', len(pregnancies.records)
print

preg_lengths_first = []
preg_lengths_others = []

for preg in pregnancies.records:
    if preg.outcome != 1:
        continue
    if preg.birthord == 1:
        preg_lengths_first.append(preg.prglength)
    else:
        preg_lengths_others.append(preg.prglength)

pregs_first = len(preg_lengths_first)
mean_length_first, var_length_first = thinkstats.MeanVar(preg_lengths_first)
std_length_first = math.sqrt(var_length_first)

pregs_others = len(preg_lengths_others)
mean_length_others, var_length_others = thinkstats.MeanVar(preg_lengths_others)
std_length_others = math.sqrt(var_length_others)

print 'Number of live births, first child', pregs_first
print 'Mean pregnancy length (weeks), first child', mean_length_first
print 'Variance of gestation time, first child', var_length_first