Ejemplo n.º 1
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """

    resp = ReadFemResp()
    pregnum = resp.pregnum

    # compare value counts with codebook
    # https://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=FEM&section=R&subSec=7869&srtLabel=606835
    pregnum_value_counts = pregnum.value_counts().sort_index()
    assert (pregnum_value_counts[0] == 2610)
    assert (pregnum_value_counts[1] == 1267)
    assert (pregnum_value_counts[2] == 1432)
    assert (pregnum_value_counts[3] == 1110)
    assert (pregnum_value_counts[4] == 611)
    assert (pregnum_value_counts[5] == 305)
    assert (pregnum_value_counts[6] == 150)
    assert (pregnum_value_counts[7:].sum() == 158)

    # cross-validation
    preg = nsfg.ReadFemPreg()

    # validate total counts with number ob preg records.
    assert (pregnum.sum() == len(preg))

    # validate each casies
    preg_map = nsfg.MakePregMap(preg)
    for index, pregnum in resp.pregnum.items():
        caseid = resp.caseid[index]
        indices = preg_map[caseid]
        assert (pregnum == len(indices))

    print('%s: All tests passed.' % script)
Ejemplo n.º 2
0
def ValidatePregnum(resp):
    """Validate pregnum in the respondent file.

    resp: respondent DataFrame
    """
    #READ PREGNANCY DATAFRAME
    #REFERRED TO EX 1-1 IN [3]
    resp = nsfg.ReadFemPreg()
    
    # make the map from caseid to list of pregnancy indices
    #GIVEN IN EXERCISE INSTRUCTIONS
    #REFERRED TO EX 1-1 IN [13]
    preg_map = nsfg.MakePregMap(preg)
    
    # iterate through the respondent pregnum series
    for index, pregnum in resp.pregnum.iteritems():
        caseid = resp.caseid[index]
        indices = preg_map[caseid]

        # check that pregnum from the respondent file equals
        # the number of records in the pregnancy file
        if len(indices) != pregnum:
            print(caseid, len(indices), pregnum)
            return False

    return True
Ejemplo n.º 3
0
def ValidatePregnum(resp):
    """Validate pregnum in the respondent file.

    resp: respondent DataFrame
    """
    # make a dictionary that maps from caseid to respondent index
    d = {}
    for index, caseid in resp.caseid.iteritems():
        d[caseid] = index

    # read the pregnancy frame
    preg = nsfg.ReadFemPreg()

    # make the map from caseid to list of pregnancy indices
    preg_map = nsfg.MakePregMap(preg)

    # iterate through the preg_map
    for caseid, indices in preg_map.items():
        row = resp[resp.caseid == caseid]
        pregnum = row.pregnum.values[0]

        # check that pregnum from the respondent file equals
        # the number of records in the pregnancy file
        if len(indices) != pregnum:
            print(caseid, len(indices), resp.pregnum[index])
            return False

    return True
Ejemplo n.º 4
0
def ValidatePregnum(resp):
    """Validate pregnum in the respondent file.

    resp: respondent DataFrame
    """
    # read the pregnancy frame
    preg = nsfg.ReadFemPreg()
    
    # [RW: Ok, my suppostion is that resp is the full femResp2002 df,
    #  and preg is the full femPreg2002 df (per my file chap01_2_1ex.ipynb).]

    # make the map from caseid to list of pregnancy indices
    preg_map = nsfg.MakePregMap(preg)
    
    # iterate through the respondent pregnum series
    # 7643 records
    # Each record is a unique caseid and each pregnum is the total number of 
    # corresponding pregnancies.
    for index, pregnum in resp.pregnum.items():
        caseid = resp.caseid[index] # getting the corresponding caseid for this/each record.
        indices = preg_map[caseid]

        # check that pregnum from the respondent file equals
        # the number of records in the pregnancy file
        # [RW: This implementation can only identify the first inequality and then stops.
        #  Would be better to keep going and identify all inequalities. But the
        #  inference is that in terms of cross-validation, if there is even a single discrepancy,
        #  then the data is not validated. Perhaps patterns can still be identified this way.
        #  It's a start.]
        if len(indices) != pregnum:
            print(caseid, len(indices), pregnum)
            return False

    return True # pregnum data is cross-validated.
Ejemplo n.º 5
0
def main():
    resp = nsfg.ReadFemResp()
    preg = nsfg.ReadFemPreg()
    preg_map = nsfg.MakePregMap(preg)
    for id, pregnum_value in resp.pregnum.items():
        caseid = resp.caseid[id]
        assert (pregnum_value == len(preg_map[caseid]))
    print('Success')
Ejemplo n.º 6
0
def cross_validate_with_preg(preg_df, resp_df):
    preg_map = nsfg.MakePregMap(preg_df)
    for preg_caseid, preg_idxs in preg_map.items():
        resp_pregnum = resp_df[resp_df.caseid == preg_caseid].pregnum.iloc[0]
        if resp_pregnum != len(preg_idxs):
            print(preg_caseid, preg_idxs[preg_caseid])
            return False
    return True
Ejemplo n.º 7
0
def PairWiseDifference(live):
    live = live[live.prglngth >= 37]
    preg_map = nsfg.MakePregMap(live)
    diffs = []
    for caseid, indicies in preg_map.items():
        lengths = live.loc[indicies].prglngth.values
        if len(lengths) >= 2:
            diffs.extend(Diffs(lengths))
    return diffs
Ejemplo n.º 8
0
def validate(resp, preg):
    preg_map = nsfg.MakePregMap(preg)
    for index, pregnum in resp.pregnum.items():
        caseidresp = resp.caseid[index]
        indices = preg_map[caseidresp]
        if len(indices) != pregnum:
            print(resp[index])
            return False

    return True
Ejemplo n.º 9
0
def ValidatePregnum(resp, preg):
    respToPregMap = nsfg.MakePregMap(preg)
    for index, pregnum in resp.pregnum.iteritems():
        caseid = resp.caseid[index]
        pregCount = len(respToPregMap[caseid])
        if pregCount != pregnum:
            print(caseid, pregCount, pregnum)
            return False

    return True
Ejemplo n.º 10
0
def CrossValidate(resp):
	preg = nsfg.ReadFemPreg()
	pregMap = nsfg.MakePregMap(preg)

	for caseid in resp['caseid']:
		if (len(pregMap[caseid]) != int(resp.loc[resp['caseid'] == caseid].pregnum)):
			print ("Test failed on caseid " + str(caseid) + ", pregMapNum: " + str(len(pregMap[caseid])) + ", respNum: " + str(resp.pregnum[caseid]))
			return False

	return True
Ejemplo n.º 11
0
def PairwiseDiff(live):
    preg_map = nsfg.MakePregMap(live)
    diffs = []
    for caseid, children in preg_map.items():
        if len(children) >= 2:
            preg_lengths = live.loc[children].prglngth.values
            first = preg_lengths[0]
            rest = preg_lengths[1:]
            diffs.extend([first - r for r in rest])
    return diffs
Ejemplo n.º 12
0
def ValidatePregnum(resp):
    preg = nsfg.ReadFemPreg()
    preg_map = nsfg.MakePregMap(preg)
    for index, pregnum in resp.pregnum.iteritems():
        caseid = resp.caseid[index]
        indices = preg_map[caseid]
        
        if len(indices) != pregnum:
            print(caseid, len(indices), pregnum)
            return False
    return True
Ejemplo n.º 13
0
def ValidatePregnum(dctpreg, dctresp, pregfile, respfile):
    """ Validate preg and resp in term of pregnum"""
    preg = ReadFemFile(dctpreg, pregfile)
    resp = ReadFemFile(dctresp, respfile)
    # caseid -> list
    m = nsfg.MakePregMap(preg)
    # Iterate over map keys
    for caseid in m:
        pregnumFromPreg = len(m[caseid])
        pregnumFromResp = list(resp[resp.caseid == caseid].pregnum)[0]
        assert pregnumFromResp == pregnumFromPreg
Ejemplo n.º 14
0
def main(script):
    """Code used while developing Chapter 1.

    script: string script name
    """
    preg = nsfg.ReadFemPreg()
    preg_map = nsfg.MakePregMap(preg)

    # print the sequence of outcomes for one caseid
    caseid = 10229
    indices = preg_map[caseid]
    print(caseid, preg.outcome[indices].values)
Ejemplo n.º 15
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    respdata=read_fem_resp()
    pregdata=nsfg.ReadFemPreg()
    preg_index_dict = nsfg.MakePregMap(pregdata)
    for (caseid, indexes) in preg_index_dict.items():
        assert len(indexes) == respdata.pregnum[respdata.caseid == caseid].values

    print('%s: All tests passed.' % script)
Ejemplo n.º 16
0
def ValidatePregnum(resp, preg):
    """make the map from caseid to list of pregnancy indices"""
    preg_map = nsfg.MakePregMap(preg)
    for k, v in resp.pregnum.iteritems():  # iterate over a pandas series
        caseid = resp.caseid[k]  # assign the caseid value for the current key
        indices = preg_map[caseid]  # get the list of rows for the case id
        # check if the number of entries is equal to
        # the resp.pregnum value
        if len(indices) != v:
            print(caseid, len(indices), v)
            return False
    return True
Ejemplo n.º 17
0
def validate(resp):
    assert len(resp.index) == 13593
    assert resp.pregnum.value_counts(1) == 1267

    preg_map = nsfg.MakePregMap(resp)
    preg_times_map = preg_times(preg_map)

    for caseid, indicies in preg_map.items():
        pregnum_resp = resp.loc[indicies, 'pregnum']
        assert len(pregnum_resp) == preg_times_map[caseid]

        caseid_resp = resp.caseid[indicies]
        assert caseid_resp.tolist() == preg_map[caseid]
Ejemplo n.º 18
0
def ValidatePregnum(femResp):
    # Get female pregnancy dataFrame
    femPreg = nsfg.ReadFemPreg()
    pregMap = nsfg.MakePregMap(femPreg)

    for index, pregnum in femResp.pregnum.items():
        caseid = femResp.caseid[index]
        indices = pregMap[caseid]

        if len(indices) != pregnum:
            print(caseid, len(indices), pregnum)
            return False

    return True
Ejemplo n.º 19
0
def validatePregnum(resp):
    preg = nsfg.ReadFemPreg(dct_file='data/2002FemPreg.dct', dat_file='data/2002FemPreg.dat.gz')
    preg_map = nsfg.MakePregMap(preg)
    for index, pregnum in resp.pregnum.items():
        caseid = resp.caseid[index]
        indices = preg_map[caseid]

        # check that pregnum from the respondent file equals
        # the number of records in the pregnancy file
        if len(indices) != pregnum:
            print(caseid, len(indices), pregnum)
            return False

    return True
Ejemplo n.º 20
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    respdf=nsfg.ReadFemResp()
    #respdf.head()
    pregnum=respdf['pregnum']
    """
      The variable pregnum is a recode that indicates how many times each re- spondent has
      been pregnant. Print the value counts for this variable and compare them to the 
      published results in the NSFG codebook:
      https://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=FEM&section=R&subSec=7869&srtLabel=606835
    """
    preg_stat=pregnum.value_counts().sort_index()
    list_of_Npregs=pregnum.unique()
    list_of_Npregs.sort()
    preg_stat_nsfg=[]
    print("list_of_Npregs",list_of_Npregs)
    Npregs_7_95=0
    Npregs_tot=0
    for i in list_of_Npregs:
        Npregs_tot+=preg_stat[i]
        if i<7:
            preg_stat_nsfg.append((i,preg_stat[i]))
        if i>6:
            Npregs_7_95+=preg_stat[i]
    print("pregnums:")
    for i in range(0,len(preg_stat_nsfg)):
        print( preg_stat_nsfg[i][0]," ",preg_stat_nsfg[i][1])
    print("7-95 ",Npregs_7_95,"\nTotal = ",Npregs_tot)


    """
    cross-validate the respondent and pregnancy files by comparing pregnum for each respondent
    with the number of records in the pregnancy file.
    """
    pregdf=nsfg.ReadFemPreg()
    map_ResptoPreg=nsfg.MakePregMap(pregdf)
    fail=0
    for index, pregnum in respdf.pregnum.iteritems(): 
            #print("index",index,"pregnum",pregnum)
        caseid = respdf.caseid[index]
        indices = map_ResptoPreg[caseid]
        if pregnum!=len(indices):
            print("caseid in resp:",caseid,", pregnum=",pregnum," entries in preg= ",indices)
            fail+=1
    if fail==0:
        print('%s: All tests passed.' % script)
Ejemplo n.º 21
0
def CrossValPythonically(resp, preg):
    """Use the dict returned by MakePregMap to validate

    resp: dataframe with nsfg respondents
    preg: dataframe with nsfg pregnancies
    """
    dict_preg = nsfg.MakePregMap(preg)
    validatecases = []
    for key, value in dict_preg.items():
        preg_val = resp.loc[resp.caseid == key,
                            'pregnum'].values[0] - len(value)
        if preg_val != 0:
            validatecases.append(key)

    print(len(validatecases))
Ejemplo n.º 22
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    df = nsfg.ReadFemPreg()
    preg_map = nsfg.MakePregMap(df)
    preg_times_map = preg_times(preg_map)

    for k, v in preg_times_map.items():
        print("subject %s was pregnant %s times" % (k, v))

    resp = ReadFemResp()
    validate(resp)
    print('%s: All tests passed.' % script)
Ejemplo n.º 23
0
def PairWiseDifferences(live):
    live = live[live.prglngth >= 37]
    preg_map = nsfg.MakePregMap(live)

    diffs = []
    for caseid, indices in preg_map.items():
        lengths = live.loc[indices].prglngth.values
        if len(lengths) >= 2:
            diffs.extend(Diffs(lengths))

    mean = thinkstats2.Mean(diffs)
    print('Mean difference between pairs', mean)

    pmf = thinkstats2.Pmf(diffs)
    thinkplot.Hist(pmf, align='center')
    thinkplot.Show(xlabel='Difference in weeks', ylabel='PMF')
Ejemplo n.º 24
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    print('Running')
    resp = ReadFemResp()
    print(resp.pregnum.value_counts().sort_index())

    preg = nsfg.ReadFemPreg()
    pm = nsfg.MakePregMap(preg)

    print('checking counts')
    for i, r in resp.caseid.head().iteritems():
        if resp.pregnum[i] != len(pm[r]):
            print([r, resp[resp.caseid == r].pregnum.iloc[0], len(pm[r])])
Ejemplo n.º 25
0
def ValidateData(resp):

    df = nsfg.ReadFemPreg()

    df_map = nsfg.MakePregMap(df)

    # Iterate response data and compare
    for index, pregnancies in resp.pregnum.items():
        caseid = resp.caseid[index]
        indexes = df_map[caseid]

        # Check the count from both source
        if len(indexes) != pregnancies:
            print(caseid, len(indexes), pregnancies)
            return False

    return True
Ejemplo n.º 26
0
def ValidatePregnum(resp):
    # read the pregnancy frame
    preg = nsfg.ReadFemPreg()

    # Uses .nsfg.makepregmap to make a dictionary that maps from each caseid to a list of idices into the pregnancy dataframe
    preg_map = nsfg.MakePregMap(preg)
    
    # iterate through the respondent pregnum series
    for index, pregnum in resp.pregnum.items():
        caseid = resp.caseid[index]
        indices = preg_map[caseid]

        # check that pregnum from the respondent file equals # records in pregnancy files
        if len(indices) != pregnum:
            print(caseid, len(indices), pregnum)
            return False

    return True
Ejemplo n.º 27
0
def ValidatePregnum(respo):
    # reads the pregnancy frame
    preg = nsfg.ReadFemPreg()

    # make the map from caseid to list of pregnancy indices
    preg_map = nsfg.MakePregMap(preg)

    # iterate respondent pregnum series
    for index, pregnum in respo.pregnum.items():
        caseid = respo.caseid[index]
        indices = preg_map[caseid]

        # check that pregnum from respondent file = pregnancy file
        if len(indices) != pregnum:
            print(caseid, len(indices), pregnum)
            return False

    return True
Ejemplo n.º 28
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    print('%s: All tests passed.' % script)
    df = nsfg.ReadFemPreg()
    #print(df.pregnum)
    preg_map = nsfg.MakePregMap(df)
    dfr = ReadFemResp()
    resp_map = MakeRespMap(dfr)
    #print(preg_map)
    for caseid in sorted(preg_map):
        preg_count = len(preg_map[caseid])

        index = resp_map[caseid]
        numpregs = dfr.numpregs[index]
        if preg_count != numpregs:
            print(caseid, preg_count, numpregs)
Ejemplo n.º 29
0
def validatePregnum(resp):
    # cross validate by numbers or records in preg file
    resp = nsfg.ReadFemResp()

    # caseid to list of pregnancy indices
    preg_map = nsfg.MakePregMap(nsfg.ReadFemPreg())

    # iterate through the respondend pregnum series
    for index, pregnum in resp.pregnum.items():
        caseid = resp.caseid[index]
        indices = preg_map[caseid]

        # check that pregnum from respondent file equals
        # number of records in preg file
        if len(indices) != pregnum:
            print(caseid, len(indices), pregnum)
            return False

    return True
Ejemplo n.º 30
0
def PairWiseDifferences(live):
    """Summarize pairwise differences for children of the same mother.

    live: DataFrame of pregnancy records for live births
    """
    live = live[live.prglngth >= 37]
    preg_map = nsfg.MakePregMap(live)

    diffs = []
    for caseid, indices in preg_map.items():
        lengths = live.loc[indices].prglngth.values
        if len(lengths) >= 2:
            diffs.extend(Diffs(lengths))

    mean = thinkstats2.Mean(diffs)
    print('Mean difference between pairs', mean)

    pmf = thinkstats2.Pmf(diffs)
    thinkplot.Hist(pmf, align='center')
    thinkplot.Show(xlabel='Difference in weeks', ylabel='PMF')