Exemple #1
0
def main():
    resp = nsfg.ReadFemResp()
    preg = nsfg.ReadFemPreg()
    preg_map = nsfg.MakePregMap(preg)
    for id, pregnum_value in resp.pregnum.items():
        caseid = resp.caseid[id]
        assert (pregnum_value == len(preg_map[caseid]))
    print('Success')
def main(script):
    preg = nsfg.ReadFemPreg()  # DataFrame 13593 rows
    resp = nsfg.ReadFemResp()  # DataFrame 7643 rows
    result = ValidatePregnum(resp, preg)
    if result:
        print("Pregnum column validated.")
    else:
        print("Problems identified with pregnum column.")
Exemple #3
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    resp = nsfg.ReadFemResp()
    # print(resp.head())
    
    print(resp.pregnum.value_counts().sort_index())
    print('%s: All tests passed.' % script)
Exemple #4
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    resp = nsfg.ReadFemResp(dct_file='data/2002FemResp.dct', dat_file='data/2002FemResp.dat.gz')
    assert(len(resp) == 7643)
    assert(resp.pregnum.value_counts()[1] == 1267)
    assert(validatePregnum(resp))

    print('%s: All tests passed.' % script)
Exemple #5
0
def main(script):
    """Tests the functions in this module.


    
    script: string script name
    """
    df_resp = nsfg.ReadFemResp()
    df_preg = nsfg.ReadFemPreg()
    assert (df_resp.pregnum.value_counts().sum() == 7643)
    assert (validate(df_resp, df_preg))
    print('%s: All tests passed.' % script)
Exemple #6
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    respdf=nsfg.ReadFemResp()
    #respdf.head()
    pregnum=respdf['pregnum']
    """
      The variable pregnum is a recode that indicates how many times each re- spondent has
      been pregnant. Print the value counts for this variable and compare them to the 
      published results in the NSFG codebook:
      https://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=FEM&section=R&subSec=7869&srtLabel=606835
    """
    preg_stat=pregnum.value_counts().sort_index()
    list_of_Npregs=pregnum.unique()
    list_of_Npregs.sort()
    preg_stat_nsfg=[]
    print("list_of_Npregs",list_of_Npregs)
    Npregs_7_95=0
    Npregs_tot=0
    for i in list_of_Npregs:
        Npregs_tot+=preg_stat[i]
        if i<7:
            preg_stat_nsfg.append((i,preg_stat[i]))
        if i>6:
            Npregs_7_95+=preg_stat[i]
    print("pregnums:")
    for i in range(0,len(preg_stat_nsfg)):
        print( preg_stat_nsfg[i][0]," ",preg_stat_nsfg[i][1])
    print("7-95 ",Npregs_7_95,"\nTotal = ",Npregs_tot)


    """
    cross-validate the respondent and pregnancy files by comparing pregnum for each respondent
    with the number of records in the pregnancy file.
    """
    pregdf=nsfg.ReadFemPreg()
    map_ResptoPreg=nsfg.MakePregMap(pregdf)
    fail=0
    for index, pregnum in respdf.pregnum.iteritems(): 
            #print("index",index,"pregnum",pregnum)
        caseid = respdf.caseid[index]
        indices = map_ResptoPreg[caseid]
        if pregnum!=len(indices):
            print("caseid in resp:",caseid,", pregnum=",pregnum," entries in preg= ",indices)
            fail+=1
    if fail==0:
        print('%s: All tests passed.' % script)
Exemple #7
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    print('%s: All tests passed.' % script)

    df = nsfg.ReadFemPreg()
    print(df.pregnum.value_counts().sort_index())

    caseid = [1, 82, 900, 1896, 5676]
    resp = nsfg.ReadFemResp()
    for i in caseid:
        try:
            print(i, ':',
                  resp[resp.caseid == i].pregnum == len(df[df.caseid == i]))
        except IndexError:
            print(f'caseid {i} out of index')
Exemple #8
0
def validatePregnum(resp):
    # cross validate by numbers or records in preg file
    resp = nsfg.ReadFemResp()

    # caseid to list of pregnancy indices
    preg_map = nsfg.MakePregMap(nsfg.ReadFemPreg())

    # iterate through the respondend pregnum series
    for index, pregnum in resp.pregnum.items():
        caseid = resp.caseid[index]
        indices = preg_map[caseid]

        # check that pregnum from respondent file equals
        # number of records in preg file
        if len(indices) != pregnum:
            print(caseid, len(indices), pregnum)
            return False

    return True
Exemple #9
0
def ReadFemResp():
    return nsfg.ReadFemResp()
Exemple #10
0
def readFile():
    resp = nsfg.ReadFemResp()
Exemple #11
0
def main(script):

    preg = nsfg.ReadFemPreg()  # DataFrame
    resp = nsfg.ReadFemResp()  # DataFrame
    preg_by_caseid = MakePregMap(preg)  # dictionary

    total_pregnancies_by_caseid = {}
    for key, preg_list in preg_by_caseid.items():
        total_preg = 0
        for p in preg_list:
            total_preg += 1
        total_pregnancies_by_caseid[key] = total_preg

    print(len(total_pregnancies_by_caseid))
    print(len(resp))
    # for k, v in total_pregnancies_by_caseid.items():
    #     print(k,v)

    # # iterate through the respondent pregnum series
    # for index, pregnum in resp.pregnum.iteritems():
    #     caseid = resp.caseid[index]
    #     indices = total_pregnancies_by_caseid[caseid]


    #     # check that pregnum from the respondent file equals
    #     # the number of records in the pregnancy file
    #     if indices != pregnum:
    #         print(caseid, indices, pregnum)


    # preg_pregnum = pd.DataFrame([total_pregnancies_by_caseid], columns=['caseid', 'pregnum'])
    # result = ValidatePregnum(resp, preg_pregnum)
    # print(result)

    # df = ReadFemResp()
    # print(df.pregnum.head())
    # print(df.pregnum.value_counts().sort_index())

    # bins = [0,1,2,3,4,5,6,100]
    # print(pd.cut(df.pregnum, bins).value_counts().sort_index())

    # preg = nsfg.ReadFemPreg()
    # resp = nsfg.ReadFemResp()
    # print(ValidatePregnum(resp, preg))
    # print(preg.head())

    # pregnum_map = nsfg.MakePregMap(preg)
    # # pprint(pregnum_map)


    # print(len(pregnum_map))
    # print(len(resp))

    # for key, value in pregnum_map.items():
    #     pass
        # print(key, len(value))
        # print(type(resp.pregnum[key]))
        # if resp.pregnum[key] == len(value):
        #     print("MATCH")
        # elif resp.pregnum[key] != len(value):
        #     print("NO MATCH")
        # else:
        #     print("ERROR")
    # print(pregnum_map)



    # print(resp.pregnum)

    # caseid = 12556
    # pregnum_map = nsfg.MakePregMap(preg)
    # indices = pregnum_map[caseid]
    # # resp.pregnum[indices].values
    # result = preg.pregnum
    # print(result)
    # print(resp.head())


    # print(result)

    """Tests the functions in this module.
Exemple #12
0
import nsfg
import thinkstats2
import thinkplot


#from thinkstats2
def BiasPmf(pmf, label):
    new_pmf = pmf.Copy(label=label)

    for x, p in pmf.Items():
        new_pmf.Mult(x, x)

    new_pmf.Normalize()
    return new_pmf


nk = nsfg.ReadFemResp().numkdhh
nkpmf = thinkstats2.Pmf(nk, label='actual')

nkpmfbias = BiasPmf(nkpmf, label='biased')

thinkplot.Pmfs([nkpmf, nkpmfbias])
thinkplot.show(xlabel='num kids', ylabel='Probability')

print("the mean of the actual pmf is " + str(nkpmf.Mean()))
print("the mean of the biased pmf is " + str(nkpmfbias.Mean()))
Exemple #13
0
    thinkplot.Plot(sf)
    thinkplot.Cdf(cdf, alpha=0.2)
    thinkplot.Show(loc='center left')

    ## calculate hazard function
    hf = sf.MakeHazardFunction(label='hazard')
    thinkplot.Plot(hf)
    thinkplot.Show(ylim=[0, 0.75], loc='upper left')

    #########################################
    ## Age at first marriage
    #########################################

    # clean dataframe and extract sub-groups we need
    resp6 = nsfg.ReadFemResp()
    resp6.cmmarrhx.replace([9997, 9998, 9999], np.nan, inplace=True)
    resp6['agemarry'] = (resp6.cmmarrhx - resp6.cmbirth) / 12.0
    resp6['age'] = (resp6.cmintvw - resp6.cmbirth) / 12.0

    complete = resp6[resp6.evrmarry == 1].agemarry.dropna()
    ongoing = resp6[resp6.evrmarry == 0].age

    ## estimate hazard function
    hf = survival.EstimateHazardFunction(complete, ongoing)
    thinkplot.Plot(hf)
    thinkplot.Show(xlabel='Age (years)', ylabel='Hazard')

    ## make survival function from hazard function
    sf = hf.MakeSurvival()
    thinkplot.Plot(sf)
Exemple #14
0
    var1 = group1.var()
    var2 = group2.var()
    n1, n2 = len(group1), len(group2)
    pooled_var = (n1 * var1 + n2 * var2) / (n1 + n2)

    d = diff / math.sqrt(pooled_var)

    return d


q1_ans = CohenEffectSize(firsts, others)
# The Cohen effect size is -0.089, which is larger (abs val) than 0.02, the
# effect size of pregnancy length, but is still quite small.

#Q2
resp = nsfg.ReadFemResp(dct_file=directory + '2002FemResp.dct',
                        dat_file=directory + '2002FemResp.dat.gz')


# This was defined in the book, but it's not in ThinkStats2, so I copied and pasted the
# code here. I can understand its contents.
def BiasPmf(pmf, label):
    new_pmf = pmf.Copy(label=label)

    for x, p in pmf.Items():
        new_pmf.Mult(x, x)

    new_pmf.Normalize()

    return new_pmf

Exemple #15
0
import  nsfg



def BiasPmf(pmf, label):
    new_pmf = pmf.Copy(label=label)

    for x, p in pmf.Items():
        new_pmf.Mult(x, x)
        
    new_pmf.Normalize()
    return new_pmf


df=nsfg.ReadFemResp()

pmf=thinkstats2.Pmf(df.numkdhh,label='numkdhh')
thinkplot.Pmf(pmf)

thinkplot.Config(xlabel='Number of children', ylabel='PMF')

biased = BiasPmf(pmf, label='biased')

thinkplot.PrePlot(2)
thinkplot.Pmfs([pmf, biased])
thinkplot.Config(xlabel='Number of children', ylabel='PMF')

pmf.Mean()

biased.Mean()
from __future__ import print_function, division

import nsfg

pres = nsfg.ReadFemResp()

pres.columns
pres.head(20)
pres.tail(30)

columns = 0
rows = 0

for columns in pres and rows in pres.agescrn:
    columns = columns + 1
    rows = rows + 1

print("There are %d rows and %d columns.' %(rows,columns)")

min = None
max = None

for num in pres.agescrn:
    if min == None or num < min:
        print(num)

for num in pres.agescrn:
    if max == None or max < num:
        print(num)

search = 0
Exemple #17
0
    """Use the dict returned by MakePregMap to validate

    resp: dataframe with nsfg respondents
    preg: dataframe with nsfg pregnancies
    """
    dict_preg = nsfg.MakePregMap(preg)
    validatecases = []
    for key, value in dict_preg.items():
        preg_val = resp.loc[resp.caseid == key,
                            'pregnum'].values[0] - len(value)
        if preg_val != 0:
            validatecases.append(key)

    print(len(validatecases))


if __name__ == '__main__':
    main(*sys.argv)

    resp = nsfg.ReadFemResp()
    preg = nsfg.ReadFemPreg()

    #part one
    #print(resp.pregnum.value_counts().sort_index())

    #part two first attempt
    #CrossValidatePregnum(resp, preg)

    #part three with dict
    CrossValPythonically(resp, preg)
Exemple #18
0
def main():
    resp = nsfg.ReadFemResp()
    pregnum = resp['pregnum'].value_counts().sort_index()
    print(pregnum)
    # cross validate by numbers or records in preg file
    print(validatePregnum(resp))