def main(): resp = nsfg.ReadFemResp() preg = nsfg.ReadFemPreg() preg_map = nsfg.MakePregMap(preg) for id, pregnum_value in resp.pregnum.items(): caseid = resp.caseid[id] assert (pregnum_value == len(preg_map[caseid])) print('Success')
def main(script): preg = nsfg.ReadFemPreg() # DataFrame 13593 rows resp = nsfg.ReadFemResp() # DataFrame 7643 rows result = ValidatePregnum(resp, preg) if result: print("Pregnum column validated.") else: print("Problems identified with pregnum column.")
def main(script): """Tests the functions in this module. script: string script name """ resp = nsfg.ReadFemResp() # print(resp.head()) print(resp.pregnum.value_counts().sort_index()) print('%s: All tests passed.' % script)
def main(script): """Tests the functions in this module. script: string script name """ resp = nsfg.ReadFemResp(dct_file='data/2002FemResp.dct', dat_file='data/2002FemResp.dat.gz') assert(len(resp) == 7643) assert(resp.pregnum.value_counts()[1] == 1267) assert(validatePregnum(resp)) print('%s: All tests passed.' % script)
def main(script): """Tests the functions in this module. script: string script name """ df_resp = nsfg.ReadFemResp() df_preg = nsfg.ReadFemPreg() assert (df_resp.pregnum.value_counts().sum() == 7643) assert (validate(df_resp, df_preg)) print('%s: All tests passed.' % script)
def main(script): """Tests the functions in this module. script: string script name """ respdf=nsfg.ReadFemResp() #respdf.head() pregnum=respdf['pregnum'] """ The variable pregnum is a recode that indicates how many times each re- spondent has been pregnant. Print the value counts for this variable and compare them to the published results in the NSFG codebook: https://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=FEM§ion=R&subSec=7869&srtLabel=606835 """ preg_stat=pregnum.value_counts().sort_index() list_of_Npregs=pregnum.unique() list_of_Npregs.sort() preg_stat_nsfg=[] print("list_of_Npregs",list_of_Npregs) Npregs_7_95=0 Npregs_tot=0 for i in list_of_Npregs: Npregs_tot+=preg_stat[i] if i<7: preg_stat_nsfg.append((i,preg_stat[i])) if i>6: Npregs_7_95+=preg_stat[i] print("pregnums:") for i in range(0,len(preg_stat_nsfg)): print( preg_stat_nsfg[i][0]," ",preg_stat_nsfg[i][1]) print("7-95 ",Npregs_7_95,"\nTotal = ",Npregs_tot) """ cross-validate the respondent and pregnancy files by comparing pregnum for each respondent with the number of records in the pregnancy file. """ pregdf=nsfg.ReadFemPreg() map_ResptoPreg=nsfg.MakePregMap(pregdf) fail=0 for index, pregnum in respdf.pregnum.iteritems(): #print("index",index,"pregnum",pregnum) caseid = respdf.caseid[index] indices = map_ResptoPreg[caseid] if pregnum!=len(indices): print("caseid in resp:",caseid,", pregnum=",pregnum," entries in preg= ",indices) fail+=1 if fail==0: print('%s: All tests passed.' % script)
def main(script): """Tests the functions in this module. script: string script name """ print('%s: All tests passed.' % script) df = nsfg.ReadFemPreg() print(df.pregnum.value_counts().sort_index()) caseid = [1, 82, 900, 1896, 5676] resp = nsfg.ReadFemResp() for i in caseid: try: print(i, ':', resp[resp.caseid == i].pregnum == len(df[df.caseid == i])) except IndexError: print(f'caseid {i} out of index')
def validatePregnum(resp): # cross validate by numbers or records in preg file resp = nsfg.ReadFemResp() # caseid to list of pregnancy indices preg_map = nsfg.MakePregMap(nsfg.ReadFemPreg()) # iterate through the respondend pregnum series for index, pregnum in resp.pregnum.items(): caseid = resp.caseid[index] indices = preg_map[caseid] # check that pregnum from respondent file equals # number of records in preg file if len(indices) != pregnum: print(caseid, len(indices), pregnum) return False return True
def ReadFemResp(): return nsfg.ReadFemResp()
def readFile(): resp = nsfg.ReadFemResp()
def main(script): preg = nsfg.ReadFemPreg() # DataFrame resp = nsfg.ReadFemResp() # DataFrame preg_by_caseid = MakePregMap(preg) # dictionary total_pregnancies_by_caseid = {} for key, preg_list in preg_by_caseid.items(): total_preg = 0 for p in preg_list: total_preg += 1 total_pregnancies_by_caseid[key] = total_preg print(len(total_pregnancies_by_caseid)) print(len(resp)) # for k, v in total_pregnancies_by_caseid.items(): # print(k,v) # # iterate through the respondent pregnum series # for index, pregnum in resp.pregnum.iteritems(): # caseid = resp.caseid[index] # indices = total_pregnancies_by_caseid[caseid] # # check that pregnum from the respondent file equals # # the number of records in the pregnancy file # if indices != pregnum: # print(caseid, indices, pregnum) # preg_pregnum = pd.DataFrame([total_pregnancies_by_caseid], columns=['caseid', 'pregnum']) # result = ValidatePregnum(resp, preg_pregnum) # print(result) # df = ReadFemResp() # print(df.pregnum.head()) # print(df.pregnum.value_counts().sort_index()) # bins = [0,1,2,3,4,5,6,100] # print(pd.cut(df.pregnum, bins).value_counts().sort_index()) # preg = nsfg.ReadFemPreg() # resp = nsfg.ReadFemResp() # print(ValidatePregnum(resp, preg)) # print(preg.head()) # pregnum_map = nsfg.MakePregMap(preg) # # pprint(pregnum_map) # print(len(pregnum_map)) # print(len(resp)) # for key, value in pregnum_map.items(): # pass # print(key, len(value)) # print(type(resp.pregnum[key])) # if resp.pregnum[key] == len(value): # print("MATCH") # elif resp.pregnum[key] != len(value): # print("NO MATCH") # else: # print("ERROR") # print(pregnum_map) # print(resp.pregnum) # caseid = 12556 # pregnum_map = nsfg.MakePregMap(preg) # indices = pregnum_map[caseid] # # resp.pregnum[indices].values # result = preg.pregnum # print(result) # print(resp.head()) # print(result) """Tests the functions in this module.
import nsfg import thinkstats2 import thinkplot #from thinkstats2 def BiasPmf(pmf, label): new_pmf = pmf.Copy(label=label) for x, p in pmf.Items(): new_pmf.Mult(x, x) new_pmf.Normalize() return new_pmf nk = nsfg.ReadFemResp().numkdhh nkpmf = thinkstats2.Pmf(nk, label='actual') nkpmfbias = BiasPmf(nkpmf, label='biased') thinkplot.Pmfs([nkpmf, nkpmfbias]) thinkplot.show(xlabel='num kids', ylabel='Probability') print("the mean of the actual pmf is " + str(nkpmf.Mean())) print("the mean of the biased pmf is " + str(nkpmfbias.Mean()))
thinkplot.Plot(sf) thinkplot.Cdf(cdf, alpha=0.2) thinkplot.Show(loc='center left') ## calculate hazard function hf = sf.MakeHazardFunction(label='hazard') thinkplot.Plot(hf) thinkplot.Show(ylim=[0, 0.75], loc='upper left') ######################################### ## Age at first marriage ######################################### # clean dataframe and extract sub-groups we need resp6 = nsfg.ReadFemResp() resp6.cmmarrhx.replace([9997, 9998, 9999], np.nan, inplace=True) resp6['agemarry'] = (resp6.cmmarrhx - resp6.cmbirth) / 12.0 resp6['age'] = (resp6.cmintvw - resp6.cmbirth) / 12.0 complete = resp6[resp6.evrmarry == 1].agemarry.dropna() ongoing = resp6[resp6.evrmarry == 0].age ## estimate hazard function hf = survival.EstimateHazardFunction(complete, ongoing) thinkplot.Plot(hf) thinkplot.Show(xlabel='Age (years)', ylabel='Hazard') ## make survival function from hazard function sf = hf.MakeSurvival() thinkplot.Plot(sf)
var1 = group1.var() var2 = group2.var() n1, n2 = len(group1), len(group2) pooled_var = (n1 * var1 + n2 * var2) / (n1 + n2) d = diff / math.sqrt(pooled_var) return d q1_ans = CohenEffectSize(firsts, others) # The Cohen effect size is -0.089, which is larger (abs val) than 0.02, the # effect size of pregnancy length, but is still quite small. #Q2 resp = nsfg.ReadFemResp(dct_file=directory + '2002FemResp.dct', dat_file=directory + '2002FemResp.dat.gz') # This was defined in the book, but it's not in ThinkStats2, so I copied and pasted the # code here. I can understand its contents. def BiasPmf(pmf, label): new_pmf = pmf.Copy(label=label) for x, p in pmf.Items(): new_pmf.Mult(x, x) new_pmf.Normalize() return new_pmf
import nsfg def BiasPmf(pmf, label): new_pmf = pmf.Copy(label=label) for x, p in pmf.Items(): new_pmf.Mult(x, x) new_pmf.Normalize() return new_pmf df=nsfg.ReadFemResp() pmf=thinkstats2.Pmf(df.numkdhh,label='numkdhh') thinkplot.Pmf(pmf) thinkplot.Config(xlabel='Number of children', ylabel='PMF') biased = BiasPmf(pmf, label='biased') thinkplot.PrePlot(2) thinkplot.Pmfs([pmf, biased]) thinkplot.Config(xlabel='Number of children', ylabel='PMF') pmf.Mean() biased.Mean()
from __future__ import print_function, division import nsfg pres = nsfg.ReadFemResp() pres.columns pres.head(20) pres.tail(30) columns = 0 rows = 0 for columns in pres and rows in pres.agescrn: columns = columns + 1 rows = rows + 1 print("There are %d rows and %d columns.' %(rows,columns)") min = None max = None for num in pres.agescrn: if min == None or num < min: print(num) for num in pres.agescrn: if max == None or max < num: print(num) search = 0
"""Use the dict returned by MakePregMap to validate resp: dataframe with nsfg respondents preg: dataframe with nsfg pregnancies """ dict_preg = nsfg.MakePregMap(preg) validatecases = [] for key, value in dict_preg.items(): preg_val = resp.loc[resp.caseid == key, 'pregnum'].values[0] - len(value) if preg_val != 0: validatecases.append(key) print(len(validatecases)) if __name__ == '__main__': main(*sys.argv) resp = nsfg.ReadFemResp() preg = nsfg.ReadFemPreg() #part one #print(resp.pregnum.value_counts().sort_index()) #part two first attempt #CrossValidatePregnum(resp, preg) #part three with dict CrossValPythonically(resp, preg)
def main(): resp = nsfg.ReadFemResp() pregnum = resp['pregnum'].value_counts().sort_index() print(pregnum) # cross validate by numbers or records in preg file print(validatePregnum(resp))