예제 #1
0
def main():
    resp6 = survival.ReadFemResp2002()
    CleanData(resp6)
    married6 = resp6[resp6.evrmarry == 1]
    resp7 = survival.ReadFemResp2010()
    CleanData(resp7)
    married7 = resp7[resp7.evrmarry == 1]
    ResampleDivorceCurveByDecade([married6, married7])
# ## Chapter 13

# %% [markdown]
# ### Exercise 12.1
# In NSFG Cycles 6 and 7, the variable `cmdivorcx` contains the date of divorce for the respondent’s first marriage, if applicable, encoded in century-months.
#
# Compute the duration of marriages that have ended in divorce, and the duration, so far, of marriages that are ongoing. Estimate the hazard and survival curve for the duration of marriage.
#
# Use resampling to take into account sampling weights, and plot data from several resamples to visualize sampling error.
#
# Consider dividing the respondents into groups by decade of birth, and possibly by age at first marriage.

#%%
# read in respondent data from NSFG Cycle 6 and 7
df_6 = survival.ReadFemResp2002()
df_7 = survival.ReadFemResp2010()

df_6.head()


#%%
# function to clean up / add data to dataframes
def cleandf(df):
    """

    @param: df (dataframe) - df of NSFG cycle
    """
    df.cmdivorcx.replace([998, 9999], np.nan, inplace=True)

    # set the columns for not divorced, duration, and duration so far
    df['notdivorced'] = df.cmdivorcx.isnull().astype(int)