def ReadVariables():
    """Reads Stata dictionary files for NSFG data.

    returns: DataFrame that maps variables names to descriptions
    """
    vars1 = thinkstats2.ReadStataDct('2002FemPreg.dct').variables
    vars2 = thinkstats2.ReadStataDct('2002FemResp.dct').variables

    all_vars = vars1.append(vars2)
    all_vars.index = all_vars.name
    return all_vars
Example #2
0
def read_resp_file(dct_file="2002FemResp.dct", dat_file="2002FemResp.dat.gz"):
    # Using similar code as the ReadFemPreg from the textbook. Page 5 of the PDF version.
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip')
    # Leaving out CleanFemPreg(df) as this doesn't seem to actually do anything.
    # The solution also just has 'pass' which indicates this function is incomplete/does not do anything.
    return df
Example #3
0
def read_2002_fem_resp(stata_file='2002FemResp.dct',
                       data_file='2002FemResp.dat.gz'):
    fixed_width_object = thinkstats2.ReadStataDct(stata_file)
    actual_data = fixed_width_object.ReadFixedWidth(data_file,
                                                    compression='gzip',
                                                    nrows=None)
    return actual_data
Example #4
0
def ReadFemResp(dct_file='2002FemResp.dct',
                dat_file='2002FemResp.dat.gz',
                nrows=None):
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows)
    CleanFemResp(df)
    return df
Example #5
0
def readFemaleResponses(dictionaryFile='./data/2002FemResp.dct',
                        dataFile='./data/2002FemResp.dat.gz',
                        nrows=None):
    dictionary = thinkstats2.ReadStataDct(dictionaryFile)
    femResp = dictionary.ReadFixedWidth(dataFile,
                                        compression='gzip',
                                        nrows=nrows)
    return femResp
Example #6
0
def ReadFemResp(
        dct_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemResp.dct',
        dat_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemResp.dat.gz',
        nrows=None):
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows)
    nsfg.CleanFemResp(df)
    return df
Example #7
0
def ReadFemResp(dct_file='2002FemResp.dct',
                dat_file='2002FemResp.dat.gz',
                nrows=None):
    #reads dct_file
    dct = thinkstats2.ReadStataDct(dct_file)
    #creates a data frame
    df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows)
    # returns the data frame
    return df
def readfunc():
    dct_file = '2002FemResp.dct'
    dat_file = '2002FemResp.dat.gz'

    dct = thinkstats2.ReadStataDct(dct_file)
    dataframe = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=None)

    print(dataframe['pregnum'].value_counts().sort_index())
    print(len(dataframe))
Example #9
0
def ReadMale2002(dct_file='2002Male.dct',
                 dat_file='2002Male.dat.gz'):
    """Reads male data from NSFG Cycle 6.

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1')
    male = dct.ReadFixedWidth(dat_file, compression='gzip')
    CleanResp(male)
    return male
Example #10
0
def ReadGss(dirname):
    """Reads GSS files from the given directory.
    
    dirname: string
    
    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dirname + '/GSS.dct')
    gss = dct.ReadFixedWidth(dirname + '/GSS.dat')
    return gss
Example #11
0
def ReadFemPreg(dct_file='data/2011_2013_FemPregSetup.dct',
                dat_file='data/2011_2013_FemPregData.dat.gz'):
    """Reads the NSFG pregnancy data.
    dct_file: string file name
    dat_file: string file name
    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1')
    df = dct.ReadFixedWidth(dat_file, compression='gzip')
    CleanFemPreg(df)
    return df
Example #12
0
def ReadResp(dct_file, dat_file, **options):
    """Reads the NSFG respondent data.

    dct_file: string file name
    dat_file: string file name

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1')
    df = dct.ReadFixedWidth(dat_file, compression='gzip', **options)
    return df
Example #13
0
def ReadFemResp2002(dct_file='2002FemResp.dct',
                    dat_file='2002FemResp.dat.gz'):
    """Reads respondent data from NSFG Cycle 6.

    returns: DataFrame
    """
    usecols = ['caseid', 'cmbirth', 'cmintvw', 'finalwgt', 'age_r']
    dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1')
    resp = dct.ReadFixedWidth(dat_file, compression='gzip', usecols=usecols)
    CleanResp(resp)
    return resp
Example #14
0
def ReadFemResp(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz'):
    """
    This function read female pregnant information 
    :param dct_file: the format of the file documented in 2002FemPreg.dct
    :param dat_file: a gzip-compressed data file in plain text, with fixed width columns
    :return: a dataframe
    """
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip')
    CleanFemResp(df)
    return df
Example #15
0
def ReadFemResp(dct_file='2002FemResp.dct',
                dat_file='2002FemResp.dat.gz',
                nrows=None):
    ''' (str file name, str file name) -> DataFrame
    
    Reads the NSFG respondents data file and returns a DataFrame.
    '''
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows)
    CleanFemResp(df)
    return df
Example #16
0
def ReadFemPreg(dct_file, dat_file):
    """Reads the NSFG 2006-2010 pregnancy data.

    dct_file: string file name
    dat_file: string file name

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1')
    df = dct.ReadFixedWidth(dat_file, compression='gzip')
    CleanFemPreg(df)
    return df
Example #17
0
def read_fem_resp(datfile='2002FemResp.dat.gz', dctfile='2002FemResp.dct'):
    """
    Reads the respondent file filename and returns the DataFrame  

    :type datfile: str
    :type dctfile: str
    :rtype: pd.DataFrame
    """
    dct = thinkstats2.ReadStataDct(dctfile)
    df = dct.ReadFixedWidth(datfile, compression='gzip')
    clean_fem_resp(df)
    return df
Example #18
0
def ReadMale2010(dct_file='2006_2010_MaleSetup.dct',
                 dat_file='2006_2010_Male.dat.gz'):
    """Reads male data from NSFG Cycle 7.

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file, encoding='iso-8859-1')
    male = dct.ReadFixedWidth(dat_file, compression='gzip')

    male['finalwgt'] = male.wgtq1q16

    return male
Example #19
0
def ReadFemResp(dct_file, dat_file, nrows=None):
    """Reads the NSFG respondent data.

    dct_file: string file name
    dat_file: string file name

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows)
    CleanFemResp(df)
    return df
Example #20
0
def ReadFemPreg(dct_file='2002FemPreg.dct', dat_file='2002FemPreg.dat.gz'):
    """Reads the NSFG pregnancy data.

    dct_file: string file name
    dat_file: string file name

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip')
    CleanFemPreg(df)
    return df
Example #21
0
def ReadFemPreg(dct_file='2002FemPreg.dct',
                dat_file='2002FemPreg.dat.gz',
                nrows=None):
    """Reads the NSFG respondent data.

    dct_file: string file name
    dat_file: string file name

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows)
    return df
Example #22
0
def ReadFemPreg(dct_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemPreg.dct',
                dat_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemPreg.dat.gz'):
    """Reads the NSFG pregnancy data.

    dct_file: string file name
    dat_file: string file name

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip')
    CleanFemPreg(df)
    return df
def ReadFemResp(file1, file2, nrows=None):
    """ This function takes the dct and dat files (NSFG respondent data),
    reads them and returns a dataframe of the data.

    :param file1: (str) dct filename
    :param file2: (str) dat filename
    :param nrows: option
    :return: df - dataframe of NSFG respondent data
    """
    dct = thinkstats2.ReadStataDct(file1)
    df = dct.ReadFixedWidth(file2, compression="gzip", nrows=nrows)

    return df
Example #24
0
def readNplotNUMKDHH(dctfile='2002FemResp.dct',
                     datfile='2002FemResp.dat.gz',
                     compression='gzip'):
    """ read data to fram and plot NUMKDHH PMF"""
    dct = thinkstats2.ReadStataDct(dctfile)
    df = dct.ReadFixedWidth(datfile, compression=compression)

    pmf = thinkstats2.Pmf(df.numkdhh)
    thinkplot.Pmf(pmf, label='numkdhh')

    pmf2 = BiasedPmf(pmf)
    thinkplot.Pmf(pmf2, label='biased')
    thinkplot.show()
Example #25
0
def ReadFemResp(dct_file='2002FemResp.dct',
                dat_file='2002FemResp.dat.gz',
                nrows=None):
    """ Read the FemRecp data file.

    dct_file: string file name
    dat_file: string file name

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows)
    CleanFemResp(df)
    return df
Example #26
0
def ReadFemResp(dct_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemResp.dct',
                dat_file='/Users/hedyeherfani/Desktop/ThinkStats2-master/code/2002FemResp.dat.gz',
                nrows=None):
    """Reads the NSFG respondent data.

    dct_file: string file name
    dat_file: string file name

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip', nrows=nrows)
    CleanFemResp(df)
    return df
Example #27
0
def ReadFemResp(dct_file='2002FemResp.dct',
                dat_file='2002FemResp.dat.gz',
                **options):
    """Reads the NSFG respondent data.

    dct_file: string file name
    dat_file: string file name

    returns: DataFrame
    """
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip', **options)
    CleanData(df)
    return df
Example #28
0
def ReadFemResp(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz'):
    dct = thinkstats2.ReadStataDct(dct_file)
    df = dct.ReadFixedWidth(dat_file, compression='gzip')
    return df
 def testReadStataDct(self):
     dct = thinkstats2.ReadStataDct('2002FemPreg.dct')
     self.assertEqual(len(dct.variables), 243)
     self.assertEqual(len(dct.colspecs), 243)
     self.assertEqual(len(dct.names), 243)
     self.assertEqual(dct.colspecs[-1][1], -1)
Example #30
0
def readResp(dct_file='2002FemResp.dct', dat_file='2002FemResp.dat.gz'):
    resp_dict = thinkstats2.ReadStataDct(dct_file)
    resp_df = resp_dict.ReadFixedWidth(dat_file, compression='gzip')
    return resp_df