def ReadBrfss(filename='CDBRFS08.ASC.gz', compression='gzip', nrows=None): """Reads the BRFSS data. filename: string compression: string nrows: int number of rows to read, or None for all returns: DataFrame """ var_info = [ ('age', 101, 102, int), ('sex', 143, 143, int), ('wtyrago', 127, 130, int), ('finalwt', 799, 808, int), ('wtkg2', 1254, 1258, int), ('htm3', 1251, 1253, int), ] columns = ['name', 'start', 'end', 'type'] variables = pandas.DataFrame(var_info, columns=columns) variables.end += 1 dct = thinkstats2.FixedWidthVariables(variables, index_base=1) df = dct.ReadFixedWidth(filename, compression=compression, nrows=nrows) CleanBrfssFrame(df) return df
def ReadBabyBoom(filename='babyboom.dat'): """Reads the babyboom data. filename: string returns: DataFrame """ var_info = [ ('time', 1, 8, int), ('sex', 9, 16, int), ('weight_g', 17, 24, int), ('minutes', 25, 32, int), ] columns = ['name', 'start', 'end', 'type'] variables = pandas.DataFrame(var_info, columns=columns) variables.end += 1 dct = thinkstats2.FixedWidthVariables(variables, index_base=1) df = dct.ReadFixedWidth(filename, skiprows=59) return df