def preprocess(studfile, schoolfile, recode_table_f, recode_vars_f): studcols = [ "STIDSTD", "CNT", "STRATUM", "SCHOOLID", "ST04Q01", "ESCS", "IMMIG", "LANGN", "TESTLANG", "FAMSTRUC", "ST05Q01", 'PV1READ', 'PV2READ', 'PV3READ', 'PV4READ', 'PV5READ', 'PV1SCIE', 'PV2SCIE', 'PV3SCIE', 'PV4SCIE', 'PV5SCIE', 'PV1MATH', 'PV2MATH', 'PV3MATH', 'PV4MATH', 'PV5MATH', 'W_FSTUWT' ] schoolcols = ["CNT", "SCHOOLID", "SC01Q01", "SC03Q01"] pisadat = manage(studfile, schoolfile, studcols=studcols, schoolcols=schoolcols) recode_vars = pd.read_csv(recode_vars_f)["variable"].unique() pisadat[recode_vars] = pisadat[recode_vars].applymap(rec.strip) rec.recode(pisadat, recode_table_f, recode_vars_f) return pisadat
def get_formatter (format): ''' This function is used to get a recode formatter in an efficient way ''' format = string.lower (format) if formatter_cache.has_key (format): ft = formatter_cache [format] else: ft = recode.recode ('latin1..' + format) formatter_cache [format] = ft return ft
def check_recode(): try: import recode except SystemError: raise RuntimeError('the recode library is probably broken.') # First, check if the recode version has the famous 3.6 bug rq = recode.request('UTF-8..latex') if recode.recode(rq, 'abc') != 'abc': raise RuntimeError('the _recode module is broken.') return 0