def filter_table_for_cohort(cohort_id_file, table_file): select_file = table_file + "_select.csv" if not os.path.exists(select_file): a = os.system("csv_select " + cohort_id_file + " studyid " + table_file) assert_exists(select_file) return select_file
except: pass if len(sys.argv) < 3: print "Important: delete the *.p files before running, if you change the cohort file!!!" err("usage:\n\tgrad [version 1 or 2: start with 1] [cohort file.csv]\nhuman should enter 1 for first parameter." ) if not os.path.exists("dd") or not os.path.isdir("dd"): # find, extract and clean data dictionary files run("dd_list") run("dd_clean") # data dictionary file for registry: dd_reg = "dd/2018-09-27_data_dictionary_consolidation-file-january-1-1986-onwards.xlsx_registry.C.csv2" assert_exists(dd_reg) dd_schlstud = "dd/2019-01-24_data_dictionary_education.xlsx_schlstud.csv2" assert_exists(dd_schlstud) dd_studcrd = "dd/2019-01-24_data_dictionary_education.xlsx_studcrd.csv2" assert_exists(dd_studcrd) cohort_file = sys.argv[2] #'youth_cohort.csv' # delete *.p files and other intermediary files if cohort changes schlstud_file = 'idomed1991-2017.ft_schlstud.A.dat_dd_sliceapply.csv' studcrd_file = 'idomed1991-2017.ft_studcrd.A.dat_dd_sliceapply.csv' registry_file = 'registry1991-2016_dd_sliceapply.csv_cat.csv' files = [cohort_file, schlstud_file, studcrd_file, registry_file] # prepare school student file, if not yet if not os.path.exists(schlstud_file): print "School student file not found. Extracting.."
# grad_gr8_cohort youth_cohort.csv 4 # youth_cohort.csv has studyid, dob (yyyy-mm)''' import os, sys, math, pickle, datetime from misc import load_fields, assert_exists, err, run if not os.path.exists("dd") or not os.path.isdir("dd"): # find, extract and clean data dictionary files run("dd_list") run("dd_clean") # data dictionary file dd_reg = "dd/2019-01-09_data_dictionary_consolidation-file-january-1-1986-onwards.xlsx_registry.C.csv2" dd_schlstud, dd_studcrd = "dd/2019-01-24_data_dictionary_education.xlsx_schlstud.csv2", "dd/2019-01-24_data_dictionary_education.xlsx_studcrd.csv2" for f in [dd_reg, dd_schlstud, dd_studcrd]: assert_exists(f) if len(sys.argv) < 3: print "Important: delete the *.p files before running, if you change the cohort file!!!" err("Usage: grad_gr8_cohort [cohort_file.csv] [number of years=6?]") cohort_file = sys.argv[1] #'youth_cohort.csv' # delete *.p files and other intermediary files if cohort changes number_of_years = None try: number_of_years = int(sys.argv[2]) except: err("failed to parse second parameter, year interval e.g. 4, 6, or 8") schlstud_file = 'idomed1991-2017.ft_schlstud.A.dat_dd_sliceapply.csv' studcrd_file = 'idomed1991-2017.ft_studcrd.A.dat_dd_sliceapply.csv' registry_file = 'registry1991-2016_dd_sliceapply.csv_cat.csv'