Esempio n. 1
0
def filter_table_for_cohort(cohort_id_file, table_file):
    select_file = table_file + "_select.csv"
    if not os.path.exists(select_file):
        a = os.system("csv_select " + cohort_id_file + " studyid " +
                      table_file)
    assert_exists(select_file)
    return select_file
Esempio n. 2
0
except:
    pass

if len(sys.argv) < 3:
    print "Important: delete the *.p files before running, if you change the cohort file!!!"
    err("usage:\n\tgrad [version 1 or 2: start with 1] [cohort file.csv]\nhuman should enter 1 for first parameter."
        )

if not os.path.exists("dd") or not os.path.isdir("dd"):
    # find, extract and clean data dictionary files
    run("dd_list")
    run("dd_clean")

# data dictionary file for registry:
dd_reg = "dd/2018-09-27_data_dictionary_consolidation-file-january-1-1986-onwards.xlsx_registry.C.csv2"
assert_exists(dd_reg)
dd_schlstud = "dd/2019-01-24_data_dictionary_education.xlsx_schlstud.csv2"
assert_exists(dd_schlstud)
dd_studcrd = "dd/2019-01-24_data_dictionary_education.xlsx_studcrd.csv2"
assert_exists(dd_studcrd)

cohort_file = sys.argv[2]
#'youth_cohort.csv' # delete *.p files and other intermediary files if cohort changes
schlstud_file = 'idomed1991-2017.ft_schlstud.A.dat_dd_sliceapply.csv'
studcrd_file = 'idomed1991-2017.ft_studcrd.A.dat_dd_sliceapply.csv'
registry_file = 'registry1991-2016_dd_sliceapply.csv_cat.csv'
files = [cohort_file, schlstud_file, studcrd_file, registry_file]

# prepare school student file, if not yet
if not os.path.exists(schlstud_file):
    print "School student file not found. Extracting.."
Esempio n. 3
0
# grad_gr8_cohort youth_cohort.csv 4
#  youth_cohort.csv has studyid, dob (yyyy-mm)'''
import os, sys, math, pickle, datetime
from misc import load_fields, assert_exists, err, run

if not os.path.exists("dd") or not os.path.isdir("dd"):
    # find, extract and clean data dictionary files
    run("dd_list")
    run("dd_clean")

# data dictionary file
dd_reg = "dd/2019-01-09_data_dictionary_consolidation-file-january-1-1986-onwards.xlsx_registry.C.csv2"
dd_schlstud, dd_studcrd = "dd/2019-01-24_data_dictionary_education.xlsx_schlstud.csv2", "dd/2019-01-24_data_dictionary_education.xlsx_studcrd.csv2"
for f in [dd_reg, dd_schlstud, dd_studcrd]:
    assert_exists(f)

if len(sys.argv) < 3:
    print "Important: delete the *.p files before running, if you change the cohort file!!!"
    err("Usage: grad_gr8_cohort [cohort_file.csv] [number of years=6?]")

cohort_file = sys.argv[1] #'youth_cohort.csv' # delete *.p files and other intermediary files if cohort changes
number_of_years = None
try:
    number_of_years = int(sys.argv[2])
except:
    err("failed to parse second parameter, year interval e.g. 4, 6, or 8")

schlstud_file = 'idomed1991-2017.ft_schlstud.A.dat_dd_sliceapply.csv'
studcrd_file = 'idomed1991-2017.ft_studcrd.A.dat_dd_sliceapply.csv'
registry_file = 'registry1991-2016_dd_sliceapply.csv_cat.csv'