if args.verbose: print('REMOVE PATIENTS AGE < 18:', stays.ICUSTAY_ID.unique().shape[0], stays.HADM_ID.unique().shape[0], stays.SUBJECT_ID.unique().shape[0]) stays.to_csv(os.path.join(args.output_path, 'all_stays.csv'), index=False) diagnoses = read_icd_diagnoses_table(args.mimic3_path) diagnoses = filter_diagnoses_on_stays(diagnoses, stays) diagnoses.to_csv(os.path.join(args.output_path, 'all_diagnoses.csv'), index=False) count_icd_codes(diagnoses, output_path=os.path.join(args.output_path, 'diagnosis_counts.csv')) phenotypes = add_hcup_ccs_2015_groups( diagnoses, yaml.load(open(args.phenotype_definitions, 'r'))) make_phenotype_label_matrix(phenotypes, stays).to_csv(os.path.join(args.output_path, 'phenotype_labels.csv'), index=False, quoting=csv.QUOTE_NONNUMERIC) if args.test: pat_idx = np.random.choice(patients.shape[0], size=1000) patients = patients.iloc[pat_idx] stays = stays.merge(patients[['SUBJECT_ID']], left_on='SUBJECT_ID', right_on='SUBJECT_ID') args.event_tables = [args.event_tables[0]] print('Using only', stays.shape[0], 'stays and only', args.event_tables[0], 'table')
stays = add_age_to_icustays(stays) stays = add_inunit_mortality_to_icustays(stays) stays = add_inhospital_mortality_to_icustays(stays) stays = filter_icustays_on_age(stays) if args.verbose: print('REMOVE PATIENTS AGE < 18:', stays.ICUSTAY_ID.unique().shape[0], stays.HADM_ID.unique().shape[0], stays.SUBJECT_ID.unique().shape[0]) stays.to_csv(os.path.join(args.output_path, 'all_stays.csv'), index=False) diagnoses = read_icd_diagnoses_table(args.mimic3_path) diagnoses = filter_diagnoses_on_stays(diagnoses, stays) diagnoses.to_csv(os.path.join(args.output_path, 'all_diagnoses.csv'), index=False) count_icd_codes(diagnoses, output_path=os.path.join(args.output_path, 'diagnosis_counts.csv')) phenotypes = add_hcup_ccs_2015_groups(diagnoses, yaml.load(open(args.phenotype_definitions, 'r'))) make_phenotype_label_matrix(phenotypes, stays).to_csv(os.path.join(args.output_path, 'phenotype_labels.csv'), index=False, quoting=csv.QUOTE_NONNUMERIC) if args.test: pat_idx = np.random.choice(patients.shape[0], size=1000) patients = patients.iloc[pat_idx] stays = stays.merge(patients[['SUBJECT_ID']], left_on='SUBJECT_ID', right_on='SUBJECT_ID') args.event_tables = [args.event_tables[0]] print('Using only', stays.shape[0], 'stays and only', args.event_tables[0], 'table') subjects = stays.SUBJECT_ID.unique() break_up_stays_by_subject(stays, args.output_path, subjects=subjects, verbose=args.verbose) break_up_diagnoses_by_subject(phenotypes, args.output_path, subjects=subjects, verbose=args.verbose) items_to_keep = set( [int(itemid) for itemid in dataframe_from_csv(args.itemids_file)['ITEMID'].unique()]) if args.itemids_file else None