def data_extraction_mortality(args):
    time_window = args.time_window
    all_df = utils.embedding(args.root_dir)
    all_mort = utils.filter_mortality_data(all_df)
    all_mort = all_mort[all_mort['itemoffset'] <= time_window]
    return all_mort
Ejemplo n.º 2
0
def data_extraction_phenotyping(args):
    label_pheno = [
        'Respiratory failure', 'Essential hypertension',
        'Cardiac dysrhythmias', 'Fluid disorders', 'Septicemia',
        'Acute and unspecified renal failure', 'Pneumonia',
        'Acute cerebrovascular disease', 'CHF', 'CKD', 'COPD',
        'Acute myocardial infarction', "Gastrointestinal hem", 'Shock',
        'lipid disorder', 'DM with complications', 'Coronary athe', 'Pleurisy',
        'Other liver diseases', 'lower respiratory',
        'Hypertension with complications', 'Conduction disorders',
        'Complications of surgical', 'upper respiratory',
        'DM without complication'
    ]

    diag_ord_col = [
        "patientunitstayid", "itemoffset", "Respiratory failure",
        "Fluid disorders", "Septicemia", "Acute and unspecified renal failure",
        "Pneumonia", "Acute cerebrovascular disease",
        "Acute myocardial infarction", "Gastrointestinal hem", "Shock",
        "Pleurisy", "lower respiratory", "Complications of surgical",
        "upper respiratory", "Hypertension with complications",
        "Essential hypertension", "CKD", "COPD", "lipid disorder",
        "Coronary athe", "DM without complication", "Cardiac dysrhythmias",
        "CHF", "DM with complications", "Other liver diseases",
        "Conduction disorders"
    ]

    diag_columns = [
        'patientunitstayid', 'itemoffset', 'Respiratory failure',
        'Essential hypertension', 'Cardiac dysrhythmias', 'Fluid disorders',
        'Septicemia', 'Acute and unspecified renal failure', 'Pneumonia',
        'Acute cerebrovascular disease', 'CHF', 'CKD', 'COPD',
        'Acute myocardial infarction', "Gastrointestinal hem", 'Shock',
        'lipid disorder', 'DM with complications', 'Coronary athe', 'Pleurisy',
        'Other liver diseases', 'lower respiratory',
        'Hypertension with complications', 'Conduction disorders',
        'Complications of surgical', 'upper respiratory',
        'DM without complication'
    ]

    codes = json.load(open('phen_code.json'))
    all_df = utils.embedding(args.root_dir)  #Done
    all_df = utils.filter_phenotyping_data(all_df)  #Done

    diag = utils.read_diagnosis_table(args.eicu_dir)
    diag = utils.diag_labels(diag)
    diag.dropna(how='all', subset=label_pheno, inplace=True)

    stay_diag = set(diag['patientunitstayid'].unique())
    stay_all = set(all_df.patientunitstayid.unique())
    stay_intersection = stay_all.intersection(stay_diag)
    stay_pheno = list(stay_intersection)

    diag = diag[diag['patientunitstayid'].isin(stay_pheno)]
    diag.rename(index=str,
                columns={"diagnosisoffset": "itemoffset"},
                inplace=True)
    diag = diag[diag_columns]
    label = diag.groupby('patientunitstayid').sum()
    label = label.reset_index()
    label[label_pheno] = np.where(label[label_pheno] >= 1, 1,
                                  label[label_pheno])
    all_pheno = all_df[all_df["patientunitstayid"].isin(stay_pheno)]
    all_pheno = all_pheno[all_pheno["itemoffset"] >
                          0]  # remove records before unit admission
    all_pheno = all_pheno[all_pheno["RLOS"] >=
                          0]  # remove records after unit discharge
    label = label[diag_ord_col]
    all_pheno_label = label[label.patientunitstayid.isin(
        list(all_pheno.patientunitstayid.unique()))]
    return all_pheno, all_pheno_label
Ejemplo n.º 3
0
def data_extraction_rlos(args):
    all_df = utils.embedding(args.root_dir)
    all_los = utils.filter_rlos_data(all_df)
    return all_los