def plot_val_ROCs(RESULT_DIR, models, label, baseline_probs = None, baseline_str = 'baseline', baseline_type = 'probability', title = 'ROC for discharge prediction on test data' ): # pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all') expt = Experiment(datafile = None, result_dir = RESULT_DIR, label = label) # pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all') # expt.save_and_plot_results(models, # cv = 5, pce_file = pce_train_est2, test = False, # test_pce_file = pce_test_est2, # train = True, # title = 'ROC for full patient cohort on validation data', # tr_title = 'ROC for full patient cohort on training data') expt.save_and_plot_results(models, cv = 5, train = False, test = False, baseline_str = baseline_str, # baseline_type = baseline_type, baseline_prob_file = baseline_probs, title = title)
def plot_ROCs(RESULT_DIR, models, label, test_models, title = 'ROC for discharge prediction on test data', pr_title = "Precision-recall curve on test data"): # pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all') expt = Experiment(datafile = None, result_dir = RESULT_DIR, label = label) # expt.save_and_plot_results(models, # cv = 5, test = False, # train = True, # title = 'ROC for full patient cohort on validation data', # tr_title = 'ROC for full patient cohort on training data') expt.save_and_plot_test_results(test_models, cv = 5, title = title, pr_title = pr_title)
def train_val(RESULT_DIR, alldata, models, label='Label', cv=5, score_name="AUC", to_exclude=None, test_ind_col=None, oversample_rate=1, imputer='iterative', add_missing_flags=True): from medical_ML import Experiment print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n') expt = Experiment(alldata, label=label, to_exclude=to_exclude, test_ind_col=test_ind_col, drop='all', result_dir=RESULT_DIR) expt.predict_models_from_groups(0, models, cv=cv, score_name=score_name, mode='classification', oversample_rate=oversample_rate, imputer=imputer, add_missing_flags=add_missing_flags) expt.save_and_plot_results(models, cv=cv, test=False) return (expt)
def train_val(RESULT_DIR, alldata, models, label = 'Label', cv = 5, score_name = "AUC", to_exclude = None, test_ind_col = None, oversample_rate = 1, imputer = 'iterative', add_missing_flags = True, baseline_str = None, baseline_prob_file = None, title = 'ROC for discharge prediction on validation data'): print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n') expt = Experiment(alldata, label = label, to_exclude = to_exclude, test_ind_col = test_ind_col, drop = 'all', result_dir = RESULT_DIR) expt.predict_models_from_groups(0, models, cv=cv, score_name=score_name, mode='classification', oversample_rate = oversample_rate, imputer = imputer, add_missing_flags = add_missing_flags) # expt.save_and_plot_results(models, # cv = cv, test = False, # baseline_prob_file = baseline_prob_file, # baseline_str = baseline_str, # title = title) return(expt)
def train_val_test(RESULT_DIR, alldata, to_exclude, test_ind_col, models, ascvd_est, label, oversample_rate=1, imputer='iterative', add_missing_flags=True): print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n') expt = Experiment(alldata, label=label, to_exclude=to_exclude, test_ind_col=test_ind_col, drop='all', result_dir=RESULT_DIR) for model in models: expt.classification_ascvd(model, oversample_rate=oversample_rate, imputer=imputer, add_missing_flags=add_missing_flags) # test_on_new_cohort(RESULT_DIR, expt, alldata, to_exclude = to_exclude, # test_ind_col = test_ind_col, # models = models, ascvd_est = ascvd_est) expt.predict_on_test( models, out_dir=RESULT_DIR) #, test_file = '../Data/cohort/test_' + datafile) to_exclude['pce_invalid_vars'] = True pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop='all') expt.save_and_plot_results(models + ['PCE'], cv=5, pce_file=pce_train_est2, test=True, test_pce_file=pce_test_est2)
def plot_ROCs(RESULT_DIR, to_exclude, test_ind_col, models, ascvd_est, label, test_models): pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all') expt = Experiment(datafile = None, result_dir = RESULT_DIR, label = label) pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all') expt.save_and_plot_results(models, cv = 5, pce_file = pce_train_est2, test = False, test_pce_file = pce_test_est2, train = True) expt.save_and_plot_test_results(test_models, cv = 5, pce_file = pce_train_est2, test_pce_file = pce_test_est2)
def train_val_test(RESULT_DIR, alldata, to_exclude, test_ind_col, models, ascvd_est, label, oversample_rate = 1, imputer = 'iterative', add_missing_flags = True): print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n') expt = Experiment(alldata, label = label, to_exclude = to_exclude, test_ind_col = test_ind_col, drop = 'all', result_dir = RESULT_DIR) for model in models: # try: # load(os.path.join(RESULT_DIR, model + '_best_model.joblib')) # except FileNotFoundError: expt.classification_ascvd(model, oversample_rate = oversample_rate, imputer = imputer, add_missing_flags = add_missing_flags) # test_on_new_cohort(RESULT_DIR, expt, alldata, to_exclude = to_exclude, # test_ind_col = test_ind_col, # models = models, ascvd_est = ascvd_est) expt.predict_on_test(models, out_dir = RESULT_DIR)#, test_file = '../Data/cohort/test_' + datafile) to_exclude['pce_invalid_vars'] = True pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all') expt.save_and_plot_results(models,# + ['baseline'], cv = 5, pce_file = pce_train_est2, test = True, test_pce_file = pce_test_est2) test_others = {'pce_nhwblack':{ 'pce_cohort': False, 'pce_invalid_vars': True, 'race': ['Non-Hispanic_white', 'African_American'], 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_hispanic':{ 'pce_cohort': False, 'pce_invalid_vars': True, 'race': ['Hispanic'], 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_asian':{ 'pce_cohort': False, 'pce_invalid_vars': True, 'race': ['Asian'], 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_pts':{ 'pce_cohort': False, 'pce_invalid_vars': True, 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'over80':{ 'pce_cohort': True, 'pce_invalid_vars': True, 'cvd_bl': True, 'antilpd': True, 'oldyoung': False, 'agebl': 80}, 'over40':{ 'pce_cohort': False, 'pce_invalid_vars': True, 'cvd_bl': True, 'antilpd': True, 'oldyoung': False, 'agebl': 40} # 'pce_statin_missing':{ # 'pce_cohort': False, # 'pce_invalid_vars': False, # 'cvd_bl': True, # 'antilpd': False, # 'oldyoung': True}, # 'pce_cvd_missing':{ # 'pce_cohort': False, # 'pce_invalid_vars': False, # 'cvd_bl': False, # 'antilpd': True, # 'oldyoung': True}, # 'cvd_missing':{ # 'pce_cohort': True, # 'pce_invalid_vars': False, # 'cvd_bl': False, # 'antilpd': True, # 'oldyoung': True}, # 'oldyoung_missing':{ # 'pce_cohort': True, # 'pce_invalid_vars': False, # 'cvd_bl': True, # 'antilpd': True, # 'oldyoung': False}, # 'over80':{ # 'pce_cohort': True, # 'pce_invalid_vars': True, # 'cvd_bl': True, # 'antilpd': True, # 'oldyoung': False, # 'agebl': 80} } test_o_missing = { 'pce_missing_hispanic':{ 'pce_cohort': False, 'pce_invalid_vars': False, 'race': ['Hispanic'], 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_missing_asian':{ 'pce_cohort': False, 'pce_invalid_vars': False, 'race': ['Asian'], 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_missing':{ 'pce_cohort': False, 'pce_invalid_vars': False, 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_missing_oldyoung':{ 'pce_cohort': False, 'pce_invalid_vars': False, 'cvd_bl': True, 'antilpd': True, 'oldyoung': False}, 'over80_missing':{ 'pce_cohort': True, 'pce_invalid_vars': False, 'cvd_bl': True, 'antilpd': True, 'oldyoung': False, 'agebl': 80}, 'over40_missing':{ 'pce_cohort': False, 'pce_invalid_vars': False, 'cvd_bl': True, 'antilpd': True, 'oldyoung': False, 'agebl': 40} } if imputer is not None: test_others.update(test_o_missing) for test_res_dir in test_others.keys(): test_on_new_cohort(RESULT_DIR + '/' + test_res_dir, expt, alldata, test_others[test_res_dir], test_ind_col, models, ascvd_est)