def test_on_new_cohort(R2, expt, alldata, to_exclude, test_ind_col, models, ascvd_est): if not os.path.isdir(R2): os.mkdir(R2) _, test_data = split_cohort(alldata, to_exclude, test_ind_col, drop = 'all') expt.test_data = test_data expt.predict_on_test(models, test_file = None, out_dir = R2) to_exclude['pce_invalid_vars'] = True ascvd_train_est2, ascvd_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all') expt.save_and_plot_test_results(models, #+ ['baseline'], cv = 5, #pce_file = ascvd_train_est2, test_baseline_prob_file = ascvd_test_est2, out_dir = R2)
def plot_ROCs(RESULT_DIR, to_exclude, test_ind_col, models, ascvd_est, label, test_models): pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all') expt = Experiment(datafile = None, result_dir = RESULT_DIR, label = label) pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all') expt.save_and_plot_results(models, cv = 5, pce_file = pce_train_est2, test = False, test_pce_file = pce_test_est2, train = True) expt.save_and_plot_test_results(test_models, cv = 5, pce_file = pce_train_est2, test_pce_file = pce_test_est2)
def train_val_test(RESULT_DIR, alldata, to_exclude, test_ind_col, models, ascvd_est, label, oversample_rate=1, imputer='iterative', add_missing_flags=True): print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n') expt = Experiment(alldata, label=label, to_exclude=to_exclude, test_ind_col=test_ind_col, drop='all', result_dir=RESULT_DIR) for model in models: expt.classification_ascvd(model, oversample_rate=oversample_rate, imputer=imputer, add_missing_flags=add_missing_flags) # test_on_new_cohort(RESULT_DIR, expt, alldata, to_exclude = to_exclude, # test_ind_col = test_ind_col, # models = models, ascvd_est = ascvd_est) expt.predict_on_test( models, out_dir=RESULT_DIR) #, test_file = '../Data/cohort/test_' + datafile) to_exclude['pce_invalid_vars'] = True pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop='all') expt.save_and_plot_results(models + ['PCE'], cv=5, pce_file=pce_train_est2, test=True, test_pce_file=pce_test_est2)
from medical_ML import split_cohort from datetime import datetime test_ind_col = 'test_ind' label = 'ascvdany5y' to_exclude = { 'pce_cohort': False, 'pce_invalid_vars': True, 'cvd_bl': True, 'antilpd': True, 'oldyoung': True } datafile = 'allvars.csv' ascvd_est = pd.read_csv('../Data/cohort/' + datafile) #%% train_est2, test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop='all') test_set_data = pd.get_dummies( test_est2, columns=[c for c in test_est2.columns if test_est2[c].dtype == 'O']) train_set_data = pd.get_dummies( train_est2, columns=[c for c in train_est2.columns if train_est2[c].dtype == 'O']) train_set_features = train_set_data[[ f for f in train_set_data.columns if f != label ]] test_set_features = test_set_data[[ f for f in test_set_data.columns if f != label ]] train_set_labels = train_est2[label] test_set_labels = test_est2[label]
def train_val_test(RESULT_DIR, alldata, to_exclude, test_ind_col, models, ascvd_est, label, oversample_rate = 1, imputer = 'iterative', add_missing_flags = True): print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n') expt = Experiment(alldata, label = label, to_exclude = to_exclude, test_ind_col = test_ind_col, drop = 'all', result_dir = RESULT_DIR) for model in models: # try: # load(os.path.join(RESULT_DIR, model + '_best_model.joblib')) # except FileNotFoundError: expt.classification_ascvd(model, oversample_rate = oversample_rate, imputer = imputer, add_missing_flags = add_missing_flags) # test_on_new_cohort(RESULT_DIR, expt, alldata, to_exclude = to_exclude, # test_ind_col = test_ind_col, # models = models, ascvd_est = ascvd_est) expt.predict_on_test(models, out_dir = RESULT_DIR)#, test_file = '../Data/cohort/test_' + datafile) to_exclude['pce_invalid_vars'] = True pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all') expt.save_and_plot_results(models,# + ['baseline'], cv = 5, pce_file = pce_train_est2, test = True, test_pce_file = pce_test_est2) test_others = {'pce_nhwblack':{ 'pce_cohort': False, 'pce_invalid_vars': True, 'race': ['Non-Hispanic_white', 'African_American'], 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_hispanic':{ 'pce_cohort': False, 'pce_invalid_vars': True, 'race': ['Hispanic'], 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_asian':{ 'pce_cohort': False, 'pce_invalid_vars': True, 'race': ['Asian'], 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_pts':{ 'pce_cohort': False, 'pce_invalid_vars': True, 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'over80':{ 'pce_cohort': True, 'pce_invalid_vars': True, 'cvd_bl': True, 'antilpd': True, 'oldyoung': False, 'agebl': 80}, 'over40':{ 'pce_cohort': False, 'pce_invalid_vars': True, 'cvd_bl': True, 'antilpd': True, 'oldyoung': False, 'agebl': 40} # 'pce_statin_missing':{ # 'pce_cohort': False, # 'pce_invalid_vars': False, # 'cvd_bl': True, # 'antilpd': False, # 'oldyoung': True}, # 'pce_cvd_missing':{ # 'pce_cohort': False, # 'pce_invalid_vars': False, # 'cvd_bl': False, # 'antilpd': True, # 'oldyoung': True}, # 'cvd_missing':{ # 'pce_cohort': True, # 'pce_invalid_vars': False, # 'cvd_bl': False, # 'antilpd': True, # 'oldyoung': True}, # 'oldyoung_missing':{ # 'pce_cohort': True, # 'pce_invalid_vars': False, # 'cvd_bl': True, # 'antilpd': True, # 'oldyoung': False}, # 'over80':{ # 'pce_cohort': True, # 'pce_invalid_vars': True, # 'cvd_bl': True, # 'antilpd': True, # 'oldyoung': False, # 'agebl': 80} } test_o_missing = { 'pce_missing_hispanic':{ 'pce_cohort': False, 'pce_invalid_vars': False, 'race': ['Hispanic'], 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_missing_asian':{ 'pce_cohort': False, 'pce_invalid_vars': False, 'race': ['Asian'], 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_missing':{ 'pce_cohort': False, 'pce_invalid_vars': False, 'cvd_bl': True, 'antilpd': True, 'oldyoung': True}, 'pce_missing_oldyoung':{ 'pce_cohort': False, 'pce_invalid_vars': False, 'cvd_bl': True, 'antilpd': True, 'oldyoung': False}, 'over80_missing':{ 'pce_cohort': True, 'pce_invalid_vars': False, 'cvd_bl': True, 'antilpd': True, 'oldyoung': False, 'agebl': 80}, 'over40_missing':{ 'pce_cohort': False, 'pce_invalid_vars': False, 'cvd_bl': True, 'antilpd': True, 'oldyoung': False, 'agebl': 40} } if imputer is not None: test_others.update(test_o_missing) for test_res_dir in test_others.keys(): test_on_new_cohort(RESULT_DIR + '/' + test_res_dir, expt, alldata, test_others[test_res_dir], test_ind_col, models, ascvd_est)