Beispiel #1
0
def test_on_new_cohort(R2, expt, alldata, to_exclude, test_ind_col, models, 
                       ascvd_est):
    if not os.path.isdir(R2): os.mkdir(R2)
    _, test_data = split_cohort(alldata, to_exclude, test_ind_col, drop = 'all')
    expt.test_data = test_data
    expt.predict_on_test(models, test_file = None,
                        out_dir = R2)
    to_exclude['pce_invalid_vars'] = True
    ascvd_train_est2, ascvd_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all')
    expt.save_and_plot_test_results(models, #+ ['baseline'], 
                               cv = 5, #pce_file = ascvd_train_est2, 
                         test_baseline_prob_file = ascvd_test_est2,
                              out_dir = R2)
Beispiel #2
0
def plot_ROCs(RESULT_DIR,  
                       to_exclude,
                       test_ind_col, models, ascvd_est, label,
                      test_models):
    pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all')

    expt = Experiment(datafile = None, 
                      result_dir = RESULT_DIR, 
                      label = label)
    pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all')
    expt.save_and_plot_results(models, 
                               cv = 5, pce_file = pce_train_est2, test = False,
                         test_pce_file = pce_test_est2, 
                         train = True)
    expt.save_and_plot_test_results(test_models, 
                               cv = 5, pce_file = pce_train_est2, 
                         test_pce_file = pce_test_est2)   
Beispiel #3
0
def train_val_test(RESULT_DIR,
                   alldata,
                   to_exclude,
                   test_ind_col,
                   models,
                   ascvd_est,
                   label,
                   oversample_rate=1,
                   imputer='iterative',
                   add_missing_flags=True):
    print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n')
    expt = Experiment(alldata,
                      label=label,
                      to_exclude=to_exclude,
                      test_ind_col=test_ind_col,
                      drop='all',
                      result_dir=RESULT_DIR)

    for model in models:
        expt.classification_ascvd(model,
                                  oversample_rate=oversample_rate,
                                  imputer=imputer,
                                  add_missing_flags=add_missing_flags)


#    test_on_new_cohort(RESULT_DIR, expt, alldata, to_exclude = to_exclude,
#                       test_ind_col = test_ind_col,
#                       models = models, ascvd_est = ascvd_est)
    expt.predict_on_test(
        models,
        out_dir=RESULT_DIR)  #, test_file = '../Data/cohort/test_' + datafile)
    to_exclude['pce_invalid_vars'] = True
    pce_train_est2, pce_test_est2 = split_cohort(ascvd_est,
                                                 to_exclude,
                                                 test_ind_col,
                                                 drop='all')
    expt.save_and_plot_results(models + ['PCE'],
                               cv=5,
                               pce_file=pce_train_est2,
                               test=True,
                               test_pce_file=pce_test_est2)
Beispiel #4
0
from medical_ML import split_cohort
from datetime import datetime
test_ind_col = 'test_ind'
label = 'ascvdany5y'
to_exclude = {
    'pce_cohort': False,
    'pce_invalid_vars': True,
    'cvd_bl': True,
    'antilpd': True,
    'oldyoung': True
}
datafile = 'allvars.csv'
ascvd_est = pd.read_csv('../Data/cohort/' + datafile)
#%%
train_est2, test_est2 = split_cohort(ascvd_est,
                                     to_exclude,
                                     test_ind_col,
                                     drop='all')
test_set_data = pd.get_dummies(
    test_est2,
    columns=[c for c in test_est2.columns if test_est2[c].dtype == 'O'])
train_set_data = pd.get_dummies(
    train_est2,
    columns=[c for c in train_est2.columns if train_est2[c].dtype == 'O'])
train_set_features = train_set_data[[
    f for f in train_set_data.columns if f != label
]]
test_set_features = test_set_data[[
    f for f in test_set_data.columns if f != label
]]
train_set_labels = train_est2[label]
test_set_labels = test_est2[label]
Beispiel #5
0
def train_val_test(RESULT_DIR, alldata, to_exclude, test_ind_col, models, ascvd_est, label, oversample_rate = 1,
                  imputer = 'iterative', add_missing_flags = True):
    print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n')
    expt = Experiment(alldata, label = label, 
                      to_exclude = to_exclude, 
                      test_ind_col = test_ind_col, drop = 'all', 
                      result_dir = RESULT_DIR)

    for model in models:
#        try:
#            load(os.path.join(RESULT_DIR, model + '_best_model.joblib'))
#        except FileNotFoundError:
        expt.classification_ascvd(model, oversample_rate = oversample_rate, imputer = imputer, add_missing_flags = add_missing_flags)
    
#    test_on_new_cohort(RESULT_DIR, expt, alldata, to_exclude = to_exclude,
#                       test_ind_col = test_ind_col,
#                       models = models, ascvd_est = ascvd_est)
    expt.predict_on_test(models, out_dir = RESULT_DIR)#, test_file = '../Data/cohort/test_' + datafile)
    to_exclude['pce_invalid_vars'] = True
    pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all')
    expt.save_and_plot_results(models,# + ['baseline'], 
                               cv = 5, pce_file = pce_train_est2, test = True,
                         test_pce_file = pce_test_est2)
    
    test_others = {'pce_nhwblack':{
                            'pce_cohort': False,
                            'pce_invalid_vars': True,
                            'race': ['Non-Hispanic_white', 'African_American'],
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_hispanic':{
                            'pce_cohort': False,
                            'pce_invalid_vars': True,
                            'race': ['Hispanic'],
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_asian':{
                            'pce_cohort': False,
                            'pce_invalid_vars': True,
                            'race': ['Asian'],
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_pts':{
                            'pce_cohort': False,
                            'pce_invalid_vars': True,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                   'over80':{
                            'pce_cohort': True,
                            'pce_invalid_vars': True,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': False,
                            'agebl': 80},
                   'over40':{
                            'pce_cohort': False,
                            'pce_invalid_vars': True,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': False,
                            'agebl': 40}
#                   'pce_statin_missing':{
#                             'pce_cohort': False,
#                             'pce_invalid_vars': False,
#                             'cvd_bl': True,
#                             'antilpd': False,
#                             'oldyoung': True},
#                    'pce_cvd_missing':{
#                             'pce_cohort': False,
#                             'pce_invalid_vars': False,
#                             'cvd_bl': False,
#                             'antilpd': True,
#                             'oldyoung': True},
#                    'cvd_missing':{
#                             'pce_cohort': True,
#                             'pce_invalid_vars': False,
#                             'cvd_bl': False,
#                             'antilpd': True,
#                             'oldyoung': True},
#                    'oldyoung_missing':{
#                             'pce_cohort': True,
#                             'pce_invalid_vars': False,
#                             'cvd_bl': True,
#                             'antilpd': True,
#                             'oldyoung': False},
#                    'over80':{
#                             'pce_cohort': True,
#                             'pce_invalid_vars': True,
#                             'cvd_bl': True,
#                             'antilpd': True,
#                             'oldyoung': False,
#                             'agebl': 80}
                  }
    test_o_missing = {
                  'pce_missing_hispanic':{
                            'pce_cohort': False,
                            'pce_invalid_vars': False,
                            'race': ['Hispanic'],
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_missing_asian':{
                            'pce_cohort': False,
                            'pce_invalid_vars': False,
                            'race': ['Asian'],
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_missing':{
                            'pce_cohort': False,
                            'pce_invalid_vars': False,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_missing_oldyoung':{
                            'pce_cohort': False,
                            'pce_invalid_vars': False,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': False},
                   'over80_missing':{
                            'pce_cohort': True,
                            'pce_invalid_vars': False,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': False,
                            'agebl': 80},
                   'over40_missing':{
                            'pce_cohort': False,
                            'pce_invalid_vars': False,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': False,
                            'agebl': 40}
    }
    if imputer is not None:
        test_others.update(test_o_missing)
    for test_res_dir in test_others.keys():
        test_on_new_cohort(RESULT_DIR + '/' + test_res_dir, expt, alldata, 
                           test_others[test_res_dir], 
                           test_ind_col, models, 
                           ascvd_est)