예제 #1
0
def plot_val_ROCs(RESULT_DIR,
                            models, label,
                            baseline_probs = None,
                            baseline_str = 'baseline',
                            baseline_type = 'probability',
                           title = 'ROC for discharge prediction on test data'
             ):
#    pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all')

    expt = Experiment(datafile = None, 
                      result_dir = RESULT_DIR, 
                      label = label)
#     pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all')
#     expt.save_and_plot_results(models, 
#                                cv = 5, pce_file = pce_train_est2, test = False,
#                          test_pce_file = pce_test_est2, 
#                          train = True,
#                          title = 'ROC for full patient cohort on validation data',
#                          tr_title = 'ROC for full patient cohort on training data')
    expt.save_and_plot_results(models, 
                               cv = 5, 
                               train = False,
                               test = False,
                                    baseline_str = baseline_str,
#                                    baseline_type = baseline_type,
                                 baseline_prob_file = baseline_probs,   
                         title = title)
예제 #2
0
def plot_ROCs(RESULT_DIR,  
                       models, label,
                      test_models, 
             title = 'ROC for discharge prediction on test data', 
             pr_title = "Precision-recall curve on test data"):
#    pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all')

    expt = Experiment(datafile = None, 
                      result_dir = RESULT_DIR, 
                      label = label)
#     expt.save_and_plot_results(models, 
#                                cv = 5, test = False,
#                          train = True,
#                          title = 'ROC for full patient cohort on validation data',
#                          tr_title = 'ROC for full patient cohort on training data')
    expt.save_and_plot_test_results(test_models, 
                               cv = 5, 
                         title = title,  
                                   pr_title = pr_title)
예제 #3
0
파일: utils.py 프로젝트: atward424/ASCVD_ML
def train_val(RESULT_DIR,
              alldata,
              models,
              label='Label',
              cv=5,
              score_name="AUC",
              to_exclude=None,
              test_ind_col=None,
              oversample_rate=1,
              imputer='iterative',
              add_missing_flags=True):

    from medical_ML import Experiment
    print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n')
    expt = Experiment(alldata,
                      label=label,
                      to_exclude=to_exclude,
                      test_ind_col=test_ind_col,
                      drop='all',
                      result_dir=RESULT_DIR)
    expt.predict_models_from_groups(0,
                                    models,
                                    cv=cv,
                                    score_name=score_name,
                                    mode='classification',
                                    oversample_rate=oversample_rate,
                                    imputer=imputer,
                                    add_missing_flags=add_missing_flags)
    expt.save_and_plot_results(models, cv=cv, test=False)
    return (expt)
예제 #4
0
def train_val(RESULT_DIR, alldata, models, label = 'Label', 
              cv = 5, 
              score_name = "AUC", 
              to_exclude = None, 
              test_ind_col = None,   oversample_rate = 1,
                  imputer = 'iterative', add_missing_flags = True, 
             baseline_str = None, 
             baseline_prob_file = None, 
             title = 'ROC for discharge prediction on validation data'):
    print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n')
    expt = Experiment(alldata, label = label, 
                      to_exclude = to_exclude, 
                      test_ind_col = test_ind_col, drop = 'all', 
                      result_dir = RESULT_DIR)
    expt.predict_models_from_groups(0, models, cv=cv, score_name=score_name, mode='classification',
                                                    oversample_rate = oversample_rate, 
                                                   imputer = imputer, add_missing_flags = add_missing_flags)
#     expt.save_and_plot_results(models, 
#                                cv = cv, test = False, 
#                                baseline_prob_file = baseline_prob_file,
#                                baseline_str = baseline_str,
#                                title = title)
    return(expt)
예제 #5
0
def train_val_test(RESULT_DIR,
                   alldata,
                   to_exclude,
                   test_ind_col,
                   models,
                   ascvd_est,
                   label,
                   oversample_rate=1,
                   imputer='iterative',
                   add_missing_flags=True):
    print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n')
    expt = Experiment(alldata,
                      label=label,
                      to_exclude=to_exclude,
                      test_ind_col=test_ind_col,
                      drop='all',
                      result_dir=RESULT_DIR)

    for model in models:
        expt.classification_ascvd(model,
                                  oversample_rate=oversample_rate,
                                  imputer=imputer,
                                  add_missing_flags=add_missing_flags)


#    test_on_new_cohort(RESULT_DIR, expt, alldata, to_exclude = to_exclude,
#                       test_ind_col = test_ind_col,
#                       models = models, ascvd_est = ascvd_est)
    expt.predict_on_test(
        models,
        out_dir=RESULT_DIR)  #, test_file = '../Data/cohort/test_' + datafile)
    to_exclude['pce_invalid_vars'] = True
    pce_train_est2, pce_test_est2 = split_cohort(ascvd_est,
                                                 to_exclude,
                                                 test_ind_col,
                                                 drop='all')
    expt.save_and_plot_results(models + ['PCE'],
                               cv=5,
                               pce_file=pce_train_est2,
                               test=True,
                               test_pce_file=pce_test_est2)
예제 #6
0
def plot_ROCs(RESULT_DIR,  
                       to_exclude,
                       test_ind_col, models, ascvd_est, label,
                      test_models):
    pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all')

    expt = Experiment(datafile = None, 
                      result_dir = RESULT_DIR, 
                      label = label)
    pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all')
    expt.save_and_plot_results(models, 
                               cv = 5, pce_file = pce_train_est2, test = False,
                         test_pce_file = pce_test_est2, 
                         train = True)
    expt.save_and_plot_test_results(test_models, 
                               cv = 5, pce_file = pce_train_est2, 
                         test_pce_file = pce_test_est2)   
예제 #7
0
def train_val_test(RESULT_DIR, alldata, to_exclude, test_ind_col, models, ascvd_est, label, oversample_rate = 1,
                  imputer = 'iterative', add_missing_flags = True):
    print('\n\n' + 'STARTING EXPERIMENT FOR ' + RESULT_DIR + '\n\n')
    expt = Experiment(alldata, label = label, 
                      to_exclude = to_exclude, 
                      test_ind_col = test_ind_col, drop = 'all', 
                      result_dir = RESULT_DIR)

    for model in models:
#        try:
#            load(os.path.join(RESULT_DIR, model + '_best_model.joblib'))
#        except FileNotFoundError:
        expt.classification_ascvd(model, oversample_rate = oversample_rate, imputer = imputer, add_missing_flags = add_missing_flags)
    
#    test_on_new_cohort(RESULT_DIR, expt, alldata, to_exclude = to_exclude,
#                       test_ind_col = test_ind_col,
#                       models = models, ascvd_est = ascvd_est)
    expt.predict_on_test(models, out_dir = RESULT_DIR)#, test_file = '../Data/cohort/test_' + datafile)
    to_exclude['pce_invalid_vars'] = True
    pce_train_est2, pce_test_est2 = split_cohort(ascvd_est, to_exclude, test_ind_col, drop = 'all')
    expt.save_and_plot_results(models,# + ['baseline'], 
                               cv = 5, pce_file = pce_train_est2, test = True,
                         test_pce_file = pce_test_est2)
    
    test_others = {'pce_nhwblack':{
                            'pce_cohort': False,
                            'pce_invalid_vars': True,
                            'race': ['Non-Hispanic_white', 'African_American'],
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_hispanic':{
                            'pce_cohort': False,
                            'pce_invalid_vars': True,
                            'race': ['Hispanic'],
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_asian':{
                            'pce_cohort': False,
                            'pce_invalid_vars': True,
                            'race': ['Asian'],
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_pts':{
                            'pce_cohort': False,
                            'pce_invalid_vars': True,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                   'over80':{
                            'pce_cohort': True,
                            'pce_invalid_vars': True,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': False,
                            'agebl': 80},
                   'over40':{
                            'pce_cohort': False,
                            'pce_invalid_vars': True,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': False,
                            'agebl': 40}
#                   'pce_statin_missing':{
#                             'pce_cohort': False,
#                             'pce_invalid_vars': False,
#                             'cvd_bl': True,
#                             'antilpd': False,
#                             'oldyoung': True},
#                    'pce_cvd_missing':{
#                             'pce_cohort': False,
#                             'pce_invalid_vars': False,
#                             'cvd_bl': False,
#                             'antilpd': True,
#                             'oldyoung': True},
#                    'cvd_missing':{
#                             'pce_cohort': True,
#                             'pce_invalid_vars': False,
#                             'cvd_bl': False,
#                             'antilpd': True,
#                             'oldyoung': True},
#                    'oldyoung_missing':{
#                             'pce_cohort': True,
#                             'pce_invalid_vars': False,
#                             'cvd_bl': True,
#                             'antilpd': True,
#                             'oldyoung': False},
#                    'over80':{
#                             'pce_cohort': True,
#                             'pce_invalid_vars': True,
#                             'cvd_bl': True,
#                             'antilpd': True,
#                             'oldyoung': False,
#                             'agebl': 80}
                  }
    test_o_missing = {
                  'pce_missing_hispanic':{
                            'pce_cohort': False,
                            'pce_invalid_vars': False,
                            'race': ['Hispanic'],
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_missing_asian':{
                            'pce_cohort': False,
                            'pce_invalid_vars': False,
                            'race': ['Asian'],
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_missing':{
                            'pce_cohort': False,
                            'pce_invalid_vars': False,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': True},
                  'pce_missing_oldyoung':{
                            'pce_cohort': False,
                            'pce_invalid_vars': False,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': False},
                   'over80_missing':{
                            'pce_cohort': True,
                            'pce_invalid_vars': False,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': False,
                            'agebl': 80},
                   'over40_missing':{
                            'pce_cohort': False,
                            'pce_invalid_vars': False,
                            'cvd_bl': True,
                            'antilpd': True,
                            'oldyoung': False,
                            'agebl': 40}
    }
    if imputer is not None:
        test_others.update(test_o_missing)
    for test_res_dir in test_others.keys():
        test_on_new_cohort(RESULT_DIR + '/' + test_res_dir, expt, alldata, 
                           test_others[test_res_dir], 
                           test_ind_col, models, 
                           ascvd_est)