Esempio n. 1
0
import analyzer.dataset as ds
import evaluation.treatment_utils as u

#%% Choose variant and specify paths

treatment = 'ACEI_ARBS'
outcome = 'COMORB_DEATH'
path = '../../covid19_treatments_results/matched_single_treatments_hypertension/' + treatment
website_path = '../../website/assets/treatment_calculators/' + outcome

training_set_name = treatment + '_hope_hm_cremona_matched_all_treatments_train.csv'

X, Z, y = u.load_data(
    '../../covid19_treatments_data/matched_single_treatments_hypertension/',
    training_set_name,
    split='train',
    matched=True,
    prediction=outcome,
    other_tx=False,
    med_hx=False)

X.drop('HYPERTENSION', axis=1, inplace=True)

load_file_path_yes = os.path.join(path, outcome, 'cart',
                                  'ACEI_ARBS_matched_comorb_death_seed1')

with open(load_file_path_yes, 'rb') as file:
    load_file_yes = pickle.load(file)

X_test_yes = load_file_yes['train'].drop(outcome, axis=1)

load_file_path_no = os.path.join(path, outcome, 'lr',
Esempio n. 2
0
training_set_name = treatment + train_file

results_path = '../../covid19_treatments_results/'
version_folder = version + str(treatment) + '/' + str(outcome) + '/'
save_path = results_path + version_folder + 'summary/'

training_set_name = treatment + train_file

data_version = 'train'
threshold = 0.05

train_X, Z, y = u.load_data(data_path,
                            training_set_name,
                            split=data_version,
                            matched=matched,
                            prediction=outcome,
                            other_tx=False,
                            replace_na='NO_' + treatment)

df_result = pd.read_csv(save_path + data_version + '_' + match_status +
                        '_bypatient_allmethods_benefit.csv')
benefit = df_result.groupby('ID').agg({'Benefit': 'mean'})['Benefit']

### Run Model
grid = iai.GridSearch(
    iai.OptimalTreeRegressor(random_seed=1, ),
    max_depth=range(3, 8),
)

grid.fit(X, benefit)
metrics_agg = pd.DataFrame(columns = ['data_version','weighted_status','threshold','match_rate','presc_count','average_auc',
                                      'PE_0','CPE_0','PE','CPE','pr_low','pr_high'])

for outcome in prediction_list:
    version_folder = str(treatment)+'/'+str(outcome)+'/'
    save_path = results_path + version_folder + 'summary/'
    # create summary folder if it does not exist
    Path(save_path).mkdir(parents=True, exist_ok=True)
    for data_version in data_list:
        print(data_version)
        for threshold in [0,0.01,0.02,0.05,0.1]:
            print('Threshold = '+str(threshold))
            #Read in the relevant data
            X, Z, y = u.load_data(data_path,training_set_name,
                                split=data_version, matched=matched, prediction = outcome,
                                replace_na = 'NO_'+treatment)
            result = pd.read_csv(save_path+data_version+'_'+match_status+'_bypatient_allmethods.csv')
            
            #Filter only to algorithms in the algorithms list
            result =result.loc[result['Algorithm'].isin(algorithm_list)]             
            
            # result.set_index(['ID','Algorithm'], inplace = True)
            pred_results = pd.read_csv(save_path+data_version+'_'+match_status+'_performance_allmethods.csv')
            pred_results.set_index('Algorithm', inplace = True)
            
            #Predictive performance table to base decisions from
            pred_perf_results = pd.read_csv(save_path+'train'+'_'+match_status+'_performance_allmethods.csv')
            pred_perf_results.set_index('Algorithm', inplace = True)
          
            #Compare different schemes
treatment = 'CORTICOSTEROIDS'
treatment_list = [treatment, 'NO_'+treatment]

results_path = '../../covid19_treatments_results/'
version_folder = 'matched_single_treatments_der_val_addl_outcomes/'+str(treatment)+'/'+str(outcome)+'/'
save_path = results_path + version_folder + 'summary/'

training_set_name = treatment+'_hope_hm_cremona_matched_all_treatments_train.csv'

#%% Run results
if not preload:
    # create summary folder if it does not exist
    Path(save_path).mkdir(parents=True, exist_ok=True)
    for data_version in ['train','test','validation','validation_cremona','validation_hope','validation_hope_italy']:
        print(data_version)
        X, Z, y = u.load_data(data_path,training_set_name,
                            split=data_version, matched=matched, prediction = outcome)
        print("X observations: "
              , str(X.shape[0]))
        result = pd.concat([u.algorithm_predictions(X, treatment_list = treatment_list, 
                                                    algorithm = alg,  matched = matched, 
                                                    prediction = outcome,
                                                    result_path = results_path+version_folder) 
                            for alg in algorithm_list], axis = 0)
        # Find optimal prescription across methods
        result['Prescribe'] = result.idxmin(axis=1)
        result['Prescribe_Prediction'] = result.min(axis=1)
        #  Save result file
        result.to_csv(save_path+data_version+'_'+match_status+'_bypatient_allmethods.csv')
        # =============================================================================
        # Predictive Performance evaluation:
        # - Given a combination of treatment and method calculate the AUC 
Esempio n. 5
0
algorithm_list = ['rf', 'cart', 'qda', 'gb', 'xgboost']

# data_list = ['train','test']

treatment = 'ACEI_ARBS'
treatment_list = [treatment, 'NO_' + treatment]

version_folder = str(treatment) + '/' + str(outcome) + '/'
save_path = results_path + version_folder + 'summary/'

## Load data for comparison
training_set_name = treatment + '_hope_hm_cremona_matched_all_treatments_train.csv'
X_train, Z_train, y_train = u.load_data(data_path + version,
                                        training_set_name,
                                        split='train',
                                        matched=matched,
                                        prediction=outcome,
                                        med_hx=False,
                                        other_tx=False)

#%% Match new data to training format
## Impute missing data

data_oh = pd.get_dummies(data, prefix_sep='_', drop_first=True)
X = data_oh.reindex(X_train.columns, axis=1)
Z = data[treatment].apply(lambda x: treatment
                          if x == 1 else 'NO_' + treatment).rename('REGIMEN')
y = data[outcome]

ft_val = X.describe().transpose().add_prefix('val_')
ft_train = X_train.describe().transpose().add_prefix('train_')