from sklearn.feature_selection import VarianceThreshold from sklearn.feature_selection import SelectPercentile from sklearn.feature_selection import chi2 from sklearn.svm import SVC from sklearn.model_selection import StratifiedKFold from sklearn.feature_selection import RFECV graphsDir = './Results/FeatureSelection/' if not os.path.exists(graphsDir): os.makedirs(graphsDir) features_file = open(graphsDir + 'HFCR Feature Selection - Features', 'w') data: pd.DataFrame = pd.read_csv('../Dataset/heart_failure_clinical_records_dataset.csv') datas = prepfunctions.prepare_dataset(data, 'DEATH_EVENT', False, False) for key, value in datas.items(): print("Key: ", key) dataframe_rec = value.copy() subDir = graphsDir + key + '/' if not os.path.exists(subDir): os.makedirs(subDir) data = dataframe_rec.copy() y = data.pop('DEATH_EVENT') print('Original') labels = ['Original'] values = [data.shape[1]] print('VarianceThreshold') data = dataframe_rec.copy()
from sklearn.ensemble import GradientBoostingClassifier from sklearn.model_selection import train_test_split from datetime import datetime graphsDir = './Results/GradientBoosting/' if not os.path.exists(graphsDir): os.makedirs(graphsDir) data: pd.DataFrame = pd.read_csv('../Dataset/qsar_oral_toxicity.csv', sep=';', header=None) train, test = train_test_split(data, train_size=0.7, stratify=data[1024].values) testDatas = {} datas = prepfunctions.prepare_dataset(train, 1024, False, False) for key in datas: testDatas[key] = test.copy() featured_datas = prepfunctions.mask_feature_selection( datas, 1024, True, './Results/FeatureSelection/QOT Feature Selection - Features') featured_test_datas = prepfunctions.mask_feature_selection( testDatas, 1024, True, './Results/FeatureSelection/QOT Feature Selection - Features') best_accuracies = { "Original": [0.9992055926278995, 0.9458858413639734], "Original with FS": [0.9817286304416905, 0.9414381022979985], "UnderSample": [1.0, 0.9347664936990363], "UnderSample with FS": [1.0, 0.933283914010378],
print('---------------------------') print('- -') print('- HFCR Clustering -') print('- -') print('---------------------------') data: pd.DataFrame = pd.read_csv( '../Dataset/heart_failure_clinical_records_dataset.csv') # Original original_data = data.copy() original_data.pop('DEATH_EVENT') # Scaled data_scaled = prepfunctions.prepare_dataset(data, 'DEATH_EVENT', True, True)['Original'] data_scaled.pop('DEATH_EVENT') scaling_pca = [(False, False), (True, False), (False, True), (True, True)] N_CLUSTERS = [2, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29] EPS = [2.5, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] fig, ax = plt.subplots(2, 3, figsize=(3 * 3, 4 * 2), squeeze=False) fig_values_1 = {} fig_values_2 = {} fig_values_3 = {} fig_values_4 = {} fig_values_5 = {} fig_values_6 = {}
import ds_functions as ds import os graphsDir = './Results/Random Forests/' if not os.path.exists(graphsDir): os.makedirs(graphsDir) print('-------------------------------') print('- -') print('- HFCR Random Forests -') print('- -') print('-------------------------------') data: pd.DataFrame = pd.read_csv( '../Dataset/heart_failure_clinical_records_dataset.csv') datas = prepfunctions.prepare_dataset(data, 'DEATH_EVENT', True, True) featured_datas = prepfunctions.mask_feature_selection( datas, 'DEATH_EVENT', False, './Results/FeatureSelection/HFCR Feature Selection - Features') best_accuracies = {} for key in datas: for do_feature_eng in [False, True]: if (do_feature_eng): data = featured_datas[key] subDir = graphsDir + 'FeatureEng/' + key + '/' if not os.path.exists(subDir): os.makedirs(subDir) else: data = datas[key] subDir = graphsDir + key + '/'
graphsDir = './Results/Random Forests/' if not os.path.exists(graphsDir): os.makedirs(graphsDir) print('-------------------------------') print('- -') print('- HFCR Random Forests -') print('- -') print('-------------------------------') data: pd.DataFrame = pd.read_csv('../../Dataset/heart_failure_clinical_records_dataset.csv') datas = prepfunctions.prepare_dataset(data.copy(), 'DEATH_EVENT', False, False) datas_outliers = prepfunctions.prepare_dataset(data.copy(), 'DEATH_EVENT', False, True) datas_outliers_scaling = prepfunctions.prepare_dataset(data.copy(), 'DEATH_EVENT', True, True) datas_outliers_featureselection = prepfunctions.mask_feature_selection(datas_outliers.copy(), 'DEATH_EVENT', False, './Results/FeatureSelection/HFCR Feature Selection - Features') datas_outliers_scaling_featureselection = prepfunctions.mask_feature_selection(datas_outliers_scaling.copy(), 'DEATH_EVENT', False, './Results/FeatureSelection/HFCR Feature Selection - Features') datas_scaling = prepfunctions.prepare_dataset(data.copy(), 'DEATH_EVENT', True, False) datas_scaling_featureselection = prepfunctions.mask_feature_selection(datas_scaling.copy(), 'DEATH_EVENT', False, './Results/FeatureSelection/HFCR Feature Selection - Features') datas_featureselection = prepfunctions.mask_feature_selection(datas.copy(), 'DEATH_EVENT', False, './Results/FeatureSelection/HFCR Feature Selection - Features') all_datas = [datas, datas_outliers, datas_scaling, datas_featureselection, datas_outliers_scaling, datas_outliers_featureselection, datas_outliers_scaling_featureselection] all_datas_names = ['', ' - No Outliers', ' - Scaling', ' - Feature Selection', ' - No Outliers & Scaling', ' - No Outliers & Feature Selection', ' - No Outliers, Scaling & Feature Selection'] provisorio_data_scaling = ' - Scaling & Feature Selection'
graphsDir = './Results/Log Regression/' if not os.path.exists(graphsDir): os.makedirs(graphsDir) print('--------------------------------------') print('- -') print('- QOT Log Regression - Treated -') print('- -') print('--------------------------------------') RANDOM_STATE = 42 data: pd.DataFrame = pd.read_csv('../Dataset/qsar_oral_toxicity.csv', sep=';', header=None) datas = prepfunctions.prepare_dataset(data, 1024, False, False) featured_datas = prepfunctions.mask_feature_selection( datas, 1024, True, './Results/FeatureSelection/QOT Feature Selection - Features') best_accuracies = {} for key in datas: for do_feature_eng in [False, True]: if (do_feature_eng): data = featured_datas[key] subDir = graphsDir + 'FeatureEng/' + key + '/' if not os.path.exists(subDir): os.makedirs(subDir) else: data = datas[key] subDir = graphsDir + key + '/'