コード例 #1
0
        'SMOTE': []
    }
    labels_values = {}
    labels_values_fs = {}
    for train_i, test_i in skf.split(X, y):
        trn_data = prepfunctions.data_balancing(dt.iloc[train_i].copy(),
                                                'DEATH_EVENT')
        tst_data = {
            'Original': dt.iloc[test_i].copy(),
            'UnderSample': dt.iloc[test_i].copy(),
            'OverSample': dt.iloc[test_i].copy(),
            'SMOTE': dt.iloc[test_i].copy()
        }

        trn_data_fs = prepfunctions.mask_feature_selection(
            trn_data.copy(), 'DEATH_EVENT', False,
            './Results/FeatureSelection/HFCR Feature Selection - Features')
        tst_data_fs = prepfunctions.mask_feature_selection(
            tst_data.copy(), 'DEATH_EVENT', False,
            './Results/FeatureSelection/HFCR Feature Selection - Features')

        for category in ['Original', 'UnderSample', 'OverSample', 'SMOTE']:
            # Train
            trn_y_b: np.ndarray = trn_data[category].pop('DEATH_EVENT').values
            trn_X_b: np.ndarray = trn_data[category].values
            labels_values[category] = trn_data[category].columns.values
            # Test
            tst_y_b: np.ndarray = tst_data[category].pop('DEATH_EVENT').values
            tst_X_b: np.ndarray = tst_data[category].values

            trn_x_b_lst[category].append(trn_X_b.copy())
コード例 #2
0
graphsDir = './Results/Random Forests/'
if not os.path.exists(graphsDir):
    os.makedirs(graphsDir)

print('-------------------------------')
print('-                             -')
print('-     HFCR Random Forests     -')
print('-                             -')
print('-------------------------------')

data: pd.DataFrame = pd.read_csv(
    '../Dataset/heart_failure_clinical_records_dataset.csv')
datas = prepfunctions.prepare_dataset(data, 'DEATH_EVENT', True, True)
featured_datas = prepfunctions.mask_feature_selection(
    datas, 'DEATH_EVENT', False,
    './Results/FeatureSelection/HFCR Feature Selection - Features')
best_accuracies = {}

for key in datas:
    for do_feature_eng in [False, True]:
        if (do_feature_eng):
            data = featured_datas[key]
            subDir = graphsDir + 'FeatureEng/' + key + '/'
            if not os.path.exists(subDir):
                os.makedirs(subDir)
        else:
            data = datas[key]
            subDir = graphsDir + key + '/'
            if not os.path.exists(subDir):
                os.makedirs(subDir)
コード例 #3
0
if not os.path.exists(graphsDir):
    os.makedirs(graphsDir)

data: pd.DataFrame = pd.read_csv('../Dataset/qsar_oral_toxicity.csv',
                                 sep=';',
                                 header=None)
train, test = train_test_split(data,
                               train_size=0.7,
                               stratify=data[1024].values)
testDatas = {}
datas = prepfunctions.prepare_dataset(train, 1024, False, False)
for key in datas:
    testDatas[key] = test.copy()

featured_datas = prepfunctions.mask_feature_selection(
    datas, 1024, True,
    './Results/FeatureSelection/QOT Feature Selection - Features')
featured_test_datas = prepfunctions.mask_feature_selection(
    testDatas, 1024, True,
    './Results/FeatureSelection/QOT Feature Selection - Features')

best_accuracies = {
    "Original": [0.9992055926278995, 0.9458858413639734],
    "Original with FS": [0.9817286304416905, 0.9414381022979985],
    "UnderSample": [1.0, 0.9347664936990363],
    "UnderSample with FS": [1.0, 0.933283914010378],
    "OverSample": [0.9992207792207792, 0.9432913269088213],
    "OverSample with FS": [0.9992207792207792, 0.9429206819866568],
    "SMOTE": [0.9993939393939394, 0.9403261675315048],
    "SMOTE with FS": [0.9993939393939394, 0.9425500370644923]
}
コード例 #4
0

print('-------------------------------')
print('-                             -')
print('-     HFCR Random Forests     -')
print('-                             -')
print('-------------------------------')



data: pd.DataFrame = pd.read_csv('../../Dataset/heart_failure_clinical_records_dataset.csv')
datas = prepfunctions.prepare_dataset(data.copy(), 'DEATH_EVENT', False, False)

datas_outliers = prepfunctions.prepare_dataset(data.copy(), 'DEATH_EVENT', False, True)
datas_outliers_scaling = prepfunctions.prepare_dataset(data.copy(), 'DEATH_EVENT', True, True)
datas_outliers_featureselection = prepfunctions.mask_feature_selection(datas_outliers.copy(), 'DEATH_EVENT', False, './Results/FeatureSelection/HFCR Feature Selection - Features')
datas_outliers_scaling_featureselection = prepfunctions.mask_feature_selection(datas_outliers_scaling.copy(), 'DEATH_EVENT', False, './Results/FeatureSelection/HFCR Feature Selection - Features')

datas_scaling = prepfunctions.prepare_dataset(data.copy(), 'DEATH_EVENT', True, False)
datas_scaling_featureselection = prepfunctions.mask_feature_selection(datas_scaling.copy(), 'DEATH_EVENT', False, './Results/FeatureSelection/HFCR Feature Selection - Features')

datas_featureselection = prepfunctions.mask_feature_selection(datas.copy(), 'DEATH_EVENT', False, './Results/FeatureSelection/HFCR Feature Selection - Features')

all_datas = [datas, datas_outliers, datas_scaling, datas_featureselection, datas_outliers_scaling, datas_outliers_featureselection, datas_outliers_scaling_featureselection]
all_datas_names = ['', ' - No Outliers', ' - Scaling', ' - Feature Selection', ' - No Outliers & Scaling', ' - No Outliers & Feature Selection', ' - No Outliers, Scaling & Feature Selection']
provisorio_data_scaling = ' - Scaling & Feature Selection'

accuracies = {}

for key in datas:
    last_name = 'None'