Esempio n. 1
0
import warnings
warnings.filterwarnings('ignore')
from preprocessing import Preprocessing
Preprocessing = Preprocessing()
from models import models
models = models()
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, auc, roc_curve, normalized_mutual_info_score


data = pd.read_csv('./data.csv',index_col=0)
data.drop(['patkey', 'index_date', 'MATCHID'], axis=1)
data['age_at_index'] = data['age_at_index']-5
data = Preprocessing.FeatureEncoding(data)
data = Preprocessing.MissingData(data)
data.to_csv('data_complete.csv')
data = pd.read_csv('./data_complete.csv',index_col=0)
#==========================================================================================
#After using the KNN to deal with missing data, count and plot the histogram of features
'''
print(data.loc[:,'Smoking_status'].value_counts())
print(data.loc[:,'BMI_group'].value_counts())
print(data.loc[:,'Alcohol_status'].value_counts())
    
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x()+rect.get_width()/2.-0.2, 1.03*height, '%s' % int(height))

name_list = ['non-smoker','current somker','ex-smoker']