def Ada(method, norm, selection): print( '---------------------------------------------------------------------------------------------------' ) print('AdaBoost') acc_save = [] f1_save = [] auc_save = [] dataset = np.loadtxt('new.csv', delimiter=',') [a, b] = dataset.shape dataset_label = dataset[:, b - 1] dataset_train = dataset[:, 0:b - 1] skf = StratifiedKFold(n_splits=5, shuffle=True) for train_index, dev_index in skf.split(dataset_train, dataset_label): X_cv_train, X_cv_dev = dataset_train[train_index], dataset_train[ dev_index] y_cv_train, y_cv_dev = dataset_label[train_index], dataset_label[ dev_index] if norm == 3: if method == 1: proba_noise.pre_select_data(selection, 1) ratio = 1 if method == 2: [ratio, sw] = proba_weight.pre_select_data(selection, 1) else: if method == 1: noise.pre_select_data(dataset, 1, selection) ratio = 1 if method == 2: [ratio, sw] = weight.pre_select_data(dataset, 1, selection) train = np.loadtxt('B_Trainset_data.csv', delimiter=',') label_train = np.loadtxt('B_Trainset_label.csv', delimiter=',') test = np.loadtxt('Test_data.csv', delimiter=',') label_test = np.loadtxt('Test_label.csv', delimiter=',') AdaB = AdaBoostClassifier(n_estimators=100, learning_rate=0.7) if method == 1: AdaB.fit(train, label_train) if method == 2: AdaB.fit(train, label_train, sw) y_pred_train = AdaB.predict(train) acc_train = accuracy_score(y_pred_train, label_train) print('acc_train = ', acc_train) # f1 target = ['class 1', 'class 2'] print( classification_report(label_train, y_pred_train, target_names=target)) # auc # fpr, tpr, thresholds = metrics.roc_curve(label_train, y_pred_train, pos_label=2) print('auc = ', roc_auc_score(label_train, y_pred_train, average='weighted')) y_pred_test = AdaB.predict(test) y_prob_pred_test = AdaB.predict_proba(test) acc_test = accuracy_score(y_pred_test, label_test) print('acc_test = ', acc_test) acc_save.append(acc_test) target = ['class 1', 'class 2'] print( classification_report(label_test, y_pred_test, target_names=target)) f1 = f1_score(label_test, y_pred_test, average='weighted') print('f1 score =', f1) f1_save.append(f1) auc = roc_auc_score(label_test, y_pred_test, average='weighted') # auc roc_curve auc = roc_auc_score(label_test, y_prob_pred_test[:, 1], average='weighted') print('auc = ', auc) auc_save.append(auc) fpr, tpr, thresholds = roc_curve(label_test, y_prob_pred_test[:, 1]) plt.plot(fpr, tpr) plt.xlabel('fpr') plt.ylabel('tpr') plt.show() print( '----------------------------------------------------------------------------------------------------' ) print('5 iteration average results') print('test_acc = ', np.mean(acc_save)) print('f1_score = ', np.mean(f1_save)) print('auc = ', np.mean(auc_save))
def KClassifier(method, norm, selection): print( '---------------------------------------------------------------------------------------------------' ) print('KNN') acc_save = [] f1_save = [] auc_save = [] dataset = np.loadtxt('new.csv', delimiter=',') [a, b] = dataset.shape dataset_label = dataset[:, b - 1] dataset_train = dataset[:, 0:b - 1] skf = StratifiedKFold(n_splits=5, shuffle=True) for train_index, dev_index in skf.split(dataset_train, dataset_label): X_cv_train, X_cv_dev = dataset_train[train_index], dataset_train[ dev_index] y_cv_train, y_cv_dev = dataset_label[train_index], dataset_label[ dev_index] if norm == 3: proba_noise.pre_select_data(selection, 1) else: noise.pre_select_data(dataset, 1, selection) train = np.loadtxt('B_Trainset_data.csv', delimiter=',') label_train = np.loadtxt('B_Trainset_label.csv', delimiter=',') test = np.loadtxt('Test_data.csv', delimiter=',') label_test = np.loadtxt('Test_label.csv', delimiter=',') KNN = KNeighborsClassifier(n_neighbors=100) KNN.fit(train, label_train) y_pred_train = KNN.predict(train) acc_train = accuracy_score(y_pred_train, label_train) print('acc_train = ', acc_train) target = ['class 1', 'class 2'] print( classification_report(label_train, y_pred_train, target_names=target)) print('auc = ', roc_auc_score(label_train, y_pred_train, average='weighted')) y_pred_test = KNN.predict(test) # y_prob_pred_test = suppvec.predict_proba(test) acc_test = accuracy_score(y_pred_test, label_test) print('acc_test = ', acc_test) acc_save.append(acc_test) target = ['class 1', 'class 2'] print( classification_report(label_test, y_pred_test, target_names=target)) f1 = f1_score(label_test, y_pred_test, average='weighted') print('f1 score =', f1) f1_save.append(f1) auc = roc_auc_score(label_test, y_pred_test, average='weighted') print('auc = ', auc) auc_save.append(auc) fpr, tpr, thresholds = roc_curve(label_test, y_pred_test) plt.plot(fpr, tpr) plt.xlabel('fpr') plt.ylabel('tpr') plt.show() print( '----------------------------------------------------------------------------------------------------' ) print('5 iteration average results') print('test_acc = ', np.mean(acc_save)) print('f1_score = ', np.mean(f1_save)) print('auc = ', np.mean(auc_save))
def support(method, norm, selection): print( '---------------------------------------------------------------------------------------------------' ) print('SVM') acc_save = [] f1_save = [] auc_save = [] dataset = np.loadtxt('new.csv', delimiter=',') [a, b] = dataset.shape dataset_label = dataset[:, b - 1] dataset_train = dataset[:, 0:b - 1] skf = StratifiedKFold(n_splits=5, shuffle=True) for train_index, dev_index in skf.split(dataset_train, dataset_label): X_cv_train, X_cv_dev = dataset_train[train_index], dataset_train[ dev_index] y_cv_train, y_cv_dev = dataset_label[train_index], dataset_label[ dev_index] if norm == 3: if method == 1: proba_noise.pre_select_data(selection, 1) ratio = 1 if method == 2: [ratio, sw] = proba_weight.pre_select_data(selection, 1) else: if method == 1: noise.pre_select_data(dataset, 1, selection) ratio = 1 if method == 2: [ratio, sw] = weight.pre_select_data(dataset, 1, selection) train = np.loadtxt('B_Trainset_data.csv', delimiter=',') label_train = np.loadtxt('B_Trainset_label.csv', delimiter=',') test = np.loadtxt('Test_data.csv', delimiter=',') label_test = np.loadtxt('Test_label.csv', delimiter=',') suppvec = SVC(kernel='rbf', probability=True, class_weight={ 0: 1, 1: int(ratio) }) suppvec.fit(train, label_train) y_pred_train = suppvec.predict(train) acc_train = accuracy_score(y_pred_train, label_train) print('acc_train = ', acc_train) target = ['class 1', 'class 2'] print( classification_report(label_train, y_pred_train, target_names=target)) print('auc = ', roc_auc_score(label_train, y_pred_train, average='weighted')) y_pred_test = suppvec.predict(test) y_prob_pred_test = suppvec.predict_proba(test) acc_test = accuracy_score(y_pred_test, label_test) print('acc_test = ', acc_test) acc_save.append(acc_test) target = ['class 1', 'class 2'] print( classification_report(label_test, y_pred_test, target_names=target)) f1 = f1_score(label_test, y_pred_test, average='weighted') print('f1 score =', f1) f1_save.append(f1) auc = roc_auc_score(label_test, y_pred_test) # auc roc_curve # auc = roc_auc_score(label_test, y_pred_test) print('auc = ', auc) auc_save.append(auc) fpr, tpr, thresholds = roc_curve(label_test, y_prob_pred_test[:, 1]) plt.plot(fpr, tpr) plt.xlabel('fpr') plt.ylabel('tpr') plt.show() print( '----------------------------------------------------------------------------------------------------' ) print('5 iteration average results') print('test_acc = ', np.mean(acc_save)) print('f1_score = ', np.mean(f1_save)) print('auc = ', np.mean(auc_save))