def baeysian_clas(train, test, val_trai, val_test, auto_calibration=False, calibration_func=None, clf=None, CostMatrix=None, CostMatrixTrain=None): scaler = MinMaxScaler() train = scaler.fit_transform(train) val_trai = scaler.fit_transform(val_trai) if calibration_func is None: model = clf.fit(train, test) else: cc = CalibratedClassifierCV(clf, method=calibration_func, cv=3) model = cc.fit(train, test) prob_test = model.predict_proba(val_trai) bmr = BayesMinimumRiskClassifier(calibration=auto_calibration) pred_test = bmr.predict(prob_test, CostMatrix) prob_test_train = model.predict_proba(train) bmr_train = BayesMinimumRiskClassifier(calibration=auto_calibration) pred_train = bmr_train.predict(prob_test_train, CostMatrixTrain) print(classification_report(val_test, pred_test)) loss = cost_loss(val_test, pred_test, CostMatrix) print("%d\n" % loss) print(confusion_matrix(val_test, pred_test).T) return pred_train, pred_test
def cost_sensitive_classification(model, X_train, X_test, y_train, y_test, cost_mat_test): c_model = BayesMinimumRiskClassifier() y_prob_test = model.predict_proba(X_test) y_pred_test_model = model.predict(X_test) c_model.fit(y_test, y_prob_test) y_pred_test_c_model = c_model.predict(y_prob_test, cost_mat_test) c_accuracy = accuracy_score(y_test, y_pred_test_c_model) return c_accuracy, y_pred_test_c_model
print("no cost minimization") clf = RandomForestClassifier(random_state=0, n_estimators=100) model = clf.fit(X_train, y_train) pred_test = model.predict(X_test) print(classification_report(y_test, pred_test, target_names=data.target_names)) loss = cost_loss(y_test, pred_test, cost_matrix) print("%d\n" % loss) print(confusion_matrix(y_test, pred_test).T) # transpose to align with slides print("no calibration") clf = RandomForestClassifier(random_state=0, n_estimators=100) model = clf.fit(X_train, y_train) prob_test = model.predict_proba(X_test) bmr = BayesMinimumRiskClassifier(calibration=False) pred_test = bmr.predict(prob_test, cost_matrix) print(classification_report(y_test, pred_test, target_names=data.target_names)) loss = cost_loss(y_test, pred_test, cost_matrix) print("%d\n" % loss) print(confusion_matrix(y_test, pred_test).T) # transpose to align with slides print("costcla calibration on training set") clf = RandomForestClassifier(random_state=0, n_estimators=100) model = clf.fit(X_train, y_train) prob_train = model.predict_proba(X_train) bmr = BayesMinimumRiskClassifier(calibration=True) bmr.fit(y_train, prob_train) prob_test = model.predict_proba(X_test) pred_test = bmr.predict(prob_test, cost_matrix) print(classification_report(y_test, pred_test, target_names=data.target_names)) loss = cost_loss(y_test, pred_test, cost_matrix)
data.cost_mat, test_size=0.33, random_state=10) X_train, X_test, y_train, y_test, cost_mat_train, cost_mat_test = sets y_pred_test_rf = RandomForestClassifier(random_state=0).fit( X_train, y_train).predict(X_test) fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_rf) print('The auc_score of RandomForest is {:.2f}'.format(metrics.auc(fpr, tpr))) print('*' * 90) y_prob_test = RandomForestClassifier(random_state=0).fit( X_train, y_train).predict_proba(X_test) f_bmr = BayesMinimumRiskClassifier(calibration=True) f_bmr.fit(y_test, y_prob_test) y_pred_test_bmr = f_bmr.predict(y_prob_test, cost_mat_test) fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_bmr) print( 'The auc_score of using RandomForest and BayesMinimumRiskClassifieris{:.2f}' .format(metrics.auc(fpr, tpr))) print('*' * 90) f = CostSensitiveLogisticRegression(solver='ga') f.fit(X_train, y_train, cost_mat_train) y_pred_test_cslr = f.predict(X_test) fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_lr) print('The auc_score of CostSensitiveLogisticRegression is {:.2f}'.format( metrics.auc(fpr, tpr))) print('*' * 90) f = CostSensitiveDecisionTreeClassifier()
fn = np.full((y_test.shape[0], 1), cost) tp = np.zeros((y_test.shape[0], 1)) tn = np.zeros((y_test.shape[0], 1)) cost_matrix = np.hstack((fp, fn, tp, tn)) if cim == 2: data, target = classimbalance.random_undersampler(data, target) elif cim == 3: data, target = classimbalance.smote(data, target) if cm == 1: # Probability calibration using Isotonic Method cc = CalibratedClassifierCV(clf, method="isotonic", cv=3) model = cc.fit(data, target) prob_test = model.predict_proba(X_test) bmr = BayesMinimumRiskClassifier(calibration=False) prediction = bmr.predict(prob_test, cost_matrix) loss = cost_loss(y_test[:, e], prediction, cost_matrix) pred_BR.append(prediction) cost_BR.append(loss) elif cm == 2: # Probability calibration using CostCla calibration model = clf.fit(data, target) prob_train = model.predict_proba(data) bmr = BayesMinimumRiskClassifier(calibration=True) bmr.fit(target, prob_train) prob_test = model.predict_proba(X_test) prediction = bmr.predict(prob_test, cost_matrix) loss = cost_loss(y_test[:, e], prediction, cost_matrix) pred_BR.append(prediction) cost_BR.append(loss)