def baeysian_clas(train,
                  test,
                  val_trai,
                  val_test,
                  auto_calibration=False,
                  calibration_func=None,
                  clf=None,
                  CostMatrix=None,
                  CostMatrixTrain=None):

    scaler = MinMaxScaler()
    train = scaler.fit_transform(train)
    val_trai = scaler.fit_transform(val_trai)

    if calibration_func is None:
        model = clf.fit(train, test)
    else:
        cc = CalibratedClassifierCV(clf, method=calibration_func, cv=3)
        model = cc.fit(train, test)

    prob_test = model.predict_proba(val_trai)
    bmr = BayesMinimumRiskClassifier(calibration=auto_calibration)
    pred_test = bmr.predict(prob_test, CostMatrix)

    prob_test_train = model.predict_proba(train)
    bmr_train = BayesMinimumRiskClassifier(calibration=auto_calibration)
    pred_train = bmr_train.predict(prob_test_train, CostMatrixTrain)

    print(classification_report(val_test, pred_test))
    loss = cost_loss(val_test, pred_test, CostMatrix)
    print("%d\n" % loss)
    print(confusion_matrix(val_test, pred_test).T)
    return pred_train, pred_test
Esempio n. 2
0
def cost_sensitive_classification(model, X_train, X_test, y_train, y_test, cost_mat_test):

	c_model = BayesMinimumRiskClassifier()
	y_prob_test = model.predict_proba(X_test)
	y_pred_test_model = model.predict(X_test)
	c_model.fit(y_test, y_prob_test)
	y_pred_test_c_model = c_model.predict(y_prob_test, cost_mat_test)
	c_accuracy = accuracy_score(y_test, y_pred_test_c_model)
	
	return c_accuracy, y_pred_test_c_model
Esempio n. 3
0
print("no cost minimization")
clf = RandomForestClassifier(random_state=0, n_estimators=100)
model = clf.fit(X_train, y_train)
pred_test = model.predict(X_test)
print(classification_report(y_test, pred_test, target_names=data.target_names))
loss = cost_loss(y_test, pred_test, cost_matrix)
print("%d\n" % loss)
print(confusion_matrix(y_test, pred_test).T)  # transpose to align with slides

print("no calibration")
clf = RandomForestClassifier(random_state=0, n_estimators=100)
model = clf.fit(X_train, y_train)
prob_test = model.predict_proba(X_test)
bmr = BayesMinimumRiskClassifier(calibration=False)
pred_test = bmr.predict(prob_test, cost_matrix)
print(classification_report(y_test, pred_test, target_names=data.target_names))
loss = cost_loss(y_test, pred_test, cost_matrix)
print("%d\n" % loss)
print(confusion_matrix(y_test, pred_test).T)  # transpose to align with slides

print("costcla calibration on training set")
clf = RandomForestClassifier(random_state=0, n_estimators=100)
model = clf.fit(X_train, y_train)
prob_train = model.predict_proba(X_train)
bmr = BayesMinimumRiskClassifier(calibration=True)
bmr.fit(y_train, prob_train)
prob_test = model.predict_proba(X_test)
pred_test = bmr.predict(prob_test, cost_matrix)
print(classification_report(y_test, pred_test, target_names=data.target_names))
loss = cost_loss(y_test, pred_test, cost_matrix)
                        data.cost_mat,
                        test_size=0.33,
                        random_state=10)
X_train, X_test, y_train, y_test, cost_mat_train, cost_mat_test = sets
y_pred_test_rf = RandomForestClassifier(random_state=0).fit(
    X_train, y_train).predict(X_test)
fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_rf)
print('The auc_score of RandomForest is {:.2f}'.format(metrics.auc(fpr, tpr)))
print('*' * 90)

y_prob_test = RandomForestClassifier(random_state=0).fit(
    X_train, y_train).predict_proba(X_test)

f_bmr = BayesMinimumRiskClassifier(calibration=True)
f_bmr.fit(y_test, y_prob_test)
y_pred_test_bmr = f_bmr.predict(y_prob_test, cost_mat_test)
fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_bmr)
print(
    'The auc_score of using RandomForest and BayesMinimumRiskClassifieris{:.2f}'
    .format(metrics.auc(fpr, tpr)))
print('*' * 90)

f = CostSensitiveLogisticRegression(solver='ga')
f.fit(X_train, y_train, cost_mat_train)
y_pred_test_cslr = f.predict(X_test)
fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_lr)
print('The auc_score of CostSensitiveLogisticRegression is {:.2f}'.format(
    metrics.auc(fpr, tpr)))
print('*' * 90)

f = CostSensitiveDecisionTreeClassifier()
Esempio n. 5
0
 fn = np.full((y_test.shape[0], 1), cost)
 tp = np.zeros((y_test.shape[0], 1))
 tn = np.zeros((y_test.shape[0], 1))
 cost_matrix = np.hstack((fp, fn, tp, tn))
 if cim == 2:
     data, target = classimbalance.random_undersampler(data, target)
 elif cim == 3:
     data, target = classimbalance.smote(data, target)
     
 if cm == 1:
     # Probability calibration using Isotonic Method
     cc = CalibratedClassifierCV(clf, method="isotonic", cv=3)
     model = cc.fit(data, target)
     prob_test = model.predict_proba(X_test)
     bmr = BayesMinimumRiskClassifier(calibration=False)
     prediction = bmr.predict(prob_test, cost_matrix)
     loss = cost_loss(y_test[:, e], prediction, cost_matrix)
     pred_BR.append(prediction)
     cost_BR.append(loss)
     
 elif cm == 2:
     # Probability calibration using CostCla calibration            
     model = clf.fit(data, target)
     prob_train = model.predict_proba(data)
     bmr = BayesMinimumRiskClassifier(calibration=True)
     bmr.fit(target, prob_train)
     prob_test = model.predict_proba(X_test)
     prediction = bmr.predict(prob_test, cost_matrix)
     loss = cost_loss(y_test[:, e], prediction, cost_matrix)
     pred_BR.append(prediction)
     cost_BR.append(loss)