Esempio n. 1
0
cost_mat_train, cost_mat_test = cost_mat[:ratio], cost_mat[ratio:]

y_train, y_test, = np.argmax(y_train, axis=1), np.argmax(y_test, axis=1)

print y_train.shape, y_test.shape

#random forest
rfc = RandomForestClassifier(random_state=0).fit(x_train, y_train)
y_pred_test_rf = rfc.predict(x_test)

print evaluate(y_pred_test_rf, y_test, cost_mat_test)

#logistic regression
lr = LogisticRegression(random_state=0).fit(x_train, y_train)
y_pred_test_lr = lr.predict(x_test)

print evaluate(y_pred_test_lr, y_test, cost_mat_test)

#cost-sensitive decision trees
CSDT = CostSensitiveDecisionTreeClassifier().fit(x_train, y_train,
                                                 cost_mat_train)
y_pred_test_csdt = CSDT.predict(x_test)

print evaluate(y_pred_test_csdt, y_test, cost_mat_test)

#cost-sensitive lr
CSLR = CostSensitiveLogisticRegression()
CSLR.fit(x_train, y_train, cost_mat_train)
y_pred_test_cslr = CSLR.predict(x_test)

print evaluate(y_pred_test_cslr, y_test, cost_mat_test)
y_prob_test = RandomForestClassifier(random_state=0).fit(
    X_train, y_train).predict_proba(X_test)

f_bmr = BayesMinimumRiskClassifier(calibration=True)
f_bmr.fit(y_test, y_prob_test)
y_pred_test_bmr = f_bmr.predict(y_prob_test, cost_mat_test)
fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_bmr)
print(
    'The auc_score of using RandomForest and BayesMinimumRiskClassifieris{:.2f}'
    .format(metrics.auc(fpr, tpr)))
print('*' * 90)

f = CostSensitiveLogisticRegression(solver='ga')
f.fit(X_train, y_train, cost_mat_train)
y_pred_test_cslr = f.predict(X_test)
fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_lr)
print('The auc_score of CostSensitiveLogisticRegression is {:.2f}'.format(
    metrics.auc(fpr, tpr)))
print('*' * 90)

f = CostSensitiveDecisionTreeClassifier()
f.fit(X_train, y_train, cost_mat_train)
y_pred_test_csdt = f.fit(X_train, y_train, cost_mat_train).predict(X_test)
fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred_test_csdt)
print('The auc_score of using CostSensitiveDecisionTreeClassifier is {:.2f}'.
      format(metrics.auc(fpr, tpr)))
print('*' * 90)