def baeysian_clas(train,
                  test,
                  val_trai,
                  val_test,
                  auto_calibration=False,
                  calibration_func=None,
                  clf=None,
                  CostMatrix=None,
                  CostMatrixTrain=None):

    scaler = MinMaxScaler()
    train = scaler.fit_transform(train)
    val_trai = scaler.fit_transform(val_trai)

    if calibration_func is None:
        model = clf.fit(train, test)
    else:
        cc = CalibratedClassifierCV(clf, method=calibration_func, cv=3)
        model = cc.fit(train, test)

    prob_test = model.predict_proba(val_trai)
    bmr = BayesMinimumRiskClassifier(calibration=auto_calibration)
    pred_test = bmr.predict(prob_test, CostMatrix)

    prob_test_train = model.predict_proba(train)
    bmr_train = BayesMinimumRiskClassifier(calibration=auto_calibration)
    pred_train = bmr_train.predict(prob_test_train, CostMatrixTrain)

    print(classification_report(val_test, pred_test))
    loss = cost_loss(val_test, pred_test, CostMatrix)
    print("%d\n" % loss)
    print(confusion_matrix(val_test, pred_test).T)
    return pred_train, pred_test
Ejemplo n.º 2
0
def main():
    X_train, X_test, y_train, y_test = load_data(train=True, test_size=0.4)
    classifiers = {"RF": {"f": RandomForestClassifier()},
                   "DT": {"f": DecisionTreeClassifier()}}
    ci_models = ['DT', 'RF']
    # Fit the classifiers using the training dataset
    for model in classifiers.keys():
        classifiers[model]["f"].fit(X_train, y_train)
        classifiers[model]["c"] = classifiers[model]["f"].predict(X_test)
        classifiers[model]["p"] = classifiers[model]["f"].predict_proba(X_test)
        classifiers[model]["p_train"] = classifiers[model]["f"].predict_proba(X_train)
    from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

    measures = {"F1Score": f1_score, "Precision": precision_score, 
                "Recall": recall_score, "Accuracy": accuracy_score}
    results = pd.DataFrame(columns=__labels__)

    
    from costcla.models import BayesMinimumRiskClassifier

    for model in ci_models:
        classifiers[model+"-BMR"] = {"f": BayesMinimumRiskClassifier()}
        # Fit
        classifiers[model+"-BMR"]["f"].fit(y_test, classifiers[model]["p"])
        # Calibration must be made in a validation set
        # Predict
        classifiers[model+"-BMR"]["c"] = classifiers[model+"-BMR"]["f"].predict(classifiers[model]["p"], cost_mat_test)
Ejemplo n.º 3
0
def _create_bmr_model(model, X_val, y_val, calibration=True):
    y_hat_val_proba = model.predict_proba(X_val)

    bmr = BayesMinimumRiskClassifier(calibration=calibration)
    bmr.fit(y_val, y_hat_val_proba)

    return model, bmr
Ejemplo n.º 4
0
def cost_sensitive_classification(model, X_train, X_test, y_train, y_test, cost_mat_test):

	c_model = BayesMinimumRiskClassifier()
	y_prob_test = model.predict_proba(X_test)
	y_pred_test_model = model.predict(X_test)
	c_model.fit(y_test, y_prob_test)
	y_pred_test_c_model = c_model.predict(y_prob_test, cost_mat_test)
	c_accuracy = accuracy_score(y_test, y_pred_test_c_model)
	
	return c_accuracy, y_pred_test_c_model
Ejemplo n.º 5
0
cost_matrix = np.hstack((fp, fn, tp, tn))

print("no cost minimization")
clf = RandomForestClassifier(random_state=0, n_estimators=100)
model = clf.fit(X_train, y_train)
pred_test = model.predict(X_test)
print(classification_report(y_test, pred_test, target_names=data.target_names))
loss = cost_loss(y_test, pred_test, cost_matrix)
print("%d\n" % loss)
print(confusion_matrix(y_test, pred_test).T)  # transpose to align with slides

print("no calibration")
clf = RandomForestClassifier(random_state=0, n_estimators=100)
model = clf.fit(X_train, y_train)
prob_test = model.predict_proba(X_test)
bmr = BayesMinimumRiskClassifier(calibration=False)
pred_test = bmr.predict(prob_test, cost_matrix)
print(classification_report(y_test, pred_test, target_names=data.target_names))
loss = cost_loss(y_test, pred_test, cost_matrix)
print("%d\n" % loss)
print(confusion_matrix(y_test, pred_test).T)  # transpose to align with slides

print("costcla calibration on training set")
clf = RandomForestClassifier(random_state=0, n_estimators=100)
model = clf.fit(X_train, y_train)
prob_train = model.predict_proba(X_train)
bmr = BayesMinimumRiskClassifier(calibration=True)
bmr.fit(y_train, prob_train)
prob_test = model.predict_proba(X_test)
pred_test = bmr.predict(prob_test, cost_matrix)
print(classification_report(y_test, pred_test, target_names=data.target_names))
 def _fit_bmr_model(self, X, y):
     """Private function used to fit the BayesMinimumRisk model."""
     self.f_bmr = BayesMinimumRiskClassifier()
     X_bmr = self.predict_proba(X)
     self.f_bmr.fit(y, X_bmr)
     return self