def baeysian_clas(train, test, val_trai, val_test, auto_calibration=False, calibration_func=None, clf=None, CostMatrix=None, CostMatrixTrain=None): scaler = MinMaxScaler() train = scaler.fit_transform(train) val_trai = scaler.fit_transform(val_trai) if calibration_func is None: model = clf.fit(train, test) else: cc = CalibratedClassifierCV(clf, method=calibration_func, cv=3) model = cc.fit(train, test) prob_test = model.predict_proba(val_trai) bmr = BayesMinimumRiskClassifier(calibration=auto_calibration) pred_test = bmr.predict(prob_test, CostMatrix) prob_test_train = model.predict_proba(train) bmr_train = BayesMinimumRiskClassifier(calibration=auto_calibration) pred_train = bmr_train.predict(prob_test_train, CostMatrixTrain) print(classification_report(val_test, pred_test)) loss = cost_loss(val_test, pred_test, CostMatrix) print("%d\n" % loss) print(confusion_matrix(val_test, pred_test).T) return pred_train, pred_test
def main(): X_train, X_test, y_train, y_test = load_data(train=True, test_size=0.4) classifiers = {"RF": {"f": RandomForestClassifier()}, "DT": {"f": DecisionTreeClassifier()}} ci_models = ['DT', 'RF'] # Fit the classifiers using the training dataset for model in classifiers.keys(): classifiers[model]["f"].fit(X_train, y_train) classifiers[model]["c"] = classifiers[model]["f"].predict(X_test) classifiers[model]["p"] = classifiers[model]["f"].predict_proba(X_test) classifiers[model]["p_train"] = classifiers[model]["f"].predict_proba(X_train) from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score measures = {"F1Score": f1_score, "Precision": precision_score, "Recall": recall_score, "Accuracy": accuracy_score} results = pd.DataFrame(columns=__labels__) from costcla.models import BayesMinimumRiskClassifier for model in ci_models: classifiers[model+"-BMR"] = {"f": BayesMinimumRiskClassifier()} # Fit classifiers[model+"-BMR"]["f"].fit(y_test, classifiers[model]["p"]) # Calibration must be made in a validation set # Predict classifiers[model+"-BMR"]["c"] = classifiers[model+"-BMR"]["f"].predict(classifiers[model]["p"], cost_mat_test)
def _create_bmr_model(model, X_val, y_val, calibration=True): y_hat_val_proba = model.predict_proba(X_val) bmr = BayesMinimumRiskClassifier(calibration=calibration) bmr.fit(y_val, y_hat_val_proba) return model, bmr
def cost_sensitive_classification(model, X_train, X_test, y_train, y_test, cost_mat_test): c_model = BayesMinimumRiskClassifier() y_prob_test = model.predict_proba(X_test) y_pred_test_model = model.predict(X_test) c_model.fit(y_test, y_prob_test) y_pred_test_c_model = c_model.predict(y_prob_test, cost_mat_test) c_accuracy = accuracy_score(y_test, y_pred_test_c_model) return c_accuracy, y_pred_test_c_model
cost_matrix = np.hstack((fp, fn, tp, tn)) print("no cost minimization") clf = RandomForestClassifier(random_state=0, n_estimators=100) model = clf.fit(X_train, y_train) pred_test = model.predict(X_test) print(classification_report(y_test, pred_test, target_names=data.target_names)) loss = cost_loss(y_test, pred_test, cost_matrix) print("%d\n" % loss) print(confusion_matrix(y_test, pred_test).T) # transpose to align with slides print("no calibration") clf = RandomForestClassifier(random_state=0, n_estimators=100) model = clf.fit(X_train, y_train) prob_test = model.predict_proba(X_test) bmr = BayesMinimumRiskClassifier(calibration=False) pred_test = bmr.predict(prob_test, cost_matrix) print(classification_report(y_test, pred_test, target_names=data.target_names)) loss = cost_loss(y_test, pred_test, cost_matrix) print("%d\n" % loss) print(confusion_matrix(y_test, pred_test).T) # transpose to align with slides print("costcla calibration on training set") clf = RandomForestClassifier(random_state=0, n_estimators=100) model = clf.fit(X_train, y_train) prob_train = model.predict_proba(X_train) bmr = BayesMinimumRiskClassifier(calibration=True) bmr.fit(y_train, prob_train) prob_test = model.predict_proba(X_test) pred_test = bmr.predict(prob_test, cost_matrix) print(classification_report(y_test, pred_test, target_names=data.target_names))
def _fit_bmr_model(self, X, y): """Private function used to fit the BayesMinimumRisk model.""" self.f_bmr = BayesMinimumRiskClassifier() X_bmr = self.predict_proba(X) self.f_bmr.fit(y, X_bmr) return self