def make_voter(estimators, y, voting='hard'): estimators = list(estimators.items()) clf = VotingClassifier(estimators, voting) clf.estimators_ = [estim for name, estim in estimators] clf.le_ = LabelEncoder() clf.le_.fit(y) clf.classes_ = clf.le_.classes_ return clf
def _oos_eval(self, clfs, func, meta=False, *args, **kwargs): # If we're in the meta case, just call this several times regularly if meta: oos = [] # Jackknife for proportionally fewer cases in meta eval for _ in range(int(np.ceil(self.n_jack*self.n_oos))): tmpclf, tmpoos = self._oos_eval(clfs, func, meta=False, *args, **kwargs) clf = tmpclf oos += [tmpoos] del tmpoos return clf, oos # Generate test / oos data oos = {} Xo, yo, grpo = self._prep_data(self.dat_t, self.tar_t, self.sam_t, func, *args, **kwargs) # Aggregate classifiers across folds and pre-load training clf = VotingClassifier(voting='soft', estimators=[(i, c) for i, c in enumerate(clfs)]) clf.estimators_ = clfs clf.le_ = LabelEncoder().fit(yo) clf.classes_ = clf.le_.classes_ # Evaluate voting classifier on test data pred = clf.predict(Xo) oos['true'] = yo oos['pred'] = pred oos['acc'] = accuracy_score(yo, pred) oos['f1'] = f1_score(yo, pred) # Compare to mean oos-performance of component classifiers comp_preds = [c.predict(Xo) for c in clfs] oos['comp_acc'] = np.mean([accuracy_score(yo, cp) for cp in comp_preds]) oos['comp_f1'] = np.mean([f1_score(yo, cp) for cp in comp_preds]) f1p, accp = self.performanceP(yo, oos['f1'], oos['acc']) oos['p_f1'] = f1p oos['p_acc'] = accp # Print performance if self.verbose: print("Y: ", pred, "->", yo) print("G: ", grpo) print("Test Accuracy: {0} (p <= {1})".format(oos['acc'], accp)) print("Test F1: {0} (p<= {1})".format(oos['f1'], f1p)) return clf, oos
def train_ensemble_classifier(training_data, forest, dtree, adaboost, extra_random, gnb, regression): ensemble = VotingClassifier(estimators=[('rf', forest), ('dt', dtree), ('et', extra_random), ('gnb', gnb), ('lr', regression)], voting='hard') ensemble.classes_ = [0, 1] scores = cross_val_score(ensemble, training_data[:, selected_features].astype('float'), training_data[:, -1].astype('float'), cv=5, scoring='roc_auc') print("Scores gotten using Ensemble classifier") print(str(scores)) print(np.mean(scores)) return adaboost