def __init__(self, stats, ensemble_method: str, ensemble_size: int, task_type: int, metric: _BaseScorer, output_dir=None): self.model = None if ensemble_method == 'bagging': self.model = Bagging(stats=stats, ensemble_size=ensemble_size, task_type=task_type, metric=metric, output_dir=output_dir) elif ensemble_method == 'blending': self.model = Blending(stats=stats, ensemble_size=ensemble_size, task_type=task_type, metric=metric, output_dir=output_dir) elif ensemble_method == 'stacking': self.model = Stacking(stats=stats, ensemble_size=ensemble_size, task_type=task_type, metric=metric, output_dir=output_dir) elif ensemble_method == 'ensemble_selection': self.model = EnsembleSelection(stats=stats, ensemble_size=ensemble_size, task_type=task_type, metric=metric, output_dir=output_dir) else: raise ValueError("%s is not supported for ensemble!" % ensemble_method)
class EnsembleBuilder: def __init__(self, stats, data_node, ensemble_method: str, ensemble_size: int, task_type: int, metric: _BaseScorer, output_dir=None): self.model = None if ensemble_method == 'bagging': self.model = Bagging(stats=stats, data_node=data_node, ensemble_size=ensemble_size, task_type=task_type, metric=metric, output_dir=output_dir) elif ensemble_method == 'blending': self.model = Blending(stats=stats, data_node=data_node, ensemble_size=ensemble_size, task_type=task_type, metric=metric, output_dir=output_dir) elif ensemble_method == 'stacking': self.model = Stacking(stats=stats, data_node=data_node, ensemble_size=ensemble_size, task_type=task_type, metric=metric, output_dir=output_dir) elif ensemble_method == 'ensemble_selection': self.model = EnsembleSelection(stats=stats, data_node=data_node, ensemble_size=ensemble_size, task_type=task_type, metric=metric, output_dir=output_dir) else: raise ValueError("%s is not supported for ensemble!" % ensemble_method) def fit(self, data): return self.model.fit(data) def predict(self, data): return self.model.predict(data) def refit(self): return self.model.refit() def get_ens_model_info(self): return self.model.get_ens_model_info()
def ensemble_implementation_examples(bandit: FirstLayerBandit, test_data: DataNode): from sklearn.model_selection import StratifiedShuffleSplit from sklearn.metrics import accuracy_score from autosklearn.metrics import accuracy n_best = 20 stats = bandit.fetch_ensemble_members(test_data) seed = stats['split_seed'] test_size = 0.2 train_predictions = [] test_predictions = [] for algo_id in bandit.nbest_algo_ids: X, y = stats[algo_id]['train_dataset'].data sss = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=1) for train_index, test_index in sss.split(X, y): X_train, X_valid = X[train_index], X[test_index] y_train, y_valid = y[train_index], y[test_index] X_test, y_test = stats[algo_id]['test_dataset'].data configs = stats[algo_id]['configurations'] performance = stats[algo_id]['performance'] best_index = np.argsort(-np.array(performance)) best_configs = [configs[i] for i in best_index[:n_best]] for config in best_configs: try: # Build the ML estimator. _, estimator = get_estimator(config) # print(X_train.shape, X_test.shape) estimator.fit(X_train, y_train) y_valid_pred = estimator.predict_proba(X_valid) y_test_pred = estimator.predict_proba(X_test) train_predictions.append(y_valid_pred) test_predictions.append(y_test_pred) except Exception as e: print(str(e)) es = EnsembleSelection(ensemble_size=50, task_type=1, metric=accuracy, random_state=np.random.RandomState(seed)) assert len(train_predictions) == len(test_predictions) es.fit(train_predictions, y_valid, identifiers=None) y_pred = es.predict(test_predictions) y_pred = np.argmax(y_pred, axis=-1) test_score = accuracy_score(y_test, y_pred) return test_score