Esempio n. 1
0
 def __init__(self, stats, ensemble_method: str,
              ensemble_size: int,
              task_type: int,
              metric: _BaseScorer,
              output_dir=None):
     self.model = None
     if ensemble_method == 'bagging':
         self.model = Bagging(stats=stats,
                              ensemble_size=ensemble_size,
                              task_type=task_type,
                              metric=metric,
                              output_dir=output_dir)
     elif ensemble_method == 'blending':
         self.model = Blending(stats=stats,
                               ensemble_size=ensemble_size,
                               task_type=task_type,
                               metric=metric,
                               output_dir=output_dir)
     elif ensemble_method == 'stacking':
         self.model = Stacking(stats=stats,
                               ensemble_size=ensemble_size,
                               task_type=task_type,
                               metric=metric,
                               output_dir=output_dir)
     elif ensemble_method == 'ensemble_selection':
         self.model = EnsembleSelection(stats=stats,
                                        ensemble_size=ensemble_size,
                                        task_type=task_type,
                                        metric=metric,
                                        output_dir=output_dir)
     else:
         raise ValueError("%s is not supported for ensemble!" % ensemble_method)
Esempio n. 2
0
class EnsembleBuilder:
    def __init__(self,
                 stats,
                 data_node,
                 ensemble_method: str,
                 ensemble_size: int,
                 task_type: int,
                 metric: _BaseScorer,
                 output_dir=None):
        self.model = None
        if ensemble_method == 'bagging':
            self.model = Bagging(stats=stats,
                                 data_node=data_node,
                                 ensemble_size=ensemble_size,
                                 task_type=task_type,
                                 metric=metric,
                                 output_dir=output_dir)
        elif ensemble_method == 'blending':
            self.model = Blending(stats=stats,
                                  data_node=data_node,
                                  ensemble_size=ensemble_size,
                                  task_type=task_type,
                                  metric=metric,
                                  output_dir=output_dir)
        elif ensemble_method == 'stacking':
            self.model = Stacking(stats=stats,
                                  data_node=data_node,
                                  ensemble_size=ensemble_size,
                                  task_type=task_type,
                                  metric=metric,
                                  output_dir=output_dir)
        elif ensemble_method == 'ensemble_selection':
            self.model = EnsembleSelection(stats=stats,
                                           data_node=data_node,
                                           ensemble_size=ensemble_size,
                                           task_type=task_type,
                                           metric=metric,
                                           output_dir=output_dir)
        else:
            raise ValueError("%s is not supported for ensemble!" %
                             ensemble_method)

    def fit(self, data):
        return self.model.fit(data)

    def predict(self, data):
        return self.model.predict(data)

    def refit(self):
        return self.model.refit()

    def get_ens_model_info(self):
        return self.model.get_ens_model_info()
def ensemble_implementation_examples(bandit: FirstLayerBandit, test_data: DataNode):
    from sklearn.model_selection import StratifiedShuffleSplit
    from sklearn.metrics import accuracy_score
    from autosklearn.metrics import accuracy
    n_best = 20
    stats = bandit.fetch_ensemble_members(test_data)
    seed = stats['split_seed']
    test_size = 0.2
    train_predictions = []
    test_predictions = []
    for algo_id in bandit.nbest_algo_ids:
        X, y = stats[algo_id]['train_dataset'].data
        sss = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=1)
        for train_index, test_index in sss.split(X, y):
            X_train, X_valid = X[train_index], X[test_index]
            y_train, y_valid = y[train_index], y[test_index]

        X_test, y_test = stats[algo_id]['test_dataset'].data
        configs = stats[algo_id]['configurations']
        performance = stats[algo_id]['performance']
        best_index = np.argsort(-np.array(performance))
        best_configs = [configs[i] for i in best_index[:n_best]]

        for config in best_configs:
            try:
                # Build the ML estimator.
                _, estimator = get_estimator(config)
                # print(X_train.shape, X_test.shape)
                estimator.fit(X_train, y_train)
                y_valid_pred = estimator.predict_proba(X_valid)
                y_test_pred = estimator.predict_proba(X_test)
                train_predictions.append(y_valid_pred)
                test_predictions.append(y_test_pred)
            except Exception as e:
                print(str(e))

    es = EnsembleSelection(ensemble_size=50, task_type=1,
                           metric=accuracy, random_state=np.random.RandomState(seed))
    assert len(train_predictions) == len(test_predictions)
    es.fit(train_predictions, y_valid, identifiers=None)
    y_pred = es.predict(test_predictions)
    y_pred = np.argmax(y_pred, axis=-1)
    test_score = accuracy_score(y_test, y_pred)
    return test_score