Example #1
0
    def fit(self, scenario: ASlibScenario, fold: int,
            amount_of_training_instances: int):
        # setup values
        actual_num_training_instances = amount_of_training_instances if amount_of_training_instances != -1 else len(
            scenario.instances)
        self.data_weights = np.ones(actual_num_training_instances)

        # boosting iterations (stop when avg_loss >= 0.5 or iteration = max_iterations)
        for iteration in range(self.max_iterations):
            self.current_iteration = self.current_iteration + 1

            # choose base learner algorithm
            if self.algorithm_name == 'per_algorithm_regressor':
                self.base_learners.append(PerAlgorithmRegressor())
            elif self.algorithm_name == 'par10':
                self.base_learners.append(
                    SurrogateSurvivalForest(criterion='PAR10'))
            else:
                sys.exit('Wrong base learner for boosting')

            # get weighted scenario and train new base learner
            new_scenario = self.generate_weighted_sample(
                scenario, fold, actual_num_training_instances)
            self.base_learners[iteration].fit(new_scenario, fold,
                                              amount_of_training_instances)

            # calculate weights for next iteration
            if not self.update_weights(scenario, self.base_learners[iteration],
                                       actual_num_training_instances):
                break
Example #2
0
    def fit(self, scenario: ASlibScenario, fold: int, amount_of_training_instances: int):
        actual_num_training_instances = amount_of_training_instances if amount_of_training_instances != -1 else len(scenario.instances)
        self.num_algorithms = len(scenario.algorithms)
        self.data_weights = np.ones(actual_num_training_instances) / actual_num_training_instances
        for iteration in range(self.num_iterations):
            self.current_iteration = self.current_iteration + 1

            if self.algorithm_name == 'per_algorithm_regressor':
                self.base_learners.append(PerAlgorithmRegressor())
            elif self.algorithm_name == 'multiclass_algorithm_selector':
                self.base_learners.append(MultiClassAlgorithmSelector())
            elif self.algorithm_name == 'satzilla':
                self.base_learners.append(SATzilla11())
            elif self.algorithm_name == 'sunny':
                self.base_learners.append(SUNNY())
            elif self.algorithm_name == 'isac':
                self.base_learners.append(ISAC())
            else:
                sys.exit('Wrong base learner for boosting')

            new_scenario = self.generate_weighted_sample(scenario, fold, actual_num_training_instances)
            self.base_learners[iteration].fit(new_scenario, fold, amount_of_training_instances)

            if not self.update_weights(scenario, self.base_learners[iteration], actual_num_training_instances):
                break

            if self.current_iteration != self.num_iterations:
                write_to_database(scenario, self, fold)
            write_to_database(scenario, self, fold, on_training=True)
def create_approach(approach_names):
    approaches = list()
    for approach_name in approach_names:
        if approach_name == 'sbs':
            approaches.append(SingleBestSolver())
        if approach_name == 'sbs_with_feature_costs':
            approaches.append(SingleBestSolverWithFeatureCosts())
        if approach_name == 'virtual_sbs_with_feature_costs':
            approaches.append(VirtualSingleBestSolverWithFeatureCosts())
        if approach_name == 'oracle':
            approaches.append(Oracle())
        if approach_name == 'ExpectationSurvivalForest':
            approaches.append(SurrogateSurvivalForest(criterion='Expectation'))
        if approach_name == 'PolynomialSurvivalForest':
            approaches.append(SurrogateSurvivalForest(criterion='Polynomial'))
        if approach_name == 'GridSearchSurvivalForest':
            approaches.append(SurrogateSurvivalForest(criterion='GridSearch'))
        if approach_name == 'ExponentialSurvivalForest':
            approaches.append(SurrogateSurvivalForest(criterion='Exponential'))
        if approach_name == 'SurrogateAutoSurvivalForest':
            approaches.append(SurrogateAutoSurvivalForest())
        if approach_name == 'PAR10SurvivalForest':
            approaches.append(SurrogateSurvivalForest(criterion='PAR10'))
        if approach_name == 'per_algorithm_regressor':
            approaches.append(PerAlgorithmRegressor())
        if approach_name == 'imputed_per_algorithm_rf_regressor':
            approaches.append(PerAlgorithmRegressor(impute_censored=True))
        if approach_name == 'imputed_per_algorithm_ridge_regressor':
            approaches.append(
                PerAlgorithmRegressor(scikit_regressor=Ridge(alpha=1.0),
                                      impute_censored=True))
        if approach_name == 'multiclass_algorithm_selector':
            approaches.append(MultiClassAlgorithmSelector())
        if approach_name == 'sunny':
            approaches.append(SUNNY())
        if approach_name == 'snnap':
            approaches.append(SNNAP())
        if approach_name == 'satzilla-11':
            approaches.append(SATzilla11())
        if approach_name == 'satzilla-07':
            approaches.append(SATzilla07())
        if approach_name == 'isac':
            approaches.append(ISAC())
    return approaches
 def create_base_learner(self):
     self.base_learners = list()
     if 1 in self.base_learner_type:
         self.base_learners.append(PerAlgorithmRegressor())
     if 2 in self.base_learner_type:
         self.base_learners.append(SUNNY())
     if 3 in self.base_learner_type:
         self.base_learners.append(ISAC())
     if 4 in self.base_learner_type:
         self.base_learners.append(SATzilla11())
     if 5 in self.base_learner_type:
         self.base_learners.append(
             SurrogateSurvivalForest(criterion='Expectation'))
     if 6 in self.base_learner_type:
         self.base_learners.append(
             SurrogateSurvivalForest(criterion='PAR10'))
     if 7 in self.base_learner_type:
         self.base_learners.append(MultiClassAlgorithmSelector())
Example #5
0
    def create_base_learner(self):
        # clean up list and init base learners
        self.trained_models = list()

        if 1 in self.base_learner:
            self.trained_models.append(PerAlgorithmRegressor())
        if 2 in self.base_learner:
            self.trained_models.append(SUNNY())
        if 3 in self.base_learner:
            self.trained_models.append(ISAC())
        if 4 in self.base_learner:
            self.trained_models.append(SATzilla11())
        if 5 in self.base_learner:
            self.trained_models.append(
                SurrogateSurvivalForest(criterion='Expectation'))
        if 6 in self.base_learner:
            self.trained_models.append(
                SurrogateSurvivalForest(criterion='PAR10'))
        if 7 in self.base_learner:
            self.trained_models.append(MultiClassAlgorithmSelector())
Example #6
0
    def __init__(self,
                 num_base_learner: int,
                 base_learner=PerAlgorithmRegressor(),
                 use_ranking=False,
                 performance_ranking=False,
                 weighting=False,
                 weight_type=None):
        self.logger = logging.getLogger("bagging")
        self.logger.addHandler(logging.StreamHandler())

        # attributes
        self.num_algorithms = 0
        self.base_learners = list()
        self.current_iteration = 0
        self.weights = None

        # parameters
        self.base_learner = base_learner
        self.num_base_learner = num_base_learner
        self.use_ranking = use_ranking
        self.performance_ranking = performance_ranking
        self.weighting = weighting
        self.weight_type = weight_type
    def fit(self, scenario: ASlibScenario, fold: int,
            amount_of_training_instances: int):

        # setup the ensemble
        self.create_base_learner()
        self.scenario_name = scenario.scenario
        self.fold = fold
        self.num_algorithms = len(scenario.algorithms)

        num_instances = len(scenario.instances)
        feature_data = scenario.feature_data.to_numpy()
        performance_data = scenario.performance_data.to_numpy()

        # new features in matrix [instances x predictions]
        if self.new_feature_type == 'full':
            new_feature_data = np.zeros(
                (num_instances, self.num_algorithms * len(self.base_learners)))

        elif self.new_feature_type == 'small':
            new_feature_data = np.zeros(
                (num_instances, len(self.base_learners)))

        # if predictions are precomputed
        if self.pre_computed:
            for base_learner in self.base_learners:
                self.predictions.append(
                    load_pickle(filename='predictions/' +
                                base_learner.get_name() + '_' +
                                scenario.scenario + '_' + str(fold)))

        # create new features for every base learner on each instance
        for learner_index, base_learner in enumerate(self.base_learners):

            # load pre computed predictions
            if self.pre_computed:
                if self.cross_validation:
                    predictions = load_pickle(
                        filename='predictions/cross_validation_' +
                        base_learner.get_name() + '_' + scenario.scenario +
                        '_' + str(fold))
                else:
                    predictions = load_pickle(
                        filename='predictions/full_trainingdata_' +
                        base_learner.get_name() + '_' + scenario.scenario +
                        '_' + str(fold))

            # create predictions, if they are not pre computed
            else:

                # if cross validation is used (h2o)
                if self.cross_validation:
                    instance_counter = 0

                    for sub_fold in range(1, 11):
                        test_scenario, training_scenario = split_scenario(
                            scenario, sub_fold, num_instances)

                        # train base learner
                        base_learner.fit(training_scenario, fold,
                                         amount_of_training_instances)

                        # create new feature data
                        for instance_number in range(
                                instance_counter, instance_counter +
                                len(test_scenario.instances)):
                            prediction = base_learner.predict(
                                feature_data[instance_number], instance_number)
                            predictions[instance_number] = prediction.flatten()

                        instance_counter = instance_counter + len(
                            test_scenario.instances)

                    # fit base learner on the original training data
                    self.create_base_learner()
                    for base_learner in self.base_learners:
                        base_learner.fit(scenario, fold,
                                         amount_of_training_instances)

                # if no cross validation is used
                else:
                    base_learner.fit(scenario, fold,
                                     amount_of_training_instances)

                    predictions = np.zeros(
                        (len(scenario.instances), self.num_algorithms))

                    for instance_id, instance_feature in enumerate(
                            feature_data):
                        predictions[instance_id] = base_learner.predict(
                            instance_feature, instance_id)

            # insert predictions to new feature data matrix
            for i in range(num_instances):
                if self.new_feature_type == 'full':
                    for alo_num in range(self.num_algorithms):
                        new_feature_data[i][
                            alo_num + self.num_algorithms *
                            learner_index] = predictions[i][alo_num]

                elif self.new_feature_type == 'small':
                    new_feature_data[i][learner_index] = np.argmin(
                        predictions[i])

        # add predictions to the features of the instances
        if self.new_feature_type == 'full':
            new_columns = np.arange(self.num_algorithms *
                                    len(self.base_learners))

        elif self.new_feature_type == 'small':
            new_columns = np.arange(len(self.base_learners))

        new_feature_data = pd.DataFrame(new_feature_data,
                                        index=scenario.feature_data.index,
                                        columns=new_columns)

        if self.meta_learner_input == 'full':
            new_feature_data = pd.concat(
                [scenario.feature_data, new_feature_data], axis=1, sort=False)

        elif self.meta_learner_input == 'predictions_only':
            pass

        else:
            sys.exit('Wrong meta learner input type option')

        scenario.feature_data = new_feature_data

        # meta learner selection
        if self.meta_learner_type == 'per_algorithm_regressor':
            self.meta_learner = PerAlgorithmRegressor(
                feature_importances=self.feature_importance)
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'SUNNY':
            self.meta_learner = SUNNY()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'ISAC':
            self.meta_learner = ISAC()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'SATzilla-11':
            self.meta_learner = SATzilla11()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'multiclass':
            self.meta_learner = MultiClassAlgorithmSelector(
                feature_importance=self.feature_importance)
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'Expectation':
            self.meta_learner = SurrogateSurvivalForest(
                criterion='Expectation')
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'PAR10':
            self.meta_learner = SurrogateSurvivalForest(criterion='PAR10')
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'RandomForest':
            self.meta_learner = RandomForestClassifier(random_state=fold)
        elif self.meta_learner_type == 'SVM':
            self.meta_learner = LinearSVC(random_state=fold, max_iter=10000)

        # feature selection
        if self.feature_selection == 'variance_threshold':
            self.feature_selector = VarianceThreshold(threshold=.8 * (1 - .8))
            self.feature_selector.fit(scenario.feature_data)
            scenario.feature_data = pd.DataFrame(
                data=self.feature_selector.transform(scenario.feature_data))
        elif self.feature_selection == 'select_k_best':
            self.feature_selector = SelectKBest(f_classif,
                                                k=self.num_algorithms)
            label_performance_data = [np.argmin(x) for x in performance_data]
            self.imputer = SimpleImputer()
            scenario.feature_data = self.imputer.fit_transform(
                scenario.feature_data)
            self.feature_selector.fit(scenario.feature_data,
                                      label_performance_data)
            scenario.feature_data = pd.DataFrame(
                data=self.feature_selector.transform(scenario.feature_data))

        # fit meta learner
        if self.algorithm_selection_algorithm:
            self.meta_learner.fit(scenario, fold, amount_of_training_instances)
        else:
            label_performance_data = [np.argmin(x) for x in performance_data]

            self.pipe = Pipeline([('imputer', SimpleImputer()),
                                  ('standard_scaler', StandardScaler())])
            x_train = self.pipe.fit_transform(scenario.feature_data.to_numpy(),
                                              label_performance_data)

            self.meta_learner.fit(x_train, label_performance_data)
Example #8
0
    def fit(self, scenario: ASlibScenario, fold: int, amount_of_training_instances: int):
        self.num_algorithms = len(scenario.algorithms)
        self.scenario_name = scenario.scenario
        self.fold = fold

        if self.algorithm == 'per_algorithm_regressor':
            self.base_learner = PerAlgorithmRegressor()
        elif self.algorithm == 'sunny':
            self.base_learner = SUNNY()
        elif self.algorithm == 'isac':
            self.base_learner = ISAC()
        elif self.algorithm == 'satzilla':
            self.base_learner = SATzilla11()
        elif self.algorithm == 'expectation':
            self.base_learner = SurrogateSurvivalForest(criterion='Expectation')
        elif self.algorithm == 'par10':
            self.base_learner = SurrogateSurvivalForest(criterion='PAR10')
        elif self.algorithm == 'multiclass':
            self.base_learner = MultiClassAlgorithmSelector()
        else:
            sys.exit('Wrong base learner')

        if self.for_cross_validation:

            num_instances = len(scenario.instances)

            feature_data = scenario.feature_data.to_numpy()

            instance_counter = 0

            predictions = np.zeros((num_instances, self.num_algorithms))

            for sub_fold in range(1, 11):
                test_scenario, training_scenario = split_scenario(scenario, sub_fold, num_instances)

                # train base learner
                self.base_learner.fit(training_scenario, fold, amount_of_training_instances)

                # create new feature data
                for instance_number in range(instance_counter, instance_counter + len(test_scenario.instances)):
                    prediction = self.base_learner.predict(feature_data[instance_number], instance_number).flatten()
                    predictions[instance_number] = prediction

                instance_counter = instance_counter + len(test_scenario.instances)

            save_pickle(filename='predictions/cross_validation_' + self.base_learner.get_name() + '_' + self.scenario_name + '_' + str(self.fold), data=predictions)
        else:
            self.base_learner.fit(scenario, fold, amount_of_training_instances)

        if self.predict_full_training_set:
            # extract data from scenario
            feature_data = scenario.feature_data.to_numpy()
            performance_data = scenario.performance_data.to_numpy()
            feature_cost_data = scenario.feature_cost_data.to_numpy() if scenario.feature_cost_data is not None else None

            num_iterations = len(
                scenario.instances) if amount_of_training_instances == -1 else amount_of_training_instances

            predictions = np.zeros((len(scenario.instances), self.num_algorithms))
            for instance_id in range(num_iterations):
                x_test = feature_data[instance_id]
                y_test = performance_data[instance_id]

                accumulated_feature_time = 0
                if scenario.feature_cost_data is not None:
                    feature_time = feature_cost_data[instance_id]
                    accumulated_feature_time = np.sum(feature_time)

                prediction = self.base_learner.predict(x_test, instance_id).flatten()
                predictions[instance_id] = prediction
            save_pickle(filename='predictions/full_trainingdata_' + self.base_learner.get_name() + '_' + self.scenario_name + '_' + str(self.fold), data=predictions)
def create_approach(approach_names):
    approaches = list()
    for approach_name in approach_names:

        # SBS and VBS
        if approach_name == 'sbs':
            approaches.append(SingleBestSolver())
        if approach_name == 'oracle':
            approaches.append(Oracle())

        # baselines
        if approach_name == 'ExpectationSurvivalForest':
            approaches.append(SurrogateSurvivalForest(criterion='Expectation'))
        if approach_name == 'PolynomialSurvivalForest':
            approaches.append(SurrogateSurvivalForest(criterion='Polynomial'))
        if approach_name == 'GridSearchSurvivalForest':
            approaches.append(SurrogateSurvivalForest(criterion='GridSearch'))
        if approach_name == 'ExponentialSurvivalForest':
            approaches.append(SurrogateSurvivalForest(criterion='Exponential'))
        if approach_name == 'SurrogateAutoSurvivalForest':
            approaches.append(SurrogateAutoSurvivalForest())
        if approach_name == 'PAR10SurvivalForest':
            approaches.append(SurrogateSurvivalForest(criterion='PAR10'))
        if approach_name == 'per_algorithm_regressor':
            approaches.append(PerAlgorithmRegressor())
        if approach_name == 'imputed_per_algorithm_rf_regressor':
            approaches.append(PerAlgorithmRegressor(impute_censored=True))
        if approach_name == 'imputed_per_algorithm_ridge_regressor':
            approaches.append(
                PerAlgorithmRegressor(scikit_regressor=Ridge(alpha=1.0),
                                      impute_censored=True))
        if approach_name == 'multiclass_algorithm_selector':
            approaches.append(MultiClassAlgorithmSelector())
        if approach_name == 'sunny':
            approaches.append(SUNNY())
        if approach_name == 'snnap':
            approaches.append(SNNAP())
        if approach_name == 'satzilla-11':
            approaches.append(SATzilla11())
        if approach_name == 'satzilla-07':
            approaches.append(SATzilla07())
        if approach_name == 'isac':
            approaches.append(ISAC())

        if approach_name == 'base_learner':
            approaches.append(
                RunPreComputedBaseLearner(
                    'per_algorithm_RandomForestRegressor_regressor'))
            approaches.append(RunPreComputedBaseLearner('sunny'))
            approaches.append(RunPreComputedBaseLearner('isac'))
            approaches.append(RunPreComputedBaseLearner('satzilla-11'))
            approaches.append(
                RunPreComputedBaseLearner(
                    'Expectation_algorithm_survival_forest'))
            approaches.append(
                RunPreComputedBaseLearner('PAR10_algorithm_survival_forest'))
            approaches.append(
                RunPreComputedBaseLearner('multiclass_algorithm_selector'))

        # voting
        if approach_name == 'voting':
            for combination in get_combinations([1, 2, 3, 4, 5, 6, 7]):
                approaches.append(
                    Voting(base_learner=combination, pre_computed=True))
        if approach_name == 'voting_borda':
            for combination in get_combinations([1, 2, 3, 4, 5, 6, 7]):
                approaches.append(
                    Voting(base_learner=combination,
                           ranking=True,
                           pre_computed=True))
        if approach_name == 'voting_weighting':
            for combination in get_combinations([1, 2, 3, 4, 5, 6, 7]):
                approaches.append(
                    Voting(base_learner=combination,
                           pre_computed=True,
                           weighting=True))
        if approach_name == 'voting_optimize':
            approaches.append(
                Voting(base_learner=[1, 2, 3, 4, 5, 6, 7],
                       pre_computed=True,
                       optimze_base_learner=True))
            approaches.append(
                Voting(base_learner=[1, 2, 3, 4, 5, 6, 7],
                       ranking=True,
                       pre_computed=True,
                       optimze_base_learner=True))
            approaches.append(
                Voting(base_learner=[1, 2, 3, 4, 5, 6, 7],
                       weighting=True,
                       pre_computed=True,
                       optimze_base_learner=True))

        # bagging
        if approach_name == 'bagging-base_learner':
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=PerAlgorithmRegressor()))
            approaches.append(
                Bagging(num_base_learner=10, base_learner=SUNNY()))
            approaches.append(Bagging(num_base_learner=10,
                                      base_learner=ISAC()))
            approaches.append(
                Bagging(num_base_learner=10, base_learner=SATzilla11()))
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=MultiClassAlgorithmSelector()))

        if approach_name == 'bagging_weighting-base_learner':
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=PerAlgorithmRegressor(),
                        weighting=True))
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=SUNNY(),
                        weighting=True))
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=ISAC(),
                        weighting=True))
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=SATzilla11(),
                        weighting=True))
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=MultiClassAlgorithmSelector(),
                        weighting=True))

        if approach_name == 'bagging_borda-base_learner':
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=PerAlgorithmRegressor(),
                        use_ranking=True))
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=SUNNY(),
                        use_ranking=True))
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=ISAC(),
                        use_ranking=True))
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=SATzilla11(),
                        use_ranking=True))
            approaches.append(
                Bagging(num_base_learner=10,
                        base_learner=MultiClassAlgorithmSelector(),
                        use_ranking=True))

        # boosting
        if approach_name == 'samme':
            approaches.append(
                SAMME('per_algorithm_regressor', num_iterations=20))
            approaches.append(
                SAMME('multiclass_algorithm_selector', num_iterations=20))
            approaches.append(SAMME('sunny', num_iterations=20))
            approaches.append(SAMME('isac', num_iterations=20))

        # stacking
        if approach_name == 'stacking_meta_learner':
            base_learner = [1, 2, 3, 4, 5, 6, 7]
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='per_algorithm_regressor',
                         pre_computed=True))
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='SUNNY',
                         pre_computed=True))
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='ISAC',
                         pre_computed=True))
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='SATzilla-11',
                         pre_computed=True))
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='PAR10',
                         pre_computed=True))
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='multiclass',
                         pre_computed=True))
        if approach_name == 'stacking_feature_selection':
            base_learner = [1, 2, 3, 4, 5, 6, 7]
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='per_algorithm_regressor',
                         pre_computed=True,
                         feature_selection='variance_threshold'))
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='SUNNY',
                         pre_computed=True,
                         feature_selection='variance_threshold'))
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='ISAC',
                         pre_computed=True,
                         feature_selection='variance_threshold'))
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='SATzilla-11',
                         pre_computed=True,
                         feature_selection='variance_threshold'))
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='PAR10',
                         pre_computed=True,
                         feature_selection='variance_threshold'))
            approaches.append(
                Stacking(base_learner=base_learner,
                         meta_learner_type='multiclass',
                         pre_computed=True,
                         feature_selection='variance_threshold'))

        # precompute baseline predictions
        if approach_name == 'create_base_learner_prediction':
            approaches.append(
                CreateBaseLearnerPrediction(
                    algorithm='per_algorithm_regressor',
                    for_cross_validation=False,
                    predict_full_training_set=True))
            approaches.append(
                CreateBaseLearnerPrediction(
                    algorithm='per_algorithm_regressor',
                    for_cross_validation=False))

            approaches.append(
                CreateBaseLearnerPrediction(algorithm='sunny',
                                            for_cross_validation=False,
                                            predict_full_training_set=True))
            approaches.append(
                CreateBaseLearnerPrediction(algorithm='sunny',
                                            for_cross_validation=False))

            approaches.append(
                CreateBaseLearnerPrediction(algorithm='isac',
                                            for_cross_validation=False,
                                            predict_full_training_set=True))
            approaches.append(
                CreateBaseLearnerPrediction(algorithm='isac',
                                            for_cross_validation=False))

            approaches.append(
                CreateBaseLearnerPrediction(algorithm='satzilla',
                                            for_cross_validation=False,
                                            predict_full_training_set=True))
            approaches.append(
                CreateBaseLearnerPrediction(algorithm='satzilla',
                                            for_cross_validation=False))

            approaches.append(
                CreateBaseLearnerPrediction(algorithm='expectation',
                                            for_cross_validation=False,
                                            predict_full_training_set=True))
            approaches.append(
                CreateBaseLearnerPrediction(algorithm='expectation',
                                            for_cross_validation=False))

            approaches.append(
                CreateBaseLearnerPrediction(algorithm='par10',
                                            for_cross_validation=False,
                                            predict_full_training_set=True))
            approaches.append(
                CreateBaseLearnerPrediction(algorithm='par10',
                                            for_cross_validation=False))

            approaches.append(
                CreateBaseLearnerPrediction(algorithm='multiclass',
                                            for_cross_validation=False,
                                            predict_full_training_set=True))
            approaches.append(
                CreateBaseLearnerPrediction(algorithm='multiclass',
                                            for_cross_validation=False))

    return approaches
Example #10
0
    def fit(self, scenario: ASlibScenario, fold: int,
            amount_of_training_instances: int):
        self.create_base_learner()
        self.scenario_name = scenario.scenario
        self.fold = fold
        self.num_algorithms = len(scenario.algorithms)
        num_instances = len(scenario.instances)
        feature_data = scenario.feature_data.to_numpy()
        performance_data = scenario.performance_data.to_numpy()
        new_feature_data = np.zeros(
            (num_instances, self.num_algorithms * len(self.base_learners)))

        for learner_index, base_learner in enumerate(self.base_learners):

            instance_counter = 0

            predictions = np.zeros((num_instances, self.num_algorithms))

            if self.pre_computed:
                predictions = load_pickle(
                    filename='predictions/cross_validation_' +
                    base_learner.get_name() + '_' + scenario.scenario + '_' +
                    str(fold))
            else:
                for sub_fold in range(1, 11):
                    test_scenario, training_scenario = split_scenario(
                        scenario, sub_fold, num_instances)

                    # train base learner
                    base_learner.fit(training_scenario, fold,
                                     amount_of_training_instances)

                    # create new feature data
                    for instance_number in range(
                            instance_counter,
                            instance_counter + len(test_scenario.instances)):
                        prediction = base_learner.predict(
                            feature_data[instance_number], instance_number)
                        predictions[instance_number] = prediction.flatten()

                    instance_counter = instance_counter + len(
                        test_scenario.instances)

            for i in range(num_instances):
                for alo_num in range(self.num_algorithms):
                    new_feature_data[i][
                        alo_num + self.num_algorithms *
                        learner_index] = predictions[i][alo_num]

        if self.pre_computed:
            for base_learner in self.base_learners:
                self.predictions.append(
                    load_pickle(filename='predictions/' +
                                base_learner.get_name() + '_' +
                                scenario.scenario + '_' + str(fold)))
        else:
            self.create_base_learner()
            for base_learner in self.base_learners:
                base_learner.fit(scenario, fold, amount_of_training_instances)

        # add predictions to the features of the instances
        new_feature_data = pd.DataFrame(
            new_feature_data,
            index=scenario.feature_data.index,
            columns=np.arange(self.num_algorithms * len(self.base_learners)))
        new_feature_data = pd.concat([scenario.feature_data, new_feature_data],
                                     axis=1,
                                     sort=False)
        scenario.feature_data = new_feature_data

        # meta learner training with or without feature selection
        if self.meta_learner_type == 'per_algorithm_regressor':
            self.meta_learner = PerAlgorithmRegressor()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'SUNNY':
            self.meta_learner = SUNNY()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'ISAC':
            self.meta_learner = ISAC()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'SATzilla-11':
            self.meta_learner = SATzilla11()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'multiclass':
            self.meta_learner = MultiClassAlgorithmSelector()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'Expectation':
            self.meta_learner = SurrogateSurvivalForest(
                criterion='Expectation')
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'RandomForest':
            self.meta_learner = DecisionTreeClassifier()
        elif self.meta_learner_type == 'RandomForest':
            self.meta_learner = RandomForestClassifier()

        if self.algorithm_selection_algorithm:
            self.meta_learner.fit(scenario, fold, amount_of_training_instances)
        else:
            label_performance_data = [np.argmin(x) for x in performance_data]

            self.pipe = Pipeline([('imputer', SimpleImputer()),
                                  ('standard_scaler', StandardScaler())])
            X_train = self.pipe.fit_transform(scenario.feature_data.to_numpy(),
                                              label_performance_data)

            self.meta_learner.fit(X_train, label_performance_data)