def fit(self, scenario: ASlibScenario, fold: int, amount_of_training_instances: int): actual_num_training_instances = amount_of_training_instances if amount_of_training_instances != -1 else len(scenario.instances) self.num_algorithms = len(scenario.algorithms) self.data_weights = np.ones(actual_num_training_instances) / actual_num_training_instances for iteration in range(self.num_iterations): self.current_iteration = self.current_iteration + 1 if self.algorithm_name == 'per_algorithm_regressor': self.base_learners.append(PerAlgorithmRegressor()) elif self.algorithm_name == 'multiclass_algorithm_selector': self.base_learners.append(MultiClassAlgorithmSelector()) elif self.algorithm_name == 'satzilla': self.base_learners.append(SATzilla11()) elif self.algorithm_name == 'sunny': self.base_learners.append(SUNNY()) elif self.algorithm_name == 'isac': self.base_learners.append(ISAC()) else: sys.exit('Wrong base learner for boosting') new_scenario = self.generate_weighted_sample(scenario, fold, actual_num_training_instances) self.base_learners[iteration].fit(new_scenario, fold, amount_of_training_instances) if not self.update_weights(scenario, self.base_learners[iteration], actual_num_training_instances): break if self.current_iteration != self.num_iterations: write_to_database(scenario, self, fold) write_to_database(scenario, self, fold, on_training=True)
def create_base_learner(self): self.base_learners = list() if 1 in self.base_learner_type: self.base_learners.append(PerAlgorithmRegressor()) if 2 in self.base_learner_type: self.base_learners.append(SUNNY()) if 3 in self.base_learner_type: self.base_learners.append(ISAC()) if 4 in self.base_learner_type: self.base_learners.append(SATzilla11()) if 5 in self.base_learner_type: self.base_learners.append( SurrogateSurvivalForest(criterion='Expectation')) if 6 in self.base_learner_type: self.base_learners.append( SurrogateSurvivalForest(criterion='PAR10')) if 7 in self.base_learner_type: self.base_learners.append(MultiClassAlgorithmSelector())
def create_approach(approach_names): approaches = list() for approach_name in approach_names: if approach_name == 'sbs': approaches.append(SingleBestSolver()) if approach_name == 'sbs_with_feature_costs': approaches.append(SingleBestSolverWithFeatureCosts()) if approach_name == 'virtual_sbs_with_feature_costs': approaches.append(VirtualSingleBestSolverWithFeatureCosts()) if approach_name == 'oracle': approaches.append(Oracle()) if approach_name == 'ExpectationSurvivalForest': approaches.append(SurrogateSurvivalForest(criterion='Expectation')) if approach_name == 'PolynomialSurvivalForest': approaches.append(SurrogateSurvivalForest(criterion='Polynomial')) if approach_name == 'GridSearchSurvivalForest': approaches.append(SurrogateSurvivalForest(criterion='GridSearch')) if approach_name == 'ExponentialSurvivalForest': approaches.append(SurrogateSurvivalForest(criterion='Exponential')) if approach_name == 'SurrogateAutoSurvivalForest': approaches.append(SurrogateAutoSurvivalForest()) if approach_name == 'PAR10SurvivalForest': approaches.append(SurrogateSurvivalForest(criterion='PAR10')) if approach_name == 'per_algorithm_regressor': approaches.append(PerAlgorithmRegressor()) if approach_name == 'imputed_per_algorithm_rf_regressor': approaches.append(PerAlgorithmRegressor(impute_censored=True)) if approach_name == 'imputed_per_algorithm_ridge_regressor': approaches.append( PerAlgorithmRegressor(scikit_regressor=Ridge(alpha=1.0), impute_censored=True)) if approach_name == 'multiclass_algorithm_selector': approaches.append(MultiClassAlgorithmSelector()) if approach_name == 'sunny': approaches.append(SUNNY()) if approach_name == 'snnap': approaches.append(SNNAP()) if approach_name == 'satzilla-11': approaches.append(SATzilla11()) if approach_name == 'satzilla-07': approaches.append(SATzilla07()) if approach_name == 'isac': approaches.append(ISAC()) return approaches
def create_base_learner(self): # clean up list and init base learners self.trained_models = list() if 1 in self.base_learner: self.trained_models.append(PerAlgorithmRegressor()) if 2 in self.base_learner: self.trained_models.append(SUNNY()) if 3 in self.base_learner: self.trained_models.append(ISAC()) if 4 in self.base_learner: self.trained_models.append(SATzilla11()) if 5 in self.base_learner: self.trained_models.append( SurrogateSurvivalForest(criterion='Expectation')) if 6 in self.base_learner: self.trained_models.append( SurrogateSurvivalForest(criterion='PAR10')) if 7 in self.base_learner: self.trained_models.append(MultiClassAlgorithmSelector())
def fit(self, scenario: ASlibScenario, fold: int, amount_of_training_instances: int): # setup the ensemble self.create_base_learner() self.scenario_name = scenario.scenario self.fold = fold self.num_algorithms = len(scenario.algorithms) num_instances = len(scenario.instances) feature_data = scenario.feature_data.to_numpy() performance_data = scenario.performance_data.to_numpy() # new features in matrix [instances x predictions] if self.new_feature_type == 'full': new_feature_data = np.zeros( (num_instances, self.num_algorithms * len(self.base_learners))) elif self.new_feature_type == 'small': new_feature_data = np.zeros( (num_instances, len(self.base_learners))) # if predictions are precomputed if self.pre_computed: for base_learner in self.base_learners: self.predictions.append( load_pickle(filename='predictions/' + base_learner.get_name() + '_' + scenario.scenario + '_' + str(fold))) # create new features for every base learner on each instance for learner_index, base_learner in enumerate(self.base_learners): # load pre computed predictions if self.pre_computed: if self.cross_validation: predictions = load_pickle( filename='predictions/cross_validation_' + base_learner.get_name() + '_' + scenario.scenario + '_' + str(fold)) else: predictions = load_pickle( filename='predictions/full_trainingdata_' + base_learner.get_name() + '_' + scenario.scenario + '_' + str(fold)) # create predictions, if they are not pre computed else: # if cross validation is used (h2o) if self.cross_validation: instance_counter = 0 for sub_fold in range(1, 11): test_scenario, training_scenario = split_scenario( scenario, sub_fold, num_instances) # train base learner base_learner.fit(training_scenario, fold, amount_of_training_instances) # create new feature data for instance_number in range( instance_counter, instance_counter + len(test_scenario.instances)): prediction = base_learner.predict( feature_data[instance_number], instance_number) predictions[instance_number] = prediction.flatten() instance_counter = instance_counter + len( test_scenario.instances) # fit base learner on the original training data self.create_base_learner() for base_learner in self.base_learners: base_learner.fit(scenario, fold, amount_of_training_instances) # if no cross validation is used else: base_learner.fit(scenario, fold, amount_of_training_instances) predictions = np.zeros( (len(scenario.instances), self.num_algorithms)) for instance_id, instance_feature in enumerate( feature_data): predictions[instance_id] = base_learner.predict( instance_feature, instance_id) # insert predictions to new feature data matrix for i in range(num_instances): if self.new_feature_type == 'full': for alo_num in range(self.num_algorithms): new_feature_data[i][ alo_num + self.num_algorithms * learner_index] = predictions[i][alo_num] elif self.new_feature_type == 'small': new_feature_data[i][learner_index] = np.argmin( predictions[i]) # add predictions to the features of the instances if self.new_feature_type == 'full': new_columns = np.arange(self.num_algorithms * len(self.base_learners)) elif self.new_feature_type == 'small': new_columns = np.arange(len(self.base_learners)) new_feature_data = pd.DataFrame(new_feature_data, index=scenario.feature_data.index, columns=new_columns) if self.meta_learner_input == 'full': new_feature_data = pd.concat( [scenario.feature_data, new_feature_data], axis=1, sort=False) elif self.meta_learner_input == 'predictions_only': pass else: sys.exit('Wrong meta learner input type option') scenario.feature_data = new_feature_data # meta learner selection if self.meta_learner_type == 'per_algorithm_regressor': self.meta_learner = PerAlgorithmRegressor( feature_importances=self.feature_importance) self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'SUNNY': self.meta_learner = SUNNY() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'ISAC': self.meta_learner = ISAC() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'SATzilla-11': self.meta_learner = SATzilla11() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'multiclass': self.meta_learner = MultiClassAlgorithmSelector( feature_importance=self.feature_importance) self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'Expectation': self.meta_learner = SurrogateSurvivalForest( criterion='Expectation') self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'PAR10': self.meta_learner = SurrogateSurvivalForest(criterion='PAR10') self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'RandomForest': self.meta_learner = RandomForestClassifier(random_state=fold) elif self.meta_learner_type == 'SVM': self.meta_learner = LinearSVC(random_state=fold, max_iter=10000) # feature selection if self.feature_selection == 'variance_threshold': self.feature_selector = VarianceThreshold(threshold=.8 * (1 - .8)) self.feature_selector.fit(scenario.feature_data) scenario.feature_data = pd.DataFrame( data=self.feature_selector.transform(scenario.feature_data)) elif self.feature_selection == 'select_k_best': self.feature_selector = SelectKBest(f_classif, k=self.num_algorithms) label_performance_data = [np.argmin(x) for x in performance_data] self.imputer = SimpleImputer() scenario.feature_data = self.imputer.fit_transform( scenario.feature_data) self.feature_selector.fit(scenario.feature_data, label_performance_data) scenario.feature_data = pd.DataFrame( data=self.feature_selector.transform(scenario.feature_data)) # fit meta learner if self.algorithm_selection_algorithm: self.meta_learner.fit(scenario, fold, amount_of_training_instances) else: label_performance_data = [np.argmin(x) for x in performance_data] self.pipe = Pipeline([('imputer', SimpleImputer()), ('standard_scaler', StandardScaler())]) x_train = self.pipe.fit_transform(scenario.feature_data.to_numpy(), label_performance_data) self.meta_learner.fit(x_train, label_performance_data)
def fit(self, scenario: ASlibScenario, fold: int, amount_of_training_instances: int): self.num_algorithms = len(scenario.algorithms) self.scenario_name = scenario.scenario self.fold = fold if self.algorithm == 'per_algorithm_regressor': self.base_learner = PerAlgorithmRegressor() elif self.algorithm == 'sunny': self.base_learner = SUNNY() elif self.algorithm == 'isac': self.base_learner = ISAC() elif self.algorithm == 'satzilla': self.base_learner = SATzilla11() elif self.algorithm == 'expectation': self.base_learner = SurrogateSurvivalForest(criterion='Expectation') elif self.algorithm == 'par10': self.base_learner = SurrogateSurvivalForest(criterion='PAR10') elif self.algorithm == 'multiclass': self.base_learner = MultiClassAlgorithmSelector() else: sys.exit('Wrong base learner') if self.for_cross_validation: num_instances = len(scenario.instances) feature_data = scenario.feature_data.to_numpy() instance_counter = 0 predictions = np.zeros((num_instances, self.num_algorithms)) for sub_fold in range(1, 11): test_scenario, training_scenario = split_scenario(scenario, sub_fold, num_instances) # train base learner self.base_learner.fit(training_scenario, fold, amount_of_training_instances) # create new feature data for instance_number in range(instance_counter, instance_counter + len(test_scenario.instances)): prediction = self.base_learner.predict(feature_data[instance_number], instance_number).flatten() predictions[instance_number] = prediction instance_counter = instance_counter + len(test_scenario.instances) save_pickle(filename='predictions/cross_validation_' + self.base_learner.get_name() + '_' + self.scenario_name + '_' + str(self.fold), data=predictions) else: self.base_learner.fit(scenario, fold, amount_of_training_instances) if self.predict_full_training_set: # extract data from scenario feature_data = scenario.feature_data.to_numpy() performance_data = scenario.performance_data.to_numpy() feature_cost_data = scenario.feature_cost_data.to_numpy() if scenario.feature_cost_data is not None else None num_iterations = len( scenario.instances) if amount_of_training_instances == -1 else amount_of_training_instances predictions = np.zeros((len(scenario.instances), self.num_algorithms)) for instance_id in range(num_iterations): x_test = feature_data[instance_id] y_test = performance_data[instance_id] accumulated_feature_time = 0 if scenario.feature_cost_data is not None: feature_time = feature_cost_data[instance_id] accumulated_feature_time = np.sum(feature_time) prediction = self.base_learner.predict(x_test, instance_id).flatten() predictions[instance_id] = prediction save_pickle(filename='predictions/full_trainingdata_' + self.base_learner.get_name() + '_' + self.scenario_name + '_' + str(self.fold), data=predictions)
def create_approach(approach_names): approaches = list() for approach_name in approach_names: # SBS and VBS if approach_name == 'sbs': approaches.append(SingleBestSolver()) if approach_name == 'oracle': approaches.append(Oracle()) # baselines if approach_name == 'ExpectationSurvivalForest': approaches.append(SurrogateSurvivalForest(criterion='Expectation')) if approach_name == 'PolynomialSurvivalForest': approaches.append(SurrogateSurvivalForest(criterion='Polynomial')) if approach_name == 'GridSearchSurvivalForest': approaches.append(SurrogateSurvivalForest(criterion='GridSearch')) if approach_name == 'ExponentialSurvivalForest': approaches.append(SurrogateSurvivalForest(criterion='Exponential')) if approach_name == 'SurrogateAutoSurvivalForest': approaches.append(SurrogateAutoSurvivalForest()) if approach_name == 'PAR10SurvivalForest': approaches.append(SurrogateSurvivalForest(criterion='PAR10')) if approach_name == 'per_algorithm_regressor': approaches.append(PerAlgorithmRegressor()) if approach_name == 'imputed_per_algorithm_rf_regressor': approaches.append(PerAlgorithmRegressor(impute_censored=True)) if approach_name == 'imputed_per_algorithm_ridge_regressor': approaches.append( PerAlgorithmRegressor(scikit_regressor=Ridge(alpha=1.0), impute_censored=True)) if approach_name == 'multiclass_algorithm_selector': approaches.append(MultiClassAlgorithmSelector()) if approach_name == 'sunny': approaches.append(SUNNY()) if approach_name == 'snnap': approaches.append(SNNAP()) if approach_name == 'satzilla-11': approaches.append(SATzilla11()) if approach_name == 'satzilla-07': approaches.append(SATzilla07()) if approach_name == 'isac': approaches.append(ISAC()) if approach_name == 'base_learner': approaches.append( RunPreComputedBaseLearner( 'per_algorithm_RandomForestRegressor_regressor')) approaches.append(RunPreComputedBaseLearner('sunny')) approaches.append(RunPreComputedBaseLearner('isac')) approaches.append(RunPreComputedBaseLearner('satzilla-11')) approaches.append( RunPreComputedBaseLearner( 'Expectation_algorithm_survival_forest')) approaches.append( RunPreComputedBaseLearner('PAR10_algorithm_survival_forest')) approaches.append( RunPreComputedBaseLearner('multiclass_algorithm_selector')) # voting if approach_name == 'voting': for combination in get_combinations([1, 2, 3, 4, 5, 6, 7]): approaches.append( Voting(base_learner=combination, pre_computed=True)) if approach_name == 'voting_borda': for combination in get_combinations([1, 2, 3, 4, 5, 6, 7]): approaches.append( Voting(base_learner=combination, ranking=True, pre_computed=True)) if approach_name == 'voting_weighting': for combination in get_combinations([1, 2, 3, 4, 5, 6, 7]): approaches.append( Voting(base_learner=combination, pre_computed=True, weighting=True)) if approach_name == 'voting_optimize': approaches.append( Voting(base_learner=[1, 2, 3, 4, 5, 6, 7], pre_computed=True, optimze_base_learner=True)) approaches.append( Voting(base_learner=[1, 2, 3, 4, 5, 6, 7], ranking=True, pre_computed=True, optimze_base_learner=True)) approaches.append( Voting(base_learner=[1, 2, 3, 4, 5, 6, 7], weighting=True, pre_computed=True, optimze_base_learner=True)) # bagging if approach_name == 'bagging-base_learner': approaches.append( Bagging(num_base_learner=10, base_learner=PerAlgorithmRegressor())) approaches.append( Bagging(num_base_learner=10, base_learner=SUNNY())) approaches.append(Bagging(num_base_learner=10, base_learner=ISAC())) approaches.append( Bagging(num_base_learner=10, base_learner=SATzilla11())) approaches.append( Bagging(num_base_learner=10, base_learner=MultiClassAlgorithmSelector())) if approach_name == 'bagging_weighting-base_learner': approaches.append( Bagging(num_base_learner=10, base_learner=PerAlgorithmRegressor(), weighting=True)) approaches.append( Bagging(num_base_learner=10, base_learner=SUNNY(), weighting=True)) approaches.append( Bagging(num_base_learner=10, base_learner=ISAC(), weighting=True)) approaches.append( Bagging(num_base_learner=10, base_learner=SATzilla11(), weighting=True)) approaches.append( Bagging(num_base_learner=10, base_learner=MultiClassAlgorithmSelector(), weighting=True)) if approach_name == 'bagging_borda-base_learner': approaches.append( Bagging(num_base_learner=10, base_learner=PerAlgorithmRegressor(), use_ranking=True)) approaches.append( Bagging(num_base_learner=10, base_learner=SUNNY(), use_ranking=True)) approaches.append( Bagging(num_base_learner=10, base_learner=ISAC(), use_ranking=True)) approaches.append( Bagging(num_base_learner=10, base_learner=SATzilla11(), use_ranking=True)) approaches.append( Bagging(num_base_learner=10, base_learner=MultiClassAlgorithmSelector(), use_ranking=True)) # boosting if approach_name == 'samme': approaches.append( SAMME('per_algorithm_regressor', num_iterations=20)) approaches.append( SAMME('multiclass_algorithm_selector', num_iterations=20)) approaches.append(SAMME('sunny', num_iterations=20)) approaches.append(SAMME('isac', num_iterations=20)) # stacking if approach_name == 'stacking_meta_learner': base_learner = [1, 2, 3, 4, 5, 6, 7] approaches.append( Stacking(base_learner=base_learner, meta_learner_type='per_algorithm_regressor', pre_computed=True)) approaches.append( Stacking(base_learner=base_learner, meta_learner_type='SUNNY', pre_computed=True)) approaches.append( Stacking(base_learner=base_learner, meta_learner_type='ISAC', pre_computed=True)) approaches.append( Stacking(base_learner=base_learner, meta_learner_type='SATzilla-11', pre_computed=True)) approaches.append( Stacking(base_learner=base_learner, meta_learner_type='PAR10', pre_computed=True)) approaches.append( Stacking(base_learner=base_learner, meta_learner_type='multiclass', pre_computed=True)) if approach_name == 'stacking_feature_selection': base_learner = [1, 2, 3, 4, 5, 6, 7] approaches.append( Stacking(base_learner=base_learner, meta_learner_type='per_algorithm_regressor', pre_computed=True, feature_selection='variance_threshold')) approaches.append( Stacking(base_learner=base_learner, meta_learner_type='SUNNY', pre_computed=True, feature_selection='variance_threshold')) approaches.append( Stacking(base_learner=base_learner, meta_learner_type='ISAC', pre_computed=True, feature_selection='variance_threshold')) approaches.append( Stacking(base_learner=base_learner, meta_learner_type='SATzilla-11', pre_computed=True, feature_selection='variance_threshold')) approaches.append( Stacking(base_learner=base_learner, meta_learner_type='PAR10', pre_computed=True, feature_selection='variance_threshold')) approaches.append( Stacking(base_learner=base_learner, meta_learner_type='multiclass', pre_computed=True, feature_selection='variance_threshold')) # precompute baseline predictions if approach_name == 'create_base_learner_prediction': approaches.append( CreateBaseLearnerPrediction( algorithm='per_algorithm_regressor', for_cross_validation=False, predict_full_training_set=True)) approaches.append( CreateBaseLearnerPrediction( algorithm='per_algorithm_regressor', for_cross_validation=False)) approaches.append( CreateBaseLearnerPrediction(algorithm='sunny', for_cross_validation=False, predict_full_training_set=True)) approaches.append( CreateBaseLearnerPrediction(algorithm='sunny', for_cross_validation=False)) approaches.append( CreateBaseLearnerPrediction(algorithm='isac', for_cross_validation=False, predict_full_training_set=True)) approaches.append( CreateBaseLearnerPrediction(algorithm='isac', for_cross_validation=False)) approaches.append( CreateBaseLearnerPrediction(algorithm='satzilla', for_cross_validation=False, predict_full_training_set=True)) approaches.append( CreateBaseLearnerPrediction(algorithm='satzilla', for_cross_validation=False)) approaches.append( CreateBaseLearnerPrediction(algorithm='expectation', for_cross_validation=False, predict_full_training_set=True)) approaches.append( CreateBaseLearnerPrediction(algorithm='expectation', for_cross_validation=False)) approaches.append( CreateBaseLearnerPrediction(algorithm='par10', for_cross_validation=False, predict_full_training_set=True)) approaches.append( CreateBaseLearnerPrediction(algorithm='par10', for_cross_validation=False)) approaches.append( CreateBaseLearnerPrediction(algorithm='multiclass', for_cross_validation=False, predict_full_training_set=True)) approaches.append( CreateBaseLearnerPrediction(algorithm='multiclass', for_cross_validation=False)) return approaches
def fit(self, scenario: ASlibScenario, fold: int, amount_of_training_instances: int): self.create_base_learner() self.scenario_name = scenario.scenario self.fold = fold self.num_algorithms = len(scenario.algorithms) num_instances = len(scenario.instances) feature_data = scenario.feature_data.to_numpy() performance_data = scenario.performance_data.to_numpy() new_feature_data = np.zeros( (num_instances, self.num_algorithms * len(self.base_learners))) for learner_index, base_learner in enumerate(self.base_learners): instance_counter = 0 predictions = np.zeros((num_instances, self.num_algorithms)) if self.pre_computed: predictions = load_pickle( filename='predictions/cross_validation_' + base_learner.get_name() + '_' + scenario.scenario + '_' + str(fold)) else: for sub_fold in range(1, 11): test_scenario, training_scenario = split_scenario( scenario, sub_fold, num_instances) # train base learner base_learner.fit(training_scenario, fold, amount_of_training_instances) # create new feature data for instance_number in range( instance_counter, instance_counter + len(test_scenario.instances)): prediction = base_learner.predict( feature_data[instance_number], instance_number) predictions[instance_number] = prediction.flatten() instance_counter = instance_counter + len( test_scenario.instances) for i in range(num_instances): for alo_num in range(self.num_algorithms): new_feature_data[i][ alo_num + self.num_algorithms * learner_index] = predictions[i][alo_num] if self.pre_computed: for base_learner in self.base_learners: self.predictions.append( load_pickle(filename='predictions/' + base_learner.get_name() + '_' + scenario.scenario + '_' + str(fold))) else: self.create_base_learner() for base_learner in self.base_learners: base_learner.fit(scenario, fold, amount_of_training_instances) # add predictions to the features of the instances new_feature_data = pd.DataFrame( new_feature_data, index=scenario.feature_data.index, columns=np.arange(self.num_algorithms * len(self.base_learners))) new_feature_data = pd.concat([scenario.feature_data, new_feature_data], axis=1, sort=False) scenario.feature_data = new_feature_data # meta learner training with or without feature selection if self.meta_learner_type == 'per_algorithm_regressor': self.meta_learner = PerAlgorithmRegressor() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'SUNNY': self.meta_learner = SUNNY() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'ISAC': self.meta_learner = ISAC() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'SATzilla-11': self.meta_learner = SATzilla11() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'multiclass': self.meta_learner = MultiClassAlgorithmSelector() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'Expectation': self.meta_learner = SurrogateSurvivalForest( criterion='Expectation') self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'RandomForest': self.meta_learner = DecisionTreeClassifier() elif self.meta_learner_type == 'RandomForest': self.meta_learner = RandomForestClassifier() if self.algorithm_selection_algorithm: self.meta_learner.fit(scenario, fold, amount_of_training_instances) else: label_performance_data = [np.argmin(x) for x in performance_data] self.pipe = Pipeline([('imputer', SimpleImputer()), ('standard_scaler', StandardScaler())]) X_train = self.pipe.fit_transform(scenario.feature_data.to_numpy(), label_performance_data) self.meta_learner.fit(X_train, label_performance_data)