def transform(self, scenario: ASlibScenario): ''' transform ASLib scenario data Arguments --------- scenario: data.aslib_scenario.ASlibScenario ASlib Scenario with all data in pandas Returns ------- data.aslib_scenario.ASlibScenario ''' if self.scaler: self.logger.debug("Applying StandardScaler") values = self.scaler.transform( np.array(scenario.feature_data.values)) scenario.feature_data = pd.DataFrame( data=values, index=scenario.feature_data.index, columns=scenario.feature_data.columns) return scenario
def fit(self, scenario: ASlibScenario, fold: int, amount_of_training_instances: int): print("Run fit on " + self.get_name() + " for fold " + str(fold)) self.num_algorithms = len(scenario.algorithms) # create all bootstrap samples bootstrap_samples, out_of_sample_samples = self.generate_bootstrap_sample( scenario, fold, self.num_base_learner) weights_denorm = list() # train each base learner on a different sample for index in range(self.num_base_learner): self.current_iteration = index + 1 self.base_learners.append(copy.deepcopy(self.base_learner)) original_scenario = copy.deepcopy(scenario) scenario.feature_data, scenario.performance_data, scenario.runstatus_data, scenario.feature_runstatus_data, scenario.feature_cost_data = bootstrap_samples[ index] self.base_learners[index].fit(scenario, fold, amount_of_training_instances) if self.weighting: if self.weight_type == 'oos': scenario.feature_data, scenario.performance_data, scenario.runstatus_data, scenario.feature_runstatus_data, scenario.feature_cost_data = out_of_sample_samples[ index] elif self.weight_type == 'original_set': scenario = original_scenario weights_denorm.append( base_learner_performance(scenario, len(scenario.feature_data), self.base_learners[index])) #if self.current_iteration != self.num_base_learner: # write_to_database(scenario, self, fold) # Turn around values (lowest (best) gets highest weight) and normalize weights_denorm = [ max(weights_denorm) / float(i + 1) for i in weights_denorm ] self.weights = [float(i) / max(weights_denorm) for i in weights_denorm]
def transform(self, scenario: ASlibScenario): ''' transform ASLib scenario data Arguments --------- scenario: data.aslib_scenario.ASlibScenario ASlib Scenario with all data in pandas Returns ------- data.aslib_scenario.ASlibScenario ''' self.logger.debug("Impute Missing Feature Values") values = self.imputer.transform(np.array(scenario.feature_data.values)) scenario.feature_data = pd.DataFrame( data=values, index=scenario.feature_data.index, columns=scenario.feature_data.columns) return scenario
def transform(self, scenario: ASlibScenario): ''' transform ASLib scenario data Arguments --------- scenario: data.aslib_scenario.ASlibScenario ASlib Scenario with all data in pandas Returns ------- data.aslib_scenario.ASlibScenario ''' if self.pca: self.logger.debug("Applying PCA") values = self.pca.transform(np.array(scenario.feature_data.values)) scenario.feature_data = pd.DataFrame( data=values, index=scenario.feature_data.index, columns=["f%d" % (i) for i in range(values.shape[1])]) return scenario
def fit(self, scenario: ASlibScenario, fold: int, amount_of_training_instances: int): # setup the ensemble self.create_base_learner() self.scenario_name = scenario.scenario self.fold = fold self.num_algorithms = len(scenario.algorithms) num_instances = len(scenario.instances) feature_data = scenario.feature_data.to_numpy() performance_data = scenario.performance_data.to_numpy() # new features in matrix [instances x predictions] if self.new_feature_type == 'full': new_feature_data = np.zeros( (num_instances, self.num_algorithms * len(self.base_learners))) elif self.new_feature_type == 'small': new_feature_data = np.zeros( (num_instances, len(self.base_learners))) # if predictions are precomputed if self.pre_computed: for base_learner in self.base_learners: self.predictions.append( load_pickle(filename='predictions/' + base_learner.get_name() + '_' + scenario.scenario + '_' + str(fold))) # create new features for every base learner on each instance for learner_index, base_learner in enumerate(self.base_learners): # load pre computed predictions if self.pre_computed: if self.cross_validation: predictions = load_pickle( filename='predictions/cross_validation_' + base_learner.get_name() + '_' + scenario.scenario + '_' + str(fold)) else: predictions = load_pickle( filename='predictions/full_trainingdata_' + base_learner.get_name() + '_' + scenario.scenario + '_' + str(fold)) # create predictions, if they are not pre computed else: # if cross validation is used (h2o) if self.cross_validation: instance_counter = 0 for sub_fold in range(1, 11): test_scenario, training_scenario = split_scenario( scenario, sub_fold, num_instances) # train base learner base_learner.fit(training_scenario, fold, amount_of_training_instances) # create new feature data for instance_number in range( instance_counter, instance_counter + len(test_scenario.instances)): prediction = base_learner.predict( feature_data[instance_number], instance_number) predictions[instance_number] = prediction.flatten() instance_counter = instance_counter + len( test_scenario.instances) # fit base learner on the original training data self.create_base_learner() for base_learner in self.base_learners: base_learner.fit(scenario, fold, amount_of_training_instances) # if no cross validation is used else: base_learner.fit(scenario, fold, amount_of_training_instances) predictions = np.zeros( (len(scenario.instances), self.num_algorithms)) for instance_id, instance_feature in enumerate( feature_data): predictions[instance_id] = base_learner.predict( instance_feature, instance_id) # insert predictions to new feature data matrix for i in range(num_instances): if self.new_feature_type == 'full': for alo_num in range(self.num_algorithms): new_feature_data[i][ alo_num + self.num_algorithms * learner_index] = predictions[i][alo_num] elif self.new_feature_type == 'small': new_feature_data[i][learner_index] = np.argmin( predictions[i]) # add predictions to the features of the instances if self.new_feature_type == 'full': new_columns = np.arange(self.num_algorithms * len(self.base_learners)) elif self.new_feature_type == 'small': new_columns = np.arange(len(self.base_learners)) new_feature_data = pd.DataFrame(new_feature_data, index=scenario.feature_data.index, columns=new_columns) if self.meta_learner_input == 'full': new_feature_data = pd.concat( [scenario.feature_data, new_feature_data], axis=1, sort=False) elif self.meta_learner_input == 'predictions_only': pass else: sys.exit('Wrong meta learner input type option') scenario.feature_data = new_feature_data # meta learner selection if self.meta_learner_type == 'per_algorithm_regressor': self.meta_learner = PerAlgorithmRegressor( feature_importances=self.feature_importance) self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'SUNNY': self.meta_learner = SUNNY() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'ISAC': self.meta_learner = ISAC() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'SATzilla-11': self.meta_learner = SATzilla11() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'multiclass': self.meta_learner = MultiClassAlgorithmSelector( feature_importance=self.feature_importance) self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'Expectation': self.meta_learner = SurrogateSurvivalForest( criterion='Expectation') self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'PAR10': self.meta_learner = SurrogateSurvivalForest(criterion='PAR10') self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'RandomForest': self.meta_learner = RandomForestClassifier(random_state=fold) elif self.meta_learner_type == 'SVM': self.meta_learner = LinearSVC(random_state=fold, max_iter=10000) # feature selection if self.feature_selection == 'variance_threshold': self.feature_selector = VarianceThreshold(threshold=.8 * (1 - .8)) self.feature_selector.fit(scenario.feature_data) scenario.feature_data = pd.DataFrame( data=self.feature_selector.transform(scenario.feature_data)) elif self.feature_selection == 'select_k_best': self.feature_selector = SelectKBest(f_classif, k=self.num_algorithms) label_performance_data = [np.argmin(x) for x in performance_data] self.imputer = SimpleImputer() scenario.feature_data = self.imputer.fit_transform( scenario.feature_data) self.feature_selector.fit(scenario.feature_data, label_performance_data) scenario.feature_data = pd.DataFrame( data=self.feature_selector.transform(scenario.feature_data)) # fit meta learner if self.algorithm_selection_algorithm: self.meta_learner.fit(scenario, fold, amount_of_training_instances) else: label_performance_data = [np.argmin(x) for x in performance_data] self.pipe = Pipeline([('imputer', SimpleImputer()), ('standard_scaler', StandardScaler())]) x_train = self.pipe.fit_transform(scenario.feature_data.to_numpy(), label_performance_data) self.meta_learner.fit(x_train, label_performance_data)
def fit(self, scenario: ASlibScenario, fold: int, amount_of_training_instances: int): self.create_base_learner() self.scenario_name = scenario.scenario self.fold = fold self.num_algorithms = len(scenario.algorithms) num_instances = len(scenario.instances) feature_data = scenario.feature_data.to_numpy() performance_data = scenario.performance_data.to_numpy() new_feature_data = np.zeros( (num_instances, self.num_algorithms * len(self.base_learners))) for learner_index, base_learner in enumerate(self.base_learners): instance_counter = 0 predictions = np.zeros((num_instances, self.num_algorithms)) if self.pre_computed: predictions = load_pickle( filename='predictions/cross_validation_' + base_learner.get_name() + '_' + scenario.scenario + '_' + str(fold)) else: for sub_fold in range(1, 11): test_scenario, training_scenario = split_scenario( scenario, sub_fold, num_instances) # train base learner base_learner.fit(training_scenario, fold, amount_of_training_instances) # create new feature data for instance_number in range( instance_counter, instance_counter + len(test_scenario.instances)): prediction = base_learner.predict( feature_data[instance_number], instance_number) predictions[instance_number] = prediction.flatten() instance_counter = instance_counter + len( test_scenario.instances) for i in range(num_instances): for alo_num in range(self.num_algorithms): new_feature_data[i][ alo_num + self.num_algorithms * learner_index] = predictions[i][alo_num] if self.pre_computed: for base_learner in self.base_learners: self.predictions.append( load_pickle(filename='predictions/' + base_learner.get_name() + '_' + scenario.scenario + '_' + str(fold))) else: self.create_base_learner() for base_learner in self.base_learners: base_learner.fit(scenario, fold, amount_of_training_instances) # add predictions to the features of the instances new_feature_data = pd.DataFrame( new_feature_data, index=scenario.feature_data.index, columns=np.arange(self.num_algorithms * len(self.base_learners))) new_feature_data = pd.concat([scenario.feature_data, new_feature_data], axis=1, sort=False) scenario.feature_data = new_feature_data # meta learner training with or without feature selection if self.meta_learner_type == 'per_algorithm_regressor': self.meta_learner = PerAlgorithmRegressor() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'SUNNY': self.meta_learner = SUNNY() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'ISAC': self.meta_learner = ISAC() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'SATzilla-11': self.meta_learner = SATzilla11() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'multiclass': self.meta_learner = MultiClassAlgorithmSelector() self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'Expectation': self.meta_learner = SurrogateSurvivalForest( criterion='Expectation') self.algorithm_selection_algorithm = True elif self.meta_learner_type == 'RandomForest': self.meta_learner = DecisionTreeClassifier() elif self.meta_learner_type == 'RandomForest': self.meta_learner = RandomForestClassifier() if self.algorithm_selection_algorithm: self.meta_learner.fit(scenario, fold, amount_of_training_instances) else: label_performance_data = [np.argmin(x) for x in performance_data] self.pipe = Pipeline([('imputer', SimpleImputer()), ('standard_scaler', StandardScaler())]) X_train = self.pipe.fit_transform(scenario.feature_data.to_numpy(), label_performance_data) self.meta_learner.fit(X_train, label_performance_data)