def fit(self, datanode, solvers=None): model_cnt = 0 for algo_id in self.stats["include_algorithms"]: model_to_eval = self.stats[algo_id]['model_to_eval'] for idx, (node, config) in enumerate(model_to_eval): X, y = node.data if self.base_model_mask[model_cnt] == 1: estimator = fetch_predict_estimator( self.task_type, config, X, y, weight_balance=node.enable_balance, data_balance=node.data_balance) with open( os.path.join( self.output_dir, '%s-bagging-model%d' % (self.timestamp, model_cnt)), 'wb') as f: pkl.dump(estimator, f) if (solvers is not None): fe_savepath = os.path.join( self.output_dir, '%s-bagging-fe%d' % (self.timestamp, model_cnt)) solvers[algo_id].optimizer['fe'].save( node, fe_savepath) model_cnt += 1 return self
def refit(self, solvers=None): # Refit models on whole training data model_cnt = 0 for algo_id in self.stats["include_algorithms"]: model_to_eval = self.stats[algo_id]['model_to_eval'] for idx, (node, config) in enumerate(model_to_eval): X, y = node.data if self.weights_[model_cnt] != 0: self.logger.info("Refit model %d" % model_cnt) estimator = fetch_predict_estimator( self.task_type, config, X, y, weight_balance=node.enable_balance, data_balance=node.data_balance) with open( os.path.join( self.output_dir, '%s-model%d' % (self.timestamp, model_cnt)), 'wb') as f: pkl.dump(estimator, f) if (solvers is not None): print('saving fe ###########') fe_savepath = os.path.join( self.output_dir, '%s-fe%d' % (self.timestamp, model_cnt)) solvers[algo_id].optimizer['fe'].save( node, fe_savepath) model_cnt += 1
def refit(self): # Refit models on whole training data model_cnt = 0 for algo_id in self.stats["include_algorithms"]: model_to_eval = self.stats[algo_id]['model_to_eval'] for idx, (node, config) in enumerate(model_to_eval): X, y = node.data if self.weights_[model_cnt] != 0: self.logger.info("Refit model %d" % model_cnt) estimator = fetch_predict_estimator( self.task_type, config, X, y, weight_balance=node.enable_balance, data_balance=node.data_balance, combined=True, ) with open( os.path.join( self.output_dir, '%s-model%d' % (self.timestamp, model_cnt)), 'wb') as f: pkl.dump(estimator, f) model_cnt += 1
def fit(self, data): # Split training data for phase 1 and phase 2 test_size = 0.2 # Train basic models using a part of training data model_cnt = 0 suc_cnt = 0 feature_p2 = None for algo_id in self.stats.keys(): model_to_eval = self.stats[algo_id] for idx, (config, _, path) in enumerate(model_to_eval): with open(path, 'rb')as f: op_list, model = pkl.load(f) _node = data.copy_() _node = construct_node(_node, op_list, mode='train') X, y = _node.data if self.task_type in CLS_TASKS: x_p1, x_p2, y_p1, y_p2 = train_test_split(X, y, test_size=test_size, stratify=data.data[1], random_state=1) else: x_p1, x_p2, y_p1, y_p2 = train_test_split(X, y, test_size=test_size, random_state=1) if self.base_model_mask[model_cnt] == 1: estimator = fetch_predict_estimator(self.task_type, algo_id, config[0], x_p1, y_p1, weight_balance=_node.enable_balance, data_balance=_node.data_balance) with open(os.path.join(self.output_dir, '%s-blending-model%d' % (self.timestamp, model_cnt)), 'wb') as f: pkl.dump(estimator, f) if self.task_type in CLS_TASKS: pred = estimator.predict_proba(x_p2) n_dim = np.array(pred).shape[1] if n_dim == 2: # Binary classificaion n_dim = 1 # Initialize training matrix for phase 2 if feature_p2 is None: num_samples = len(x_p2) feature_p2 = np.zeros((num_samples, self.ensemble_size * n_dim)) if n_dim == 1: feature_p2[:, suc_cnt * n_dim:(suc_cnt + 1) * n_dim] = pred[:, 1:2] else: feature_p2[:, suc_cnt * n_dim:(suc_cnt + 1) * n_dim] = pred else: pred = estimator.predict(x_p2).reshape(-1, 1) n_dim = 1 # Initialize training matrix for phase 2 if feature_p2 is None: num_samples = len(x_p2) feature_p2 = np.zeros((num_samples, self.ensemble_size * n_dim)) feature_p2[:, suc_cnt * n_dim:(suc_cnt + 1) * n_dim] = pred suc_cnt += 1 model_cnt += 1 self.meta_learner.fit(feature_p2, y_p2) return self
def evaluate_bo_optimizer(dataset, time_limit, run_id, seed): from solnml.components.fe_optimizers.bo_optimizer import BayesianOptimizationOptimizer # Prepare the configuration for random forest. from ConfigSpace.hyperparameters import UnParametrizedHyperparameter from autosklearn.pipeline.components.classification.random_forest import RandomForest cs = RandomForest.get_hyperparameter_search_space() clf_hp = UnParametrizedHyperparameter("estimator", 'random_forest') cs.add_hyperparameter(clf_hp) print(cs.get_default_configuration()) evaluator = ClassificationEvaluator(cs.get_default_configuration(), name='fe', seed=seed, resampling_strategy='holdout') train_data, test_data = load_train_test_data(dataset) cls_task_type = BINARY_CLS if len(set( train_data.data[1])) == 2 else MULTICLASS_CLS optimizer = BayesianOptimizationOptimizer(cls_task_type, train_data, evaluator, 'random_forest', 300, 10000, seed, time_budget=time_limit) optimizer.optimize() inc = optimizer.incumbent_config val_score = 1 - optimizer.evaluate_function(inc) print(val_score) print(optimizer.incumbent_score) optimizer.fetch_nodes(n=10) print("Refit finished!") final_train_data = optimizer.apply(train_data, optimizer.incumbent, phase='train') X_train, y_train = final_train_data.data final_test_data = optimizer.apply(test_data, optimizer.incumbent) X_test, y_test = final_test_data.data clf = fetch_predict_estimator( cls_task_type, cs.get_default_configuration(), X_train, y_train, weight_balance=final_train_data.enable_balance, data_balance=final_train_data.data_balance) y_pred = clf.predict(X_test) from solnml.components.metrics.cls_metrics import balanced_accuracy test_score = balanced_accuracy(y_test, y_pred) print('==> Test score', test_score) save_path = save_dir + 'bo_fe_%s_%d_%d.pkl' % (dataset, time_limit, run_id) with open(save_path, 'wb') as f: pickle.dump([dataset, val_score, test_score], f)
def fit(self, data): # Split training data for phase 1 and phase 2 if self.task_type in CLS_TASKS: kf = StratifiedKFold(n_splits=self.kfold) else: kf = KFold(n_splits=self.kfold) # Train basic models using a part of training data model_cnt = 0 suc_cnt = 0 feature_p2 = None for algo_id in self.stats["include_algorithms"]: model_to_eval = self.stats[algo_id]['model_to_eval'] for idx, (node, config) in enumerate(model_to_eval): X, y = node.data if self.base_model_mask[model_cnt] == 1: for j, (train, test) in enumerate(kf.split(X, y)): x_p1, x_p2, y_p1, _ = X[train], X[test], y[train], y[test] estimator = fetch_predict_estimator(self.task_type, config, x_p1, y_p1, weight_balance=data.enable_balance, data_balance=data.data_balance ) with open( os.path.join(self.output_dir, '%s-model%d_part%d' % (self.timestamp, model_cnt, j)), 'wb') as f: pkl.dump(estimator, f) if self.task_type in CLS_TASKS: pred = estimator.predict_proba(x_p2) n_dim = np.array(pred).shape[1] if n_dim == 2: # Binary classificaion n_dim = 1 # Initialize training matrix for phase 2 if feature_p2 is None: num_samples = len(train) + len(test) feature_p2 = np.zeros((num_samples, self.ensemble_size * n_dim)) if n_dim == 1: feature_p2[test, suc_cnt * n_dim:(suc_cnt + 1) * n_dim] = pred[:, 1:2] else: feature_p2[test, suc_cnt * n_dim:(suc_cnt + 1) * n_dim] = pred else: pred = estimator.predict(x_p2).reshape(-1, 1) n_dim = 1 # Initialize training matrix for phase 2 if feature_p2 is None: num_samples = len(train) + len(test) feature_p2 = np.zeros((num_samples, self.ensemble_size * n_dim)) feature_p2[test, suc_cnt * n_dim:(suc_cnt + 1) * n_dim] = pred suc_cnt += 1 model_cnt += 1 # Train model for stacking using the other part of training data self.meta_learner.fit(feature_p2, y) return self
def evaluate_ml_algorithm(dataset, algo, run_id, obj_metric, total_resource=20, seed=1, task_type=None): print('EVALUATE-%s-%s-%s: run_id=%d' % (dataset, algo, obj_metric, run_id)) train_data, test_data = load_train_test_data(dataset, task_type=task_type) if task_type in CLS_TASKS: task_type = BINARY_CLS if len(set( train_data.data[1])) == 2 else MULTICLASS_CLS print(set(train_data.data[1])) metric = get_metric(obj_metric) bandit = SecondLayerBandit(task_type, algo, train_data, metric, per_run_time_limit=300, seed=seed, eval_type='holdout', fe_algo='bo', total_resource=total_resource) bandit.optimize_fixed_pipeline() val_score = bandit.incumbent_perf best_config = bandit.inc['hpo'] fe_optimizer = bandit.optimizer['fe'] fe_optimizer.fetch_nodes(10) best_data_node = fe_optimizer.incumbent test_data_node = fe_optimizer.apply(test_data, best_data_node) estimator = fetch_predict_estimator( task_type, best_config, best_data_node.data[0], best_data_node.data[1], weight_balance=best_data_node.enable_balance, data_balance=best_data_node.data_balance) score = metric(estimator, test_data_node.data[0], test_data_node.data[1]) * metric._sign print('Test score', score) save_path = save_dir + '%s-%s-%s-%d-%d.pkl' % (dataset, algo, obj_metric, run_id, total_resource) with open(save_path, 'wb') as f: pickle.dump([dataset, algo, score, val_score, task_type], f)
def fit(self, datanode): model_cnt = 0 for algo_id in self.stats["include_algorithms"]: model_to_eval = self.stats[algo_id]['model_to_eval'] for idx, (node, config) in enumerate(model_to_eval): X, y = node.data if self.base_model_mask[model_cnt] == 1: estimator = fetch_predict_estimator( self.task_type, config, X, y, weight_balance=node.enable_balance, data_balance=node.data_balance, combined=True) with open( os.path.join( self.output_dir, '%s-bagging-model%d' % (self.timestamp, model_cnt)), 'wb') as f: pkl.dump(estimator, f) model_cnt += 1 return self
def optimize(self): if self.inner_opt_algorithm in ['rb_hpo', 'fixed']: self.optimize_explore_first() elif self.inner_opt_algorithm == 'equal': self.optimize_equal_resource() else: raise ValueError('Unsupported optimization method: %s!' % self.inner_opt_algorithm) scores = list() for _arm in self.arms: scores.append(self.sub_bandits[_arm].incumbent_perf) scores = np.array(scores) algo_idx = np.argmax(scores) self.optimal_algo_id = self.arms[algo_idx] _best_perf = scores[algo_idx] _threshold, _ensemble_size = 0.90, 5 idxs = np.argsort(-scores)[:_ensemble_size] _algo_ids = [self.arms[idx] for idx in idxs] self.nbest_algo_ids = list() for _idx, _arm in zip(idxs, _algo_ids): if scores[_idx] >= _threshold * _best_perf: self.nbest_algo_ids.append(_arm) assert len(self.nbest_algo_ids) > 0 self.logger.info('=' * 50) self.logger.info('Best_algo_perf: %s' % str(_best_perf)) self.logger.info('Best_algo_id: %s' % str(self.optimal_algo_id)) self.logger.info('Nbest_algo_ids: %s' % str(self.nbest_algo_ids)) self.logger.info('Arm candidates: %s' % str(self.arms)) self.logger.info('Best val scores: %s' % str(list(scores))) self.logger.info('=' * 50) # Fit the best model self.fe_optimizer = self.sub_bandits[ self.optimal_algo_id].optimizer['fe'] if self.fe_algo == 'bo': self.fe_optimizer.fetch_nodes(1) best_config = self.sub_bandits[self.optimal_algo_id].inc['hpo'] best_estimator = fetch_predict_estimator( self.task_type, best_config, self.best_data_node.data[0], self.best_data_node.data[1], weight_balance=self.best_data_node.enable_balance, data_balance=self.best_data_node.data_balance) with open( os.path.join(self.output_dir, '%s-best_model' % self.timestamp), 'wb') as f: pkl.dump(best_estimator, f) if self.ensemble_method is not None: # stats = self.fetch_ensemble_members() stats = self.fetch_ensemble_members_ano() # Ensembling all intermediate/ultimate models found in above optimization process. self.es = EnsembleBuilder(stats=stats, ensemble_method=self.ensemble_method, ensemble_size=self.ensemble_size, task_type=self.task_type, metric=self.metric, output_dir=self.output_dir) self.es.fit(data=self.original_data)
def __init__(self, stats, ensemble_method: str, ensemble_size: int, task_type: int, metric: _BaseScorer, base_save=False, output_dir=None): self.stats = stats self.ensemble_method = ensemble_method self.ensemble_size = ensemble_size self.task_type = task_type self.metric = metric self.output_dir = output_dir self.train_predictions = [] self.config_list = [] self.train_data_dict = {} self.train_labels = None self.seed = self.stats['split_seed'] self.timestamp = str(time.time()) logger_name = 'EnsembleBuilder' self.logger = get_logger(logger_name) model_cnt = 0 for algo_id in self.stats["include_algorithms"]: model_to_eval = self.stats[algo_id]['model_to_eval'] for idx, (node, config) in enumerate(model_to_eval): X, y = node.data # TODO: Hyperparameter test_size = 0.33 if self.task_type in CLS_TASKS: ss = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=self.seed) else: ss = ShuffleSplit(n_splits=1, test_size=test_size, random_state=self.seed) for train_index, test_index in ss.split(X, y): X_train, X_valid = X[train_index], X[test_index] y_train, y_valid = y[train_index], y[test_index] if self.train_labels is not None: assert (self.train_labels == y_valid).all() else: self.train_labels = y_valid estimator = fetch_predict_estimator( self.task_type, config, X_train, y_train, weight_balance=node.enable_balance, data_balance=node.data_balance) if base_save: # For ensemble selection with open( os.path.join( self.output_dir, '%s-model%d' % (self.timestamp, model_cnt)), 'wb') as f: pkl.dump(estimator, f) if self.task_type in CLS_TASKS: y_valid_pred = estimator.predict_proba(X_valid) else: y_valid_pred = estimator.predict(X_valid) self.train_predictions.append(y_valid_pred) model_cnt += 1 if len(self.train_predictions) < self.ensemble_size: self.ensemble_size = len(self.train_predictions) if ensemble_method == 'ensemble_selection': return if task_type in CLS_TASKS: self.base_model_mask = choose_base_models_classification( np.array(self.train_predictions), self.ensemble_size) else: self.base_model_mask = choose_base_models_regression( np.array(self.train_predictions), np.array(y_valid), self.ensemble_size) self.ensemble_size = sum(self.base_model_mask)
def refit(self): if self.ensemble_method is not None: self.logger.info('Start to refit all the well-performed models!') config_path = os.path.join(self.output_dir, '%s_topk_config.pkl' % self.timestamp) if not os.path.exists(config_path): warnings.warn( "Config path %s not found! Please check if all the evaluations are failed!" % config_path) return with open(config_path, 'rb') as f: stats = pkl.load(f) for algo_id in stats.keys(): model_to_eval = stats[algo_id] for idx, (config, perf, path) in enumerate(model_to_eval): data_node, op_list = parse_config( self.original_data.copy_(), config, record=True, if_imbal=self.if_imbal) algo_id = config['algorithm'] estimator = fetch_predict_estimator( self.task_type, algo_id, config, data_node.data[0], data_node.data[1], weight_balance=data_node.enable_balance, data_balance=data_node.data_balance) with open(path, 'wb') as f: pkl.dump([op_list, estimator, None], f) self.fit_ensemble() else: self.logger.info('Start to refit the best model!') if self.incumbent is None: warnings.warn( "The best config is None! Please check if all the evaluations are failed!" ) return model_path = os.path.join( self.output_dir, '%s_%s.pkl' % (self.timestamp, CombinedTopKModelSaver.get_configuration_id(self.incumbent))) config = self.incumbent.copy() data_node, op_list = parse_config(self.original_data.copy_(), config, record=True, if_imbal=self.if_imbal) algo_id = config['algorithm'] estimator = fetch_predict_estimator( self.task_type, algo_id, config, data_node.data[0], data_node.data[1], weight_balance=data_node.enable_balance, data_balance=data_node.data_balance) with open(model_path, 'wb') as f: pkl.dump([op_list, estimator, None], f)
def optimize(self): if self.inner_opt_algorithm in ['rb_hpo', 'fixed', 'alter_hpo', 'alter', 'combined']: self.optimize_explore_first() elif self.inner_opt_algorithm == 'equal': self.optimize_equal_resource() else: raise ValueError('Unsupported optimization method: %s!' % self.inner_opt_algorithm) scores = list() for _arm in self.arms: scores.append(self.sub_bandits[_arm].incumbent_perf) scores = np.array(scores) algo_idx = np.argmax(scores) self.optimal_algo_id = self.arms[algo_idx] self.incumbent_perf = scores[algo_idx] _threshold, _ensemble_size = self.incumbent_perf * 0.90, 5 if self.incumbent_perf < 0.: _threshold = self.incumbent_perf / 0.9 idxs = np.argsort(-scores)[:_ensemble_size] _algo_ids = [self.arms[idx] for idx in idxs] self.nbest_algo_ids = list() for _idx, _arm in zip(idxs, _algo_ids): if scores[_idx] >= _threshold: self.nbest_algo_ids.append(_arm) assert len(self.nbest_algo_ids) > 0 self.logger.info('=' * 50) self.logger.info('Best_algo_perf: %s' % str(self.incumbent_perf)) self.logger.info('Best_algo_id: %s' % str(self.optimal_algo_id)) self.logger.info('Nbest_algo_ids: %s' % str(self.nbest_algo_ids)) self.logger.info('Arm candidates: %s' % str(self.arms)) self.logger.info('Best val scores: %s' % str(list(scores))) self.logger.info('=' * 50) if self.inner_opt_algorithm == 'combined': tmp_evaluator = ClassificationEvaluator(None) # A tmp optimizer for recording fe transformations self.tmp_bo = AnotherBayesianOptimizationOptimizer(0, self.original_data, tmp_evaluator, 'adaboost', 1, 1, 1) # Fit the best mode best_config = self.sub_bandits[self.optimal_algo_id].incumbent_config self.best_node = self.tmp_bo.fetch_nodes_by_config([best_config])[0] best_estimator = fetch_predict_estimator(self.task_type, best_config, self.best_node.data[0], self.best_node.data[1], weight_balance=self.best_node.enable_balance, data_balance=self.best_node.data_balance, combined=True) else: # Fit the best model self.fe_optimizer = self.sub_bandits[self.optimal_algo_id].optimizer['fe'] if self.fe_algo == 'bo': self.fe_optimizer.fetch_nodes(1) best_config = self.sub_bandits[self.optimal_algo_id].inc['hpo'] best_estimator = fetch_predict_estimator(self.task_type, best_config, self.best_data_node.data[0], self.best_data_node.data[1], weight_balance=self.best_data_node.enable_balance, data_balance=self.best_data_node.data_balance) with open(os.path.join(self.output_dir, '%s-best_model' % self.timestamp), 'wb') as f: pkl.dump(best_estimator, f) if self.ensemble_method is not None: if self.inner_opt_algorithm == 'combined': eval_dict = {key: self.sub_bandits[key].eval_dict for key in self.include_algorithms} stats = fetch_ensemble_members(self.nbest_algo_ids, self.seed, eval_dict, self.tmp_bo) from solnml.components.ensemble.combined_ensemble.ensemble_bulider import EnsembleBuilder else: # stats = self.fetch_ensemble_members_ano() stats = self.fetch_ensemble_members() from solnml.components.ensemble import EnsembleBuilder # Ensembling all intermediate/ultimate models found in above optimization process. self.es = EnsembleBuilder(stats=stats, ensemble_method=self.ensemble_method, ensemble_size=self.ensemble_size, task_type=self.task_type, metric=self.metric, output_dir=self.output_dir) self.es.fit(data=self.original_data)
def execute_func(params): estimator = fetch_predict_estimator(*params) return estimator
def evaluate_evaluation_based_fe(dataset, time_limit, run_id, seed): from solnml.components.fe_optimizers.evaluation_based_optimizer import EvaluationBasedOptimizer # Prepare the configuration for random forest. from ConfigSpace.hyperparameters import UnParametrizedHyperparameter from autosklearn.pipeline.components.classification.random_forest import RandomForest cs = RandomForest.get_hyperparameter_search_space() clf_hp = UnParametrizedHyperparameter("estimator", 'random_forest') cs.add_hyperparameter(clf_hp) print(cs.get_default_configuration()) """ Configuration: bootstrap, Value: 'True' criterion, Value: 'gini' estimator, Constant: 'random_forest' max_depth, Constant: 'None' max_features, Value: 0.5 max_leaf_nodes, Constant: 'None' min_impurity_decrease, Constant: 0.0 min_samples_leaf, Value: 1 min_samples_split, Value: 2 min_weight_fraction_leaf, Constant: 0.0 n_estimators, Constant: 100 """ evaluator = ClassificationEvaluator(cs.get_default_configuration(), name='fe', seed=seed, resampling_strategy='holdout') train_data, test_data = load_train_test_data(dataset) optimizer = EvaluationBasedOptimizer(MULTICLASS_CLS, train_data, evaluator, 'random_forest', 300, 10000, seed, trans_set=None) _start_time = time.time() _iter_id = 0 while True: if time.time( ) > _start_time + time_limit or optimizer.early_stopped_flag: break score, iteration_cost, inc = optimizer.iterate() print('%d - %.4f' % (_iter_id, score)) _iter_id += 1 final_train_data = optimizer.apply(train_data, optimizer.incumbent) val_score = evaluator(None, data_node=final_train_data) print('==> Best validation score', val_score, score) final_test_data = optimizer.apply(test_data, optimizer.incumbent) X_train, y_train = final_train_data.data clf = fetch_predict_estimator(MULTICLASS_CLS, cs.get_default_configuration(), X_train, y_train) X_test, y_test = final_test_data.data y_pred = clf.predict(X_test) from solnml.components.metrics.cls_metrics import balanced_accuracy test_score = balanced_accuracy(y_test, y_pred) print('==> Test score', test_score) save_path = save_dir + 'hmab_fe_%s_%d_%d.pkl' % (dataset, time_limit, run_id) with open(save_path, 'wb') as f: pickle.dump([dataset, val_score, test_score], f)
def fit(self, data, solvers=None): # Split training data for phase 1 and phase 2 test_size = 0.2 # Train basic models using a part of training data model_cnt = 0 suc_cnt = 0 feature_p2 = None for algo_id in self.stats["include_algorithms"]: model_to_eval = self.stats[algo_id]['model_to_eval'] for idx, (node, config) in enumerate(model_to_eval): X, y = node.data if self.task_type in CLS_TASKS: x_p1, x_p2, y_p1, y_p2 = train_test_split( X, y, test_size=test_size, stratify=data.data[1], random_state=self.seed) else: x_p1, x_p2, y_p1, y_p2 = train_test_split( X, y, test_size=test_size, random_state=self.seed) if self.base_model_mask[model_cnt] == 1: estimator = fetch_predict_estimator( self.task_type, config, x_p1, y_p1, weight_balance=node.enable_balance, data_balance=node.data_balance) with open( os.path.join( self.output_dir, '%s-blending-model%d' % (self.timestamp, model_cnt)), 'wb') as f: pkl.dump(estimator, f) if (solvers is not None): fe_savepath = os.path.join( self.output_dir, '%s-blending-fe%d' % (self.timestamp, model_cnt)) solvers[algo_id].optimizer['fe'].save( node, fe_savepath) if self.task_type in CLS_TASKS: pred = estimator.predict_proba(x_p2) n_dim = np.array(pred).shape[1] if n_dim == 2: # Binary classificaion n_dim = 1 # Initialize training matrix for phase 2 if feature_p2 is None: num_samples = len(x_p2) feature_p2 = np.zeros( (num_samples, self.ensemble_size * n_dim)) if n_dim == 1: feature_p2[:, suc_cnt * n_dim:(suc_cnt + 1) * n_dim] = pred[:, 1:2] else: feature_p2[:, suc_cnt * n_dim:(suc_cnt + 1) * n_dim] = pred else: pred = estimator.predict(x_p2).reshape(-1, 1) n_dim = 1 # Initialize training matrix for phase 2 if feature_p2 is None: num_samples = len(x_p2) feature_p2 = np.zeros( (num_samples, self.ensemble_size * n_dim)) feature_p2[:, suc_cnt * n_dim:(suc_cnt + 1) * n_dim] = pred suc_cnt += 1 model_cnt += 1 self.meta_learner.fit(feature_p2, y_p2) return self