def predict_and_loss(self, train=False): if train: Y_pred = self.predict_function(self.X_train, self.model, self.task_type, self.Y_train) score = calculate_score( solution=self.Y_train, prediction=Y_pred, task_type=self.task_type, metric=self.metric, num_classes=self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) else: Y_pred = self.predict_function(self.X_test, self.model, self.task_type, self.Y_train) score = calculate_score( solution=self.Y_test, prediction=Y_pred, task_type=self.task_type, metric=self.metric, num_classes=self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if hasattr(score, '__len__'): err = {key: 1 - score[key] for key in score} else: err = 1 - score return err, Y_pred, Y_pred, Y_pred
def predict(self, train=False): if train: Y_pred = self.predict_function(self.X_train, self.model, self.task_type, self.Y_train) score = calculate_score( solution=self.Y_train, prediction=Y_pred, task_type=self.task_type, metric=self.metric, num_classes=self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) else: Y_pred = self.predict_function(self.X_test, self.model, self.task_type, self.Y_train) score = calculate_score( solution=self.Y_test, prediction=Y_pred, task_type=self.task_type, metric=self.metric, num_classes=self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if hasattr(score, '__len__'): err = {key: 1 - score[key] for key in score} else: err = 1 - score if self.with_predictions: return err, Y_pred, Y_pred, Y_pred return err
def original_ensemble_selection(predictions, labels, ensemble_size, task_type, metric, do_pruning=False): """Rich Caruana's ensemble selection method.""" ensemble = [] trajectory = [] order = [] if do_pruning: n_best = 20 indices = pruning(predictions, labels, n_best, task_type, metric) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = calculate_score( labels, ensemble_, task_type, metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size -= n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) for j, pred in enumerate(predictions): ensemble.append(pred) ensemble_prediction = np.mean(np.array(ensemble), axis=0) scores[j] = calculate_score(labels, ensemble_prediction, task_type, metric, ensemble_prediction.shape[1]) ensemble.pop() best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) return np.array(order), np.array(trajectory)
def _fast(self, predictions, labels): """Fast version of Rich Caruana's ensemble selection method.""" self.num_input_models_ = len(predictions) ensemble = [] trajectory = [] order = [] ensemble_size = self.ensemble_size if self.sorted_initialization: n_best = 20 indices = self._sorted_initialization(predictions, labels, n_best) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = calculate_score(labels, ensemble_, self.task_type, self.metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size -= n_best for i in range(ensemble_size): scores = np.zeros((len(predictions))) s = len(ensemble) if s == 0: weighted_ensemble_prediction = np.zeros(predictions[0].shape) else: ensemble_prediction = np.mean(np.array(ensemble), axis=0) weighted_ensemble_prediction = (s / float(s + 1)) * \ ensemble_prediction for j, pred in enumerate(predictions): fant_ensemble_prediction = weighted_ensemble_prediction + \ (1. / float(s + 1)) * pred scores[j] = calculate_score(labels, fant_ensemble_prediction, self.task_type, self.metric, fant_ensemble_prediction.shape[1]) best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) # Handle special case if len(predictions) == 1: break self.indices_ = order self.trajectory_ = trajectory self.train_score_ = trajectory[-1]
def ensemble_selection(predictions, labels, ensemble_size, task_type, metric, do_pruning=False): """Fast version of Rich Caruana's ensemble selection method.""" ensemble = [] trajectory = [] order = [] if do_pruning: n_best = 20 indices = pruning(predictions, labels, n_best, task_type, metric) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = calculate_score(labels, ensemble_, task_type, metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size -= n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) s = len(ensemble) if s == 0: weighted_ensemble_prediction = np.zeros(predictions[0].shape) else: ensemble_prediction = np.mean(np.array(ensemble), axis=0) weighted_ensemble_prediction = (s / float(s + 1)) * ensemble_prediction for j, pred in enumerate(predictions): # ensemble.append(pred) # ensemble_prediction = np.mean(np.array(ensemble), axis=0) fant_ensemble_prediction = weighted_ensemble_prediction + ( 1. / float(s + 1)) * pred scores[j] = calculate_score(labels, fant_ensemble_prediction, task_type, metric, fant_ensemble_prediction.shape[1]) # ensemble.pop() best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) return np.array(order), np.array(trajectory)
def _fast(self, predictions, labels): """Fast version of Rich Caruana's ensemble selection method.""" self.num_input_models_ = len(predictions) ensemble = [] trajectory = [] order = [] ensemble_size = self.ensemble_size if self.sorted_initialization: n_best = 20 indices = self._sorted_initialization(predictions, labels, n_best) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = calculate_score( labels, ensemble_, self.task_type, self.metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size -= n_best for i in range(ensemble_size): scores = np.zeros((len(predictions))) s = len(ensemble) if s == 0: weighted_ensemble_prediction = np.zeros(predictions[0].shape) else: ensemble_prediction = np.mean(np.array(ensemble), axis=0) weighted_ensemble_prediction = (s / float(s + 1)) * \ ensemble_prediction for j, pred in enumerate(predictions): fant_ensemble_prediction = weighted_ensemble_prediction + \ (1. / float(s + 1)) * pred scores[j] = calculate_score( labels, fant_ensemble_prediction, self.task_type, self.metric, fant_ensemble_prediction.shape[1]) best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) # Handle special case if len(predictions) == 1: break self.indices_ = order self.trajectory_ = trajectory self.train_score_ = trajectory[-1]
def pruning(predictions, labels, n_best, task_type, metric): perf = np.zeros([predictions.shape[0]]) for i, p in enumerate(predictions): perf[i] = calculate_score(labels, predictions, task_type, metric, predictions.shape[1]) indcies = np.argsort(perf)[perf.shape[0] - n_best :] return indcies
def predict(self): Y_optimization_pred = self.predict_function(self.X_optimization, self.model, self.task_type) if self.X_valid is not None: Y_valid_pred = self.predict_function(self.X_valid, self.model, self.task_type) else: Y_valid_pred = None if self.X_test is not None: Y_test_pred = self.predict_function(self.X_test, self.model, self.task_type) else: Y_test_pred = None score = calculate_score( self.Y_optimization, Y_optimization_pred, self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if hasattr(score, '__len__'): err = {key: 1 - score[key] for key in score} else: err = 1 - score if self.with_predictions: return err, Y_optimization_pred, Y_valid_pred, Y_test_pred return err
def pruning(predictions, labels, n_best, task_type, metric): perf = np.zeros([predictions.shape[0]]) for i, p in enumerate(predictions): perf[i] = calculate_score(labels, predictions, task_type, metric, predictions.shape[1]) indcies = np.argsort(perf)[perf.shape[0] - n_best:] return indcies
def ensemble_selection(predictions, labels, ensemble_size, task_type, metric, do_pruning=False): """Fast version of Rich Caruana's ensemble selection method.""" ensemble = [] trajectory = [] order = [] if do_pruning: n_best = 20 indices = pruning(predictions, labels, n_best, task_type, metric) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = calculate_score( labels, ensemble_, task_type, metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size -= n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) s = len(ensemble) if s == 0: weighted_ensemble_prediction = np.zeros(predictions[0].shape) else: ensemble_prediction = np.mean(np.array(ensemble), axis=0) weighted_ensemble_prediction = (s / float(s + 1) ) * ensemble_prediction for j, pred in enumerate(predictions): # ensemble.append(pred) # ensemble_prediction = np.mean(np.array(ensemble), axis=0) fant_ensemble_prediction = weighted_ensemble_prediction + ( 1. / float(s + 1)) * pred scores[j] = calculate_score( labels, fant_ensemble_prediction, task_type, metric, fant_ensemble_prediction.shape[1]) # ensemble.pop() best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) return np.array(order), np.array(trajectory)
def _slow(self, predictions, labels): """Rich Caruana's ensemble selection method.""" self.num_input_models_ = len(predictions) ensemble = [] trajectory = [] order = [] ensemble_size = self.ensemble_size if self.sorted_initialization: n_best = 20 indices = self._sorted_initialization(predictions, labels, n_best) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = calculate_score(labels, ensemble_, self.task_type, self.metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size -= n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) for j, pred in enumerate(predictions): ensemble.append(pred) ensemble_prediction = np.mean(np.array(ensemble), axis=0) scores[j] = calculate_score(labels, ensemble_prediction, self.task_type, self.metric, ensemble_prediction.shape[1]) ensemble.pop() best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) # Handle special case if len(predictions) == 1: break self.indices_ = np.array(order) self.trajectory_ = np.array(trajectory) self.train_score_ = trajectory[-1]
def _sorted_initialization(self, predictions, labels, n_best): perf = np.zeros([predictions.shape[0]]) for idx, prediction in enumerate(predictions): perf[idx] = calculate_score(labels, prediction, self.task_type, self.metric, predictions.shape[1]) indices = np.argsort(perf)[perf.shape[0] - n_best:] return indices
def _sorted_initialization(self, predictions, labels, n_best): perf = np.zeros([predictions.shape[0]]) for i, p in enumerate(predictions): perf[i] = calculate_score(labels, predictions, self.task_type, self.metric, predictions.shape[1]) indices = np.argsort(perf)[perf.shape[0] - n_best:] return indices
def _slow(self, predictions, labels): """Rich Caruana's ensemble selection method.""" self.num_input_models_ = len(predictions) ensemble = [] trajectory = [] order = [] ensemble_size = self.ensemble_size if self.sorted_initialization: n_best = 20 indices = self._sorted_initialization(predictions, labels, n_best) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = calculate_score( labels, ensemble_, self.task_type, self.metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size -= n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) for j, pred in enumerate(predictions): ensemble.append(pred) ensemble_prediction = np.mean(np.array(ensemble), axis=0) scores[j] = calculate_score(labels, ensemble_prediction, self.task_type, self.metric, ensemble_prediction.shape[1]) ensemble.pop() best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) # Handle special case if len(predictions) == 1: break self.indices_ = np.array(order) self.trajectory_ = np.array(trajectory) self.train_score_ = trajectory[-1]
def original_ensemble_selection(predictions, labels, ensemble_size, task_type, metric, do_pruning=False): """Rich Caruana's ensemble selection method.""" ensemble = [] trajectory = [] order = [] if do_pruning: n_best = 20 indices = pruning(predictions, labels, n_best, task_type, metric) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = calculate_score(labels, ensemble_, task_type, metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size -= n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) for j, pred in enumerate(predictions): ensemble.append(pred) ensemble_prediction = np.mean(np.array(ensemble), axis=0) scores[j] = calculate_score(labels, ensemble_prediction, task_type, metric, ensemble_prediction.shape[1]) ensemble.pop() best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) return np.array(order), np.array(trajectory)
def _loss(self, Y_optimization_pred, Y_valid_pred, Y_test_pred): inner_scores = defaultdict(list) for outer_fold in range(self.outer_cv_folds): for inner_fold in range(self.inner_cv_folds): inner_train_indices, inner_test_indices = self.inner_indices[ outer_fold][inner_fold] Y_test = self.Y_train[inner_test_indices] X_test = self.X_train[inner_test_indices] model = self.inner_models[outer_fold][inner_fold] Y_hat = self.predict_function( X_test, model, self.task_type, Y_train=self.Y_train[inner_train_indices]) scores = calculate_score( Y_test, Y_hat, self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: inner_scores[score_name].append(scores[score_name]) else: inner_scores[self.metric].append(scores) # Average the scores! if self.all_scoring_functions: inner_err = { key: 1 - np.mean(inner_scores[key]) for key in inner_scores } outer_err = { 'outer:%s' % METRIC_TO_STRING[key]: 1 - np.mean(self.outer_scores_[key]) for key in self.outer_scores_ } inner_err.update(outer_err) else: inner_err = 1 - np.mean(inner_scores[self.metric]) return inner_err, Y_optimization_pred, Y_valid_pred, Y_test_pred
def _loss(self, y_true, y_hat): if self.configuration is None: if self.all_scoring_functions: return {self.metric: 1.0} else: return 1.0 score = calculate_score( y_true, y_hat, self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if hasattr(score, '__len__'): err = {key: 1 - score[key] for key in score} else: err = 1 - score return err
def _loss(self, Y_optimization_pred, Y_valid_pred, Y_test_pred): inner_scores = defaultdict(list) for outer_fold in range(self.outer_cv_folds): for inner_fold in range(self.inner_cv_folds): inner_train_indices, inner_test_indices = self.inner_indices[ outer_fold][inner_fold] Y_test = self.Y_train[inner_test_indices] X_test = self.X_train[inner_test_indices] model = self.inner_models[outer_fold][inner_fold] Y_hat = self.predict_function( X_test, model, self.task_type, Y_train=self.Y_train[inner_train_indices]) scores = calculate_score( Y_test, Y_hat, self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: inner_scores[score_name].append(scores[score_name]) else: inner_scores[self.metric].append(scores) # Average the scores! if self.all_scoring_functions: inner_err = { key: 1 - np.mean(inner_scores[key]) for key in inner_scores} outer_err = { 'outer:%s' % METRIC_TO_STRING[key]: 1 - np.mean(self.outer_scores_[key]) for key in self.outer_scores_ } inner_err.update(outer_err) else: inner_err = 1 - np.mean(inner_scores[self.metric]) return inner_err, Y_optimization_pred, Y_valid_pred, Y_test_pred
def predict(self): Y_optimization_pred = [None] * self.cv_folds Y_targets = [None] * self.cv_folds Y_valid_pred = [None] * self.cv_folds Y_test_pred = [None] * self.cv_folds for i in range(self.cv_folds): # To support prediction when only partial_fit was called if self.models[i] is None: if self.partial: continue else: raise ValueError('Did not fit all models for the CV fold. ' 'Try increasing the time for the ML ' 'algorithm or decrease the number of folds' ' if this happens too often.') train_indices, test_indices = self.indices[i] opt_pred = self.predict_function(self.X_train[test_indices], self.models[i], self.task_type, self.Y_train[train_indices]) Y_optimization_pred[i] = opt_pred Y_targets[i] = self.Y_train[test_indices] if self.X_valid is not None: X_valid = self.X_valid.copy() valid_pred = self.predict_function(X_valid, self.models[i], self.task_type, self.Y_train[train_indices]) Y_valid_pred[i] = valid_pred if self.X_test is not None: X_test = self.X_test.copy() test_pred = self.predict_function(X_test, self.models[i], self.task_type, self.Y_train[train_indices]) Y_test_pred[i] = test_pred Y_optimization_pred = np.concatenate( [Y_optimization_pred[i] for i in range(self.cv_folds) if Y_optimization_pred[i] is not None]) Y_targets = np.concatenate([Y_targets[i] for i in range(self.cv_folds) if Y_targets[i] is not None]) if self.X_valid is not None: Y_valid_pred = np.array([Y_valid_pred[i] for i in range(self.cv_folds) if Y_valid_pred[i] is not None]) # Average the predictions of several models if len(Y_valid_pred.shape) == 3: Y_valid_pred = np.nanmean(Y_valid_pred, axis=0) if self.X_test is not None: Y_test_pred = np.array([Y_test_pred[i] for i in range(self.cv_folds) if Y_test_pred[i] is not None]) # Average the predictions of several models if len(Y_test_pred.shape) == 3: Y_test_pred = np.nanmean(Y_test_pred, axis=0) self.Y_optimization = Y_targets score = calculate_score( Y_targets, Y_optimization_pred, self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if hasattr(score, '__len__'): err = {key: 1 - score[key] for key in score} else: err = 1 - score if self.with_predictions: return err, Y_optimization_pred, Y_valid_pred, Y_test_pred return err
def main(self): watch = StopWatch() watch.start_task('ensemble_builder') used_time = 0 time_iter = 0 index_run = 0 num_iteration = 0 current_num_models = 0 last_hash = None current_hash = None backend = Backend(self.output_dir, self.autosklearn_tmp_dir) dir_ensemble = os.path.join(self.autosklearn_tmp_dir, '.auto-sklearn', 'predictions_ensemble') dir_valid = os.path.join(self.autosklearn_tmp_dir, '.auto-sklearn', 'predictions_valid') dir_test = os.path.join(self.autosklearn_tmp_dir, '.auto-sklearn', 'predictions_test') paths_ = [dir_ensemble, dir_valid, dir_test] dir_ensemble_list_mtimes = [] self.logger.debug( 'Starting main loop with %f seconds and %d iterations ' 'left.' % (self.limit - used_time, num_iteration)) while used_time < self.limit or (self.max_iterations > 0 and self.max_iterations >= num_iteration): num_iteration += 1 self.logger.debug('Time left: %f', self.limit - used_time) self.logger.debug('Time last ensemble building: %f', time_iter) # Reload the ensemble targets every iteration, important, because cv may # update the ensemble targets in the cause of running auto-sklearn # TODO update cv in order to not need this any more! targets_ensemble = backend.load_targets_ensemble() # Load the predictions from the models exists = [os.path.isdir(dir_) for dir_ in paths_] if not exists[0]: # all(exists): self.logger.debug('Prediction directory %s does not exist!' % dir_ensemble) time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue if self.shared_mode is False: dir_ensemble_list = sorted( glob.glob( os.path.join( dir_ensemble, 'predictions_ensemble_%s_*.npy' % self.seed))) if exists[1]: dir_valid_list = sorted( glob.glob( os.path.join( dir_valid, 'predictions_valid_%s_*.npy' % self.seed))) else: dir_valid_list = [] if exists[2]: dir_test_list = sorted( glob.glob( os.path.join( dir_test, 'predictions_test_%s_*.npy' % self.seed))) else: dir_test_list = [] else: dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted( os.listdir(dir_valid)) if exists[1] else [] dir_test_list = sorted( os.listdir(dir_test)) if exists[2] else [] # Check the modification times because predictions can be updated # over time! old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes dir_ensemble_list_mtimes = [] for dir_ensemble_file in dir_ensemble_list: if dir_ensemble_file.endswith("/"): dir_ensemble_file = dir_ensemble_file[:-1] basename = os.path.basename(dir_ensemble_file) dir_ensemble_file = os.path.join(dir_ensemble, basename) mtime = os.path.getmtime(dir_ensemble_file) dir_ensemble_list_mtimes.append(mtime) if len(dir_ensemble_list) == 0: self.logger.debug('Directories are empty') time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue if len(dir_ensemble_list) <= current_num_models and \ old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes: self.logger.debug('Nothing has changed since the last time') time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue watch.start_task('index_run' + str(index_run)) watch.start_task('ensemble_iter_' + str(num_iteration)) # List of num_runs (which are in the filename) which will be included # later include_num_runs = [] backup_num_runs = [] model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy$') if self.ensemble_nbest is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] # The names of the models model_names = [] model_names_to_scores = dict() model_idx = 0 for model_name in dir_ensemble_list: if model_name.endswith("/"): model_name = model_name[:-1] basename = os.path.basename(model_name) try: if self.precision is "16": predictions = np.load( os.path.join(dir_ensemble, basename)).astype(dtype=np.float16) elif self.precision is "32": predictions = np.load( os.path.join(dir_ensemble, basename)).astype(dtype=np.float32) elif self.precision is "64": predictions = np.load( os.path.join(dir_ensemble, basename)).astype(dtype=np.float64) else: predictions = np.load( os.path.join(dir_ensemble, basename)) score = calculate_score(targets_ensemble, predictions, self.task_type, self.metric, predictions.shape[1]) except Exception as e: self.logger.warning('Error loading %s: %s', basename, e) score = -1 model_names_to_scores[model_name] = score match = model_and_automl_re.search(model_name) automl_seed = int(match.group(1)) num_run = int(match.group(2)) if self.ensemble_nbest is not None: if score <= 0.001: self.logger.error('Model only predicts at random: ' + model_name + ' has score: ' + str(score)) backup_num_runs.append((automl_seed, num_run)) # If we have less models in our ensemble than ensemble_nbest add # the current model if it is better than random elif len(scores_nbest) < self.ensemble_nbest: scores_nbest.append(score) indices_nbest.append(model_idx) include_num_runs.append((automl_seed, num_run)) model_names.append(model_name) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in # our ensemble replace it by the current model if scores_nbest[idx] < score: self.logger.debug( 'Worst model in our ensemble: %s with ' 'score %f will be replaced by model %s ' 'with score %f', model_names[idx], scores_nbest[idx], model_name, score) # Exclude the old model del scores_nbest[idx] scores_nbest.append(score) del include_num_runs[idx] del indices_nbest[idx] indices_nbest.append(model_idx) include_num_runs.append((automl_seed, num_run)) del model_names[idx] model_names.append(model_name) # Otherwise exclude the current model from the ensemble else: # include_num_runs.append(True) pass else: # Load all predictions that are better than random if score <= 0.001: # include_num_runs.append(True) self.logger.error('Model only predicts at random: ' + model_name + ' has score: ' + str(score)) backup_num_runs.append((automl_seed, num_run)) else: include_num_runs.append((automl_seed, num_run)) model_idx += 1 # If there is no model better than random guessing, we have to use # all models which do random guessing if len(include_num_runs) == 0: include_num_runs = backup_num_runs indices_to_model_names = dict() indices_to_run_num = dict() for i, model_name in enumerate(dir_ensemble_list): match = model_and_automl_re.search(model_name) automl_seed = int(match.group(1)) num_run = int(match.group(2)) if (automl_seed, num_run) in include_num_runs: num_indices = len(indices_to_model_names) indices_to_model_names[num_indices] = model_name indices_to_run_num[num_indices] = (automl_seed, num_run) try: all_predictions_train, all_predictions_valid, all_predictions_test =\ self.get_all_predictions(dir_ensemble, dir_ensemble_list, dir_valid, dir_valid_list, dir_test, dir_test_list, include_num_runs, model_and_automl_re, self.precision) except IOError: self.logger.error('Could not load the predictions.') continue if len(include_num_runs) == 0: self.logger.error('All models do just random guessing') time.sleep(2) continue else: ensemble = EnsembleSelection(ensemble_size=self.ensemble_size, task_type=self.task_type, metric=self.metric) try: ensemble.fit(all_predictions_train, targets_ensemble, include_num_runs) self.logger.info(ensemble) except ValueError as e: self.logger.error('Caught ValueError: ' + str(e)) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue except IndexError as e: self.logger.error('Caught IndexError: ' + str(e)) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue except Exception as e: self.logger.error('Caught error! %s', str(e)) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue # Output the score self.logger.info('Training performance: %f' % ensemble.train_score_) self.logger.info( 'Building the ensemble took %f seconds' % watch.wall_elapsed('ensemble_iter_' + str(num_iteration))) # Set this variable here to avoid re-running the ensemble builder # every two seconds in case the ensemble did not change current_num_models = len(dir_ensemble_list) ensemble_predictions = ensemble.predict(all_predictions_train) if sys.version_info[0] == 2: ensemble_predictions.flags.writeable = False current_hash = hash(ensemble_predictions.data) else: current_hash = hash(ensemble_predictions.data.tobytes()) # Only output a new ensemble and new predictions if the output of the # ensemble would actually change! # TODO this is neither safe (collisions, tests only with the ensemble # prediction, but not the ensemble), implement a hash function for # each possible ensemble builder. if last_hash is not None: if current_hash == last_hash: self.logger.info('Ensemble output did not change.') time.sleep(2) continue else: last_hash = current_hash else: last_hash = current_hash # Save the ensemble for later use in the main auto-sklearn module! backend.save_ensemble(ensemble, index_run, self.seed) # Save predictions for valid and test data set if len(dir_valid_list) == len(dir_ensemble_list): all_predictions_valid = np.array(all_predictions_valid) ensemble_predictions_valid = ensemble.predict( all_predictions_valid) if self.task_type == BINARY_CLASSIFICATION: ensemble_predictions_valid = ensemble_predictions_valid[:, 1] if self.low_precision: if self.task_type in [ BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION ]: ensemble_predictions_valid[ ensemble_predictions_valid < 1e-4] = 0. if self.metric in [BAC_METRIC, F1_METRIC]: bin_array = np.zeros(ensemble_predictions_valid.shape, dtype=np.int32) if (self.task_type != MULTICLASS_CLASSIFICATION) or ( ensemble_predictions_valid.shape[1] == 1): bin_array[ensemble_predictions_valid >= 0.5] = 1 else: sample_num = ensemble_predictions_valid.shape[0] for i in range(sample_num): j = np.argmax(ensemble_predictions_valid[i, :]) bin_array[i, j] = 1 ensemble_predictions_valid = bin_array if self.task_type in CLASSIFICATION_TASKS: if ensemble_predictions_valid.size < (20000 * 20): precision = 3 else: precision = 2 else: if ensemble_predictions_valid.size > 1000000: precision = 4 else: # File size maximally 2.1MB precision = 6 backend.save_predictions_as_txt(ensemble_predictions_valid, 'valid', index_run, prefix=self.dataset_name, precision=precision) else: self.logger.info( 'Could not find as many validation set predictions (%d)' 'as ensemble predictions (%d)!.', len(dir_valid_list), len(dir_ensemble_list)) del all_predictions_valid if len(dir_test_list) == len(dir_ensemble_list): all_predictions_test = np.array(all_predictions_test) ensemble_predictions_test = ensemble.predict( all_predictions_test) if self.task_type == BINARY_CLASSIFICATION: ensemble_predictions_test = ensemble_predictions_test[:, 1] if self.low_precision: if self.task_type in [ BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION ]: ensemble_predictions_test[ ensemble_predictions_test < 1e-4] = 0. if self.metric in [BAC_METRIC, F1_METRIC]: bin_array = np.zeros(ensemble_predictions_test.shape, dtype=np.int32) if (self.task_type != MULTICLASS_CLASSIFICATION) or ( ensemble_predictions_test.shape[1] == 1): bin_array[ensemble_predictions_test >= 0.5] = 1 else: sample_num = ensemble_predictions_test.shape[0] for i in range(sample_num): j = np.argmax(ensemble_predictions_test[i, :]) bin_array[i, j] = 1 ensemble_predictions_test = bin_array if self.task_type in CLASSIFICATION_TASKS: if ensemble_predictions_test.size < (20000 * 20): precision = 3 else: precision = 2 else: if ensemble_predictions_test.size > 1000000: precision = 4 else: precision = 6 backend.save_predictions_as_txt(ensemble_predictions_test, 'test', index_run, prefix=self.dataset_name, precision=precision) else: self.logger.info( 'Could not find as many test set predictions (%d) as ' 'ensemble predictions (%d)!', len(dir_test_list), len(dir_ensemble_list)) del all_predictions_test current_num_models = len(dir_ensemble_list) watch.stop_task('index_run' + str(index_run)) time_iter = watch.get_wall_dur('index_run' + str(index_run)) used_time = watch.wall_elapsed('ensemble_builder') index_run += 1 return
def predict(self): # First, obtain the predictions for the ensembles, the validation and # the test set! outer_scores = defaultdict(list) inner_scores = defaultdict(list) Y_optimization_pred = [None] * self.outer_cv_folds Y_targets = [None] * self.outer_cv_folds Y_valid_pred = [None] * self.outer_cv_folds Y_test_pred = [None] * self.outer_cv_folds for i in range(self.outer_cv_folds): train_indices, test_indices = self.outer_indices[i] opt_pred = self.predict_function( self.X_train[test_indices], self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_optimization_pred[i] = opt_pred Y_targets[i] = self.Y_train[test_indices] if self.X_valid is not None: X_valid = self.X_valid.copy() valid_pred = self.predict_function( X_valid, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_valid_pred[i] = valid_pred if self.X_test is not None: X_test = self.X_test.copy() test_pred = self.predict_function( X_test, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_test_pred[i] = test_pred # Calculate the outer scores for i in range(self.outer_cv_folds): scores = calculate_score( Y_targets[i], Y_optimization_pred[i], self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: outer_scores[score_name].append(scores[score_name]) else: outer_scores[self.metric].append(scores) Y_optimization_pred = np.concatenate([ Y_optimization_pred[i] for i in range(self.outer_cv_folds) if Y_optimization_pred[i] is not None ]) Y_targets = np.concatenate([ Y_targets[i] for i in range(self.outer_cv_folds) if Y_targets[i] is not None ]) if self.X_valid is not None: Y_valid_pred = np.array([ Y_valid_pred[i] for i in range(self.outer_cv_folds) if Y_valid_pred[i] is not None ]) # Average the predictions of several models if len(Y_valid_pred.shape) == 3: Y_valid_pred = np.nanmean(Y_valid_pred, axis=0) if self.X_test is not None: Y_test_pred = np.array([ Y_test_pred[i] for i in range(self.outer_cv_folds) if Y_test_pred[i] is not None ]) # Average the predictions of several models if len(Y_test_pred.shape) == 3: Y_test_pred = np.nanmean(Y_test_pred, axis=0) self.Y_optimization = Y_targets # Second, calculate the inner score for outer_fold in range(self.outer_cv_folds): for inner_fold in range(self.inner_cv_folds): inner_train_indices, inner_test_indices = self.inner_indices[ outer_fold][inner_fold] Y_test = self.Y_train[inner_test_indices] X_test = self.X_train[inner_test_indices] model = self.inner_models[outer_fold][inner_fold] Y_hat = self.predict_function( X_test, model, self.task_type, Y_train=self.Y_train[inner_train_indices]) scores = calculate_score( Y_test, Y_hat, self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: inner_scores[score_name].append(scores[score_name]) else: inner_scores[self.metric].append(scores) # Average the scores! if self.all_scoring_functions: inner_err = { key: 1 - np.mean(inner_scores[key]) for key in inner_scores } outer_err = { 'outer:%s' % METRIC_TO_STRING[key]: 1 - np.mean(outer_scores[key]) for key in outer_scores } inner_err.update(outer_err) else: inner_err = 1 - np.mean(inner_scores[self.metric]) if self.with_predictions: return inner_err, Y_optimization_pred, Y_valid_pred, Y_test_pred return inner_err
def main(self): watch = StopWatch() watch.start_task('ensemble_builder') used_time = 0 time_iter = 0 index_run = 0 num_iteration = 0 current_num_models = 0 last_hash = None current_hash = None backend = Backend(self.output_dir, self.autosklearn_tmp_dir) dir_ensemble = os.path.join(self.autosklearn_tmp_dir, '.auto-sklearn', 'predictions_ensemble') dir_valid = os.path.join(self.autosklearn_tmp_dir, '.auto-sklearn', 'predictions_valid') dir_test = os.path.join(self.autosklearn_tmp_dir, '.auto-sklearn', 'predictions_test') paths_ = [dir_ensemble, dir_valid, dir_test] dir_ensemble_list_mtimes = [] self.logger.debug('Starting main loop with %f seconds and %d iterations ' 'left.' % (self.limit - used_time, num_iteration)) while used_time < self.limit or (self.max_iterations > 0 and self.max_iterations >= num_iteration): num_iteration += 1 self.logger.debug('Time left: %f', self.limit - used_time) self.logger.debug('Time last ensemble building: %f', time_iter) # Reload the ensemble targets every iteration, important, because cv may # update the ensemble targets in the cause of running auto-sklearn # TODO update cv in order to not need this any more! targets_ensemble = backend.load_targets_ensemble() # Load the predictions from the models exists = [os.path.isdir(dir_) for dir_ in paths_] if not exists[0]: # all(exists): self.logger.debug('Prediction directory %s does not exist!' % dir_ensemble) time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue if self.shared_mode is False: dir_ensemble_list = sorted(glob.glob(os.path.join( dir_ensemble, 'predictions_ensemble_%s_*.npy' % self.seed))) if exists[1]: dir_valid_list = sorted(glob.glob(os.path.join( dir_valid, 'predictions_valid_%s_*.npy' % self.seed))) else: dir_valid_list = [] if exists[2]: dir_test_list = sorted(glob.glob(os.path.join( dir_test, 'predictions_test_%s_*.npy' % self.seed))) else: dir_test_list = [] else: dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else [] dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else [] # Check the modification times because predictions can be updated # over time! old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes dir_ensemble_list_mtimes = [] for dir_ensemble_file in dir_ensemble_list: if dir_ensemble_file.endswith("/"): dir_ensemble_file = dir_ensemble_file[:-1] basename = os.path.basename(dir_ensemble_file) dir_ensemble_file = os.path.join(dir_ensemble, basename) mtime = os.path.getmtime(dir_ensemble_file) dir_ensemble_list_mtimes.append(mtime) if len(dir_ensemble_list) == 0: self.logger.debug('Directories are empty') time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue if len(dir_ensemble_list) <= current_num_models and \ old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes: self.logger.debug('Nothing has changed since the last time') time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue watch.start_task('index_run' + str(index_run)) watch.start_task('ensemble_iter_' + str(num_iteration)) # List of num_runs (which are in the filename) which will be included # later include_num_runs = [] backup_num_runs = [] model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy$') if self.ensemble_nbest is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] # The names of the models model_names = [] model_names_to_scores = dict() model_idx = 0 for model_name in dir_ensemble_list: if model_name.endswith("/"): model_name = model_name[:-1] basename = os.path.basename(model_name) try: if self.precision is "16": predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float16) elif self.precision is "32": predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float32) elif self.precision is "64": predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float64) else: predictions = np.load(os.path.join(dir_ensemble, basename)) score = calculate_score(targets_ensemble, predictions, self.task_type, self.metric, predictions.shape[1]) except Exception as e: self.logger.warning('Error loading %s: %s', basename, e) score = -1 model_names_to_scores[model_name] = score match = model_and_automl_re.search(model_name) automl_seed = int(match.group(1)) num_run = int(match.group(2)) if self.ensemble_nbest is not None: if score <= 0.001: self.logger.error('Model only predicts at random: ' + model_name + ' has score: ' + str(score)) backup_num_runs.append((automl_seed, num_run)) # If we have less models in our ensemble than ensemble_nbest add # the current model if it is better than random elif len(scores_nbest) < self.ensemble_nbest: scores_nbest.append(score) indices_nbest.append(model_idx) include_num_runs.append((automl_seed, num_run)) model_names.append(model_name) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in # our ensemble replace it by the current model if scores_nbest[idx] < score: self.logger.debug('Worst model in our ensemble: %s with ' 'score %f will be replaced by model %s ' 'with score %f', model_names[idx], scores_nbest[idx], model_name, score) # Exclude the old model del scores_nbest[idx] scores_nbest.append(score) del include_num_runs[idx] del indices_nbest[idx] indices_nbest.append(model_idx) include_num_runs.append((automl_seed, num_run)) del model_names[idx] model_names.append(model_name) # Otherwise exclude the current model from the ensemble else: # include_num_runs.append(True) pass else: # Load all predictions that are better than random if score <= 0.001: # include_num_runs.append(True) self.logger.error('Model only predicts at random: ' + model_name + ' has score: ' + str(score)) backup_num_runs.append((automl_seed, num_run)) else: include_num_runs.append((automl_seed, num_run)) model_idx += 1 # If there is no model better than random guessing, we have to use # all models which do random guessing if len(include_num_runs) == 0: include_num_runs = backup_num_runs indices_to_model_names = dict() indices_to_run_num = dict() for i, model_name in enumerate(dir_ensemble_list): match = model_and_automl_re.search(model_name) automl_seed = int(match.group(1)) num_run = int(match.group(2)) if (automl_seed, num_run) in include_num_runs: num_indices = len(indices_to_model_names) indices_to_model_names[num_indices] = model_name indices_to_run_num[num_indices] = (automl_seed, num_run) try: all_predictions_train, all_predictions_valid, all_predictions_test =\ self.get_all_predictions(dir_ensemble, dir_ensemble_list, dir_valid, dir_valid_list, dir_test, dir_test_list, include_num_runs, model_and_automl_re, self.precision) except IOError: self.logger.error('Could not load the predictions.') continue if len(include_num_runs) == 0: self.logger.error('All models do just random guessing') time.sleep(2) continue else: ensemble = EnsembleSelection(ensemble_size=self.ensemble_size, task_type=self.task_type, metric=self.metric) try: ensemble.fit(all_predictions_train, targets_ensemble, include_num_runs) self.logger.info(ensemble) except ValueError as e: self.logger.error('Caught ValueError: ' + str(e)) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue except IndexError as e: self.logger.error('Caught IndexError: ' + str(e)) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue except Exception as e: self.logger.error('Caught error! %s', str(e)) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue # Output the score self.logger.info('Training performance: %f' % ensemble.train_score_) self.logger.info('Building the ensemble took %f seconds' % watch.wall_elapsed('ensemble_iter_' + str(num_iteration))) # Set this variable here to avoid re-running the ensemble builder # every two seconds in case the ensemble did not change current_num_models = len(dir_ensemble_list) ensemble_predictions = ensemble.predict(all_predictions_train) if sys.version_info[0] == 2: ensemble_predictions.flags.writeable = False current_hash = hash(ensemble_predictions.data) else: current_hash = hash(ensemble_predictions.data.tobytes()) # Only output a new ensemble and new predictions if the output of the # ensemble would actually change! # TODO this is neither safe (collisions, tests only with the ensemble # prediction, but not the ensemble), implement a hash function for # each possible ensemble builder. if last_hash is not None: if current_hash == last_hash: self.logger.info('Ensemble output did not change.') time.sleep(2) continue else: last_hash = current_hash else: last_hash = current_hash # Save the ensemble for later use in the main auto-sklearn module! backend.save_ensemble(ensemble, index_run, self.seed) # Save predictions for valid and test data set if len(dir_valid_list) == len(dir_ensemble_list): all_predictions_valid = np.array(all_predictions_valid) ensemble_predictions_valid = ensemble.predict(all_predictions_valid) if self.task_type == BINARY_CLASSIFICATION: ensemble_predictions_valid = ensemble_predictions_valid[:, 1] if self.low_precision: if self.task_type in [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION]: ensemble_predictions_valid[ensemble_predictions_valid < 1e-4] = 0. if self.metric in [BAC_METRIC, F1_METRIC]: bin_array = np.zeros(ensemble_predictions_valid.shape, dtype=np.int32) if (self.task_type != MULTICLASS_CLASSIFICATION) or ( ensemble_predictions_valid.shape[1] == 1): bin_array[ensemble_predictions_valid >= 0.5] = 1 else: sample_num = ensemble_predictions_valid.shape[0] for i in range(sample_num): j = np.argmax(ensemble_predictions_valid[i, :]) bin_array[i, j] = 1 ensemble_predictions_valid = bin_array if self.task_type in CLASSIFICATION_TASKS: if ensemble_predictions_valid.size < (20000 * 20): precision = 3 else: precision = 2 else: if ensemble_predictions_valid.size > 1000000: precision = 4 else: # File size maximally 2.1MB precision = 6 backend.save_predictions_as_txt(ensemble_predictions_valid, 'valid', index_run, prefix=self.dataset_name, precision=precision) else: self.logger.info('Could not find as many validation set predictions (%d)' 'as ensemble predictions (%d)!.', len(dir_valid_list), len(dir_ensemble_list)) del all_predictions_valid if len(dir_test_list) == len(dir_ensemble_list): all_predictions_test = np.array(all_predictions_test) ensemble_predictions_test = ensemble.predict(all_predictions_test) if self.task_type == BINARY_CLASSIFICATION: ensemble_predictions_test = ensemble_predictions_test[:, 1] if self.low_precision: if self.task_type in [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION]: ensemble_predictions_test[ensemble_predictions_test < 1e-4] = 0. if self.metric in [BAC_METRIC, F1_METRIC]: bin_array = np.zeros(ensemble_predictions_test.shape, dtype=np.int32) if (self.task_type != MULTICLASS_CLASSIFICATION) or ( ensemble_predictions_test.shape[1] == 1): bin_array[ensemble_predictions_test >= 0.5] = 1 else: sample_num = ensemble_predictions_test.shape[0] for i in range(sample_num): j = np.argmax(ensemble_predictions_test[i, :]) bin_array[i, j] = 1 ensemble_predictions_test = bin_array if self.task_type in CLASSIFICATION_TASKS: if ensemble_predictions_test.size < (20000 * 20): precision = 3 else: precision = 2 else: if ensemble_predictions_test.size > 1000000: precision = 4 else: precision = 6 backend.save_predictions_as_txt(ensemble_predictions_test, 'test', index_run, prefix=self.dataset_name, precision=precision) else: self.logger.info('Could not find as many test set predictions (%d) as ' 'ensemble predictions (%d)!', len(dir_test_list), len(dir_ensemble_list)) del all_predictions_test current_num_models = len(dir_ensemble_list) watch.stop_task('index_run' + str(index_run)) time_iter = watch.get_wall_dur('index_run' + str(index_run)) used_time = watch.wall_elapsed('ensemble_builder') index_run += 1 return
def predict(self): Y_optimization_pred = [None] * self.cv_folds Y_targets = [None] * self.cv_folds Y_valid_pred = [None] * self.cv_folds Y_test_pred = [None] * self.cv_folds for i in range(self.cv_folds): # To support prediction when only partial_fit was called if self.models[i] is None: if self.partial: continue else: raise ValueError( 'Did not fit all models for the CV fold. ' 'Try increasing the time for the ML ' 'algorithm or decrease the number of folds' ' if this happens too often.') train_indices, test_indices = self.indices[i] opt_pred = self.predict_function(self.X_train[test_indices], self.models[i], self.task_type, self.Y_train[train_indices]) Y_optimization_pred[i] = opt_pred Y_targets[i] = self.Y_train[test_indices] if self.X_valid is not None: X_valid = self.X_valid.copy() valid_pred = self.predict_function(X_valid, self.models[i], self.task_type, self.Y_train[train_indices]) Y_valid_pred[i] = valid_pred if self.X_test is not None: X_test = self.X_test.copy() test_pred = self.predict_function(X_test, self.models[i], self.task_type, self.Y_train[train_indices]) Y_test_pred[i] = test_pred Y_optimization_pred = np.concatenate([ Y_optimization_pred[i] for i in range(self.cv_folds) if Y_optimization_pred[i] is not None ]) Y_targets = np.concatenate([ Y_targets[i] for i in range(self.cv_folds) if Y_targets[i] is not None ]) if self.X_valid is not None: Y_valid_pred = np.array([ Y_valid_pred[i] for i in range(self.cv_folds) if Y_valid_pred[i] is not None ]) # Average the predictions of several models if len(Y_valid_pred.shape) == 3: Y_valid_pred = np.nanmean(Y_valid_pred, axis=0) if self.X_test is not None: Y_test_pred = np.array([ Y_test_pred[i] for i in range(self.cv_folds) if Y_test_pred[i] is not None ]) # Average the predictions of several models if len(Y_test_pred.shape) == 3: Y_test_pred = np.nanmean(Y_test_pred, axis=0) self.Y_optimization = Y_targets score = calculate_score( Y_targets, Y_optimization_pred, self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if hasattr(score, '__len__'): err = {key: 1 - score[key] for key in score} else: err = 1 - score if self.with_predictions: return err, Y_optimization_pred, Y_valid_pred, Y_test_pred return err
def predict(self): # First, obtain the predictions for the ensembles, the validation and # the test set! outer_scores = defaultdict(list) inner_scores = defaultdict(list) Y_optimization_pred = [None] * self.outer_cv_folds Y_targets = [None] * self.outer_cv_folds Y_valid_pred = [None] * self.outer_cv_folds Y_test_pred = [None] * self.outer_cv_folds for i in range(self.outer_cv_folds): train_indices, test_indices = self.outer_indices[i] opt_pred = self.predict_function( self.X_train[test_indices], self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_optimization_pred[i] = opt_pred Y_targets[i] = self.Y_train[test_indices] if self.X_valid is not None: X_valid = self.X_valid.copy() valid_pred = self.predict_function( X_valid, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_valid_pred[i] = valid_pred if self.X_test is not None: X_test = self.X_test.copy() test_pred = self.predict_function( X_test, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_test_pred[i] = test_pred # Calculate the outer scores for i in range(self.outer_cv_folds): scores = calculate_score( Y_targets[i], Y_optimization_pred[i], self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: outer_scores[score_name].append(scores[score_name]) else: outer_scores[self.metric].append(scores) Y_optimization_pred = np.concatenate( [Y_optimization_pred[i] for i in range(self.outer_cv_folds) if Y_optimization_pred[i] is not None]) Y_targets = np.concatenate([Y_targets[i] for i in range(self.outer_cv_folds) if Y_targets[i] is not None]) if self.X_valid is not None: Y_valid_pred = np.array([Y_valid_pred[i] for i in range(self.outer_cv_folds) if Y_valid_pred[i] is not None]) # Average the predictions of several models if len(Y_valid_pred.shape) == 3: Y_valid_pred = np.nanmean(Y_valid_pred, axis=0) if self.X_test is not None: Y_test_pred = np.array([Y_test_pred[i] for i in range(self.outer_cv_folds) if Y_test_pred[i] is not None]) # Average the predictions of several models if len(Y_test_pred.shape) == 3: Y_test_pred = np.nanmean(Y_test_pred, axis=0) self.Y_optimization = Y_targets # Second, calculate the inner score for outer_fold in range(self.outer_cv_folds): for inner_fold in range(self.inner_cv_folds): inner_train_indices, inner_test_indices = self.inner_indices[ outer_fold][inner_fold] Y_test = self.Y_train[inner_test_indices] X_test = self.X_train[inner_test_indices] model = self.inner_models[outer_fold][inner_fold] Y_hat = self.predict_function( X_test, model, self.task_type, Y_train=self.Y_train[inner_train_indices]) scores = calculate_score( Y_test, Y_hat, self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: inner_scores[score_name].append(scores[score_name]) else: inner_scores[self.metric].append(scores) # Average the scores! if self.all_scoring_functions: inner_err = { key: 1 - np.mean(inner_scores[key]) for key in inner_scores } outer_err = { 'outer:%s' % METRIC_TO_STRING[key]: 1 - np.mean(outer_scores[ key]) for key in outer_scores } inner_err.update(outer_err) else: inner_err = 1 - np.mean(inner_scores[self.metric]) if self.with_predictions: return inner_err, Y_optimization_pred, Y_valid_pred, Y_test_pred return inner_err
def _predict(self): # First, obtain the predictions for the ensembles, the validation and # the test set! self.outer_scores_ = defaultdict(list) Y_optimization_pred = [None] * self.outer_cv_folds Y_targets = [None] * self.outer_cv_folds Y_valid_pred = [None] * self.outer_cv_folds Y_test_pred = [None] * self.outer_cv_folds for i in range(self.outer_cv_folds): train_indices, test_indices = self.outer_indices[i] opt_pred = self.predict_function( self.X_train[test_indices], self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_optimization_pred[i] = opt_pred Y_targets[i] = self.Y_train[test_indices] if self.X_valid is not None: X_valid = self.X_valid.copy() valid_pred = self.predict_function( X_valid, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_valid_pred[i] = valid_pred if self.X_test is not None: X_test = self.X_test.copy() test_pred = self.predict_function( X_test, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_test_pred[i] = test_pred # Calculate the outer scores for i in range(self.outer_cv_folds): scores = calculate_score( Y_targets[i], Y_optimization_pred[i], self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: self.outer_scores_[score_name].append(scores[score_name]) else: self.outer_scores_[self.metric].append(scores) Y_optimization_pred = np.concatenate( [Y_optimization_pred[i] for i in range(self.outer_cv_folds) if Y_optimization_pred[i] is not None]) Y_targets = np.concatenate([Y_targets[i] for i in range(self.outer_cv_folds) if Y_targets[i] is not None]) if self.X_valid is not None: Y_valid_pred = np.array([Y_valid_pred[i] for i in range(self.outer_cv_folds) if Y_valid_pred[i] is not None]) # Average the predictions of several models if len(Y_valid_pred.shape) == 3: Y_valid_pred = np.nanmean(Y_valid_pred, axis=0) if self.X_test is not None: Y_test_pred = np.array([Y_test_pred[i] for i in range(self.outer_cv_folds) if Y_test_pred[i] is not None]) # Average the predictions of several models if len(Y_test_pred.shape) == 3: Y_test_pred = np.nanmean(Y_test_pred, axis=0) self.Y_optimization = Y_targets return Y_optimization_pred, Y_valid_pred, Y_test_pred
def main(autosklearn_tmp_dir, basename, task_type, metric, limit, output_dir, ensemble_size=None, ensemble_nbest=None, seed=1, shared_mode=False, max_iterations=-1, precision="32"): watch = StopWatch() watch.start_task('ensemble_builder') used_time = 0 time_iter = 0 index_run = 0 num_iteration = 0 current_num_models = 0 backend = Backend(output_dir, autosklearn_tmp_dir) dir_ensemble = os.path.join(autosklearn_tmp_dir, '.auto-sklearn', 'predictions_ensemble') dir_valid = os.path.join(autosklearn_tmp_dir, '.auto-sklearn', 'predictions_valid') dir_test = os.path.join(autosklearn_tmp_dir, '.auto-sklearn', 'predictions_test') paths_ = [dir_ensemble, dir_valid, dir_test] targets_ensemble = backend.load_targets_ensemble() dir_ensemble_list_mtimes = [] while used_time < limit or (max_iterations > 0 and max_iterations >= num_iteration): num_iteration += 1 logger.debug('Time left: %f', limit - used_time) logger.debug('Time last iteration: %f', time_iter) # Load the predictions from the models exists = [os.path.isdir(dir_) for dir_ in paths_] if not exists[0]: # all(exists): logger.debug('Prediction directory %s does not exist!' % dir_ensemble) time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue if shared_mode is False: dir_ensemble_list = sorted(glob.glob(os.path.join( dir_ensemble, 'predictions_ensemble_%s_*.npy' % seed))) if exists[1]: dir_valid_list = sorted(glob.glob(os.path.join( dir_valid, 'predictions_valid_%s_*.npy' % seed))) else: dir_valid_list = [] if exists[2]: dir_test_list = sorted(glob.glob(os.path.join( dir_test, 'predictions_test_%s_*.npy' % seed))) else: dir_test_list = [] else: dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else [] dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else [] # Check the modification times because predictions can be updated # over time! old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes dir_ensemble_list_mtimes = [] for dir_ensemble_file in dir_ensemble_list: dir_ensemble_file = os.path.join(dir_ensemble, dir_ensemble_file) mtime = os.path.getmtime(dir_ensemble_file) dir_ensemble_list_mtimes.append(mtime) if len(dir_ensemble_list) == 0: logger.debug('Directories are empty') time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue if len(dir_ensemble_list) <= current_num_models and \ old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes: logger.debug('Nothing has changed since the last time') time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue watch.start_task('ensemble_iter_' + str(index_run)) # List of num_runs (which are in the filename) which will be included # later include_num_runs = [] backup_num_runs = [] model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy$') if ensemble_nbest is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] # The names of the models model_names = [] model_names_to_scores = dict() model_idx = 0 for model_name in dir_ensemble_list: if precision is "16": predictions = np.load(os.path.join(dir_ensemble, model_name)).astype(dtype=np.float16) elif precision is "32": predictions = np.load(os.path.join(dir_ensemble, model_name)).astype(dtype=np.float32) elif precision is "64": predictions = np.load(os.path.join(dir_ensemble, model_name)).astype(dtype=np.float64) else: predictions = np.load(os.path.join(dir_ensemble, model_name)) score = calculate_score(targets_ensemble, predictions, task_type, metric, predictions.shape[1]) model_names_to_scores[model_name] = score match = model_and_automl_re.search(model_name) automl_seed = int(match.group(1)) num_run = int(match.group(2)) if ensemble_nbest is not None: if score <= 0.001: # include_num_runs.append(True) logger.error('Model only predicts at random: ' + model_name + ' has score: ' + str(score)) backup_num_runs.append(num_run) # If we have less models in our ensemble than ensemble_nbest add # the current model if it is better than random elif len(scores_nbest) < ensemble_nbest: scores_nbest.append(score) indices_nbest.append(model_idx) include_num_runs.append((automl_seed, num_run)) model_names.append(model_name) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in # our ensemble replace it by the current model if scores_nbest[idx] < score: logger.debug('Worst model in our ensemble: %s with ' 'score %f will be replaced by model %s ' 'with score %f', model_names[idx], scores_nbest[idx], model_name, score) # Exclude the old model del scores_nbest[idx] scores_nbest.append(score) del include_num_runs[idx] del indices_nbest[idx] indices_nbest.append(model_idx) include_num_runs.append((automl_seed, num_run)) del model_names[idx] model_names.append(model_name) # Otherwise exclude the current model from the ensemble else: # include_num_runs.append(True) pass else: # Load all predictions that are better than random if score <= 0.001: # include_num_runs.append(True) logger.error('Model only predicts at random: ' + model_name + ' has score: ' + str(score)) backup_num_runs.append((automl_seed, num_run)) else: include_num_runs.append((automl_seed, num_run)) model_idx += 1 # If there is no model better than random guessing, we have to use # all models which do random guessing if len(include_num_runs) == 0: include_num_runs = backup_num_runs indices_to_model_names = dict() indices_to_run_num = dict() for i, model_name in enumerate(dir_ensemble_list): match = model_and_automl_re.search(model_name) automl_seed = int(match.group(1)) num_run = int(match.group(2)) if (automl_seed, num_run) in include_num_runs: num_indices = len(indices_to_model_names) indices_to_model_names[num_indices] = model_name indices_to_run_num[num_indices] = (automl_seed, num_run) # logging.info("Indices to model names:") # logging.info(indices_to_model_names) # for i, item in enumerate(sorted(model_names_to_scores.items(), # key=lambda t: t[1])): # logging.info("%d: %s", i, item) include_num_runs = set(include_num_runs) all_predictions_train = get_predictions(dir_ensemble, dir_ensemble_list, include_num_runs, model_and_automl_re, precision) # if len(all_predictions_train) == len(all_predictions_test) == len( # all_predictions_valid) == 0: if len(include_num_runs) == 0: logger.error('All models do just random guessing') time.sleep(2) continue else: try: indices, trajectory = ensemble_selection( np.array(all_predictions_train), targets_ensemble, ensemble_size, task_type, metric) logger.info('Trajectory and indices!') logger.info(trajectory) logger.info(indices) except ValueError as e: logger.error('Caught ValueError: ' + str(e)) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue except Exception as e: logger.error('Caught error! %s', e.message) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue # Output the score logger.info('Training performance: %f' % trajectory[-1]) # Print the ensemble members: ensemble_members_run_numbers = dict() ensemble_members = Counter(indices).most_common() ensemble_members_string = 'Ensemble members:\n' logger.info(ensemble_members) for ensemble_member in ensemble_members: weight = float(ensemble_member[1]) / len(indices) ensemble_members_string += \ (' %s; weight: %10f; performance: %10f\n' % (indices_to_model_names[ensemble_member[0]], weight, model_names_to_scores[ indices_to_model_names[ensemble_member[0]]])) ensemble_members_run_numbers[ indices_to_run_num[ ensemble_member[0]]] = weight logger.info(ensemble_members_string) # Save the ensemble indices for later use! backend.save_ensemble_indices_weights(ensemble_members_run_numbers, index_run, seed) all_predictions_valid = get_predictions(dir_valid, dir_valid_list, include_num_runs, model_and_automl_re, precision) # Save predictions for valid and test data set if len(dir_valid_list) == len(dir_ensemble_list): all_predictions_valid = np.array(all_predictions_valid) ensemble_predictions_valid = np.mean( all_predictions_valid[indices.astype(int)], axis=0) backend.save_predictions_as_txt(ensemble_predictions_valid, 'valid', index_run, prefix=basename) else: logger.info('Could not find as many validation set predictions (%d)' 'as ensemble predictions (%d)!.', len(dir_valid_list), len(dir_ensemble_list)) del all_predictions_valid all_predictions_test = get_predictions(dir_test, dir_test_list, include_num_runs, model_and_automl_re, precision) if len(dir_test_list) == len(dir_ensemble_list): all_predictions_test = np.array(all_predictions_test) ensemble_predictions_test = np.mean( all_predictions_test[indices.astype(int)], axis=0) backend.save_predictions_as_txt(ensemble_predictions_test, 'test', index_run, prefix=basename) else: logger.info('Could not find as many test set predictions (%d) as ' 'ensemble predictions (%d)!', len(dir_test_list), len(dir_ensemble_list)) del all_predictions_test current_num_models = len(dir_ensemble_list) watch.stop_task('ensemble_iter_' + str(index_run)) time_iter = watch.get_wall_dur('ensemble_iter_' + str(index_run)) used_time = watch.wall_elapsed('ensemble_builder') index_run += 1 return
def _predict(self): # First, obtain the predictions for the ensembles, the validation and # the test set! self.outer_scores_ = defaultdict(list) Y_optimization_pred = [None] * self.outer_cv_folds Y_targets = [None] * self.outer_cv_folds Y_valid_pred = [None] * self.outer_cv_folds Y_test_pred = [None] * self.outer_cv_folds for i in range(self.outer_cv_folds): train_indices, test_indices = self.outer_indices[i] opt_pred = self.predict_function( self.X_train[test_indices], self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_optimization_pred[i] = opt_pred Y_targets[i] = self.Y_train[test_indices] if self.X_valid is not None: X_valid = self.X_valid.copy() valid_pred = self.predict_function( X_valid, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_valid_pred[i] = valid_pred if self.X_test is not None: X_test = self.X_test.copy() test_pred = self.predict_function( X_test, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_test_pred[i] = test_pred # Calculate the outer scores for i in range(self.outer_cv_folds): scores = calculate_score( Y_targets[i], Y_optimization_pred[i], self.task_type, self.metric, self.D.info['label_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: self.outer_scores_[score_name].append(scores[score_name]) else: self.outer_scores_[self.metric].append(scores) Y_optimization_pred = np.concatenate([ Y_optimization_pred[i] for i in range(self.outer_cv_folds) if Y_optimization_pred[i] is not None ]) Y_targets = np.concatenate([ Y_targets[i] for i in range(self.outer_cv_folds) if Y_targets[i] is not None ]) if self.X_valid is not None: Y_valid_pred = np.array([ Y_valid_pred[i] for i in range(self.outer_cv_folds) if Y_valid_pred[i] is not None ]) # Average the predictions of several models if len(Y_valid_pred.shape) == 3: Y_valid_pred = np.nanmean(Y_valid_pred, axis=0) if self.X_test is not None: Y_test_pred = np.array([ Y_test_pred[i] for i in range(self.outer_cv_folds) if Y_test_pred[i] is not None ]) # Average the predictions of several models if len(Y_test_pred.shape) == 3: Y_test_pred = np.nanmean(Y_test_pred, axis=0) self.Y_optimization = Y_targets return Y_optimization_pred, Y_valid_pred, Y_test_pred
def ensemble_loop(iterations, starttime, info, ensemble_size, valid_labels, test_labels): written_first_line = False all_predictions = [] all_test_predictions = [] identifiers = [] csv_writer_list = [] # Assign the time of the "current" model time_function_evaluation = os.path.getmtime(iterations[-1]) - starttime ids = os.path.basename(iterations[-1]).split(".")[0].split('_')[-1] print(ids) for index, iters in enumerate(iterations): test_fname = iters.replace('ensemble', 'valid') if not os.path.isfile(test_fname): continue predictions = np.load(iters) all_predictions.append(predictions) identifiers.append(index) test_predictions = np.load(test_fname) all_test_predictions.append(test_predictions) # Build the ensemble start = time.time() es_cls = EnsembleSelection(ensemble_size, info["task"], info["metric"]) es_cls.fit(np.array(all_predictions), valid_labels, identifiers) order = es_cls.indices_ # Compute validation error s1 = time.time() ensemble_error = 1 - calculate_score( valid_labels, np.nanmean(np.array(all_predictions)[order], axis=0), info["task"], info["metric"], info['label_num']) # Compute test error ensemble_test_error = 1 - calculate_score( test_labels, np.nanmean(np.array(all_test_predictions)[order], axis=0), info["task"], info["metric"], info['label_num']) ensemble_time = time.time() - start # We have to add an additional row for the first iteration if len(iterations) == 1: csv_writer_list.append({'Time': 0, 'Training (Empirical) Performance': ensemble_error, 'Test Set Performance': ensemble_test_error, 'AC Overhead Time': 0, 'Validation Configuration ID': 0}) written_first_line = True csv_writer_list.append({'Time': ensemble_time + time_function_evaluation, 'Training (Empirical) Performance': ensemble_error, 'Test Set Performance': ensemble_test_error, 'AC Overhead Time': 0, 'Validation Configuration ID': ids}) return csv_writer_list
def main(autosklearn_tmp_dir, dataset_name, task_type, metric, limit, output_dir, ensemble_size=None, ensemble_nbest=None, seed=1, shared_mode=False, max_iterations=-1, precision="32"): watch = StopWatch() watch.start_task('ensemble_builder') used_time = 0 time_iter = 0 index_run = 0 num_iteration = 0 current_num_models = 0 backend = Backend(output_dir, autosklearn_tmp_dir) dir_ensemble = os.path.join(autosklearn_tmp_dir, '.auto-sklearn', 'predictions_ensemble') dir_valid = os.path.join(autosklearn_tmp_dir, '.auto-sklearn', 'predictions_valid') dir_test = os.path.join(autosklearn_tmp_dir, '.auto-sklearn', 'predictions_test') paths_ = [dir_ensemble, dir_valid, dir_test] dir_ensemble_list_mtimes = [] while used_time < limit or (max_iterations > 0 and max_iterations >= num_iteration): num_iteration += 1 logger.debug('Time left: %f', limit - used_time) logger.debug('Time last iteration: %f', time_iter) # Reload the ensemble targets every iteration, important, because cv may # update the ensemble targets in the cause of running auto-sklearn # TODO update cv in order to not need this any more! targets_ensemble = backend.load_targets_ensemble() # Load the predictions from the models exists = [os.path.isdir(dir_) for dir_ in paths_] if not exists[0]: # all(exists): logger.debug('Prediction directory %s does not exist!' % dir_ensemble) time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue if shared_mode is False: dir_ensemble_list = sorted(glob.glob(os.path.join( dir_ensemble, 'predictions_ensemble_%s_*.npy' % seed))) if exists[1]: dir_valid_list = sorted(glob.glob(os.path.join( dir_valid, 'predictions_valid_%s_*.npy' % seed))) else: dir_valid_list = [] if exists[2]: dir_test_list = sorted(glob.glob(os.path.join( dir_test, 'predictions_test_%s_*.npy' % seed))) else: dir_test_list = [] else: dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else [] dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else [] # Check the modification times because predictions can be updated # over time! old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes dir_ensemble_list_mtimes = [] for dir_ensemble_file in dir_ensemble_list: if dir_ensemble_file.endswith("/"): dir_ensemble_file = dir_ensemble_file[:-1] basename = os.path.basename(dir_ensemble_file) dir_ensemble_file = os.path.join(dir_ensemble, basename) mtime = os.path.getmtime(dir_ensemble_file) dir_ensemble_list_mtimes.append(mtime) if len(dir_ensemble_list) == 0: logger.debug('Directories are empty') time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue if len(dir_ensemble_list) <= current_num_models and \ old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes: logger.debug('Nothing has changed since the last time') time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue watch.start_task('ensemble_iter_' + str(index_run)) # List of num_runs (which are in the filename) which will be included # later include_num_runs = [] backup_num_runs = [] model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy$') if ensemble_nbest is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] # The names of the models model_names = [] model_names_to_scores = dict() model_idx = 0 for model_name in dir_ensemble_list: if model_name.endswith("/"): model_name = model_name[:-1] basename = os.path.basename(model_name) if precision is "16": predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float16) elif precision is "32": predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float32) elif precision is "64": predictions = np.load(os.path.join(dir_ensemble, basename)).astype(dtype=np.float64) else: predictions = np.load(os.path.join(dir_ensemble, basename)) try: score = calculate_score(targets_ensemble, predictions, task_type, metric, predictions.shape[1]) except: score = -1 model_names_to_scores[model_name] = score match = model_and_automl_re.search(model_name) automl_seed = int(match.group(1)) num_run = int(match.group(2)) if ensemble_nbest is not None: if score <= 0.001: logger.error('Model only predicts at random: ' + model_name + ' has score: ' + str(score)) backup_num_runs.append((automl_seed, num_run)) # If we have less models in our ensemble than ensemble_nbest add # the current model if it is better than random elif len(scores_nbest) < ensemble_nbest: scores_nbest.append(score) indices_nbest.append(model_idx) include_num_runs.append((automl_seed, num_run)) model_names.append(model_name) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in # our ensemble replace it by the current model if scores_nbest[idx] < score: logger.debug('Worst model in our ensemble: %s with ' 'score %f will be replaced by model %s ' 'with score %f', model_names[idx], scores_nbest[idx], model_name, score) # Exclude the old model del scores_nbest[idx] scores_nbest.append(score) del include_num_runs[idx] del indices_nbest[idx] indices_nbest.append(model_idx) include_num_runs.append((automl_seed, num_run)) del model_names[idx] model_names.append(model_name) # Otherwise exclude the current model from the ensemble else: # include_num_runs.append(True) pass else: # Load all predictions that are better than random if score <= 0.001: # include_num_runs.append(True) logger.error('Model only predicts at random: ' + model_name + ' has score: ' + str(score)) backup_num_runs.append((automl_seed, num_run)) else: include_num_runs.append((automl_seed, num_run)) model_idx += 1 # If there is no model better than random guessing, we have to use # all models which do random guessing if len(include_num_runs) == 0: include_num_runs = backup_num_runs indices_to_model_names = dict() indices_to_run_num = dict() for i, model_name in enumerate(dir_ensemble_list): match = model_and_automl_re.search(model_name) automl_seed = int(match.group(1)) num_run = int(match.group(2)) if (automl_seed, num_run) in include_num_runs: num_indices = len(indices_to_model_names) indices_to_model_names[num_indices] = model_name indices_to_run_num[num_indices] = (automl_seed, num_run) try: all_predictions_train, all_predictions_valid, all_predictions_test =\ get_all_predictions(dir_ensemble, dir_ensemble_list, dir_valid, dir_valid_list, dir_test, dir_test_list, include_num_runs, model_and_automl_re, precision) except IOError: logger.error('Could not load the predictions.') continue if len(include_num_runs) == 0: logger.error('All models do just random guessing') time.sleep(2) continue else: ensemble = EnsembleSelection(ensemble_size=ensemble_size, task_type=task_type, metric=metric) try: ensemble.fit(all_predictions_train, targets_ensemble, include_num_runs) logger.info(ensemble) except ValueError as e: logger.error('Caught ValueError: ' + str(e)) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue except IndexError as e: logger.error('Caught IndexError: ' + str(e)) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue except Exception as e: logger.error('Caught error! %s', e.message) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue # Output the score logger.info('Training performance: %f' % ensemble.train_score_) # Save the ensemble for later use in the main auto-sklearn module! backend.save_ensemble(ensemble, index_run, seed) # Save predictions for valid and test data set if len(dir_valid_list) == len(dir_ensemble_list): all_predictions_valid = np.array(all_predictions_valid) ensemble_predictions_valid = ensemble.predict(all_predictions_valid) backend.save_predictions_as_txt(ensemble_predictions_valid, 'valid', index_run, prefix=dataset_name) else: logger.info('Could not find as many validation set predictions (%d)' 'as ensemble predictions (%d)!.', len(dir_valid_list), len(dir_ensemble_list)) del all_predictions_valid if len(dir_test_list) == len(dir_ensemble_list): all_predictions_test = np.array(all_predictions_test) ensemble_predictions_test = ensemble.predict(all_predictions_test) backend.save_predictions_as_txt(ensemble_predictions_test, 'test', index_run, prefix=dataset_name) else: logger.info('Could not find as many test set predictions (%d) as ' 'ensemble predictions (%d)!', len(dir_test_list), len(dir_ensemble_list)) del all_predictions_test current_num_models = len(dir_ensemble_list) watch.stop_task('ensemble_iter_' + str(index_run)) time_iter = watch.get_wall_dur('ensemble_iter_' + str(index_run)) used_time = watch.wall_elapsed('ensemble_builder') index_run += 1 return
def ensemble_loop(iterations, starttime, info, ensemble_size, valid_labels, test_labels): written_first_line = False all_predictions = [] all_test_predictions = [] identifiers = [] csv_writer_list = [] # Assign the time of the "current" model time_function_evaluation = os.path.getmtime(iterations[-1]) - starttime ids = os.path.basename(iterations[-1]).split(".")[0].split('_')[-1] print(ids) for index, iters in enumerate(iterations): test_fname = iters.replace('ensemble', 'valid') if not os.path.isfile(test_fname): continue predictions = np.load(iters) all_predictions.append(predictions) identifiers.append(index) test_predictions = np.load(test_fname) all_test_predictions.append(test_predictions) # Build the ensemble start = time.time() es_cls = EnsembleSelection(ensemble_size, info["task"], info["metric"]) es_cls.fit(np.array(all_predictions), valid_labels, identifiers) order = es_cls.indices_ # Compute validation error s1 = time.time() ensemble_error = 1 - calculate_score( valid_labels, np.nanmean(np.array(all_predictions)[order], axis=0), info["task"], info["metric"], info['label_num']) # Compute test error ensemble_test_error = 1 - calculate_score( test_labels, np.nanmean(np.array(all_test_predictions)[order], axis=0), info["task"], info["metric"], info['label_num']) ensemble_time = time.time() - start # We have to add an additional row for the first iteration if len(iterations) == 1: csv_writer_list.append({ 'Time': 0, 'Training (Empirical) Performance': ensemble_error, 'Test Set Performance': ensemble_test_error, 'AC Overhead Time': 0, 'Validation Configuration ID': 0 }) written_first_line = True csv_writer_list.append({ 'Time': ensemble_time + time_function_evaluation, 'Training (Empirical) Performance': ensemble_error, 'Test Set Performance': ensemble_test_error, 'AC Overhead Time': 0, 'Validation Configuration ID': ids }) return csv_writer_list