def predict_and_loss(self, train=False): if train: Y_pred = self.predict_function(self.X_train, self.model, self.task_type, self.Y_train) score = calculate_score( solution=self.Y_train, prediction=Y_pred, task_type=self.task_type, metric=self.metric, all_scoring_functions=self.all_scoring_functions) else: Y_pred = self.predict_function(self.X_test, self.model, self.task_type, self.Y_train) score = calculate_score( solution=self.Y_test, prediction=Y_pred, task_type=self.task_type, metric=self.metric, all_scoring_functions=self.all_scoring_functions) if hasattr(score, '__len__'): err = {key: 1 - score[key] for key in score} else: err = 1 - score return err, Y_pred, None, None
def predict_and_loss(self, train=False): if train: Y_pred = self.predict_function(self.X_train, self.model, self.task_type, self.Y_train) score = calculate_score( solution=self.Y_train, prediction=Y_pred, task_type=self.task_type, metric=self.metric, all_scoring_functions=self.all_scoring_functions) else: Y_pred = self.predict_function(self.X_test, self.model, self.task_type, self.Y_train) score = calculate_score( solution=self.Y_test, prediction=Y_pred, task_type=self.task_type, metric=self.metric, all_scoring_functions=self.all_scoring_functions) if hasattr(score, '__len__'): err = {key: 1 - score[key] for key in score} else: err = 1 - score return err, Y_pred, Y_pred, Y_pred
def predict_and_loss(self, train=False): if train: Y_pred = self.predict_function(self.X_train, self.model, self.task_type, self.Y_train) score = calculate_score( solution=self.Y_train, prediction=Y_pred, task_type=self.task_type, metric=self.metric, scoring_functions=self.scoring_functions) else: Y_pred = self.predict_function(self.X_test, self.model, self.task_type, self.Y_train) score = calculate_score( solution=self.Y_test, prediction=Y_pred, task_type=self.task_type, metric=self.metric, scoring_functions=self.scoring_functions) if hasattr(score, '__len__'): if self.task_type in CLASSIFICATION_TASKS: err = {key: metric._optimum - score[key] for key, metric in CLASSIFICATION_METRICS.items() if key in score} else: err = {key: metric._optimum - score[key] for key, metric in REGRESSION_METRICS.items() if key in score} else: err = self.metric._optimum - score return err, Y_pred, None, None
def _fast(self, predictions, labels): """Fast version of Rich Caruana's ensemble selection method.""" self.num_input_models_ = len(predictions) ensemble = [] trajectory = [] order = [] ensemble_size = self.ensemble_size if self.sorted_initialization: n_best = 20 indices = self._sorted_initialization(predictions, labels, n_best) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = calculate_score(labels, ensemble_, self.task_type, self.metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size -= n_best for i in range(ensemble_size): scores = np.zeros((len(predictions))) s = len(ensemble) if s == 0: weighted_ensemble_prediction = np.zeros(predictions[0].shape) else: ensemble_prediction = np.mean(np.array(ensemble), axis=0) weighted_ensemble_prediction = (s / float(s + 1)) * \ ensemble_prediction fant_ensemble_prediction = np.zeros( weighted_ensemble_prediction.shape) for j, pred in enumerate(predictions): # TODO: this could potentially be vectorized! - let's profile # the script first! fant_ensemble_prediction[:,:] = weighted_ensemble_prediction + \ (1. / float(s + 1)) * pred scores[j] = 1 - calculate_score( solution=labels, prediction=fant_ensemble_prediction, task_type=self.task_type, metric=self.metric, all_scoring_functions=False) all_best = np.argwhere(scores == np.nanmin(scores)).flatten() best = np.random.choice(all_best) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) # Handle special case if len(predictions) == 1: break self.indices_ = order self.trajectory_ = trajectory self.train_score_ = trajectory[-1]
def test_unsupported_task_type(self): y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0]) y_pred = \ np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]) scorer = autosklearn.metrics.accuracy raised = False try: calculate_score(y_true, y_pred, 6, scorer) except NotImplementedError: raised = True self.assertTrue(raised)
def _slow(self, predictions, labels): """Rich Caruana's ensemble selection method.""" self.num_input_models_ = len(predictions) ensemble = [] trajectory = [] order = [] ensemble_size = self.ensemble_size if self.sorted_initialization: n_best = 20 indices = self._sorted_initialization(predictions, labels, n_best) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = calculate_score( solution=labels, prediction=ensemble_, task_type=self.task_type, metric=self.metric, all_scoring_functions=False) trajectory.append(ensemble_performance) ensemble_size -= n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) for j, pred in enumerate(predictions): ensemble.append(pred) ensemble_prediction = np.mean(np.array(ensemble), axis=0) scores[j] = 1 - calculate_score( solution=labels, prediction=ensemble_prediction, task_type=self.task_type, metric=self.metric, all_scoring_functions=False) ensemble.pop() best = np.nanargmin(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) # Handle special case if len(predictions) == 1: break self.indices_ = np.array(order) self.trajectory_ = np.array(trajectory) self.train_score_ = trajectory[-1]
def test_classification_scoring_functions(self): scoring_functions = list( autosklearn.metrics.CLASSIFICATION_METRICS.values()) scoring_functions.remove(autosklearn.metrics.accuracy) fail_metrics = ['precision_samples', 'recall_samples', 'f1_samples'] success_metrics = list( autosklearn.metrics.CLASSIFICATION_METRICS.keys()) for metric in fail_metrics: success_metrics.remove(metric) y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0]) y_pred = \ np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]) score_dict = calculate_score(y_true, y_pred, BINARY_CLASSIFICATION, autosklearn.metrics.accuracy, scoring_functions) self.assertIsInstance(score_dict, dict) self.assertTrue(len(success_metrics), len(score_dict)) for metric in fail_metrics: self.assertNotIn(metric, score_dict.keys()) for metric in success_metrics: self.assertIn(metric, score_dict.keys()) self.assertAlmostEqual( autosklearn.metrics.CLASSIFICATION_METRICS[metric]._optimum, score_dict[metric])
def _loss(self, y_true, y_hat, all_scoring_functions=None): all_scoring_functions = (self.all_scoring_functions if all_scoring_functions is None else all_scoring_functions) if not isinstance(self.configuration, Configuration): if all_scoring_functions: return {self.metric: 1.0} else: return 1.0 score = calculate_score(y_true, y_hat, self.task_type, self.metric, all_scoring_functions=all_scoring_functions) if hasattr(score, '__len__'): # TODO: instead of using self.metric, it should use all metrics given by key. # But now this throws error... err = { key: metric._optimum - score[key] for key, metric in CLASSIFICATION_METRICS.items() if key in score } else: err = self.metric._optimum - score return err
def test_regression_only_metric(self): y_true = np.array([1, 2, 3, 4]) y_pred = y_true.copy() scorer = autosklearn.metrics.root_mean_squared_error score = calculate_score(y_true, y_pred, REGRESSION, scorer) previous_score = scorer._optimum self.assertAlmostEqual(score, previous_score)
def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred): """ Records a snapshot of how the performance look at a given training time. Parameters ---------- ensemble: EnsembleSelection The ensemble selection object to record valid_pred: np.ndarray The predictions on the validation set using ensemble test_pred: np.ndarray The predictions on the test set using ensemble """ performance_stamp = { 'Timestamp': pd.Timestamp.now(), 'ensemble_optimization_score': calculate_score(solution=self.y_true_ensemble, prediction=train_pred, task_type=self.task_type, metric=self.metric, all_scoring_functions=False) } if valid_pred is not None: # TODO: valid_pred are a legacy from competition manager # and this if never happens. Re-evaluate Y_valid support performance_stamp['ensemble_val_score'] = calculate_score( solution=self.y_valid, prediction=valid_pred, task_type=self.task_type, metric=self.metric, all_scoring_functions=False) # In case test_pred was provided if test_pred is not None: performance_stamp['ensemble_test_score'] = calculate_score( solution=self.y_test, prediction=test_pred, task_type=self.task_type, metric=self.metric, all_scoring_functions=False) self.queue.put(performance_stamp)
def score(self, X, y): # fix: Consider only index 1 of second dimension # Don't know if the reshaping should be done there or in calculate_score prediction = self.predict(X) return calculate_score(solution=y, prediction=prediction, task_type=self._task, metric=self._metric, all_scoring_functions=False)
def _sorted_initialization(self, predictions, labels, n_best): perf = np.zeros([predictions.shape[0]]) for idx, prediction in enumerate(predictions): perf[idx] = calculate_score(labels, prediction, self.task_type, self.metric, predictions.shape[1]) indices = np.argsort(perf)[perf.shape[0] - n_best:] return indices
def test_classification_only_metric(self): y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0]) y_pred = \ np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]) scorer = autosklearn.metrics.accuracy score = calculate_score(y_true, y_pred, BINARY_CLASSIFICATION, scorer) previous_score = scorer._optimum self.assertAlmostEqual(score, previous_score)
def _slow(self, predictions: List[np.ndarray], labels: np.ndarray) -> None: """Rich Caruana's ensemble selection method.""" self.num_input_models_ = len(predictions) ensemble = [] trajectory = [] order = [] ensemble_size = self.ensemble_size for i in range(ensemble_size): scores = np.zeros( [np.shape(predictions)[0]], dtype=np.float64, ) for j, pred in enumerate(predictions): ensemble.append(pred) ensemble_prediction = np.mean(np.array(ensemble), axis=0) # Calculate score is versatile and can return a dict of score # when scoring_functions=None, we know it will be a float calculated_score = cast( float, calculate_score(solution=labels, prediction=ensemble_prediction, task_type=self.task_type, metric=self.metric, scoring_functions=None)) scores[j] = self.metric._optimum - calculated_score ensemble.pop() best = np.nanargmin(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) # Handle special case if len(predictions) == 1: break self.indices_ = np.array( order, dtype=np.int64, ) self.trajectory_ = np.array( trajectory, dtype=np.float64, ) self.train_score_ = trajectory[-1]
def _loss(self, y_true, y_hat, all_scoring_functions=None): """Auto-sklearn follows a minimization goal, so the make_scorer sign is used as a guide to obtain the value to reduce. On this regard, to optimize a metric: 1- score is calculared with calculate_score, with the caveat, that if for the metric greater is not better, a negative score is returned. 2- the err (the optimization goal) is then: optimum - (metric.sign * actual_score) For accuracy for example: optimum(1) - (+1 * actual score) For logloss for example: optimum(0) - (-1 * actual score) """ all_scoring_functions = (self.all_scoring_functions if all_scoring_functions is None else all_scoring_functions) if not isinstance(self.configuration, Configuration): if all_scoring_functions: return {self.metric: 1.0} else: return 1.0 score = calculate_score(y_true, y_hat, self.task_type, self.metric, all_scoring_functions=all_scoring_functions) if hasattr(score, '__len__'): # TODO: instead of using self.metric, it should use all metrics given by key. # But now this throws error... if self.task_type in CLASSIFICATION_TASKS: err = { key: metric._optimum - score[key] for key, metric in CLASSIFICATION_METRICS.items() if key in score } else: err = { key: metric._optimum - score[key] for key, metric in REGRESSION_METRICS.items() if key in score } else: err = self.metric._optimum - score return err
def _loss(self, y_true, y_hat): if not isinstance(self.configuration, Configuration): if self.all_scoring_functions: return {self.metric: 1.0} else: return 1.0 score = calculate_score( y_true, y_hat, self.task_type, self.metric, all_scoring_functions=self.all_scoring_functions) if hasattr(score, '__len__'): err = {key: 1 - score[key] for key in score} else: err = 1 - score return err
def _loss(self, y_true, y_hat, all_scoring_functions=None): all_scoring_functions = ( self.all_scoring_functions if all_scoring_functions is None else all_scoring_functions ) if not isinstance(self.configuration, Configuration): if all_scoring_functions: return {self.metric: 1.0} else: return 1.0 score = calculate_score( y_true, y_hat, self.task_type, self.metric, all_scoring_functions=all_scoring_functions) if hasattr(score, '__len__'): err = {key: self.metric._optimum - score[key] for key in score} else: err = self.metric._optimum - score return err
def test_regression_scoring_functions(self): scoring_functions = list( autosklearn.metrics.REGRESSION_METRICS.values()) scoring_functions.remove(autosklearn.metrics.root_mean_squared_error) metrics = list(autosklearn.metrics.REGRESSION_METRICS.keys()) metrics.remove('mean_squared_log_error') y_true = np.array([1, 2, 3, -4]) y_pred = y_true.copy() score_dict = calculate_score( y_true, y_pred, REGRESSION, autosklearn.metrics.root_mean_squared_error, scoring_functions) self.assertIsInstance(score_dict, dict) self.assertTrue(len(metrics), len(score_dict)) for metric in metrics: self.assertIn(metric, score_dict.keys()) self.assertAlmostEqual( autosklearn.metrics.REGRESSION_METRICS[metric]._optimum, score_dict[metric])
def read_ensemble_preds(self): """ reading predictions on ensemble building data set; populates self.read_preds """ self.logger.debug("Read ensemble data set predictions") if self.y_true_ensemble is None: try: self.y_true_ensemble = self.backend.load_targets_ensemble() except FileNotFoundError: self.logger.debug( "Could not find true targets on ensemble data set: %s", traceback.format_exc(), ) return False # no validation predictions so far -- no dir if not os.path.isdir(self.dir_ensemble): self.logger.debug("No ensemble dataset prediction directory found") return False if self.shared_mode is False: pred_path = os.path.join( self.dir_ensemble, 'predictions_ensemble_%s_*.npy' % self.seed) # pSMAC else: pred_path = os.path.join(self.dir_ensemble, 'predictions_ensemble_*_*.npy') y_ens_files = glob.glob(pred_path) # no validation predictions so far -- no files if len(y_ens_files) == 0: self.logger.debug("Found no prediction files on ensemble data set:" " %s" % pred_path) return False n_read_files = 0 for y_ens_fn in y_ens_files: if self.read_at_most and n_read_files >= self.read_at_most: # limit the number of files that will be read # to limit memory consumption break if not y_ens_fn.endswith(".npy"): self.logger.info('Error loading file (not .npy): %s', y_ens_fn) continue match = self.model_fn_re.search(y_ens_fn) _seed = int(match.group(1)) _num_run = int(match.group(2)) if not self.read_preds.get(y_ens_fn): self.read_preds[y_ens_fn] = { "ens_score": -1, "mtime_ens": 0, "mtime_valid": 0, "mtime_test": 0, "seed": _seed, "num_run": _num_run, Y_ENSEMBLE: None, Y_VALID: None, Y_TEST: None, # Lazy keys so far: # 0 - not loaded # 1 - loaded and ind memory # 2 - loaded but dropped again "loaded": 0 } if self.read_preds[y_ens_fn]["mtime_ens"] == os.path.getmtime( y_ens_fn): # same time stamp; nothing changed; continue # actually read the predictions and score them try: with open(y_ens_fn, 'rb') as fp: y_ensemble = self._read_np_fn(fp=fp) score = calculate_score( solution=self. y_true_ensemble, # y_ensemble = y_true for ensemble set prediction=y_ensemble, task_type=self.task_type, metric=self.metric, all_scoring_functions=False) if self.read_preds[y_ens_fn]["ens_score"] > -1: self.logger.critical( 'Changing ensemble score for file %s from %f to %f ' 'because file modification time changed? %f - %f', y_ens_fn, self.read_preds[y_ens_fn]["ens_score"], score, self.read_preds[y_ens_fn]["mtime_ens"], os.path.getmtime(y_ens_fn), ) self.read_preds[y_ens_fn]["ens_score"] = score self.read_preds[y_ens_fn][Y_ENSEMBLE] = y_ensemble self.read_preds[y_ens_fn]["mtime_ens"] = os.path.getmtime( y_ens_fn) self.read_preds[y_ens_fn]["loaded"] = 1 n_read_files += 1 except: self.logger.warning( 'Error loading %s: %s', y_ens_fn, traceback.format_exc(), ) self.read_preds[y_ens_fn]["ens_score"] = -1 self.logger.debug( 'Done reading %d new prediction files. Loaded %d predictions in ' 'total.', n_read_files, np.sum([pred["loaded"] > 0 for pred in self.read_preds.values()])) return True
def read_ensemble_preds(self): """ reading predictions on ensemble building data set; populates self.read_preds """ self.logger.debug("Read ensemble data set predictions") if self.y_true_ensemble is None: try: self.y_true_ensemble = self.backend.load_targets_ensemble() except FileNotFoundError: self.logger.debug( "Could not find true targets on ensemble data set: %s", traceback.format_exc(), ) return False # no validation predictions so far -- no dir if not os.path.isdir(self.dir_ensemble): self.logger.debug("No ensemble dataset prediction directory found") return False if self.shared_mode is False: pred_path = os.path.join( self.dir_ensemble, 'predictions_ensemble_%s_*.npy' % self.seed) # pSMAC else: pred_path = os.path.join( self.dir_ensemble, 'predictions_ensemble_*_*.npy') y_ens_files = glob.glob(pred_path) # no validation predictions so far -- no files if len(y_ens_files) == 0: self.logger.debug("Found no prediction files on ensemble data set:" " %s" % pred_path) return False n_read_files = 0 for y_ens_fn in y_ens_files: if self.read_at_most and n_read_files >= self.read_at_most: # limit the number of files that will be read # to limit memory consumption break if not y_ens_fn.endswith(".npy"): self.logger.info('Error loading file (not .npy): %s', y_ens_fn) continue match = self.model_fn_re.search(y_ens_fn) _seed = int(match.group(1)) _num_run = int(match.group(2)) if not self.read_preds.get(y_ens_fn): self.read_preds[y_ens_fn] = { "ens_score": -1, "mtime_ens": 0, "mtime_valid": 0, "mtime_test": 0, "seed": _seed, "num_run": _num_run, Y_ENSEMBLE: None, Y_VALID: None, Y_TEST: None, # Lazy keys so far: # 0 - not loaded # 1 - loaded and ind memory # 2 - loaded but dropped again "loaded": 0 } if self.read_preds[y_ens_fn]["mtime_ens"] == os.path.getmtime(y_ens_fn): # same time stamp; nothing changed; continue # actually read the predictions and score them try: with open(y_ens_fn, 'rb') as fp: y_ensemble = self._read_np_fn(fp=fp) score = calculate_score(solution=self.y_true_ensemble, # y_ensemble = y_true for ensemble set prediction=y_ensemble, task_type=self.task_type, metric=self.metric, all_scoring_functions=False) if self.read_preds[y_ens_fn]["ens_score"] > -1: self.logger.critical( 'Changing ensemble score for file %s from %f to %f ' 'because file modification time changed? %f - %f', y_ens_fn, self.read_preds[y_ens_fn]["ens_score"], score, self.read_preds[y_ens_fn]["mtime_ens"], os.path.getmtime(y_ens_fn), ) self.read_preds[y_ens_fn]["ens_score"] = score self.read_preds[y_ens_fn][Y_ENSEMBLE] = y_ensemble self.read_preds[y_ens_fn]["mtime_ens"] = os.path.getmtime( y_ens_fn ) self.read_preds[y_ens_fn]["loaded"] = 1 n_read_files += 1 except: self.logger.warning( 'Error loading %s: %s', y_ens_fn, traceback.format_exc(), ) self.read_preds[y_ens_fn]["ens_score"] = -1 self.logger.debug( 'Done reading %d new prediction files. Loaded %d predictions in ' 'total.', n_read_files, np.sum([pred["loaded"] > 0 for pred in self.read_preds.values()]) ) return True
def _fast( self, predictions: List[np.ndarray], labels: np.ndarray, ) -> None: """Fast version of Rich Caruana's ensemble selection method.""" self.num_input_models_ = len(predictions) ensemble = [] # type: List[np.ndarray] trajectory = [] order = [] ensemble_size = self.ensemble_size weighted_ensemble_prediction = np.zeros( predictions[0].shape, dtype=np.float64, ) fant_ensemble_prediction = np.zeros( weighted_ensemble_prediction.shape, dtype=np.float64, ) for i in range(ensemble_size): scores = np.zeros( (len(predictions)), dtype=np.float64, ) s = len(ensemble) if s == 0: weighted_ensemble_prediction.fill(0.0) else: weighted_ensemble_prediction.fill(0.0) for pred in ensemble: np.add( weighted_ensemble_prediction, pred, out=weighted_ensemble_prediction, ) np.multiply( weighted_ensemble_prediction, 1 / s, out=weighted_ensemble_prediction, ) np.multiply( weighted_ensemble_prediction, (s / float(s + 1)), out=weighted_ensemble_prediction, ) # Memory-efficient averaging! for j, pred in enumerate(predictions): # TODO: this could potentially be vectorized! - let's profile # the script first! fant_ensemble_prediction.fill(0.0) np.add(fant_ensemble_prediction, weighted_ensemble_prediction, out=fant_ensemble_prediction) np.add(fant_ensemble_prediction, (1. / float(s + 1)) * pred, out=fant_ensemble_prediction) # Calculate score is versatile and can return a dict of score # when scoring_functions=None, we know it will be a float calculated_score = cast( float, calculate_score(solution=labels, prediction=fant_ensemble_prediction, task_type=self.task_type, metric=self.metric, scoring_functions=None)) scores[j] = self.metric._optimum - calculated_score all_best = np.argwhere(scores == np.nanmin(scores)).flatten() best = self.random_state.choice(all_best) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) # Handle special case if len(predictions) == 1: break self.indices_ = order self.trajectory_ = trajectory self.train_score_ = trajectory[-1]
def main(self): watch = StopWatch() watch.start_task('ensemble_builder') used_time = 0 time_iter = 0 index_run = 0 num_iteration = 0 current_num_models = 0 last_hash = None current_hash = None dir_ensemble = os.path.join(self.backend.temporary_directory, '.auto-sklearn', 'predictions_ensemble') dir_valid = os.path.join(self.backend.temporary_directory, '.auto-sklearn', 'predictions_valid') dir_test = os.path.join(self.backend.temporary_directory, '.auto-sklearn', 'predictions_test') paths_ = [dir_ensemble, dir_valid, dir_test] dir_ensemble_list_mtimes = [] self.logger.debug( 'Starting main loop with %f seconds and %d iterations ' 'left.' % (self.limit - used_time, num_iteration)) while used_time < self.limit or (self.max_iterations > 0 and self.max_iterations >= num_iteration): num_iteration += 1 self.logger.debug('Time left: %f', self.limit - used_time) self.logger.debug('Time last ensemble building: %f', time_iter) # Reload the ensemble targets every iteration, important, because cv may # update the ensemble targets in the cause of running auto-sklearn # TODO update cv in order to not need this any more! targets_ensemble = self.backend.load_targets_ensemble() # Load the predictions from the models exists = [os.path.isdir(dir_) for dir_ in paths_] if not exists[0]: # all(exists): self.logger.debug('Prediction directory %s does not exist!' % dir_ensemble) time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue if self.shared_mode is False: dir_ensemble_list = sorted( glob.glob( os.path.join( dir_ensemble, 'predictions_ensemble_%s_*.npy' % self.seed))) if exists[1]: dir_valid_list = sorted( glob.glob( os.path.join( dir_valid, 'predictions_valid_%s_*.npy' % self.seed))) else: dir_valid_list = [] if exists[2]: dir_test_list = sorted( glob.glob( os.path.join( dir_test, 'predictions_test_%s_*.npy' % self.seed))) else: dir_test_list = [] else: dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted( os.listdir(dir_valid)) if exists[1] else [] dir_test_list = sorted( os.listdir(dir_test)) if exists[2] else [] # Check the modification times because predictions can be updated # over time! old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes dir_ensemble_list_mtimes = [] # The ensemble dir can contain non-model files. We filter them and # use the following list instead dir_ensemble_model_files = [] for dir_ensemble_file in dir_ensemble_list: if dir_ensemble_file.endswith("/"): dir_ensemble_file = dir_ensemble_file[:-1] if not dir_ensemble_file.endswith(".npy"): self.logger.info('Error loading file (not .npy): %s', dir_ensemble_file) continue dir_ensemble_model_files.append(dir_ensemble_file) basename = os.path.basename(dir_ensemble_file) dir_ensemble_file = os.path.join(dir_ensemble, basename) mtime = os.path.getmtime(dir_ensemble_file) dir_ensemble_list_mtimes.append(mtime) if len(dir_ensemble_model_files) == 0: self.logger.debug('Directories are empty') time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue if len(dir_ensemble_model_files) <= current_num_models and \ old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes: self.logger.debug('Nothing has changed since the last time') time.sleep(2) used_time = watch.wall_elapsed('ensemble_builder') continue with warnings.catch_warnings(): warnings.simplefilter('ignore') # TODO restructure time management in the ensemble builder, # what is the time of index_run actually needed for? watch.start_task('index_run' + str(index_run)) watch.start_task('ensemble_iter_' + str(num_iteration)) # List of num_runs (which are in the filename) which will be included # later include_num_runs = [] backup_num_runs = [] model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy') if self.ensemble_nbest is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] # The names of the models model_names = [] model_names_to_scores = dict() model_idx = 0 for model_name in dir_ensemble_model_files: if model_name.endswith("/"): model_name = model_name[:-1] basename = os.path.basename(model_name) try: with open(os.path.join(dir_ensemble, basename), 'rb') as fh: if self.precision is "16": predictions = np.load(fh).astype(dtype=np.float16) elif self.precision is "32": predictions = np.load(fh).astype(dtype=np.float32) elif self.precision is "64": predictions = np.load(fh).astype(dtype=np.float64) else: predictions = np.load(fh) score = calculate_score(solution=targets_ensemble, prediction=predictions, task_type=self.task_type, metric=self.metric, all_scoring_functions=False) except Exception as e: self.logger.warning('Error loading %s: %s - %s', basename, type(e), e) score = -1 model_names_to_scores[model_name] = score match = model_and_automl_re.search(model_name) automl_seed = int(match.group(1)) num_run = int(match.group(2)) if self.ensemble_nbest is not None: if score <= 0.001: self.logger.info('Model only predicts at random: ' + model_name + ' has score: ' + str(score)) backup_num_runs.append((automl_seed, num_run)) # If we have less models in our ensemble than ensemble_nbest add # the current model if it is better than random elif len(scores_nbest) < self.ensemble_nbest: scores_nbest.append(score) indices_nbest.append(model_idx) include_num_runs.append((automl_seed, num_run)) model_names.append(model_name) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in # our ensemble replace it by the current model if scores_nbest[idx] < score: self.logger.info( 'Worst model in our ensemble: %s with score %f ' 'will be replaced by model %s with score %f', model_names[idx], scores_nbest[idx], model_name, score) # Exclude the old model del scores_nbest[idx] scores_nbest.append(score) del include_num_runs[idx] del indices_nbest[idx] indices_nbest.append(model_idx) include_num_runs.append((automl_seed, num_run)) del model_names[idx] model_names.append(model_name) # Otherwise exclude the current model from the ensemble else: # include_num_runs.append(True) pass else: # Load all predictions that are better than random if score <= 0.001: # include_num_runs.append(True) self.logger.info('Model only predicts at random: ' + model_name + ' has score: ' + str(score)) backup_num_runs.append((automl_seed, num_run)) else: include_num_runs.append((automl_seed, num_run)) model_idx += 1 # If there is no model better than random guessing, we have to use # all models which do random guessing if len(include_num_runs) == 0: include_num_runs = backup_num_runs indices_to_model_names = dict() indices_to_run_num = dict() for i, model_name in enumerate(dir_ensemble_model_files): match = model_and_automl_re.search(model_name) automl_seed = int(match.group(1)) num_run = int(match.group(2)) if (automl_seed, num_run) in include_num_runs: num_indices = len(indices_to_model_names) indices_to_model_names[num_indices] = model_name indices_to_run_num[num_indices] = (automl_seed, num_run) try: all_predictions_train, all_predictions_valid, all_predictions_test =\ self.get_all_predictions(dir_ensemble, dir_ensemble_model_files, dir_valid, dir_valid_list, dir_test, dir_test_list, include_num_runs, model_and_automl_re, self.precision) except IOError as e: print(e) self.logger.error('Could not load the predictions.') continue if len(include_num_runs) == 0: self.logger.error('All models do just random guessing') time.sleep(2) continue else: ensemble = EnsembleSelection(ensemble_size=self.ensemble_size, task_type=self.task_type, metric=self.metric) try: ensemble.fit(all_predictions_train, targets_ensemble, include_num_runs) self.logger.info(ensemble) except ValueError as e: self.logger.error('Caught ValueError: ' + str(e)) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue except IndexError as e: self.logger.error('Caught IndexError: ' + str(e)) used_time = watch.wall_elapsed('ensemble_builder') time.sleep(2) continue #except Exception as e: # self.logger.error('Caught error! %s', str(e)) # used_time = watch.wall_elapsed('ensemble_builder') # time.sleep(2) # continue # Output the score self.logger.info('Training performance: %f' % ensemble.train_score_) self.logger.info( 'Building the ensemble took %f seconds' % watch.wall_elapsed('ensemble_iter_' + str(num_iteration))) # Set this variable here to avoid re-running the ensemble builder # every two seconds in case the ensemble did not change current_num_models = len(dir_ensemble_model_files) ensemble_predictions = ensemble.predict(all_predictions_train) if sys.version_info[0] == 2: ensemble_predictions.flags.writeable = False current_hash = hash(ensemble_predictions.data) else: current_hash = hash(ensemble_predictions.data.tobytes()) # Only output a new ensemble and new predictions if the output of the # ensemble would actually change! # TODO this is neither safe (collisions, tests only with the ensemble # prediction, but not the ensemble), implement a hash function for # each possible ensemble builder. if last_hash is not None: if current_hash == last_hash: self.logger.info('Ensemble output did not change.') time.sleep(2) continue else: last_hash = current_hash else: last_hash = current_hash # Save the ensemble for later use in the main auto-sklearn module! self.backend.save_ensemble(ensemble, index_run, self.seed) # Save predictions for valid and test data set if len(dir_valid_list) == len(dir_ensemble_model_files): all_predictions_valid = np.array(all_predictions_valid) ensemble_predictions_valid = ensemble.predict( all_predictions_valid) if self.task_type == BINARY_CLASSIFICATION: ensemble_predictions_valid = ensemble_predictions_valid[:, 1] self.backend.save_predictions_as_txt( ensemble_predictions_valid, 'valid', index_run, prefix=self.dataset_name) else: self.logger.info( 'Could not find as many validation set predictions (%d)' 'as ensemble predictions (%d)!.', len(dir_valid_list), len(dir_ensemble_model_files)) del all_predictions_valid if len(dir_test_list) == len(dir_ensemble_model_files): all_predictions_test = np.array(all_predictions_test) ensemble_predictions_test = ensemble.predict( all_predictions_test) if self.task_type == BINARY_CLASSIFICATION: ensemble_predictions_test = ensemble_predictions_test[:, 1] self.backend.save_predictions_as_txt(ensemble_predictions_test, 'test', index_run, prefix=self.dataset_name) else: self.logger.info( 'Could not find as many test set predictions (%d) as ' 'ensemble predictions (%d)!', len(dir_test_list), len(dir_ensemble_model_files)) del all_predictions_test current_num_models = len(dir_ensemble_model_files) watch.stop_task('index_run' + str(index_run)) time_iter = watch.get_wall_dur('index_run' + str(index_run)) used_time = watch.wall_elapsed('ensemble_builder') index_run += 1 return
def score_ensemble_preds(self): """ score predictions on ensemble building data set; populates self.read_preds """ self.logger.debug("Read ensemble data set predictions") if self.y_true_ensemble is None: try: self.y_true_ensemble = self.backend.load_targets_ensemble() except FileNotFoundError: self.logger.debug( "Could not find true targets on ensemble data set: %s", traceback.format_exc(), ) return False # no validation predictions so far -- no dir if not os.path.isdir(self.dir_ensemble): self.logger.debug("No ensemble dataset prediction directory found") return False if self.shared_mode is False: pred_path = os.path.join( glob.escape(self.dir_ensemble), 'predictions_ensemble_%s_*_*.npy*' % self.seed, ) # pSMAC else: pred_path = os.path.join( glob.escape(self.dir_ensemble), 'predictions_ensemble_*_*_*.npy*', ) y_ens_files = glob.glob(pred_path) y_ens_files = [ y_ens_file for y_ens_file in y_ens_files if y_ens_file.endswith('.npy') or y_ens_file.endswith('.npy.gz') ] self.y_ens_files = y_ens_files # no validation predictions so far -- no files if len(self.y_ens_files) == 0: self.logger.debug("Found no prediction files on ensemble data set:" " %s" % pred_path) return False # First sort files chronologically to_read = [] for y_ens_fn in self.y_ens_files: match = self.model_fn_re.search(y_ens_fn) _seed = int(match.group(1)) _num_run = int(match.group(2)) _budget = float(match.group(3)) to_read.append([y_ens_fn, match, _seed, _num_run, _budget]) n_read_files = 0 # Now read file wrt to num_run for y_ens_fn, match, _seed, _num_run, _budget in \ sorted(to_read, key=lambda x: x[3]): if self.read_at_most and n_read_files >= self.read_at_most: # limit the number of files that will be read # to limit memory consumption break if not y_ens_fn.endswith(".npy") and not y_ens_fn.endswith( ".npy.gz"): self.logger.info( 'Error loading file (not .npy or .npy.gz): %s', y_ens_fn) continue if not self.read_preds.get(y_ens_fn): self.read_preds[y_ens_fn] = { "ens_score": -1, "mtime_ens": 0, "mtime_valid": 0, "mtime_test": 0, "seed": _seed, "num_run": _num_run, "budget": _budget, "disc_space_cost_mb": None, Y_ENSEMBLE: None, Y_VALID: None, Y_TEST: None, # Lazy keys so far: # 0 - not loaded # 1 - loaded and in memory # 2 - loaded but dropped again "loaded": 0 } if self.read_preds[y_ens_fn]["mtime_ens"] == os.path.getmtime( y_ens_fn): # same time stamp; nothing changed; continue # actually read the predictions and score them try: y_ensemble = self._read_np_fn(y_ens_fn) score = calculate_score(solution=self.y_true_ensemble, prediction=y_ensemble, task_type=self.task_type, metric=self.metric, all_scoring_functions=False) if self.read_preds[y_ens_fn]["ens_score"] > -1: self.logger.debug( 'Changing ensemble score for file %s from %f to %f ' 'because file modification time changed? %f - %f', y_ens_fn, self.read_preds[y_ens_fn]["ens_score"], score, self.read_preds[y_ens_fn]["mtime_ens"], os.path.getmtime(y_ens_fn), ) self.read_preds[y_ens_fn]["ens_score"] = score # It is not needed to create the object here # To save memory, we just score the object. # self.read_preds[y_ens_fn][Y_ENSEMBLE] = y_ensemble self.read_preds[y_ens_fn]["mtime_ens"] = os.path.getmtime( y_ens_fn) self.read_preds[y_ens_fn]["loaded"] = 2 self.read_preds[y_ens_fn][ "disc_space_cost_mb"] = self.get_disk_consumption(y_ens_fn) n_read_files += 1 except Exception: self.logger.warning( 'Error loading %s: %s', y_ens_fn, traceback.format_exc(), ) self.read_preds[y_ens_fn]["ens_score"] = -1 self.logger.debug( 'Done reading %d new prediction files. Loaded %d predictions in ' 'total.', n_read_files, np.sum([pred["loaded"] > 0 for pred in self.read_preds.values()])) return True
def test_calculate_loss(): # In a 0-1 ranged scorer, make sure that the loss # has a expected positive value y_pred = np.array([0, 1, 0, 1, 1, 1, 0, 0, 0, 0]) y_true = np.array([0, 1, 0, 1, 1, 0, 0, 0, 0, 0]) score = sklearn.metrics.accuracy_score(y_true, y_pred) assert pytest.approx(score) == calculate_score( solution=y_true, prediction=y_pred, task_type=BINARY_CLASSIFICATION, metric=autosklearn.metrics.accuracy, ) loss = 1.0 - score assert pytest.approx(loss) == calculate_loss( solution=y_true, prediction=y_pred, task_type=BINARY_CLASSIFICATION, metric=autosklearn.metrics.accuracy, ) # Test the dictionary case score_dict = calculate_score(solution=y_true, prediction=y_pred, task_type=BINARY_CLASSIFICATION, metric=autosklearn.metrics.accuracy, scoring_functions=[ autosklearn.metrics.accuracy, autosklearn.metrics.balanced_accuracy ]) expected_score_dict = { 'accuracy': 0.9, 'balanced_accuracy': 0.9285714285714286, } loss_dict = calculate_loss(solution=y_true, prediction=y_pred, task_type=BINARY_CLASSIFICATION, metric=autosklearn.metrics.accuracy, scoring_functions=[ autosklearn.metrics.accuracy, autosklearn.metrics.balanced_accuracy ]) for expected_metric, expected_score in expected_score_dict.items(): assert pytest.approx(expected_score) == score_dict[expected_metric] assert pytest.approx(1 - expected_score) == loss_dict[expected_metric] # Lastly make sure that metrics whose optimum is zero # are also properly working y_true = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) y_pred = np.array([0.11, 0.22, 0.33, 0.44, 0.55, 0.66]) score = sklearn.metrics.mean_squared_error(y_true, y_pred) assert pytest.approx(score) == calculate_score( solution=y_true, prediction=y_pred, task_type=REGRESSION, metric=autosklearn.metrics.mean_squared_error, ) loss = score assert pytest.approx(loss) == calculate_loss( solution=y_true, prediction=y_pred, task_type=REGRESSION, metric=autosklearn.metrics.mean_squared_error, )