Esempio n. 1
0
    def predict_and_loss(self, train=False):

        if train:
            Y_pred = self.predict_function(self.X_train, self.model,
                                           self.task_type, self.Y_train)
            score = calculate_score(
                solution=self.Y_train,
                prediction=Y_pred,
                task_type=self.task_type,
                metric=self.metric,
                all_scoring_functions=self.all_scoring_functions)
        else:
            Y_pred = self.predict_function(self.X_test, self.model,
                                           self.task_type, self.Y_train)
            score = calculate_score(
                solution=self.Y_test,
                prediction=Y_pred,
                task_type=self.task_type,
                metric=self.metric,
                all_scoring_functions=self.all_scoring_functions)

        if hasattr(score, '__len__'):
            err = {key: 1 - score[key] for key in score}
        else:
            err = 1 - score

        return err, Y_pred, None, None
Esempio n. 2
0
    def predict_and_loss(self, train=False):

        if train:
            Y_pred = self.predict_function(self.X_train, self.model,
                                           self.task_type, self.Y_train)
            score = calculate_score(
                solution=self.Y_train,
                prediction=Y_pred,
                task_type=self.task_type,
                metric=self.metric,
                all_scoring_functions=self.all_scoring_functions)
        else:
            Y_pred = self.predict_function(self.X_test, self.model,
                                           self.task_type, self.Y_train)
            score = calculate_score(
                solution=self.Y_test,
                prediction=Y_pred,
                task_type=self.task_type,
                metric=self.metric,
                all_scoring_functions=self.all_scoring_functions)

        if hasattr(score, '__len__'):
            err = {key: 1 - score[key] for key in score}
        else:
            err = 1 - score

        return err, Y_pred, Y_pred, Y_pred
Esempio n. 3
0
    def predict_and_loss(self, train=False):

        if train:
            Y_pred = self.predict_function(self.X_train, self.model,
                                           self.task_type, self.Y_train)
            score = calculate_score(
                solution=self.Y_train,
                prediction=Y_pred,
                task_type=self.task_type,
                metric=self.metric,
                scoring_functions=self.scoring_functions)
        else:
            Y_pred = self.predict_function(self.X_test, self.model,
                                           self.task_type, self.Y_train)
            score = calculate_score(
                solution=self.Y_test,
                prediction=Y_pred,
                task_type=self.task_type,
                metric=self.metric,
                scoring_functions=self.scoring_functions)

        if hasattr(score, '__len__'):
            if self.task_type in CLASSIFICATION_TASKS:
                err = {key: metric._optimum - score[key] for key, metric in
                       CLASSIFICATION_METRICS.items() if key in score}
            else:
                err = {key: metric._optimum - score[key] for key, metric in
                       REGRESSION_METRICS.items() if key in score}
        else:
            err = self.metric._optimum - score

        return err, Y_pred, None, None
    def _fast(self, predictions, labels):
        """Fast version of Rich Caruana's ensemble selection method."""
        self.num_input_models_ = len(predictions)

        ensemble = []
        trajectory = []
        order = []

        ensemble_size = self.ensemble_size

        if self.sorted_initialization:
            n_best = 20
            indices = self._sorted_initialization(predictions, labels, n_best)
            for idx in indices:
                ensemble.append(predictions[idx])
                order.append(idx)
                ensemble_ = np.array(ensemble).mean(axis=0)
                ensemble_performance = calculate_score(labels, ensemble_,
                                                       self.task_type,
                                                       self.metric,
                                                       ensemble_.shape[1])
                trajectory.append(ensemble_performance)
            ensemble_size -= n_best

        for i in range(ensemble_size):
            scores = np.zeros((len(predictions)))
            s = len(ensemble)
            if s == 0:
                weighted_ensemble_prediction = np.zeros(predictions[0].shape)
            else:
                ensemble_prediction = np.mean(np.array(ensemble), axis=0)
                weighted_ensemble_prediction = (s / float(s + 1)) * \
                                               ensemble_prediction
            fant_ensemble_prediction = np.zeros(
                weighted_ensemble_prediction.shape)
            for j, pred in enumerate(predictions):
                # TODO: this could potentially be vectorized! - let's profile
                # the script first!
                fant_ensemble_prediction[:,:] = weighted_ensemble_prediction + \
                                             (1. / float(s + 1)) * pred
                scores[j] = 1 - calculate_score(
                    solution=labels,
                    prediction=fant_ensemble_prediction,
                    task_type=self.task_type,
                    metric=self.metric,
                    all_scoring_functions=False)

            all_best = np.argwhere(scores == np.nanmin(scores)).flatten()
            best = np.random.choice(all_best)
            ensemble.append(predictions[best])
            trajectory.append(scores[best])
            order.append(best)

            # Handle special case
            if len(predictions) == 1:
                break

        self.indices_ = order
        self.trajectory_ = trajectory
        self.train_score_ = trajectory[-1]
Esempio n. 5
0
    def test_unsupported_task_type(self):
        y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
        y_pred = \
            np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]])
        scorer = autosklearn.metrics.accuracy

        raised = False
        try:
            calculate_score(y_true, y_pred, 6, scorer)
        except NotImplementedError:
            raised = True
        self.assertTrue(raised)
    def _slow(self, predictions, labels):
        """Rich Caruana's ensemble selection method."""
        self.num_input_models_ = len(predictions)

        ensemble = []
        trajectory = []
        order = []

        ensemble_size = self.ensemble_size

        if self.sorted_initialization:
            n_best = 20
            indices = self._sorted_initialization(predictions, labels, n_best)
            for idx in indices:
                ensemble.append(predictions[idx])
                order.append(idx)
                ensemble_ = np.array(ensemble).mean(axis=0)
                ensemble_performance = calculate_score(
                    solution=labels,
                    prediction=ensemble_,
                    task_type=self.task_type,
                    metric=self.metric,
                    all_scoring_functions=False)
                trajectory.append(ensemble_performance)
            ensemble_size -= n_best

        for i in range(ensemble_size):
            scores = np.zeros([predictions.shape[0]])
            for j, pred in enumerate(predictions):
                ensemble.append(pred)
                ensemble_prediction = np.mean(np.array(ensemble), axis=0)
                scores[j] = 1 - calculate_score(
                    solution=labels,
                    prediction=ensemble_prediction,
                    task_type=self.task_type,
                    metric=self.metric,
                    all_scoring_functions=False)
                ensemble.pop()
            best = np.nanargmin(scores)
            ensemble.append(predictions[best])
            trajectory.append(scores[best])
            order.append(best)

            # Handle special case
            if len(predictions) == 1:
                break

        self.indices_ = np.array(order)
        self.trajectory_ = np.array(trajectory)
        self.train_score_ = trajectory[-1]
Esempio n. 7
0
    def test_classification_scoring_functions(self):

        scoring_functions = list(
            autosklearn.metrics.CLASSIFICATION_METRICS.values())
        scoring_functions.remove(autosklearn.metrics.accuracy)
        fail_metrics = ['precision_samples', 'recall_samples', 'f1_samples']
        success_metrics = list(
            autosklearn.metrics.CLASSIFICATION_METRICS.keys())
        for metric in fail_metrics:
            success_metrics.remove(metric)

        y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
        y_pred = \
            np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]])
        score_dict = calculate_score(y_true, y_pred, BINARY_CLASSIFICATION,
                                     autosklearn.metrics.accuracy,
                                     scoring_functions)

        self.assertIsInstance(score_dict, dict)
        self.assertTrue(len(success_metrics), len(score_dict))
        for metric in fail_metrics:
            self.assertNotIn(metric, score_dict.keys())
        for metric in success_metrics:
            self.assertIn(metric, score_dict.keys())
            self.assertAlmostEqual(
                autosklearn.metrics.CLASSIFICATION_METRICS[metric]._optimum,
                score_dict[metric])
    def _loss(self, y_true, y_hat, all_scoring_functions=None):
        all_scoring_functions = (self.all_scoring_functions
                                 if all_scoring_functions is None else
                                 all_scoring_functions)
        if not isinstance(self.configuration, Configuration):
            if all_scoring_functions:
                return {self.metric: 1.0}
            else:
                return 1.0

        score = calculate_score(y_true,
                                y_hat,
                                self.task_type,
                                self.metric,
                                all_scoring_functions=all_scoring_functions)

        if hasattr(score, '__len__'):
            # TODO: instead of using self.metric, it should use all metrics given by key.
            # But now this throws error...

            err = {
                key: metric._optimum - score[key]
                for key, metric in CLASSIFICATION_METRICS.items()
                if key in score
            }
        else:
            err = self.metric._optimum - score

        return err
Esempio n. 9
0
    def test_regression_only_metric(self):
        y_true = np.array([1, 2, 3, 4])
        y_pred = y_true.copy()
        scorer = autosklearn.metrics.root_mean_squared_error

        score = calculate_score(y_true, y_pred, REGRESSION, scorer)
        previous_score = scorer._optimum
        self.assertAlmostEqual(score, previous_score)
Esempio n. 10
0
    def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred):
        """
        Records a snapshot of how the performance look at a given training
        time.

        Parameters
        ----------
        ensemble: EnsembleSelection
            The ensemble selection object to record
        valid_pred: np.ndarray
            The predictions on the validation set using ensemble
        test_pred: np.ndarray
            The predictions on the test set using ensemble

        """
        performance_stamp = {
            'Timestamp':
            pd.Timestamp.now(),
            'ensemble_optimization_score':
            calculate_score(solution=self.y_true_ensemble,
                            prediction=train_pred,
                            task_type=self.task_type,
                            metric=self.metric,
                            all_scoring_functions=False)
        }
        if valid_pred is not None:
            # TODO: valid_pred are a legacy from competition manager
            # and this if never happens. Re-evaluate Y_valid support
            performance_stamp['ensemble_val_score'] = calculate_score(
                solution=self.y_valid,
                prediction=valid_pred,
                task_type=self.task_type,
                metric=self.metric,
                all_scoring_functions=False)

        # In case test_pred was provided
        if test_pred is not None:
            performance_stamp['ensemble_test_score'] = calculate_score(
                solution=self.y_test,
                prediction=test_pred,
                task_type=self.task_type,
                metric=self.metric,
                all_scoring_functions=False)

        self.queue.put(performance_stamp)
Esempio n. 11
0
 def score(self, X, y):
     # fix: Consider only index 1 of second dimension
     # Don't know if the reshaping should be done there or in calculate_score
     prediction = self.predict(X)
     return calculate_score(solution=y,
                            prediction=prediction,
                            task_type=self._task,
                            metric=self._metric,
                            all_scoring_functions=False)
Esempio n. 12
0
 def score(self, X, y):
     # fix: Consider only index 1 of second dimension
     # Don't know if the reshaping should be done there or in calculate_score
     prediction = self.predict(X)
     return calculate_score(solution=y,
                            prediction=prediction,
                            task_type=self._task,
                            metric=self._metric,
                            all_scoring_functions=False)
    def _sorted_initialization(self, predictions, labels, n_best):
        perf = np.zeros([predictions.shape[0]])

        for idx, prediction in enumerate(predictions):
            perf[idx] = calculate_score(labels, prediction, self.task_type,
                                        self.metric, predictions.shape[1])

        indices = np.argsort(perf)[perf.shape[0] - n_best:]
        return indices
Esempio n. 14
0
    def test_classification_only_metric(self):
        y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
        y_pred = \
            np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]])
        scorer = autosklearn.metrics.accuracy

        score = calculate_score(y_true, y_pred, BINARY_CLASSIFICATION, scorer)

        previous_score = scorer._optimum
        self.assertAlmostEqual(score, previous_score)
    def _slow(self, predictions: List[np.ndarray], labels: np.ndarray) -> None:
        """Rich Caruana's ensemble selection method."""
        self.num_input_models_ = len(predictions)

        ensemble = []
        trajectory = []
        order = []

        ensemble_size = self.ensemble_size

        for i in range(ensemble_size):
            scores = np.zeros(
                [np.shape(predictions)[0]],
                dtype=np.float64,
            )
            for j, pred in enumerate(predictions):
                ensemble.append(pred)
                ensemble_prediction = np.mean(np.array(ensemble), axis=0)
                # Calculate score is versatile and can return a dict of score
                # when scoring_functions=None, we know it will be a float
                calculated_score = cast(
                    float,
                    calculate_score(solution=labels,
                                    prediction=ensemble_prediction,
                                    task_type=self.task_type,
                                    metric=self.metric,
                                    scoring_functions=None))
                scores[j] = self.metric._optimum - calculated_score
                ensemble.pop()
            best = np.nanargmin(scores)
            ensemble.append(predictions[best])
            trajectory.append(scores[best])
            order.append(best)

            # Handle special case
            if len(predictions) == 1:
                break

        self.indices_ = np.array(
            order,
            dtype=np.int64,
        )
        self.trajectory_ = np.array(
            trajectory,
            dtype=np.float64,
        )
        self.train_score_ = trajectory[-1]
    def _loss(self, y_true, y_hat, all_scoring_functions=None):
        """Auto-sklearn follows a minimization goal, so the make_scorer
        sign is used as a guide to obtain the value to reduce.

        On this regard, to optimize a metric:
            1- score is calculared with calculate_score, with the caveat, that if
            for the metric greater is not better, a negative score is returned.
            2- the err (the optimization goal) is then:
                optimum - (metric.sign * actual_score)
                For accuracy for example: optimum(1) - (+1 * actual score)
                For logloss for example: optimum(0) - (-1 * actual score)
        """
        all_scoring_functions = (self.all_scoring_functions
                                 if all_scoring_functions is None else
                                 all_scoring_functions)
        if not isinstance(self.configuration, Configuration):
            if all_scoring_functions:
                return {self.metric: 1.0}
            else:
                return 1.0

        score = calculate_score(y_true,
                                y_hat,
                                self.task_type,
                                self.metric,
                                all_scoring_functions=all_scoring_functions)

        if hasattr(score, '__len__'):
            # TODO: instead of using self.metric, it should use all metrics given by key.
            # But now this throws error...
            if self.task_type in CLASSIFICATION_TASKS:
                err = {
                    key: metric._optimum - score[key]
                    for key, metric in CLASSIFICATION_METRICS.items()
                    if key in score
                }
            else:
                err = {
                    key: metric._optimum - score[key]
                    for key, metric in REGRESSION_METRICS.items()
                    if key in score
                }
        else:
            err = self.metric._optimum - score

        return err
    def _loss(self, y_true, y_hat):
        if not isinstance(self.configuration, Configuration):
            if self.all_scoring_functions:
                return {self.metric: 1.0}
            else:
                return 1.0

        score = calculate_score(
            y_true,
            y_hat,
            self.task_type,
            self.metric,
            all_scoring_functions=self.all_scoring_functions)

        if hasattr(score, '__len__'):
            err = {key: 1 - score[key] for key in score}
        else:
            err = 1 - score

        return err
Esempio n. 18
0
    def _loss(self, y_true, y_hat, all_scoring_functions=None):
        all_scoring_functions = (
            self.all_scoring_functions
            if all_scoring_functions is None
            else all_scoring_functions
        )
        if not isinstance(self.configuration, Configuration):
            if all_scoring_functions:
                return {self.metric: 1.0}
            else:
                return 1.0

        score = calculate_score(
            y_true, y_hat, self.task_type, self.metric,
            all_scoring_functions=all_scoring_functions)

        if hasattr(score, '__len__'):
            err = {key: self.metric._optimum - score[key] for key in score}
        else:
            err = self.metric._optimum - score

        return err
Esempio n. 19
0
    def test_regression_scoring_functions(self):

        scoring_functions = list(
            autosklearn.metrics.REGRESSION_METRICS.values())
        scoring_functions.remove(autosklearn.metrics.root_mean_squared_error)

        metrics = list(autosklearn.metrics.REGRESSION_METRICS.keys())
        metrics.remove('mean_squared_log_error')

        y_true = np.array([1, 2, 3, -4])
        y_pred = y_true.copy()

        score_dict = calculate_score(
            y_true, y_pred, REGRESSION,
            autosklearn.metrics.root_mean_squared_error, scoring_functions)

        self.assertIsInstance(score_dict, dict)
        self.assertTrue(len(metrics), len(score_dict))
        for metric in metrics:
            self.assertIn(metric, score_dict.keys())
            self.assertAlmostEqual(
                autosklearn.metrics.REGRESSION_METRICS[metric]._optimum,
                score_dict[metric])
Esempio n. 20
0
    def read_ensemble_preds(self):
        """
            reading predictions on ensemble building data set; 
            populates self.read_preds
        """
        self.logger.debug("Read ensemble data set predictions")

        if self.y_true_ensemble is None:
            try:
                self.y_true_ensemble = self.backend.load_targets_ensemble()
            except FileNotFoundError:
                self.logger.debug(
                    "Could not find true targets on ensemble data set: %s",
                    traceback.format_exc(),
                )
                return False

        # no validation predictions so far -- no dir
        if not os.path.isdir(self.dir_ensemble):
            self.logger.debug("No ensemble dataset prediction directory found")
            return False

        if self.shared_mode is False:
            pred_path = os.path.join(
                self.dir_ensemble, 'predictions_ensemble_%s_*.npy' % self.seed)
        # pSMAC
        else:
            pred_path = os.path.join(self.dir_ensemble,
                                     'predictions_ensemble_*_*.npy')

        y_ens_files = glob.glob(pred_path)
        # no validation predictions so far -- no files
        if len(y_ens_files) == 0:
            self.logger.debug("Found no prediction files on ensemble data set:"
                              " %s" % pred_path)
            return False

        n_read_files = 0
        for y_ens_fn in y_ens_files:

            if self.read_at_most and n_read_files >= self.read_at_most:
                # limit the number of files that will be read
                # to limit memory consumption
                break

            if not y_ens_fn.endswith(".npy"):
                self.logger.info('Error loading file (not .npy): %s', y_ens_fn)
                continue

            match = self.model_fn_re.search(y_ens_fn)
            _seed = int(match.group(1))
            _num_run = int(match.group(2))

            if not self.read_preds.get(y_ens_fn):
                self.read_preds[y_ens_fn] = {
                    "ens_score": -1,
                    "mtime_ens": 0,
                    "mtime_valid": 0,
                    "mtime_test": 0,
                    "seed": _seed,
                    "num_run": _num_run,
                    Y_ENSEMBLE: None,
                    Y_VALID: None,
                    Y_TEST: None,
                    # Lazy keys so far:
                    # 0 - not loaded
                    # 1 - loaded and ind memory
                    # 2 - loaded but dropped again
                    "loaded": 0
                }

            if self.read_preds[y_ens_fn]["mtime_ens"] == os.path.getmtime(
                    y_ens_fn):
                # same time stamp; nothing changed;
                continue

            # actually read the predictions and score them
            try:
                with open(y_ens_fn, 'rb') as fp:
                    y_ensemble = self._read_np_fn(fp=fp)
                    score = calculate_score(
                        solution=self.
                        y_true_ensemble,  # y_ensemble = y_true for ensemble set
                        prediction=y_ensemble,
                        task_type=self.task_type,
                        metric=self.metric,
                        all_scoring_functions=False)

                    if self.read_preds[y_ens_fn]["ens_score"] > -1:
                        self.logger.critical(
                            'Changing ensemble score for file %s from %f to %f '
                            'because file modification time changed? %f - %f',
                            y_ens_fn,
                            self.read_preds[y_ens_fn]["ens_score"],
                            score,
                            self.read_preds[y_ens_fn]["mtime_ens"],
                            os.path.getmtime(y_ens_fn),
                        )

                    self.read_preds[y_ens_fn]["ens_score"] = score
                    self.read_preds[y_ens_fn][Y_ENSEMBLE] = y_ensemble
                    self.read_preds[y_ens_fn]["mtime_ens"] = os.path.getmtime(
                        y_ens_fn)
                    self.read_preds[y_ens_fn]["loaded"] = 1

                    n_read_files += 1

            except:
                self.logger.warning(
                    'Error loading %s: %s',
                    y_ens_fn,
                    traceback.format_exc(),
                )
                self.read_preds[y_ens_fn]["ens_score"] = -1

        self.logger.debug(
            'Done reading %d new prediction files. Loaded %d predictions in '
            'total.', n_read_files,
            np.sum([pred["loaded"] > 0 for pred in self.read_preds.values()]))
        return True
Esempio n. 21
0
    def read_ensemble_preds(self):
        """
            reading predictions on ensemble building data set; 
            populates self.read_preds
        """
        self.logger.debug("Read ensemble data set predictions")
        
        if self.y_true_ensemble is None:
            try:
                self.y_true_ensemble = self.backend.load_targets_ensemble()
            except FileNotFoundError:
                self.logger.debug(
                    "Could not find true targets on ensemble data set: %s",
                    traceback.format_exc(),
                )
                return False
            
        # no validation predictions so far -- no dir
        if not os.path.isdir(self.dir_ensemble):
            self.logger.debug("No ensemble dataset prediction directory found")
            return False
        
        if self.shared_mode is False:
            pred_path = os.path.join(
                    self.dir_ensemble,
                    'predictions_ensemble_%s_*.npy' % self.seed)
        # pSMAC
        else:
            pred_path = os.path.join(
                    self.dir_ensemble,
                    'predictions_ensemble_*_*.npy')

        y_ens_files = glob.glob(pred_path)
        # no validation predictions so far -- no files
        if len(y_ens_files) == 0:
            self.logger.debug("Found no prediction files on ensemble data set:"
                              " %s" % pred_path)
            return False
        
        n_read_files = 0
        for y_ens_fn in y_ens_files:
            
            if self.read_at_most and n_read_files >= self.read_at_most:
                # limit the number of files that will be read 
                # to limit memory consumption
                break
            
            if not y_ens_fn.endswith(".npy"):
                self.logger.info('Error loading file (not .npy): %s', y_ens_fn)
                continue
            
            match = self.model_fn_re.search(y_ens_fn)
            _seed = int(match.group(1))
            _num_run = int(match.group(2))
            
            if not self.read_preds.get(y_ens_fn):
                self.read_preds[y_ens_fn] = {
                    "ens_score": -1,
                    "mtime_ens": 0,
                    "mtime_valid": 0,
                    "mtime_test": 0,
                    "seed": _seed,
                    "num_run": _num_run,
                    Y_ENSEMBLE: None,
                    Y_VALID: None,
                    Y_TEST: None,
                    # Lazy keys so far:
                    # 0 - not loaded
                    # 1 - loaded and ind memory
                    # 2 - loaded but dropped again
                    "loaded": 0
                }
                
            if self.read_preds[y_ens_fn]["mtime_ens"] == os.path.getmtime(y_ens_fn):
                # same time stamp; nothing changed;
                continue

            # actually read the predictions and score them
            try:
                with open(y_ens_fn, 'rb') as fp:
                    y_ensemble = self._read_np_fn(fp=fp)
                    score = calculate_score(solution=self.y_true_ensemble,  # y_ensemble = y_true for ensemble set
                                            prediction=y_ensemble,
                                            task_type=self.task_type,
                                            metric=self.metric,
                                            all_scoring_functions=False)

                    if self.read_preds[y_ens_fn]["ens_score"] > -1:
                        self.logger.critical(
                            'Changing ensemble score for file %s from %f to %f '
                            'because file modification time changed? %f - %f',
                            y_ens_fn,
                            self.read_preds[y_ens_fn]["ens_score"],
                            score,
                            self.read_preds[y_ens_fn]["mtime_ens"],
                            os.path.getmtime(y_ens_fn),
                        )

                    self.read_preds[y_ens_fn]["ens_score"] = score
                    self.read_preds[y_ens_fn][Y_ENSEMBLE] = y_ensemble
                    self.read_preds[y_ens_fn]["mtime_ens"] = os.path.getmtime(
                        y_ens_fn
                    )
                    self.read_preds[y_ens_fn]["loaded"] = 1

                    n_read_files += 1

            except:
                self.logger.warning(
                    'Error loading %s: %s',
                    y_ens_fn,
                    traceback.format_exc(),
                )
                self.read_preds[y_ens_fn]["ens_score"] = -1

        self.logger.debug(
            'Done reading %d new prediction files. Loaded %d predictions in '
            'total.',
            n_read_files,
            np.sum([pred["loaded"] > 0 for pred in self.read_preds.values()])
        )
        return True
    def _fast(
        self,
        predictions: List[np.ndarray],
        labels: np.ndarray,
    ) -> None:
        """Fast version of Rich Caruana's ensemble selection method."""
        self.num_input_models_ = len(predictions)

        ensemble = []  # type: List[np.ndarray]
        trajectory = []
        order = []

        ensemble_size = self.ensemble_size

        weighted_ensemble_prediction = np.zeros(
            predictions[0].shape,
            dtype=np.float64,
        )
        fant_ensemble_prediction = np.zeros(
            weighted_ensemble_prediction.shape,
            dtype=np.float64,
        )
        for i in range(ensemble_size):
            scores = np.zeros(
                (len(predictions)),
                dtype=np.float64,
            )
            s = len(ensemble)
            if s == 0:
                weighted_ensemble_prediction.fill(0.0)
            else:
                weighted_ensemble_prediction.fill(0.0)
                for pred in ensemble:
                    np.add(
                        weighted_ensemble_prediction,
                        pred,
                        out=weighted_ensemble_prediction,
                    )
                np.multiply(
                    weighted_ensemble_prediction,
                    1 / s,
                    out=weighted_ensemble_prediction,
                )
                np.multiply(
                    weighted_ensemble_prediction,
                    (s / float(s + 1)),
                    out=weighted_ensemble_prediction,
                )

            # Memory-efficient averaging!
            for j, pred in enumerate(predictions):
                # TODO: this could potentially be vectorized! - let's profile
                # the script first!
                fant_ensemble_prediction.fill(0.0)
                np.add(fant_ensemble_prediction,
                       weighted_ensemble_prediction,
                       out=fant_ensemble_prediction)
                np.add(fant_ensemble_prediction, (1. / float(s + 1)) * pred,
                       out=fant_ensemble_prediction)

                # Calculate score is versatile and can return a dict of score
                # when scoring_functions=None, we know it will be a float
                calculated_score = cast(
                    float,
                    calculate_score(solution=labels,
                                    prediction=fant_ensemble_prediction,
                                    task_type=self.task_type,
                                    metric=self.metric,
                                    scoring_functions=None))
                scores[j] = self.metric._optimum - calculated_score

            all_best = np.argwhere(scores == np.nanmin(scores)).flatten()
            best = self.random_state.choice(all_best)
            ensemble.append(predictions[best])
            trajectory.append(scores[best])
            order.append(best)

            # Handle special case
            if len(predictions) == 1:
                break

        self.indices_ = order
        self.trajectory_ = trajectory
        self.train_score_ = trajectory[-1]
Esempio n. 23
0
    def main(self):

        watch = StopWatch()
        watch.start_task('ensemble_builder')

        used_time = 0
        time_iter = 0
        index_run = 0
        num_iteration = 0
        current_num_models = 0
        last_hash = None
        current_hash = None

        dir_ensemble = os.path.join(self.backend.temporary_directory,
                                    '.auto-sklearn', 'predictions_ensemble')
        dir_valid = os.path.join(self.backend.temporary_directory,
                                 '.auto-sklearn', 'predictions_valid')
        dir_test = os.path.join(self.backend.temporary_directory,
                                '.auto-sklearn', 'predictions_test')
        paths_ = [dir_ensemble, dir_valid, dir_test]

        dir_ensemble_list_mtimes = []

        self.logger.debug(
            'Starting main loop with %f seconds and %d iterations '
            'left.' % (self.limit - used_time, num_iteration))
        while used_time < self.limit or (self.max_iterations > 0 and
                                         self.max_iterations >= num_iteration):
            num_iteration += 1
            self.logger.debug('Time left: %f', self.limit - used_time)
            self.logger.debug('Time last ensemble building: %f', time_iter)

            # Reload the ensemble targets every iteration, important, because cv may
            # update the ensemble targets in the cause of running auto-sklearn
            # TODO update cv in order to not need this any more!
            targets_ensemble = self.backend.load_targets_ensemble()

            # Load the predictions from the models
            exists = [os.path.isdir(dir_) for dir_ in paths_]
            if not exists[0]:  # all(exists):
                self.logger.debug('Prediction directory %s does not exist!' %
                                  dir_ensemble)
                time.sleep(2)
                used_time = watch.wall_elapsed('ensemble_builder')
                continue

            if self.shared_mode is False:
                dir_ensemble_list = sorted(
                    glob.glob(
                        os.path.join(
                            dir_ensemble,
                            'predictions_ensemble_%s_*.npy' % self.seed)))
                if exists[1]:
                    dir_valid_list = sorted(
                        glob.glob(
                            os.path.join(
                                dir_valid,
                                'predictions_valid_%s_*.npy' % self.seed)))
                else:
                    dir_valid_list = []
                if exists[2]:
                    dir_test_list = sorted(
                        glob.glob(
                            os.path.join(
                                dir_test,
                                'predictions_test_%s_*.npy' % self.seed)))
                else:
                    dir_test_list = []
            else:
                dir_ensemble_list = sorted(os.listdir(dir_ensemble))
                dir_valid_list = sorted(
                    os.listdir(dir_valid)) if exists[1] else []
                dir_test_list = sorted(
                    os.listdir(dir_test)) if exists[2] else []

            # Check the modification times because predictions can be updated
            # over time!
            old_dir_ensemble_list_mtimes = dir_ensemble_list_mtimes
            dir_ensemble_list_mtimes = []
            # The ensemble dir can contain non-model files. We filter them and
            # use the following list instead
            dir_ensemble_model_files = []

            for dir_ensemble_file in dir_ensemble_list:
                if dir_ensemble_file.endswith("/"):
                    dir_ensemble_file = dir_ensemble_file[:-1]
                if not dir_ensemble_file.endswith(".npy"):
                    self.logger.info('Error loading file (not .npy): %s',
                                     dir_ensemble_file)
                    continue

                dir_ensemble_model_files.append(dir_ensemble_file)
                basename = os.path.basename(dir_ensemble_file)
                dir_ensemble_file = os.path.join(dir_ensemble, basename)
                mtime = os.path.getmtime(dir_ensemble_file)
                dir_ensemble_list_mtimes.append(mtime)

            if len(dir_ensemble_model_files) == 0:
                self.logger.debug('Directories are empty')
                time.sleep(2)
                used_time = watch.wall_elapsed('ensemble_builder')
                continue

            if len(dir_ensemble_model_files) <= current_num_models and \
                    old_dir_ensemble_list_mtimes == dir_ensemble_list_mtimes:
                self.logger.debug('Nothing has changed since the last time')
                time.sleep(2)
                used_time = watch.wall_elapsed('ensemble_builder')
                continue

            with warnings.catch_warnings():
                warnings.simplefilter('ignore')
                # TODO restructure time management in the ensemble builder,
                # what is the time of index_run actually needed for?
                watch.start_task('index_run' + str(index_run))
            watch.start_task('ensemble_iter_' + str(num_iteration))

            # List of num_runs (which are in the filename) which will be included
            #  later
            include_num_runs = []
            backup_num_runs = []
            model_and_automl_re = re.compile(r'_([0-9]*)_([0-9]*)\.npy')
            if self.ensemble_nbest is not None:
                # Keeps track of the single scores of each model in our ensemble
                scores_nbest = []
                # The indices of the model that are currently in our ensemble
                indices_nbest = []
                # The names of the models
                model_names = []

            model_names_to_scores = dict()

            model_idx = 0
            for model_name in dir_ensemble_model_files:
                if model_name.endswith("/"):
                    model_name = model_name[:-1]
                basename = os.path.basename(model_name)

                try:
                    with open(os.path.join(dir_ensemble, basename),
                              'rb') as fh:
                        if self.precision is "16":
                            predictions = np.load(fh).astype(dtype=np.float16)
                        elif self.precision is "32":
                            predictions = np.load(fh).astype(dtype=np.float32)
                        elif self.precision is "64":
                            predictions = np.load(fh).astype(dtype=np.float64)
                        else:
                            predictions = np.load(fh)

                    score = calculate_score(solution=targets_ensemble,
                                            prediction=predictions,
                                            task_type=self.task_type,
                                            metric=self.metric,
                                            all_scoring_functions=False)

                except Exception as e:
                    self.logger.warning('Error loading %s: %s - %s', basename,
                                        type(e), e)
                    score = -1

                model_names_to_scores[model_name] = score
                match = model_and_automl_re.search(model_name)
                automl_seed = int(match.group(1))
                num_run = int(match.group(2))

                if self.ensemble_nbest is not None:
                    if score <= 0.001:
                        self.logger.info('Model only predicts at random: ' +
                                         model_name + ' has score: ' +
                                         str(score))
                        backup_num_runs.append((automl_seed, num_run))
                    # If we have less models in our ensemble than ensemble_nbest add
                    # the current model if it is better than random
                    elif len(scores_nbest) < self.ensemble_nbest:
                        scores_nbest.append(score)
                        indices_nbest.append(model_idx)
                        include_num_runs.append((automl_seed, num_run))
                        model_names.append(model_name)
                    else:
                        # Take the worst performing model in our ensemble so far
                        idx = np.argmin(np.array([scores_nbest]))

                        # If the current model is better than the worst model in
                        # our ensemble replace it by the current model
                        if scores_nbest[idx] < score:
                            self.logger.info(
                                'Worst model in our ensemble: %s with score %f '
                                'will be replaced by model %s with score %f',
                                model_names[idx], scores_nbest[idx],
                                model_name, score)
                            # Exclude the old model
                            del scores_nbest[idx]
                            scores_nbest.append(score)
                            del include_num_runs[idx]
                            del indices_nbest[idx]
                            indices_nbest.append(model_idx)
                            include_num_runs.append((automl_seed, num_run))
                            del model_names[idx]
                            model_names.append(model_name)

                        # Otherwise exclude the current model from the ensemble
                        else:
                            # include_num_runs.append(True)
                            pass

                else:
                    # Load all predictions that are better than random
                    if score <= 0.001:
                        # include_num_runs.append(True)
                        self.logger.info('Model only predicts at random: ' +
                                         model_name + ' has score: ' +
                                         str(score))
                        backup_num_runs.append((automl_seed, num_run))
                    else:
                        include_num_runs.append((automl_seed, num_run))

                model_idx += 1

            # If there is no model better than random guessing, we have to use
            # all models which do random guessing
            if len(include_num_runs) == 0:
                include_num_runs = backup_num_runs

            indices_to_model_names = dict()
            indices_to_run_num = dict()
            for i, model_name in enumerate(dir_ensemble_model_files):
                match = model_and_automl_re.search(model_name)
                automl_seed = int(match.group(1))
                num_run = int(match.group(2))
                if (automl_seed, num_run) in include_num_runs:
                    num_indices = len(indices_to_model_names)
                    indices_to_model_names[num_indices] = model_name
                    indices_to_run_num[num_indices] = (automl_seed, num_run)

            try:
                all_predictions_train, all_predictions_valid, all_predictions_test =\
                    self.get_all_predictions(dir_ensemble,
                                             dir_ensemble_model_files,
                                             dir_valid, dir_valid_list,
                                             dir_test, dir_test_list,
                                             include_num_runs,
                                             model_and_automl_re,
                                             self.precision)
            except IOError as e:
                print(e)
                self.logger.error('Could not load the predictions.')
                continue

            if len(include_num_runs) == 0:
                self.logger.error('All models do just random guessing')
                time.sleep(2)
                continue

            else:
                ensemble = EnsembleSelection(ensemble_size=self.ensemble_size,
                                             task_type=self.task_type,
                                             metric=self.metric)

                try:
                    ensemble.fit(all_predictions_train, targets_ensemble,
                                 include_num_runs)
                    self.logger.info(ensemble)

                except ValueError as e:
                    self.logger.error('Caught ValueError: ' + str(e))
                    used_time = watch.wall_elapsed('ensemble_builder')
                    time.sleep(2)
                    continue
                except IndexError as e:
                    self.logger.error('Caught IndexError: ' + str(e))
                    used_time = watch.wall_elapsed('ensemble_builder')
                    time.sleep(2)
                    continue
                #except Exception as e:
                #    self.logger.error('Caught error! %s', str(e))
                #    used_time = watch.wall_elapsed('ensemble_builder')
                #    time.sleep(2)
                #    continue

                # Output the score
                self.logger.info('Training performance: %f' %
                                 ensemble.train_score_)

                self.logger.info(
                    'Building the ensemble took %f seconds' %
                    watch.wall_elapsed('ensemble_iter_' + str(num_iteration)))

            # Set this variable here to avoid re-running the ensemble builder
            # every two seconds in case the ensemble did not change
            current_num_models = len(dir_ensemble_model_files)

            ensemble_predictions = ensemble.predict(all_predictions_train)
            if sys.version_info[0] == 2:
                ensemble_predictions.flags.writeable = False
                current_hash = hash(ensemble_predictions.data)
            else:
                current_hash = hash(ensemble_predictions.data.tobytes())

            # Only output a new ensemble and new predictions if the output of the
            # ensemble would actually change!
            # TODO this is neither safe (collisions, tests only with the ensemble
            #  prediction, but not the ensemble), implement a hash function for
            # each possible ensemble builder.
            if last_hash is not None:
                if current_hash == last_hash:
                    self.logger.info('Ensemble output did not change.')
                    time.sleep(2)
                    continue
                else:
                    last_hash = current_hash
            else:
                last_hash = current_hash

            # Save the ensemble for later use in the main auto-sklearn module!
            self.backend.save_ensemble(ensemble, index_run, self.seed)

            # Save predictions for valid and test data set
            if len(dir_valid_list) == len(dir_ensemble_model_files):
                all_predictions_valid = np.array(all_predictions_valid)
                ensemble_predictions_valid = ensemble.predict(
                    all_predictions_valid)
                if self.task_type == BINARY_CLASSIFICATION:
                    ensemble_predictions_valid = ensemble_predictions_valid[:,
                                                                            1]

                self.backend.save_predictions_as_txt(
                    ensemble_predictions_valid,
                    'valid',
                    index_run,
                    prefix=self.dataset_name)
            else:
                self.logger.info(
                    'Could not find as many validation set predictions (%d)'
                    'as ensemble predictions (%d)!.', len(dir_valid_list),
                    len(dir_ensemble_model_files))

            del all_predictions_valid

            if len(dir_test_list) == len(dir_ensemble_model_files):
                all_predictions_test = np.array(all_predictions_test)
                ensemble_predictions_test = ensemble.predict(
                    all_predictions_test)
                if self.task_type == BINARY_CLASSIFICATION:
                    ensemble_predictions_test = ensemble_predictions_test[:, 1]

                self.backend.save_predictions_as_txt(ensemble_predictions_test,
                                                     'test',
                                                     index_run,
                                                     prefix=self.dataset_name)
            else:
                self.logger.info(
                    'Could not find as many test set predictions (%d) as '
                    'ensemble predictions (%d)!', len(dir_test_list),
                    len(dir_ensemble_model_files))

            del all_predictions_test

            current_num_models = len(dir_ensemble_model_files)
            watch.stop_task('index_run' + str(index_run))
            time_iter = watch.get_wall_dur('index_run' + str(index_run))
            used_time = watch.wall_elapsed('ensemble_builder')
            index_run += 1
        return
Esempio n. 24
0
    def score_ensemble_preds(self):
        """
            score predictions on ensemble building data set;
            populates self.read_preds
        """

        self.logger.debug("Read ensemble data set predictions")

        if self.y_true_ensemble is None:
            try:
                self.y_true_ensemble = self.backend.load_targets_ensemble()
            except FileNotFoundError:
                self.logger.debug(
                    "Could not find true targets on ensemble data set: %s",
                    traceback.format_exc(),
                )
                return False

        # no validation predictions so far -- no dir
        if not os.path.isdir(self.dir_ensemble):
            self.logger.debug("No ensemble dataset prediction directory found")
            return False

        if self.shared_mode is False:
            pred_path = os.path.join(
                glob.escape(self.dir_ensemble),
                'predictions_ensemble_%s_*_*.npy*' % self.seed,
            )
        # pSMAC
        else:
            pred_path = os.path.join(
                glob.escape(self.dir_ensemble),
                'predictions_ensemble_*_*_*.npy*',
            )

        y_ens_files = glob.glob(pred_path)
        y_ens_files = [
            y_ens_file for y_ens_file in y_ens_files
            if y_ens_file.endswith('.npy') or y_ens_file.endswith('.npy.gz')
        ]
        self.y_ens_files = y_ens_files
        # no validation predictions so far -- no files
        if len(self.y_ens_files) == 0:
            self.logger.debug("Found no prediction files on ensemble data set:"
                              " %s" % pred_path)
            return False

        # First sort files chronologically
        to_read = []
        for y_ens_fn in self.y_ens_files:
            match = self.model_fn_re.search(y_ens_fn)
            _seed = int(match.group(1))
            _num_run = int(match.group(2))
            _budget = float(match.group(3))
            to_read.append([y_ens_fn, match, _seed, _num_run, _budget])

        n_read_files = 0
        # Now read file wrt to num_run
        for y_ens_fn, match, _seed, _num_run, _budget in \
                sorted(to_read, key=lambda x: x[3]):
            if self.read_at_most and n_read_files >= self.read_at_most:
                # limit the number of files that will be read
                # to limit memory consumption
                break

            if not y_ens_fn.endswith(".npy") and not y_ens_fn.endswith(
                    ".npy.gz"):
                self.logger.info(
                    'Error loading file (not .npy or .npy.gz): %s', y_ens_fn)
                continue

            if not self.read_preds.get(y_ens_fn):
                self.read_preds[y_ens_fn] = {
                    "ens_score": -1,
                    "mtime_ens": 0,
                    "mtime_valid": 0,
                    "mtime_test": 0,
                    "seed": _seed,
                    "num_run": _num_run,
                    "budget": _budget,
                    "disc_space_cost_mb": None,
                    Y_ENSEMBLE: None,
                    Y_VALID: None,
                    Y_TEST: None,
                    # Lazy keys so far:
                    # 0 - not loaded
                    # 1 - loaded and in memory
                    # 2 - loaded but dropped again
                    "loaded": 0
                }

            if self.read_preds[y_ens_fn]["mtime_ens"] == os.path.getmtime(
                    y_ens_fn):
                # same time stamp; nothing changed;
                continue

            # actually read the predictions and score them
            try:
                y_ensemble = self._read_np_fn(y_ens_fn)
                score = calculate_score(solution=self.y_true_ensemble,
                                        prediction=y_ensemble,
                                        task_type=self.task_type,
                                        metric=self.metric,
                                        all_scoring_functions=False)

                if self.read_preds[y_ens_fn]["ens_score"] > -1:
                    self.logger.debug(
                        'Changing ensemble score for file %s from %f to %f '
                        'because file modification time changed? %f - %f',
                        y_ens_fn,
                        self.read_preds[y_ens_fn]["ens_score"],
                        score,
                        self.read_preds[y_ens_fn]["mtime_ens"],
                        os.path.getmtime(y_ens_fn),
                    )

                self.read_preds[y_ens_fn]["ens_score"] = score

                # It is not needed to create the object here
                # To save memory, we just score the object.
                # self.read_preds[y_ens_fn][Y_ENSEMBLE] = y_ensemble
                self.read_preds[y_ens_fn]["mtime_ens"] = os.path.getmtime(
                    y_ens_fn)
                self.read_preds[y_ens_fn]["loaded"] = 2
                self.read_preds[y_ens_fn][
                    "disc_space_cost_mb"] = self.get_disk_consumption(y_ens_fn)

                n_read_files += 1

            except Exception:
                self.logger.warning(
                    'Error loading %s: %s',
                    y_ens_fn,
                    traceback.format_exc(),
                )
                self.read_preds[y_ens_fn]["ens_score"] = -1

        self.logger.debug(
            'Done reading %d new prediction files. Loaded %d predictions in '
            'total.', n_read_files,
            np.sum([pred["loaded"] > 0 for pred in self.read_preds.values()]))
        return True
Esempio n. 25
0
def test_calculate_loss():
    # In a 0-1 ranged scorer, make sure that the loss
    # has a expected positive value
    y_pred = np.array([0, 1, 0, 1, 1, 1, 0, 0, 0, 0])
    y_true = np.array([0, 1, 0, 1, 1, 0, 0, 0, 0, 0])
    score = sklearn.metrics.accuracy_score(y_true, y_pred)
    assert pytest.approx(score) == calculate_score(
        solution=y_true,
        prediction=y_pred,
        task_type=BINARY_CLASSIFICATION,
        metric=autosklearn.metrics.accuracy,
    )
    loss = 1.0 - score
    assert pytest.approx(loss) == calculate_loss(
        solution=y_true,
        prediction=y_pred,
        task_type=BINARY_CLASSIFICATION,
        metric=autosklearn.metrics.accuracy,
    )

    # Test the dictionary case
    score_dict = calculate_score(solution=y_true,
                                 prediction=y_pred,
                                 task_type=BINARY_CLASSIFICATION,
                                 metric=autosklearn.metrics.accuracy,
                                 scoring_functions=[
                                     autosklearn.metrics.accuracy,
                                     autosklearn.metrics.balanced_accuracy
                                 ])
    expected_score_dict = {
        'accuracy': 0.9,
        'balanced_accuracy': 0.9285714285714286,
    }
    loss_dict = calculate_loss(solution=y_true,
                               prediction=y_pred,
                               task_type=BINARY_CLASSIFICATION,
                               metric=autosklearn.metrics.accuracy,
                               scoring_functions=[
                                   autosklearn.metrics.accuracy,
                                   autosklearn.metrics.balanced_accuracy
                               ])
    for expected_metric, expected_score in expected_score_dict.items():
        assert pytest.approx(expected_score) == score_dict[expected_metric]
        assert pytest.approx(1 - expected_score) == loss_dict[expected_metric]

    # Lastly make sure that metrics whose optimum is zero
    # are also properly working
    y_true = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])
    y_pred = np.array([0.11, 0.22, 0.33, 0.44, 0.55, 0.66])
    score = sklearn.metrics.mean_squared_error(y_true, y_pred)
    assert pytest.approx(score) == calculate_score(
        solution=y_true,
        prediction=y_pred,
        task_type=REGRESSION,
        metric=autosklearn.metrics.mean_squared_error,
    )
    loss = score
    assert pytest.approx(loss) == calculate_loss(
        solution=y_true,
        prediction=y_pred,
        task_type=REGRESSION,
        metric=autosklearn.metrics.mean_squared_error,
    )