예제 #1
0
def make_mode_holdout_iterative_fit(data, seed, configuration, num_run):
    global evaluator
    evaluator = HoldoutEvaluator(data, configuration, seed=seed, num_run=num_run, **_get_base_dict())
    evaluator.iterative_fit()
    signal.signal(15, empty_signal_handler)
    evaluator.finish_up()

    backend = Backend(None, os.getcwd())
    if os.path.exists(backend.get_model_dir()):
        backend.save_model(evaluator.model, num_run, seed)
예제 #2
0
def make_mode_holdout_iterative_fit(data, seed, configuration, num_run):
    global evaluator
    evaluator = HoldoutEvaluator(data,
                                 configuration,
                                 seed=seed,
                                 num_run=num_run,
                                 **_get_base_dict())
    evaluator.iterative_fit()
    signal.signal(15, empty_signal_handler)
    evaluator.finish_up()

    backend = Backend(None, os.getcwd())
    if os.path.exists(backend.get_model_dir()):
        backend.save_model(evaluator.model, num_run, seed)
예제 #3
0
class AbstractEvaluator(object):
    __metaclass__ = abc.ABCMeta

    @abc.abstractmethod
    def __init__(self,
                 Datamanager,
                 output_dir,
                 configuration=None,
                 with_predictions=False,
                 all_scoring_functions=False,
                 seed=1,
                 output_y_test=False,
                 num_run=None):

        self.starttime = time.time()

        self.output_dir = output_dir
        self.configuration = configuration
        self.D = Datamanager

        self.X_valid = Datamanager.data.get('X_valid')
        self.X_test = Datamanager.data.get('X_test')

        self.metric = Datamanager.info['metric']
        self.task_type = Datamanager.info['task']
        self.seed = seed

        self.output_y_test = output_y_test
        self.with_predictions = with_predictions
        self.all_scoring_functions = all_scoring_functions

        if self.task_type in REGRESSION_TASKS:
            if self.configuration is None:
                self.model_class = MyDummyRegressor
            else:
                self.model_class = \
                    autosklearn.pipeline.regression.SimpleRegressionPipeline
            self.predict_function = self._predict_regression
        else:
            if self.configuration is None:
                self.model_class = MyDummyClassifier
            else:
                self.model_class = \
                    autosklearn.pipeline.classification.SimpleClassificationPipeline
            self.predict_function = self._predict_proba

        if num_run is None:
            num_run = get_new_run_num()
        self.num_run = num_run

        self.backend = Backend(None, self.output_dir)
        self.model = self.model_class(self.configuration, self.seed)

    def fit_predict_and_loss(self):
        """Fit model(s) according to resampling strategy, predict for the
        validation set and return the loss and predictions on the validation
        set.

        Provides a closed interface in which all steps of the target
        algorithm are performed without any communication with other
        processes. Useful for cross-validation because it allows to train a
        model, predict for the validation set and then forget the model in
        order to save main memory.
        """
        raise NotImplementedError()

    def iterative_fit(self):
        """Fit a model iteratively.

        Fitting can be interrupted in order to use a partially trained model."""
        raise NotImplementedError()

    def predict_and_loss(self):
        """Use current model to predict on the validation set and calculate
        loss.

         Should be used when using iterative fitting."""
        raise NotImplementedError()

    def predict(self):
        """Use the current model to predict on the validation set.

        Should only be used to create dummy predictions."""
        raise NotImplementedError()

    def _loss(self, y_true, y_hat):
        if self.configuration is None:
            if self.all_scoring_functions:
                return {self.metric: 1.0}
            else:
                return 1.0

        score = calculate_score(
            y_true,
            y_hat,
            self.task_type,
            self.metric,
            self.D.info['label_num'],
            all_scoring_functions=self.all_scoring_functions)

        if hasattr(score, '__len__'):
            err = {key: 1 - score[key] for key in score}
        else:
            err = 1 - score

        return err

    def finish_up(self,
                  loss=None,
                  opt_pred=None,
                  valid_pred=None,
                  test_pred=None):
        """This function does everything necessary after the fitting is done:

        * predicting
        * saving the files for the ensembles_statistics
        * generate output for SMAC
        We use it as the signal handler so we can recycle the code for the
        normal usecase and when the runsolver kills us here :)"""

        try:
            self.duration = time.time() - self.starttime
            if loss is None:
                loss, opt_pred, valid_pred, test_pred = self.predict_and_loss()
            self.file_output(loss, opt_pred, valid_pred, test_pred)
            self.duration = time.time() - self.starttime

            num_run = str(self.num_run).zfill(5)
            if isinstance(loss, dict):
                loss_ = loss
                loss = loss_[self.D.info['metric']]
            else:
                loss_ = {}
            additional_run_info = ';'.join([
                '%s: %s' % (METRIC_TO_STRING[metric]
                            if metric in METRIC_TO_STRING else metric, value)
                for metric, value in loss_.items()
            ])
            additional_run_info += ';' + 'duration: ' + str(self.duration)
            additional_run_info += ';' + 'num_run:' + num_run

            if self.configuration is not None:
                self._output_SMAC_string(self.duration, loss, self.seed,
                                         additional_run_info)
        except Exception as e:
            self.duration = time.time() - self.starttime
            print(traceback.format_exc())
            self._output_SMAC_string(
                self.duration, 2.0, self.seed,
                'No results were produced! Error is %s' % str(e))

    def _output_SMAC_string(self, duration, loss, seed, additional_run_info):
        print(
            'Result for ParamILS: %s, %f, 1, %f, %d, %s' %
            ('SAT', abs(self.duration), loss, self.seed, additional_run_info))

    def file_output(self, loss, Y_optimization_pred, Y_valid_pred,
                    Y_test_pred):
        seed = os.environ.get('AUTOSKLEARN_SEED')

        if self.Y_optimization.shape[0] != Y_optimization_pred.shape[0]:
            return 2, "Targets %s and prediction %s don't have the same " \
            "length. Probably training didn't finish" % (
                self.Y_optimization.shape, Y_optimization_pred.shape)

        num_run = str(self.num_run).zfill(5)
        if os.path.exists(self.backend.get_model_dir()):
            self.backend.save_model(self.model, self.num_run, seed)

        if self.output_y_test:
            try:
                os.makedirs(self.output_dir)
            except OSError:
                pass
            self.backend.save_targets_ensemble(self.Y_optimization)

        self.backend.save_predictions_as_npy(Y_optimization_pred, 'ensemble',
                                             seed, num_run)

        if Y_valid_pred is not None:
            self.backend.save_predictions_as_npy(Y_valid_pred, 'valid', seed,
                                                 num_run)

        if Y_test_pred is not None:
            self.backend.save_predictions_as_npy(Y_test_pred, 'test', seed,
                                                 num_run)

    def _predict_proba(self, X, model, task_type, Y_train):
        Y_pred = model.predict_proba(X, batch_size=1000)
        Y_pred = self._ensure_prediction_array_sizes(Y_pred, Y_train)
        return Y_pred

    def _predict_regression(self, X, model, task_type, Y_train=None):
        Y_pred = model.predict(X)

        if len(Y_pred.shape) == 1:
            Y_pred = Y_pred.reshape((-1, 1))

        return Y_pred

    def _ensure_prediction_array_sizes(self, prediction, Y_train):
        num_classes = self.D.info['label_num']

        if self.task_type == MULTICLASS_CLASSIFICATION and \
                prediction.shape[1] < num_classes:
            if Y_train is None:
                raise ValueError('Y_train must not be None!')
            classes = list(np.unique(Y_train))

            mapping = dict()
            for class_number in range(num_classes):
                if class_number in classes:
                    index = classes.index(class_number)
                    mapping[index] = class_number
            new_predictions = np.zeros((prediction.shape[0], num_classes),
                                       dtype=np.float32)

            for index in mapping:
                class_index = mapping[index]
                new_predictions[:, class_index] = prediction[:, index]

            return new_predictions

        return prediction
예제 #4
0
class AbstractEvaluator(object):
    __metaclass__ = abc.ABCMeta

    @abc.abstractmethod
    def __init__(self, Datamanager, output_dir, configuration=None,
                 with_predictions=False,
                 all_scoring_functions=False,
                 seed=1,
                 output_y_test=False,
                 num_run=None):

        self.starttime = time.time()

        self.output_dir = output_dir
        self.configuration = configuration
        self.D = Datamanager

        self.X_valid = Datamanager.data.get('X_valid')
        self.X_test = Datamanager.data.get('X_test')

        self.metric = Datamanager.info['metric']
        self.task_type = Datamanager.info['task']
        self.seed = seed

        self.output_y_test = output_y_test
        self.with_predictions = with_predictions
        self.all_scoring_functions = all_scoring_functions

        if self.task_type in REGRESSION_TASKS:
            if self.configuration is None:
                self.model_class = MyDummyRegressor
            else:
                self.model_class = \
                    autosklearn.pipeline.regression.SimpleRegressionPipeline
            self.predict_function = self._predict_regression
        else:
            if self.configuration is None:
                self.model_class = MyDummyClassifier
            else:
                self.model_class = \
                    autosklearn.pipeline.classification.SimpleClassificationPipeline
            self.predict_function = self._predict_proba

        if num_run is None:
            num_run = get_new_run_num()
        self.num_run = num_run

        self.backend = Backend(None, self.output_dir)
        self.model = self.model_class(self.configuration, self.seed)

    def fit_predict_and_loss(self):
        """Fit model(s) according to resampling strategy, predict for the
        validation set and return the loss and predictions on the validation
        set.

        Provides a closed interface in which all steps of the target
        algorithm are performed without any communication with other
        processes. Useful for cross-validation because it allows to train a
        model, predict for the validation set and then forget the model in
        order to save main memory.
        """
        raise NotImplementedError()

    def iterative_fit(self):
        """Fit a model iteratively.

        Fitting can be interrupted in order to use a partially trained model."""
        raise NotImplementedError()

    def predict_and_loss(self):
        """Use current model to predict on the validation set and calculate
        loss.

         Should be used when using iterative fitting."""
        raise NotImplementedError()

    def predict(self):
        """Use the current model to predict on the validation set.

        Should only be used to create dummy predictions."""
        raise NotImplementedError()

    def _loss(self, y_true, y_hat):
        if self.configuration is None:
            if self.all_scoring_functions:
                return {self.metric: 1.0}
            else:
                return 1.0

        score = calculate_score(
            y_true, y_hat, self.task_type,
            self.metric, self.D.info['label_num'],
            all_scoring_functions=self.all_scoring_functions)

        if hasattr(score, '__len__'):
            err = {key: 1 - score[key] for key in score}
        else:
            err = 1 - score

        return err

    def finish_up(self, loss=None, opt_pred=None, valid_pred=None,
                  test_pred=None):
        """This function does everything necessary after the fitting is done:

        * predicting
        * saving the files for the ensembles_statistics
        * generate output for SMAC
        We use it as the signal handler so we can recycle the code for the
        normal usecase and when the runsolver kills us here :)"""

        try:
            self.duration = time.time() - self.starttime
            if loss is None:
                loss, opt_pred, valid_pred, test_pred = self.predict_and_loss()
            self.file_output(loss, opt_pred, valid_pred, test_pred)
            self.duration = time.time() - self.starttime

            num_run = str(self.num_run).zfill(5)
            if isinstance(loss, dict):
                loss_ = loss
                loss = loss_[self.D.info['metric']]
            else:
                loss_ = {}
            additional_run_info = ';'.join(['%s: %s' %
                                    (METRIC_TO_STRING[
                                         metric] if metric in METRIC_TO_STRING else metric,
                                     value)
                                    for metric, value in loss_.items()])
            additional_run_info += ';' + 'duration: ' + str(self.duration)
            additional_run_info += ';' + 'num_run:' + num_run

            if self.configuration is not None:
                self._output_SMAC_string(self.duration, loss, self.seed,
                                         additional_run_info)
        except Exception as e:
            self.duration = time.time() - self.starttime
            print(traceback.format_exc())
            self._output_SMAC_string(self.duration, 2.0, self.seed,
                'No results were produced! Error is %s' % str(e))

    def _output_SMAC_string(self, duration, loss, seed, additional_run_info):
        print('Result for ParamILS: %s, %f, 1, %f, %d, %s' %
              ('SAT', abs(self.duration), loss, self.seed,
               additional_run_info))

    def file_output(self, loss, Y_optimization_pred, Y_valid_pred, Y_test_pred):
        seed = os.environ.get('AUTOSKLEARN_SEED')

        if self.Y_optimization.shape[0] != Y_optimization_pred.shape[0]:
            return 2, "Targets %s and prediction %s don't have the same " \
            "length. Probably training didn't finish" % (
                self.Y_optimization.shape, Y_optimization_pred.shape)

        num_run = str(self.num_run).zfill(5)
        if os.path.exists(self.backend.get_model_dir()):
            self.backend.save_model(self.model, self.num_run, seed)

        if self.output_y_test:
            try:
                os.makedirs(self.output_dir)
            except OSError:
                pass
            self.backend.save_targets_ensemble(self.Y_optimization)

        self.backend.save_predictions_as_npy(Y_optimization_pred, 'ensemble',
                                             seed, num_run)

        if Y_valid_pred is not None:
            self.backend.save_predictions_as_npy(Y_valid_pred, 'valid',
                                                 seed, num_run)

        if Y_test_pred is not None:
            self.backend.save_predictions_as_npy(Y_test_pred, 'test',
                                                 seed, num_run)

    def _predict_proba(self, X, model, task_type, Y_train):
        Y_pred = model.predict_proba(X, batch_size=1000)
        Y_pred = self._ensure_prediction_array_sizes(Y_pred, Y_train)
        return Y_pred

    def _predict_regression(self, X, model, task_type, Y_train=None):
        Y_pred = model.predict(X)

        if len(Y_pred.shape) == 1:
            Y_pred = Y_pred.reshape((-1, 1))

        return Y_pred

    def _ensure_prediction_array_sizes(self, prediction, Y_train):
        num_classes = self.D.info['label_num']

        if self.task_type == MULTICLASS_CLASSIFICATION and \
                prediction.shape[1] < num_classes:
            if Y_train is None:
                raise ValueError('Y_train must not be None!')
            classes = list(np.unique(Y_train))

            mapping = dict()
            for class_number in range(num_classes):
                if class_number in classes:
                    index = classes.index(class_number)
                    mapping[index] = class_number
            new_predictions = np.zeros((prediction.shape[0], num_classes),
                                       dtype=np.float32)

            for index in mapping:
                class_index = mapping[index]
                new_predictions[:, class_index] = prediction[:, index]

            return new_predictions

        return prediction
예제 #5
0
class AbstractEvaluator(object):
    __metaclass__ = abc.ABCMeta

    @abc.abstractmethod
    def __init__(self, Datamanager, configuration=None,
                 with_predictions=False,
                 all_scoring_functions=False,
                 seed=1,
                 output_dir=None,
                 output_y_test=False,
                 num_run=None):

        self.starttime = time.time()

        self.configuration = configuration
        self.D = Datamanager

        self.X_valid = Datamanager.data.get('X_valid')
        self.X_test = Datamanager.data.get('X_test')

        self.metric = Datamanager.info['metric']
        self.task_type = Datamanager.info['task']
        self.seed = seed

        if output_dir is None:
            self.output_dir = os.getcwd()
        else:
            self.output_dir = output_dir

        self.output_y_test = output_y_test
        self.with_predictions = with_predictions
        self.all_scoring_functions = all_scoring_functions

        if self.task_type in REGRESSION_TASKS:
            if self.configuration is None:
                self.model_class = MyDummyRegressor
            else:
                self.model_class = ParamSklearnRegressor
            self.predict_function = self.predict_regression
        else:
            if self.configuration is None:
                self.model_class = MyDummyClassifier
            else:
                self.model_class = ParamSklearnClassifier
            self.predict_function = self.predict_proba

        if num_run is None:
            num_run = get_new_run_num()
        self.num_run = num_run

        self.backend = Backend(None, self.output_dir)
        self.model = self.model_class(self.configuration, self.seed)

    @abc.abstractmethod
    def fit(self):
        pass

    @abc.abstractmethod
    def predict(self):
        pass

    # This function does everything necessary after the fitting is done:
    #        predicting
    #        saving the files for the ensembles_statistics
    #        generate output for SMAC
    # We use it as the signal handler so we can recycle the code for the
    # normal usecase and when the runsolver kills us here :)
    def finish_up(self):
        try:
            self.duration = time.time() - self.starttime
            result, additional_run_info = self.file_output()
            if self.configuration is not None:
                print('Result for ParamILS: %s, %f, 1, %f, %d, %s' %
                      ('SAT', abs(self.duration), result, self.seed,
                       additional_run_info))
        except Exception as e:
            self.duration = time.time() - self.starttime

            print(traceback.format_exc())
            print('Result for ParamILS: %s, %f, 1, %f, %d, %s' %
                  ('TIMEOUT', abs(self.duration), 1.0, self.seed,
                   'No results were produced! Error is %s' % str(e)))

    def file_output(self):
        seed = os.environ.get('AUTOSKLEARN_SEED')

        errs, Y_optimization_pred, Y_valid_pred, Y_test_pred = self.predict()

        if self.Y_optimization.shape[0] != Y_optimization_pred.shape[0]:
            return 2, "Targets %s and prediction %s don't have the same " \
            "length. Probably training didn't finish" % (
                self.Y_optimization.shape, Y_optimization_pred.shape)

        num_run = str(self.num_run).zfill(5)

        if os.path.exists(self.backend.get_model_dir()):
            self.backend.save_model(self.model, self.num_run, seed)

        if self.output_y_test:
            try:
                os.makedirs(self.output_dir)
            except OSError:
                pass
            self.backend.save_targets_ensemble(self.Y_optimization)

        self.backend.save_predictions_as_npy(Y_optimization_pred, 'ensemble',
                                             seed, num_run)

        if Y_valid_pred is not None:
            self.backend.save_predictions_as_npy(Y_valid_pred, 'valid',
                                                 seed, num_run)

        if Y_test_pred is not None:
            self.backend.save_predictions_as_npy(Y_test_pred, 'test',
                                                 seed, num_run)

        self.duration = time.time() - self.starttime
        err = errs[self.D.info['metric']]
        additional_run_info = ';'.join(['%s: %s' %
            (METRIC_TO_STRING[metric] if metric in METRIC_TO_STRING else metric,
                                                                     value)
                                        for metric, value in errs.items()])
        additional_run_info += ';' + 'duration: ' + str(self.duration)
        additional_run_info += ';' + 'num_run:' + num_run
        return err, additional_run_info

    def predict_proba(self, X, model, task_type, Y_train=None):
        Y_pred = model.predict_proba(X, batch_size=1000)

        if task_type == MULTILABEL_CLASSIFICATION:
            Y_pred = np.hstack([Y_pred[i][:, -1].reshape((-1, 1))
                                for i in range(len(Y_pred))])

        elif task_type == BINARY_CLASSIFICATION:
            if len(Y_pred.shape) != 1:
                Y_pred = Y_pred[:, 1].reshape(-1, 1)

        elif task_type == MULTICLASS_CLASSIFICATION:
            pass

        Y_pred = self._ensure_prediction_array_sizes(Y_pred, Y_train)
        return Y_pred

    def predict_regression(self, X, model, task_type, Y_train=None):
        Y_pred = model.predict(X)

        if len(Y_pred.shape) == 1:
            Y_pred = Y_pred.reshape((-1, 1))

        return Y_pred

    def _ensure_prediction_array_sizes(self, prediction, Y_train):
        num_classes = self.D.info['label_num']

        if self.task_type == MULTICLASS_CLASSIFICATION and \
                prediction.shape[1] < num_classes:
            classes = list(np.unique(self.D.data['Y_train']))
            if num_classes == prediction.shape[1]:
                return prediction

            if Y_train is not None:
                classes = list(np.unique(Y_train))

            mapping = dict()
            for class_number in range(num_classes):
                if class_number in classes:
                    index = classes.index(class_number)
                    mapping[index] = class_number
            new_predictions = np.zeros((prediction.shape[0], num_classes))
            for index in mapping:
                class_index = mapping[index]
                new_predictions[:, class_index] = prediction[:, index]

            return new_predictions

        return prediction
예제 #6
0
class AbstractEvaluator(object):
    __metaclass__ = abc.ABCMeta

    @abc.abstractmethod
    def __init__(self,
                 Datamanager,
                 configuration=None,
                 with_predictions=False,
                 all_scoring_functions=False,
                 seed=1,
                 output_dir=None,
                 output_y_test=False,
                 num_run=None):

        self.starttime = time.time()

        self.configuration = configuration
        self.D = Datamanager

        self.X_valid = Datamanager.data.get('X_valid')
        self.X_test = Datamanager.data.get('X_test')

        self.metric = Datamanager.info['metric']
        self.task_type = Datamanager.info['task']
        self.seed = seed

        if output_dir is None:
            self.output_dir = os.getcwd()
        else:
            self.output_dir = output_dir

        self.output_y_test = output_y_test
        self.with_predictions = with_predictions
        self.all_scoring_functions = all_scoring_functions

        if self.task_type in REGRESSION_TASKS:
            if self.configuration is None:
                self.model_class = MyDummyRegressor
            else:
                self.model_class = SimpleRegressionPipeline
            self.predict_function = self.predict_regression
        else:
            if self.configuration is None:
                self.model_class = MyDummyClassifier
            else:
                self.model_class = SimpleClassificationPipeline
            self.predict_function = self.predict_proba

        if num_run is None:
            num_run = get_new_run_num()
        self.num_run = num_run

        self.backend = Backend(None, self.output_dir)
        self.model = self.model_class(self.configuration, self.seed)

    @abc.abstractmethod
    def fit(self):
        pass

    @abc.abstractmethod
    def predict(self):
        pass

    # This function does everything necessary after the fitting is done:
    #        predicting
    #        saving the files for the ensembles_statistics
    #        generate output for SMAC
    # We use it as the signal handler so we can recycle the code for the
    # normal usecase and when the runsolver kills us here :)
    def finish_up(self):
        try:
            self.duration = time.time() - self.starttime
            result, additional_run_info = self.file_output()
            if self.configuration is not None:
                print('Result for ParamILS: %s, %f, 1, %f, %d, %s' %
                      ('SAT', abs(self.duration), result, self.seed,
                       additional_run_info))
        except Exception as e:
            self.duration = time.time() - self.starttime

            print(traceback.format_exc())
            print('Result for ParamILS: %s, %f, 1, %f, %d, %s' %
                  ('TIMEOUT', abs(self.duration), 1.0, self.seed,
                   'No results were produced! Error is %s' % str(e)))

    def file_output(self):
        seed = os.environ.get('AUTOSKLEARN_SEED')

        errs, Y_optimization_pred, Y_valid_pred, Y_test_pred = self.predict()

        if self.Y_optimization.shape[0] != Y_optimization_pred.shape[0]:
            return 2, "Targets %s and prediction %s don't have the same " \
            "length. Probably training didn't finish" % (
                self.Y_optimization.shape, Y_optimization_pred.shape)

        num_run = str(self.num_run).zfill(5)

        if os.path.exists(self.backend.get_model_dir()):
            self.backend.save_model(self.model, self.num_run, seed)

        if self.output_y_test:
            try:
                os.makedirs(self.output_dir)
            except OSError:
                pass
            self.backend.save_targets_ensemble(self.Y_optimization)

        self.backend.save_predictions_as_npy(Y_optimization_pred, 'ensemble',
                                             seed, num_run)

        if Y_valid_pred is not None:
            self.backend.save_predictions_as_npy(Y_valid_pred, 'valid', seed,
                                                 num_run)

        if Y_test_pred is not None:
            self.backend.save_predictions_as_npy(Y_test_pred, 'test', seed,
                                                 num_run)

        self.duration = time.time() - self.starttime
        err = errs[self.D.info['metric']]
        additional_run_info = ';'.join([
            '%s: %s' % (METRIC_TO_STRING[metric]
                        if metric in METRIC_TO_STRING else metric, value)
            for metric, value in errs.items()
        ])
        additional_run_info += ';' + 'duration: ' + str(self.duration)
        additional_run_info += ';' + 'num_run:' + num_run
        return err, additional_run_info

    def predict_proba(self, X, model, task_type, Y_train=None):
        Y_pred = model.predict_proba(X, batch_size=1000)

        if task_type == MULTILABEL_CLASSIFICATION:
            Y_pred = np.hstack([
                Y_pred[i][:, -1].reshape((-1, 1)) for i in range(len(Y_pred))
            ])

        elif task_type == BINARY_CLASSIFICATION:
            if len(Y_pred.shape) != 1:
                Y_pred = Y_pred[:, 1].reshape(-1, 1)

        elif task_type == MULTICLASS_CLASSIFICATION:
            pass

        Y_pred = self._ensure_prediction_array_sizes(Y_pred, Y_train)
        return Y_pred

    def predict_regression(self, X, model, task_type, Y_train=None):
        Y_pred = model.predict(X)

        if len(Y_pred.shape) == 1:
            Y_pred = Y_pred.reshape((-1, 1))

        return Y_pred

    def _ensure_prediction_array_sizes(self, prediction, Y_train):
        num_classes = self.D.info['label_num']

        if self.task_type == MULTICLASS_CLASSIFICATION and \
                prediction.shape[1] < num_classes:
            classes = list(np.unique(self.D.data['Y_train']))
            if num_classes == prediction.shape[1]:
                return prediction

            if Y_train is not None:
                classes = list(np.unique(Y_train))

            mapping = dict()
            for class_number in range(num_classes):
                if class_number in classes:
                    index = classes.index(class_number)
                    mapping[index] = class_number
            new_predictions = np.zeros((prediction.shape[0], num_classes))
            for index in mapping:
                class_index = mapping[index]
                new_predictions[:, class_index] = prediction[:, index]

            return new_predictions

        return prediction