def _apply_default_pipeline_settings(pipeline):
        from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
        from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
        from autoPyTorch.pipeline.nodes.train_node import TrainNode
        from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation

        import torch.nn as nn
        from autoPyTorch.components.metrics.standard_metrics import multilabel_accuracy
        from autoPyTorch.components.preprocessing.loss_weight_strategies import LossWeightStrategyWeightedBinary

        AutoNetFeatureData._apply_default_pipeline_settings(pipeline)

        net_selector = pipeline[NetworkSelector.get_name()]
        net_selector.add_final_activation('sigmoid', nn.Sigmoid())

        loss_selector = pipeline[LossModuleSelector.get_name()]
        loss_selector.add_loss_module('bce_with_logits', nn.BCEWithLogitsLoss,
                                      None, False)
        loss_selector.add_loss_module('bce_with_logits_weighted',
                                      nn.BCEWithLogitsLoss,
                                      LossWeightStrategyWeightedBinary(),
                                      False)

        metric_selector = pipeline[MetricSelector.get_name()]
        metric_selector.add_metric('multilabel_accuracy', multilabel_accuracy)

        train_node = pipeline[TrainNode.get_name()]
        train_node.default_minimize_value = False

        cv = pipeline[CrossValidation.get_name()]
        cv.use_stratified_cv_split_default = False
Example #2
0
    def _apply_default_pipeline_settings(pipeline):
        from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
        from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
        from autoPyTorch.pipeline.nodes.train_node import TrainNode
        from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation

        import torch.nn as nn
        from autoPyTorch.components.metrics.standard_metrics import mean_distance

        AutoNetFeatureData._apply_default_pipeline_settings(pipeline)

        net_selector = pipeline[NetworkSelector.get_name()]
        net_selector.add_final_activation('none', nn.Sequential())

        loss_selector = pipeline[LossModuleSelector.get_name()]
        loss_selector.add_loss_module('l1_loss', nn.L1Loss)

        metric_selector = pipeline[MetricSelector.get_name()]
        metric_selector.add_metric('mean_distance', mean_distance)

        train_node = pipeline[TrainNode.get_name()]
        train_node.default_minimize_value = True

        cv = pipeline[CrossValidation.get_name()]
        cv.use_stratified_cv_split_default = False
Example #3
0
    def score(self, X_test, Y_test, return_loss_value=False):
        """Calculate the sore on test data using the specified optimize_metric
        
        Arguments:
            X_test {array} -- The test data matrix.
            Y_test {array} -- The test targets.
        
        Returns:
            score -- The score for the test data.
        """

        # run predict pipeline
        X_test, Y_test = self.check_data_array_types(X_test, Y_test)
        autonet_config = self.autonet_config or self.base_config
        self.pipeline.predict_pipeline(pipeline_config=autonet_config,
                                       X=X_test)
        Y_pred = self.pipeline[
            OptimizationAlgorithm.get_name()].predict_output['Y']

        # one hot encode Y
        OHE = self.pipeline[OneHotEncoding.get_name()]
        Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder'])

        metric = self.pipeline[
            MetricSelector.get_name()].fit_output['optimize_metric']
        if return_loss_value:
            return metric.get_loss_value(Y_pred, Y_test)
        return metric(Y_pred, Y_test)
Example #4
0
    def test_selector(self):
        pipeline = Pipeline([MetricSelector()])

        selector = pipeline[MetricSelector.get_name()]
        selector.add_metric("auc", auc_metric)
        selector.add_metric("accuracy", accuracy)
        selector.add_metric("mean", mean_distance)

        pipeline_config = pipeline.get_pipeline_config(
            optimize_metric="accuracy", additional_metrics=['auc', 'mean'])
        pipeline.fit_pipeline(pipeline_config=pipeline_config)

        selected_optimize_metric = selector.fit_output['optimize_metric']
        selected_additional_metrics = selector.fit_output['additional_metrics']

        self.assertEqual(selected_optimize_metric.metric, accuracy)
        self.assertSetEqual(set(x.metric for x in selected_additional_metrics),
                            set([auc_metric, mean_distance]))
Example #5
0
    def predict(self, X, return_probabilities=False, return_metric=False):
        # run predict pipeline
        X, = self.check_data_array_types(X)
        prediction = None
        autonet_config = self.get_current_autonet_config()

        identifiers_with_budget, weights = self.fit_result["ensemble"].identifiers_, self.fit_result["ensemble"].weights_

        baseline_id2model = BaselineTrainer.identifiers_ens


        model_dirs = [os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".torch") for ident in identifiers_with_budget]
        
        # get data preprocessing pipeline
        for ident, weight in zip(identifiers_with_budget, weights):
            
            if weight==0:
                continue


            if ident[0]>=0:
                model_dir = os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".torch")
                logging.info("==> Inferring model model " + model_dir + ", adding preds with weight " + str(weight))
                model = torch.load(model_dir)

                autonet_config["model"] = model
                current_prediction = self.trained_autonet.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X)['Y']
                prediction = current_prediction if prediction is None else prediction + weight * current_prediction

                OHE = self.trained_autonet.pipeline[OneHotEncoding.get_name()]
                metric = self.trained_autonet.pipeline[MetricSelector.get_name()].fit_output['optimize_metric']

            else:
                model_dir = os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".pkl")
                info_dir =  os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + "_info.pkl")

                logging.info("==> Inferring model model " + model_dir + ", adding preds with weight " + str(weight))

                baseline_model = baseline_id2model[ident[0]]()
                baseline_model.load(model_dir, info_dir)

                current_prediction = baseline_model.predict(X_test=X, predict_proba=True)
                prediction = current_prediction if prediction is None else prediction + weight * current_prediction
                
        # reverse one hot encoding
        result = OHE.reverse_transform_y(prediction, OHE.fit_output['y_one_hot_encoder'])
        if not return_probabilities and not return_metric:
            return result
        result = [result]
        if return_probabilities:
            result.append(prediction)
        if return_metric:
            result.append(metric)
        return tuple(result)


        """
    def _apply_default_pipeline_settings(pipeline):
        from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
        from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
        from autoPyTorch.pipeline.nodes.train_node import TrainNode
        from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
        from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
        from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
        from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
        from autoPyTorch.components.preprocessing.resampling import RandomOverSamplingWithReplacement, RandomUnderSamplingWithReplacement, SMOTE, \
            TargetSizeStrategyAverageSample, TargetSizeStrategyDownsample, TargetSizeStrategyMedianSample, TargetSizeStrategyUpsample

        import torch.nn as nn
        from autoPyTorch.components.metrics.standard_metrics import accuracy
        from autoPyTorch.components.preprocessing.loss_weight_strategies import LossWeightStrategyWeighted

        AutoNetFeatureData._apply_default_pipeline_settings(pipeline)

        net_selector = pipeline[NetworkSelector.get_name()]
        net_selector.add_final_activation('softmax', nn.Softmax(1))

        loss_selector = pipeline[LossModuleSelector.get_name()]
        loss_selector.add_loss_module('cross_entropy', nn.CrossEntropyLoss,
                                      None, True)
        loss_selector.add_loss_module('cross_entropy_weighted',
                                      nn.CrossEntropyLoss,
                                      LossWeightStrategyWeighted(), True)

        metric_selector = pipeline[MetricSelector.get_name()]
        metric_selector.add_metric('accuracy', accuracy)

        resample_selector = pipeline[ResamplingStrategySelector.get_name()]
        resample_selector.add_over_sampling_method(
            'random', RandomOverSamplingWithReplacement)
        resample_selector.add_over_sampling_method('smote', SMOTE)
        resample_selector.add_under_sampling_method(
            'random', RandomUnderSamplingWithReplacement)
        resample_selector.add_target_size_strategy('upsample',
                                                   TargetSizeStrategyUpsample)
        resample_selector.add_target_size_strategy(
            'downsample', TargetSizeStrategyDownsample)
        resample_selector.add_target_size_strategy(
            'average', TargetSizeStrategyAverageSample)
        resample_selector.add_target_size_strategy(
            'median', TargetSizeStrategyMedianSample)

        train_node = pipeline[TrainNode.get_name()]
        train_node.default_minimize_value = False

        cv = pipeline[CrossValidation.get_name()]
        cv.use_stratified_cv_split_default = True

        one_hot_encoding_node = pipeline[OneHotEncoding.get_name()]
        one_hot_encoding_node.encode_Y = True

        return pipeline
    def fit(self,
            pipeline_config,
            final_metric_score,
            optimized_hyperparameter_config,
            budget,
            refit=None):
        if refit or pipeline_config["ensemble_size"] == 0 or pipeline_config[
                "task_id"] not in [-1, 1]:
            return {
                "final_metric_score": final_metric_score,
                "optimized_hyperparameter_config":
                optimized_hyperparameter_config,
                "budget": budget
            }

        filename = os.path.join(pipeline_config["result_logger_dir"],
                                'predictions_for_ensemble.npy')
        train_metric = self.pipeline[MetricSelector.get_name()].metrics[
            pipeline_config["train_metric"]]
        y_transform = self.pipeline[
            OneHotEncoding.get_name()].complete_y_tranformation
        result = logged_results_to_HBS_result(
            pipeline_config["result_logger_dir"])

        all_predictions, labels, model_identifiers, _ = read_ensemble_prediction_file(
            filename=filename, y_transform=y_transform)
        ensemble_selection, ensemble_configs = build_ensemble(
            result=result,
            train_metric=train_metric,
            minimize=pipeline_config["minimize"],
            ensemble_size=pipeline_config["ensemble_size"],
            all_predictions=all_predictions,
            labels=labels,
            model_identifiers=model_identifiers,
            only_consider_n_best=pipeline_config[
                "ensemble_only_consider_n_best"],
            sorted_initialization_n_best=pipeline_config[
                "ensemble_sorted_initialization_n_best"])

        return {
            "final_metric_score":
            final_metric_score,
            "optimized_hyperparameter_config":
            optimized_hyperparameter_config,
            "budget":
            budget,
            "ensemble":
            ensemble_selection,
            "ensemble_final_metric_score":
            ensemble_selection.get_validation_performance(),
            "ensemble_configs":
            ensemble_configs
        }
    def _apply_default_pipeline_settings(pipeline):
        import torch.nn as nn
        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
        from autoPyTorch.pipeline.nodes.image.simple_train_node import SimpleTrainNode
        from autoPyTorch.pipeline.nodes.image.cross_validation_indices import CrossValidationIndices
        from autoPyTorch.pipeline.nodes.image.loss_module_selector_indices import LossModuleSelectorIndices
        from autoPyTorch.pipeline.nodes.image.network_selector_datasetinfo import NetworkSelectorDatasetInfo
        from autoPyTorch.components.metrics import accuracy, auc_metric, pac_metric, balanced_accuracy, cross_entropy
        from autoPyTorch.components.preprocessing.loss_weight_strategies import LossWeightStrategyWeighted

        AutoNetImageData._apply_default_pipeline_settings(pipeline)

        net_selector = pipeline[NetworkSelectorDatasetInfo.get_name()]
        net_selector.add_final_activation('softmax', nn.Softmax(1))

        loss_selector = pipeline[LossModuleSelectorIndices.get_name()]
        loss_selector.add_loss_module('cross_entropy', nn.CrossEntropyLoss,
                                      None, True)
        loss_selector.add_loss_module('cross_entropy_weighted',
                                      nn.CrossEntropyLoss,
                                      LossWeightStrategyWeighted(), True)

        metric_selector = pipeline[MetricSelector.get_name()]
        metric_selector.add_metric('accuracy',
                                   accuracy,
                                   loss_transform=True,
                                   requires_target_class_labels=False)
        metric_selector.add_metric('auc_metric',
                                   auc_metric,
                                   loss_transform=True,
                                   requires_target_class_labels=False)
        metric_selector.add_metric('pac_metric',
                                   pac_metric,
                                   loss_transform=True,
                                   requires_target_class_labels=False)
        metric_selector.add_metric('balanced_accuracy',
                                   balanced_accuracy,
                                   loss_transform=True,
                                   requires_target_class_labels=True)
        metric_selector.add_metric('cross_entropy',
                                   cross_entropy,
                                   loss_transform=True,
                                   requires_target_class_labels=False)

        train_node = pipeline[SimpleTrainNode.get_name()]
        train_node.default_minimize_value = False

        cv = pipeline[CrossValidationIndices.get_name()]
        cv.use_stratified_cv_split_default = True
Example #9
0
    def get_default_pipeline(cls):
        from autoPyTorch.pipeline.base.pipeline import Pipeline
        from autoPyTorch.pipeline.nodes.image.optimization_algorithm_no_timelimit import OptimizationAlgorithmNoTimeLimit
        from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
        from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
        from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector
        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector

        from autoPyTorch.pipeline.nodes.image.simple_scheduler_selector import SimpleLearningrateSchedulerSelector
        from autoPyTorch.pipeline.nodes.image.cross_validation_indices import CrossValidationIndices
        from autoPyTorch.pipeline.nodes.image.autonet_settings_no_shuffle import AutoNetSettingsNoShuffle
        from autoPyTorch.pipeline.nodes.image.network_selector_datasetinfo import NetworkSelectorDatasetInfo
        from autoPyTorch.pipeline.nodes.image.loss_module_selector_indices import LossModuleSelectorIndices
        from autoPyTorch.pipeline.nodes.image.image_augmentation import ImageAugmentation
        from autoPyTorch.pipeline.nodes.image.create_image_dataloader import CreateImageDataLoader
        from autoPyTorch.pipeline.nodes.image.create_dataset_info import CreateDatasetInfo
        from autoPyTorch.pipeline.nodes.image.simple_train_node import SimpleTrainNode
        from autoPyTorch.pipeline.nodes.image.image_dataset_reader import ImageDatasetReader
        from autoPyTorch.pipeline.nodes.image.single_dataset import SingleDataset

        # build the pipeline
        pipeline = Pipeline([
            AutoNetSettingsNoShuffle(),
            OptimizationAlgorithmNoTimeLimit([
                SingleDataset([
                    ImageDatasetReader(),
                    CreateDatasetInfo(),
                    CrossValidationIndices([
                        NetworkSelectorDatasetInfo(),
                        OptimizerSelector(),
                        SimpleLearningrateSchedulerSelector(),
                        LogFunctionsSelector(),
                        MetricSelector(),
                        LossModuleSelectorIndices(),
                        ImageAugmentation(),
                        CreateImageDataLoader(),
                        SimpleTrainNode()
                    ])
                ])
            ])
        ])

        cls._apply_default_pipeline_settings(pipeline)
        return pipeline
    def get_default_pipeline(cls):
        from autoPyTorch.pipeline.base.pipeline import Pipeline
        from autoPyTorch.pipeline.nodes.autonet_settings import AutoNetSettings
        from autoPyTorch.pipeline.nodes.optimization_algorithm import OptimizationAlgorithm
        from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
        from autoPyTorch.pipeline.nodes.imputation import Imputation
        from autoPyTorch.pipeline.nodes.normalization_strategy_selector import NormalizationStrategySelector
        from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
        from autoPyTorch.pipeline.nodes.preprocessor_selector import PreprocessorSelector
        from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
        from autoPyTorch.pipeline.nodes.embedding_selector import EmbeddingSelector
        from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
        from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
        from autoPyTorch.pipeline.nodes.lr_scheduler_selector import LearningrateSchedulerSelector
        from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector
        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
        from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
        from autoPyTorch.pipeline.nodes.train_node import TrainNode

        # build the pipeline
        pipeline = Pipeline([
            AutoNetSettings(),
            OptimizationAlgorithm([
                CrossValidation([
                    Imputation(),
                    NormalizationStrategySelector(),
                    OneHotEncoding(),
                    PreprocessorSelector(),
                    ResamplingStrategySelector(),
                    EmbeddingSelector(),
                    NetworkSelector(),
                    OptimizerSelector(),
                    LearningrateSchedulerSelector(),
                    LogFunctionsSelector(),
                    MetricSelector(),
                    LossModuleSelector(),
                    TrainNode()
                ])
            ])
        ])

        cls._apply_default_pipeline_settings(pipeline)
        return pipeline
Example #11
0
    def score(self, X_test, Y_test, return_loss_value=False):
        """Calculate the sore on test data using the specified optimize_metric
        
        Arguments:
            X_test {array} -- The test data matrix.
            Y_test {array} -- The test targets.
        
        Returns:
            score -- The score for the test data.
        """

        # Update config if needed
        X_test, Y_test = self.check_data_array_types(X_test, Y_test)
        autonet_config = self.get_current_autonet_config()

        res = self.pipeline.predict_pipeline(pipeline_config=autonet_config,
                                             X=X_test)
        if 'score' in res:
            # in case of default dataset like CIFAR10 - the pipeline will compute the score of the according pytorch test set
            return res['score']
        Y_pred = res['Y']
        # run predict pipeline
        #self.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X_test)
        #Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y']

        # one hot encode Y
        try:
            OHE = self.pipeline[OneHotEncoding.get_name()]
            Y_test = OHE.transform_y(Y_test,
                                     OHE.fit_output['y_one_hot_encoder'])
        except:
            print("No one-hot encodig possible. Continuing without.")
            pass

        metric = self.pipeline[
            MetricSelector.get_name()].fit_output['optimize_metric']

        if return_loss_value:
            return metric.get_loss_value(Y_pred, Y_test)
        return metric(torch.from_numpy(Y_pred.astype(np.float32)),
                      torch.from_numpy(Y_test.astype(np.float32)))
Example #12
0
    def score(self, X_test, Y_test):
        """Calculate the sore on test data using the specified train_metric
        
        Arguments:
            X_test {array} -- The test data matrix.
            Y_test {array} -- The test targets.
        
        Returns:
            score -- The score for the test data.
        """

        # run predict pipeline
        self.pipeline.predict_pipeline(pipeline_config=self.autonet_config, X=X_test)
        Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y']
        
        # one hot encode Y
        OHE = self.pipeline[OneHotEncoding.get_name()]
        Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder'])

        metric = self.pipeline[MetricSelector.get_name()].fit_output['train_metric']
        return metric(torch.from_numpy(Y_test), torch.from_numpy(Y_pred))
Example #13
0
    def predict(self, X, return_probabilities=False, return_metric=False):
        # run predict pipeline
        X, = self.check_data_array_types(X)
        prediction = None
        models_with_weights = self.fit_result["ensemble"].get_models_with_weights(self.trained_autonets)
        autonet_config = self.autonet_config or self.base_config
        for weight, autonet in models_with_weights:
            current_prediction = autonet.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X)["Y"]
            prediction = current_prediction if prediction is None else prediction + weight * current_prediction
            OHE = autonet.pipeline[OneHotEncoding.get_name()]
            metric = autonet.pipeline[MetricSelector.get_name()].fit_output['optimize_metric']

        # reverse one hot encoding 
        result = OHE.reverse_transform_y(prediction, OHE.fit_output['y_one_hot_encoder'])
        if not return_probabilities and not return_metric:
            return result
        result = [result]
        if return_probabilities:
            result.append(prediction)
        if return_metric:
            result.append(metric)
        return tuple(result)
def save_ensemble_logs(pipeline_config, autonet, result_dir, ensemble_size=None, log_filename=None):
    # prepare some variables
    autonet_config = autonet.get_current_autonet_config()
    metrics = autonet.pipeline[MetricSelector.get_name()].metrics
    optimize_metric = metrics[autonet_config["optimize_metric"]]
    y_transform = autonet.pipeline[OneHotEncoding.get_name()].complete_y_tranformation
    result = logged_results_to_HBS_result(result_dir)
    filename = os.path.join(result_dir, "predictions_for_ensemble.npy")
    test_filename = os.path.join(result_dir, "test_predictions_for_ensemble.npy")
    ensemble_log_filename = os.path.join(result_dir, log_filename or "ensemble_log.json")
    with open(ensemble_log_filename, "w") as f: pass

    # read the predictions
    predictions, labels, model_identifiers, timestamps = read_ensemble_prediction_file(filename=filename, y_transform=y_transform)
    assert(list(map(lambda x: x["finished"], timestamps)) == sorted(list(map(lambda x: x["finished"], timestamps))))
    test_data_available = False
    try:
        test_predictions, test_labels, test_model_identifiers, test_timestamps = read_ensemble_prediction_file(filename=test_filename, y_transform=y_transform)
        test_predictions = [np.mean(p, axis=0) for p in test_predictions]     
        assert test_model_identifiers == model_identifiers and test_timestamps == timestamps, "Different model identifiers or timestamps in test file"
        predictions, model_identifiers, timestamps, test_predictions = \
            filter_nan_predictions(predictions, model_identifiers, timestamps, test_predictions)
        test_data_available = True
    except IOError:
        logging.getLogger("benchmark").info("No test data available when building ensemble logs.")
        predictions, model_identifiers, timestamps = \
            filter_nan_predictions(predictions, model_identifiers, timestamps)

    # compute the prediction subset used to compute performance over time
    start_time = min(map(lambda t: t["submitted"], timestamps))
    end_time = max(map(lambda t: t["finished"], timestamps))
    step = math.log(end_time - start_time) / (pipeline_config["num_ensemble_evaluations"] - 1)
    steps = start_time + np.exp(np.arange(step, step * (pipeline_config["num_ensemble_evaluations"] + 1), step))
    subset_indices = [np.array([i for i, t in enumerate(timestamps) if t["finished"] < s]) for s in steps]

    # iterate over the subset to compute performance over time
    last_finished = 0
    for subset in subset_indices:
        if len(subset) == 0:
            continue
        
        finished = max(timestamps[s]["finished"] for s in subset)
        if finished == last_finished:
            continue
        last_finished = finished
        subset_predictions = [np.copy(predictions[s]) for s in subset]
        subset_model_identifiers = [model_identifiers[s] for s in subset]

        # build an ensemble with current subset and size
        ensemble_start_time = time.time()
        ensemble, _ = build_ensemble(result=result,
            optimize_metric=optimize_metric, ensemble_size=ensemble_size or autonet_config["ensemble_size"],
            all_predictions=subset_predictions, labels=labels, model_identifiers=subset_model_identifiers,
            only_consider_n_best=autonet_config["ensemble_only_consider_n_best"],
            sorted_initialization_n_best=autonet_config["ensemble_sorted_initialization_n_best"])

        # get the ensemble predictions
        ensemble_prediction = ensemble.predict(subset_predictions)
        if test_data_available:
            subset_test_predictions = [np.copy(test_predictions[s]) for s in subset]
            test_ensemble_prediction = ensemble.predict(subset_test_predictions)

        # evaluate the metrics
        metric_performances = dict()
        for metric_name, metric in metrics.items():
            if metric_name != autonet_config["optimize_metric"] and metric_name not in autonet_config["additional_metrics"]:
                continue
            metric_performances[metric_name] = metric(ensemble_prediction, labels)
            if test_data_available:
                metric_performances["test_%s" % metric_name] = metric(test_ensemble_prediction, test_labels)

        ensemble_time = time.time() - ensemble_start_time

        # write to log
        with open(ensemble_log_filename, "a") as f:
            print(json.dumps([
                finished + ensemble_time,
                metric_performances,
                sorted([(identifier, weight) for identifier, weight in zip(ensemble.identifiers_, ensemble.weights_) if weight > 0],
                        key=lambda x: -x[1]),
                [ensemble.identifiers_[i] for i in ensemble.indices_],
                {
                    "ensemble_size": ensemble.ensemble_size,
                    "metric": autonet_config["optimize_metric"],
                    "sorted_initialization_n_best": ensemble.sorted_initialization_n_best,
                    "only_consider_n_best": ensemble.only_consider_n_best,
                    "bagging": ensemble.bagging,
                    "mode": ensemble.mode,
                    "num_input_models": ensemble.num_input_models_,
                    "trajectory": ensemble.trajectory_,
                    "train_score": ensemble.train_score_
                }
            ]), file=f)