Exemple #1
0
    def predict(self, X, return_probabilities=False):
        """Predict the targets for a data matrix X.
        
        Arguments:
            X {array} -- The data matrix.
        
        Keyword Arguments:
            return_probabilities {bool} -- Whether to return a tuple, where the second entry is the true network output (default: {False})
        
        Returns:
            result -- The predicted targets.
        """

        # run predict pipeline
        X, = self.check_data_array_types(X)
        autonet_config = self.get_current_autonet_config()

        Y_pred = self.pipeline.predict_pipeline(pipeline_config=autonet_config,
                                                X=X)['Y']

        # reverse one hot encoding
        if OneHotEncoding.get_name() in self.pipeline:
            OHE = self.pipeline[OneHotEncoding.get_name()]
            result = OHE.reverse_transform_y(
                Y_pred, OHE.fit_output['y_one_hot_encoder'])
            return result if not return_probabilities else (result, Y_pred)
        else:
            result = dict()
            result['Y'] = Y_pred
            return result if not return_probabilities else (result, Y_pred)
Exemple #2
0
    def score(self, X_test, Y_test, return_loss_value=False):
        """Calculate the sore on test data using the specified optimize_metric
        
        Arguments:
            X_test {array} -- The test data matrix.
            Y_test {array} -- The test targets.
        
        Returns:
            score -- The score for the test data.
        """

        # run predict pipeline
        X_test, Y_test = self.check_data_array_types(X_test, Y_test)
        autonet_config = self.autonet_config or self.base_config
        self.pipeline.predict_pipeline(pipeline_config=autonet_config,
                                       X=X_test)
        Y_pred = self.pipeline[
            OptimizationAlgorithm.get_name()].predict_output['Y']

        # one hot encode Y
        OHE = self.pipeline[OneHotEncoding.get_name()]
        Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder'])

        metric = self.pipeline[
            MetricSelector.get_name()].fit_output['optimize_metric']
        if return_loss_value:
            return metric.get_loss_value(Y_pred, Y_test)
        return metric(Y_pred, Y_test)
Exemple #3
0
    def predict(self, X, return_probabilities=False, return_metric=False):
        # run predict pipeline
        X, = self.check_data_array_types(X)
        prediction = None
        autonet_config = self.get_current_autonet_config()

        identifiers_with_budget, weights = self.fit_result["ensemble"].identifiers_, self.fit_result["ensemble"].weights_

        baseline_id2model = BaselineTrainer.identifiers_ens


        model_dirs = [os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".torch") for ident in identifiers_with_budget]
        
        # get data preprocessing pipeline
        for ident, weight in zip(identifiers_with_budget, weights):
            
            if weight==0:
                continue


            if ident[0]>=0:
                model_dir = os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".torch")
                logging.info("==> Inferring model model " + model_dir + ", adding preds with weight " + str(weight))
                model = torch.load(model_dir)

                autonet_config["model"] = model
                current_prediction = self.trained_autonet.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X)['Y']
                prediction = current_prediction if prediction is None else prediction + weight * current_prediction

                OHE = self.trained_autonet.pipeline[OneHotEncoding.get_name()]
                metric = self.trained_autonet.pipeline[MetricSelector.get_name()].fit_output['optimize_metric']

            else:
                model_dir = os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".pkl")
                info_dir =  os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + "_info.pkl")

                logging.info("==> Inferring model model " + model_dir + ", adding preds with weight " + str(weight))

                baseline_model = baseline_id2model[ident[0]]()
                baseline_model.load(model_dir, info_dir)

                current_prediction = baseline_model.predict(X_test=X, predict_proba=True)
                prediction = current_prediction if prediction is None else prediction + weight * current_prediction
                
        # reverse one hot encoding
        result = OHE.reverse_transform_y(prediction, OHE.fit_output['y_one_hot_encoder'])
        if not return_probabilities and not return_metric:
            return result
        result = [result]
        if return_probabilities:
            result.append(prediction)
        if return_metric:
            result.append(metric)
        return tuple(result)


        """
    def _apply_default_pipeline_settings(pipeline):
        from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
        from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
        from autoPyTorch.pipeline.nodes.train_node import TrainNode
        from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
        from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
        from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
        from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
        from autoPyTorch.components.preprocessing.resampling import RandomOverSamplingWithReplacement, RandomUnderSamplingWithReplacement, SMOTE, \
            TargetSizeStrategyAverageSample, TargetSizeStrategyDownsample, TargetSizeStrategyMedianSample, TargetSizeStrategyUpsample

        import torch.nn as nn
        from autoPyTorch.components.metrics.standard_metrics import accuracy
        from autoPyTorch.components.preprocessing.loss_weight_strategies import LossWeightStrategyWeighted

        AutoNetFeatureData._apply_default_pipeline_settings(pipeline)

        net_selector = pipeline[NetworkSelector.get_name()]
        net_selector.add_final_activation('softmax', nn.Softmax(1))

        loss_selector = pipeline[LossModuleSelector.get_name()]
        loss_selector.add_loss_module('cross_entropy', nn.CrossEntropyLoss,
                                      None, True)
        loss_selector.add_loss_module('cross_entropy_weighted',
                                      nn.CrossEntropyLoss,
                                      LossWeightStrategyWeighted(), True)

        metric_selector = pipeline[MetricSelector.get_name()]
        metric_selector.add_metric('accuracy', accuracy)

        resample_selector = pipeline[ResamplingStrategySelector.get_name()]
        resample_selector.add_over_sampling_method(
            'random', RandomOverSamplingWithReplacement)
        resample_selector.add_over_sampling_method('smote', SMOTE)
        resample_selector.add_under_sampling_method(
            'random', RandomUnderSamplingWithReplacement)
        resample_selector.add_target_size_strategy('upsample',
                                                   TargetSizeStrategyUpsample)
        resample_selector.add_target_size_strategy(
            'downsample', TargetSizeStrategyDownsample)
        resample_selector.add_target_size_strategy(
            'average', TargetSizeStrategyAverageSample)
        resample_selector.add_target_size_strategy(
            'median', TargetSizeStrategyMedianSample)

        train_node = pipeline[TrainNode.get_name()]
        train_node.default_minimize_value = False

        cv = pipeline[CrossValidation.get_name()]
        cv.use_stratified_cv_split_default = True

        one_hot_encoding_node = pipeline[OneHotEncoding.get_name()]
        one_hot_encoding_node.encode_Y = True

        return pipeline
    def get_default_pipeline(cls):
        from autoPyTorch.pipeline.base.pipeline import Pipeline
        from autoPyTorch.pipeline.nodes.autonet_settings import AutoNetSettings
        from autoPyTorch.pipeline.nodes.optimization_algorithm import OptimizationAlgorithm
        from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
        from autoPyTorch.pipeline.nodes.imputation import Imputation
        from autoPyTorch.pipeline.nodes.normalization_strategy_selector import NormalizationStrategySelector
        from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
        from autoPyTorch.pipeline.nodes.preprocessor_selector import PreprocessorSelector
        from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
        from autoPyTorch.pipeline.nodes.embedding_selector import EmbeddingSelector
        from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
        from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
        from autoPyTorch.pipeline.nodes.lr_scheduler_selector import LearningrateSchedulerSelector
        from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector
        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
        from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
        from autoPyTorch.pipeline.nodes.train_node import TrainNode

        # build the pipeline
        pipeline = Pipeline([
            AutoNetSettings(),
            OptimizationAlgorithm([
                CrossValidation([
                    Imputation(),
                    NormalizationStrategySelector(),
                    OneHotEncoding(),
                    PreprocessorSelector(),
                    ResamplingStrategySelector(),
                    EmbeddingSelector(),
                    NetworkSelector(),
                    OptimizerSelector(),
                    LearningrateSchedulerSelector(),
                    LogFunctionsSelector(),
                    MetricSelector(),
                    LossModuleSelector(),
                    TrainNode()
                ])
            ])
        ])

        cls._apply_default_pipeline_settings(pipeline)
        return pipeline
Exemple #6
0
    def score(self, X_test, Y_test, return_loss_value=False):
        """Calculate the sore on test data using the specified optimize_metric
        
        Arguments:
            X_test {array} -- The test data matrix.
            Y_test {array} -- The test targets.
        
        Returns:
            score -- The score for the test data.
        """

        # Update config if needed
        X_test, Y_test = self.check_data_array_types(X_test, Y_test)
        autonet_config = self.get_current_autonet_config()

        res = self.pipeline.predict_pipeline(pipeline_config=autonet_config,
                                             X=X_test)
        if 'score' in res:
            # in case of default dataset like CIFAR10 - the pipeline will compute the score of the according pytorch test set
            return res['score']
        Y_pred = res['Y']
        # run predict pipeline
        #self.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X_test)
        #Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y']

        # one hot encode Y
        try:
            OHE = self.pipeline[OneHotEncoding.get_name()]
            Y_test = OHE.transform_y(Y_test,
                                     OHE.fit_output['y_one_hot_encoder'])
        except:
            print("No one-hot encodig possible. Continuing without.")
            pass

        metric = self.pipeline[
            MetricSelector.get_name()].fit_output['optimize_metric']

        if return_loss_value:
            return metric.get_loss_value(Y_pred, Y_test)
        return metric(torch.from_numpy(Y_pred.astype(np.float32)),
                      torch.from_numpy(Y_test.astype(np.float32)))
Exemple #7
0
    def score(self, X_test, Y_test):
        """Calculate the sore on test data using the specified train_metric
        
        Arguments:
            X_test {array} -- The test data matrix.
            Y_test {array} -- The test targets.
        
        Returns:
            score -- The score for the test data.
        """

        # run predict pipeline
        self.pipeline.predict_pipeline(pipeline_config=self.autonet_config, X=X_test)
        Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y']
        
        # one hot encode Y
        OHE = self.pipeline[OneHotEncoding.get_name()]
        Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder'])

        metric = self.pipeline[MetricSelector.get_name()].fit_output['train_metric']
        return metric(torch.from_numpy(Y_test), torch.from_numpy(Y_pred))
Exemple #8
0
    def predict(self, X, return_probabilities=False):
        """Predict the targets for a data matrix X.
        
        Arguments:
            X {array} -- The data matrix.
        
        Keyword Arguments:
            return_probabilities {bool} -- Whether to return a tuple, where the second entry is the true network output (default: {False})
        
        Returns:
            result -- The predicted targets.
        """

        # run predict pipeline
        self.pipeline.predict_pipeline(pipeline_config=self.autonet_config, X=X)
        Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y']

        # reverse one hot encoding 
        OHE = self.pipeline[OneHotEncoding.get_name()]
        result = OHE.reverse_transform_y(Y_pred, OHE.fit_output['y_one_hot_encoder'])
        return result if not return_probabilities else (result, Y_pred)
Exemple #9
0
    def predict(self, X, return_probabilities=False, return_metric=False):
        # run predict pipeline
        X, = self.check_data_array_types(X)
        prediction = None
        models_with_weights = self.fit_result["ensemble"].get_models_with_weights(self.trained_autonets)
        autonet_config = self.autonet_config or self.base_config
        for weight, autonet in models_with_weights:
            current_prediction = autonet.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X)["Y"]
            prediction = current_prediction if prediction is None else prediction + weight * current_prediction
            OHE = autonet.pipeline[OneHotEncoding.get_name()]
            metric = autonet.pipeline[MetricSelector.get_name()].fit_output['optimize_metric']

        # reverse one hot encoding 
        result = OHE.reverse_transform_y(prediction, OHE.fit_output['y_one_hot_encoder'])
        if not return_probabilities and not return_metric:
            return result
        result = [result]
        if return_probabilities:
            result.append(prediction)
        if return_metric:
            result.append(metric)
        return tuple(result)
Exemple #10
0
 def score(self, X_test, Y_test):
     # run predict pipeline
     X_test, Y_test = self.check_data_array_types(X_test, Y_test)
     _, Y_pred, metric = self.predict(X_test, return_probabilities=True, return_metric=True)
     Y_test, _ = self.pipeline[OneHotEncoding.get_name()].complete_y_tranformation(Y_test)
     return metric(Y_pred, Y_test)