def test_optimizer(self):
        class ResultNode(PipelineNode):
            def fit(self, X_train, Y_train):
                return {
                    'loss': X_train.shape[1],
                    'info': {
                        'train_a': X_train.shape[1],
                        'train_b': Y_train.shape[1]
                    }
                }

            def get_hyperparameter_search_space(self, **pipeline_config):
                cs = CS.ConfigurationSpace()
                cs.add_hyperparameter(
                    CSH.UniformIntegerHyperparameter('hyper',
                                                     lower=0,
                                                     upper=30))
                return cs

            def get_pipeline_config_options(self):
                return [
                    ConfigOption("result_logger_dir",
                                 default=".",
                                 type="directory"),
                    ConfigOption("optimize_metric", default="a", type=str),
                ]

        logger = logging.getLogger('hpbandster')
        logger.setLevel(logging.ERROR)
        logger = logging.getLogger('autonet')
        logger.setLevel(logging.ERROR)

        pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])])

        pipeline_config = pipeline.get_pipeline_config(num_iterations=1,
                                                       budget_type='epochs',
                                                       result_logger_dir=".")
        pipeline.fit_pipeline(
            pipeline_config=pipeline_config,
            X_train=np.random.rand(15, 10),
            Y_train=np.random.rand(15, 5),
            X_valid=None,
            Y_valid=None,
            result_loggers=[json_result_logger(directory=".", overwrite=True)],
            dataset_info=None,
            shutdownables=[])

        result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name(
        )].fit_output['optimized_hyperparameter_config']
        print(pipeline[OptimizationAlgorithm.get_name()].fit_output)

        self.assertIn(
            result_of_opt_pipeline[ResultNode.get_name() +
                                   ConfigWrapper.delimiter + 'hyper'],
            list(range(0, 31)))
예제 #2
0
    def fit(self, X_train, Y_train, X_valid=None, Y_valid=None, refit=True, **autonet_config):
        """Fit AutoNet to training data.
        
        Arguments:
            X_train {array} -- Training data.
            Y_train {array} -- Targets of training data.
        
        Keyword Arguments:
            X_valid {array} -- Validation data. Will be ignored if cv_splits > 1. (default: {None})
            Y_valid {array} -- Validation data. Will be ignored if cv_splits > 1. (default: {None})
            refit {bool} -- Whether final architecture should be trained again after search. (default: {True})
        
        Returns:
            optimized_hyperparameter_config -- The best found hyperparameter config.
            final_metric_score --  The final score of the specified train metric.
            **autonet_config -- Configure AutoNet for your needs. You can also configure AutoNet in the constructor(). Call print_help() for more info.
        """
        self.autonet_config = self.pipeline.get_pipeline_config(**dict(self.base_config, **autonet_config))

        self.pipeline.fit_pipeline(pipeline_config=self.autonet_config,
                                   X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid)

        output = self.pipeline[OptimizationAlgorithm.get_name()].fit_output
        self.optimized_hyperparameter_config = output["optimized_hyperparamater_config"]
        self.optimized_hyperparameter_config_budget = output["budget"]
        if (refit):
            self.refit(X_train, Y_train, X_valid, Y_valid, self.optimized_hyperparameter_config, self.autonet_config)
        return self.optimized_hyperparameter_config, output['final_metric_score']
예제 #3
0
    def score(self, X_test, Y_test, return_loss_value=False):
        """Calculate the sore on test data using the specified optimize_metric
        
        Arguments:
            X_test {array} -- The test data matrix.
            Y_test {array} -- The test targets.
        
        Returns:
            score -- The score for the test data.
        """

        # run predict pipeline
        X_test, Y_test = self.check_data_array_types(X_test, Y_test)
        autonet_config = self.autonet_config or self.base_config
        self.pipeline.predict_pipeline(pipeline_config=autonet_config,
                                       X=X_test)
        Y_pred = self.pipeline[
            OptimizationAlgorithm.get_name()].predict_output['Y']

        # one hot encode Y
        OHE = self.pipeline[OneHotEncoding.get_name()]
        Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder'])

        metric = self.pipeline[
            MetricSelector.get_name()].fit_output['optimize_metric']
        if return_loss_value:
            return metric.get_loss_value(Y_pred, Y_test)
        return metric(Y_pred, Y_test)
예제 #4
0
    def test_optimizer(self):
        class ResultNode(PipelineNode):
            def fit(self, X_train, Y_train):
                return {
                    'loss': X_train.shape[1],
                    'info': {
                        'a': X_train.shape[1],
                        'b': Y_train.shape[1]
                    }
                }

            def get_hyperparameter_search_space(self, **pipeline_config):
                cs = CS.ConfigurationSpace()
                cs.add_hyperparameter(
                    CSH.UniformIntegerHyperparameter('hyper',
                                                     lower=0,
                                                     upper=30))
                return cs

        logger = logging.getLogger('hpbandster')
        logger.setLevel(logging.ERROR)
        logger = logging.getLogger('autonet')
        logger.setLevel(logging.ERROR)

        pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])])

        pipeline_config = pipeline.get_pipeline_config(num_iterations=1,
                                                       budget_type='epochs')
        pipeline.fit_pipeline(pipeline_config=pipeline_config,
                              X_train=torch.rand(15, 10),
                              Y_train=torch.rand(15, 5),
                              X_valid=None,
                              Y_valid=None,
                              one_hot_encoder=None)

        result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name(
        )].fit_output['optimized_hyperparamater_config']

        self.assertIn(
            result_of_opt_pipeline[ResultNode.get_name() +
                                   ConfigWrapper.delimiter + 'hyper'],
            list(range(0, 31)))
    def get_default_pipeline(cls):
        from autoPyTorch.pipeline.base.pipeline import Pipeline
        from autoPyTorch.pipeline.nodes.autonet_settings import AutoNetSettings
        from autoPyTorch.pipeline.nodes.optimization_algorithm import OptimizationAlgorithm
        from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
        from autoPyTorch.pipeline.nodes.imputation import Imputation
        from autoPyTorch.pipeline.nodes.normalization_strategy_selector import NormalizationStrategySelector
        from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
        from autoPyTorch.pipeline.nodes.preprocessor_selector import PreprocessorSelector
        from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
        from autoPyTorch.pipeline.nodes.embedding_selector import EmbeddingSelector
        from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
        from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
        from autoPyTorch.pipeline.nodes.lr_scheduler_selector import LearningrateSchedulerSelector
        from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector
        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
        from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
        from autoPyTorch.pipeline.nodes.train_node import TrainNode

        # build the pipeline
        pipeline = Pipeline([
            AutoNetSettings(),
            OptimizationAlgorithm([
                CrossValidation([
                    Imputation(),
                    NormalizationStrategySelector(),
                    OneHotEncoding(),
                    PreprocessorSelector(),
                    ResamplingStrategySelector(),
                    EmbeddingSelector(),
                    NetworkSelector(),
                    OptimizerSelector(),
                    LearningrateSchedulerSelector(),
                    LogFunctionsSelector(),
                    MetricSelector(),
                    LossModuleSelector(),
                    TrainNode()
                ])
            ])
        ])

        cls._apply_default_pipeline_settings(pipeline)
        return pipeline
예제 #6
0
    def score(self, X_test, Y_test):
        """Calculate the sore on test data using the specified train_metric
        
        Arguments:
            X_test {array} -- The test data matrix.
            Y_test {array} -- The test targets.
        
        Returns:
            score -- The score for the test data.
        """

        # run predict pipeline
        self.pipeline.predict_pipeline(pipeline_config=self.autonet_config, X=X_test)
        Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y']
        
        # one hot encode Y
        OHE = self.pipeline[OneHotEncoding.get_name()]
        Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder'])

        metric = self.pipeline[MetricSelector.get_name()].fit_output['train_metric']
        return metric(torch.from_numpy(Y_test), torch.from_numpy(Y_pred))
예제 #7
0
    def predict(self, X, return_probabilities=False):
        """Predict the targets for a data matrix X.
        
        Arguments:
            X {array} -- The data matrix.
        
        Keyword Arguments:
            return_probabilities {bool} -- Whether to return a tuple, where the second entry is the true network output (default: {False})
        
        Returns:
            result -- The predicted targets.
        """

        # run predict pipeline
        self.pipeline.predict_pipeline(pipeline_config=self.autonet_config, X=X)
        Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y']

        # reverse one hot encoding 
        OHE = self.pipeline[OneHotEncoding.get_name()]
        result = OHE.reverse_transform_y(Y_pred, OHE.fit_output['y_one_hot_encoder'])
        return result if not return_probabilities else (result, Y_pred)