Ejemplo n.º 1
0
    def test_optimizer_selector(self):
        pipeline = Pipeline([NetworkSelector(), OptimizerSelector()])

        net_selector = pipeline[NetworkSelector.get_name()]
        net_selector.add_network("mlpnet", MlpNet)
        net_selector.add_network("shapedmlpnet", ShapedMlpNet)
        net_selector.add_final_activation('none', nn.Sequential())

        opt_selector = pipeline[OptimizerSelector.get_name()]
        opt_selector.add_optimizer("adam", AdamOptimizer)
        opt_selector.add_optimizer("sgd", SgdOptimizer)

        pipeline_config = pipeline.get_pipeline_config()
        pipeline_config["random_seed"] = 42
        hyper_config = pipeline.get_hyperparameter_search_space(
        ).sample_configuration()
        pipeline.fit_pipeline(hyperparameter_config=hyper_config,
                              pipeline_config=pipeline_config,
                              X=torch.rand(3, 3),
                              Y=torch.rand(3, 2),
                              embedding=nn.Sequential())

        sampled_optimizer = opt_selector.fit_output['optimizer']

        self.assertIn(type(sampled_optimizer), [optim.Adam, optim.SGD])
    def test_optimizer(self):
        class ResultNode(PipelineNode):
            def fit(self, X_train, Y_train):
                return {
                    'loss': X_train.shape[1],
                    'info': {
                        'train_a': X_train.shape[1],
                        'train_b': Y_train.shape[1]
                    }
                }

            def get_hyperparameter_search_space(self, **pipeline_config):
                cs = CS.ConfigurationSpace()
                cs.add_hyperparameter(
                    CSH.UniformIntegerHyperparameter('hyper',
                                                     lower=0,
                                                     upper=30))
                return cs

            def get_pipeline_config_options(self):
                return [
                    ConfigOption("result_logger_dir",
                                 default=".",
                                 type="directory"),
                    ConfigOption("optimize_metric", default="a", type=str),
                ]

        logger = logging.getLogger('hpbandster')
        logger.setLevel(logging.ERROR)
        logger = logging.getLogger('autonet')
        logger.setLevel(logging.ERROR)

        pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])])

        pipeline_config = pipeline.get_pipeline_config(num_iterations=1,
                                                       budget_type='epochs',
                                                       result_logger_dir=".")
        pipeline.fit_pipeline(
            pipeline_config=pipeline_config,
            X_train=np.random.rand(15, 10),
            Y_train=np.random.rand(15, 5),
            X_valid=None,
            Y_valid=None,
            result_loggers=[json_result_logger(directory=".", overwrite=True)],
            dataset_info=None,
            shutdownables=[])

        result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name(
        )].fit_output['optimized_hyperparameter_config']
        print(pipeline[OptimizationAlgorithm.get_name()].fit_output)

        self.assertIn(
            result_of_opt_pipeline[ResultNode.get_name() +
                                   ConfigWrapper.delimiter + 'hyper'],
            list(range(0, 31)))
Ejemplo n.º 3
0
    def test_selector(self):
        pipeline = Pipeline([MetricSelector()])

        selector = pipeline[MetricSelector.get_name()]
        selector.add_metric("auc", auc_metric)
        selector.add_metric("accuracy", accuracy)
        selector.add_metric("mean", mean_distance)

        pipeline_config = pipeline.get_pipeline_config(
            optimize_metric="accuracy", additional_metrics=['auc', 'mean'])
        pipeline.fit_pipeline(pipeline_config=pipeline_config)

        selected_optimize_metric = selector.fit_output['optimize_metric']
        selected_additional_metrics = selector.fit_output['additional_metrics']

        self.assertEqual(selected_optimize_metric.metric, accuracy)
        self.assertSetEqual(set(x.metric for x in selected_additional_metrics),
                            set([auc_metric, mean_distance]))
Ejemplo n.º 4
0
    def test_selector(self):
        def log_fnc1(network, epoch):
            print("a")

        def log_fnc2(network, epoch):
            print("b")

        selector = LogFunctionsSelector()
        pipeline = Pipeline([selector])
        selector.add_log_function("log1", log_fnc1)
        selector.add_log_function("log2", log_fnc2)

        pipeline_config = pipeline.get_pipeline_config(
            additional_logs=["log2"])
        pipeline.fit_pipeline(pipeline_config=pipeline_config)

        log_functions = selector.fit_output['log_functions']

        self.assertListEqual([x.log for x in log_functions], [log_fnc2])
    def test_network_selector(self):
        pipeline = Pipeline([
            NetworkSelector()
        ])

        selector = pipeline[NetworkSelector.get_name()]
        selector.add_network("mlpnet", MlpNet)
        selector.add_network("shapedmlpnet", ShapedMlpNet)
        selector.add_final_activation('none', nn.Sequential())

        pipeline_config = pipeline.get_pipeline_config()
        pipeline_config["random_seed"] = 42
        hyper_config = pipeline.get_hyperparameter_search_space().sample_configuration()
        pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, 
                                X=torch.rand(3,3), Y=torch.rand(3, 2), embedding=nn.Sequential())

        sampled_network = pipeline[selector.get_name()].fit_output['network']

        self.assertIn(type(sampled_network), [MlpNet, ShapedMlpNet])
Ejemplo n.º 6
0
    def test_optimizer(self):
        class ResultNode(PipelineNode):
            def fit(self, X_train, Y_train):
                return {
                    'loss': X_train.shape[1],
                    'info': {
                        'a': X_train.shape[1],
                        'b': Y_train.shape[1]
                    }
                }

            def get_hyperparameter_search_space(self, **pipeline_config):
                cs = CS.ConfigurationSpace()
                cs.add_hyperparameter(
                    CSH.UniformIntegerHyperparameter('hyper',
                                                     lower=0,
                                                     upper=30))
                return cs

        logger = logging.getLogger('hpbandster')
        logger.setLevel(logging.ERROR)
        logger = logging.getLogger('autonet')
        logger.setLevel(logging.ERROR)

        pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])])

        pipeline_config = pipeline.get_pipeline_config(num_iterations=1,
                                                       budget_type='epochs')
        pipeline.fit_pipeline(pipeline_config=pipeline_config,
                              X_train=torch.rand(15, 10),
                              Y_train=torch.rand(15, 5),
                              X_valid=None,
                              Y_valid=None,
                              one_hot_encoder=None)

        result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name(
        )].fit_output['optimized_hyperparamater_config']

        self.assertIn(
            result_of_opt_pipeline[ResultNode.get_name() +
                                   ConfigWrapper.delimiter + 'hyper'],
            list(range(0, 31)))
Ejemplo n.º 7
0
    def test_loss_selector(self):
        pipeline = Pipeline([LossModuleSelector()])

        selector = pipeline[LossModuleSelector.get_name()]
        selector.add_loss_module("L1", nn.L1Loss)
        selector.add_loss_module("cross_entropy", nn.CrossEntropyLoss,
                                 LossWeightStrategyWeighted(), True)

        pipeline_config = pipeline.get_pipeline_config(
            loss_modules=["L1", "cross_entropy"])
        pipeline_hyperparameter_config = pipeline.get_hyperparameter_search_space(
            **pipeline_config).sample_configuration()

        pipeline_hyperparameter_config["LossModuleSelector:loss_module"] = "L1"
        pipeline.fit_pipeline(
            hyperparameter_config=pipeline_hyperparameter_config,
            train_indices=np.array([0, 1, 2]),
            X=np.random.rand(3, 3),
            Y=np.random.rand(3, 2),
            pipeline_config=pipeline_config,
            tmp=None)
        selected_loss = pipeline[
            selector.get_name()].fit_output['loss_function']
        self.assertEqual(type(selected_loss.function), nn.L1Loss)

        pipeline_hyperparameter_config[
            "LossModuleSelector:loss_module"] = "cross_entropy"
        pipeline.fit_pipeline(
            hyperparameter_config=pipeline_hyperparameter_config,
            train_indices=np.array([0, 1, 2]),
            X=np.random.rand(3, 3),
            Y=np.array([[1, 0], [0, 1], [1, 0]]),
            pipeline_config=pipeline_config,
            tmp=None)
        selected_loss = pipeline[
            selector.get_name()].fit_output['loss_function']
        self.assertEqual(type(selected_loss.function), nn.CrossEntropyLoss)
        self.assertEqual(
            selected_loss(torch.tensor([[0.0, 10000.0]]),
                          torch.tensor([[0, 1]])), 0)
    def test_lr_scheduler_selector(self):
        pipeline = Pipeline([
            NetworkSelector(),
            OptimizerSelector(),
            LearningrateSchedulerSelector(),
        ])

        net_selector = pipeline[NetworkSelector.get_name()]
        net_selector.add_network("mlpnet", MlpNet)
        net_selector.add_network("shapedmlpnet", ShapedMlpNet)
        net_selector.add_final_activation('none', nn.Sequential())

        opt_selector = pipeline[OptimizerSelector.get_name()]
        opt_selector.add_optimizer("adam", AdamOptimizer)
        opt_selector.add_optimizer("sgd", SgdOptimizer)

        lr_scheduler_selector = pipeline[
            LearningrateSchedulerSelector.get_name()]
        lr_scheduler_selector.add_lr_scheduler("step", SchedulerStepLR)
        lr_scheduler_selector.add_lr_scheduler("exp", SchedulerExponentialLR)

        pipeline_config = pipeline.get_pipeline_config()
        pipeline_config["random_seed"] = 42
        hyper_config = pipeline.get_hyperparameter_search_space(
        ).sample_configuration()

        pipeline.fit_pipeline(hyperparameter_config=hyper_config,
                              pipeline_config=pipeline_config,
                              X=torch.rand(3, 3),
                              Y=torch.rand(3, 2),
                              embedding=nn.Sequential(),
                              training_techniques=[],
                              train_indices=np.array([0, 1, 2]))

        sampled_lr_scheduler = pipeline[
            lr_scheduler_selector.get_name()].fit_output[
                'training_techniques'][0].training_components['lr_scheduler']

        self.assertIn(type(sampled_lr_scheduler),
                      [lr_scheduler.ExponentialLR, lr_scheduler.StepLR])
    def test_cross_validation(self):

        class ResultNode(PipelineNode):
            def fit(self, X, Y, train_indices, valid_indices):
                return { 'loss': np.sum(X[valid_indices]), 'info': {'a': np.sum(X[train_indices]), 'b': np.sum(X[valid_indices])} }

        pipeline = Pipeline([
            CrossValidation([
                ResultNode()
            ])
        ])

        pipeline["CrossValidation"].add_cross_validator("k_fold", KFold, lambda x: x.reshape((-1 ,)))
        pipeline["CrossValidation"].add_cross_validator("stratified_k_fold", StratifiedKFold, lambda x: x.reshape((-1 ,)))

        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y_train = np.array([[1], [0], [1]])

        # test cv_splits
        pipeline_config = pipeline.get_pipeline_config(cross_validator="k_fold", cross_validator_args={"n_splits": 3})
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape
        pipeline_config["random_seed"] = 42

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 15)
        self.assertDictEqual(cv_result['info'], {'a': 30, 'b': 15})

        
        # test validation split
        pipeline_config = pipeline.get_pipeline_config(validation_split=0.3)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        pipeline_config['random_seed'] = 42
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 24)
        self.assertDictEqual(cv_result['info'], {'a': 21, 'b': 24})


        # test stratified cv split
        x_valid = x_train
        y_valid = y_train
        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]])
        y_train = np.array([[1], [1], [0], [0], [1], [0]])

        pipeline_config = pipeline.get_pipeline_config(cross_validator="stratified_k_fold", cross_validator_args={"n_splits": 3})
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        pipeline_config['random_seed'] = 42
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 57)
        self.assertDictEqual(cv_result['info'], {'a': 114, 'b': 57})

        pipeline_config = pipeline.get_pipeline_config()
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        pipeline_config['random_seed'] = 42
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=x_valid, Y_valid=y_valid, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 45)
        self.assertDictEqual(cv_result['info'], {'a': 171, 'b': 45})
Ejemplo n.º 10
0
    def test_cross_validation(self):
        class ResultNode(PipelineNode):
            def fit(self, X_train, X_valid):
                return {
                    'loss': np.sum(X_valid),
                    'info': {
                        'a': np.sum(X_train),
                        'b': np.sum(X_valid)
                    }
                }

        pipeline = Pipeline([CrossValidation([ResultNode()])])

        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y_train = np.array([[1], [0], [1]])

        # test cv_splits
        pipeline_config = pipeline.get_pipeline_config(cv_splits=3)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(
            **pipeline_config)
        pipeline_config['categorical_features'] = None

        cv_result = pipeline.fit_pipeline(
            hyperparameter_config=pipeline_config_space,
            pipeline_config=pipeline_config,
            X_train=x_train,
            Y_train=y_train,
            X_valid=None,
            Y_valid=None,
            budget=5,
            budget_type=BudgetTypeEpochs,
            one_hot_encoder=None,
            optimize_start_time=time.time())

        self.assertEqual(cv_result['loss'], 15)
        self.assertDictEqual(cv_result['info'], {'a': 30, 'b': 15})

        # test validation split
        pipeline_config = pipeline.get_pipeline_config(validation_split=0.3)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(
            **pipeline_config)
        pipeline_config['categorical_features'] = None

        cv_result = pipeline.fit_pipeline(
            hyperparameter_config=pipeline_config_space,
            pipeline_config=pipeline_config,
            X_train=x_train,
            Y_train=y_train,
            X_valid=None,
            Y_valid=None,
            budget=5,
            budget_type=BudgetTypeEpochs,
            one_hot_encoder=None,
            optimize_start_time=time.time())

        self.assertEqual(cv_result['loss'], 24)
        self.assertDictEqual(cv_result['info'], {'a': 21, 'b': 24})

        # test stratified cv split
        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
                            [13, 14, 15], [16, 17, 18]])
        y_train = np.array([[1], [1], [0], [0], [1], [0]])

        pipeline_config = pipeline.get_pipeline_config(
            cv_splits=3, use_stratified_cv_split=True)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(
            **pipeline_config)
        pipeline_config['categorical_features'] = None

        cv_result = pipeline.fit_pipeline(
            hyperparameter_config=pipeline_config_space,
            pipeline_config=pipeline_config,
            X_train=x_train,
            Y_train=y_train,
            X_valid=None,
            Y_valid=None,
            budget=5,
            budget_type=BudgetTypeEpochs,
            one_hot_encoder=None,
            optimize_start_time=time.time())

        self.assertEqual(cv_result['loss'], 57)
        self.assertDictEqual(cv_result['info'], {'a': 114, 'b': 57})