def test_network_selector(self):
        pipeline = Pipeline([
            NetworkSelector()
        ])

        selector = pipeline[NetworkSelector.get_name()]
        selector.add_network("mlpnet", MlpNet)
        selector.add_network("shapedmlpnet", ShapedMlpNet)
        selector.add_final_activation('none', nn.Sequential())

        pipeline_config = pipeline.get_pipeline_config()
        pipeline_config["random_seed"] = 42
        hyper_config = pipeline.get_hyperparameter_search_space().sample_configuration()
        pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, 
                                X=torch.rand(3,3), Y=torch.rand(3, 2), embedding=nn.Sequential())

        sampled_network = pipeline[selector.get_name()].fit_output['network']

        self.assertIn(type(sampled_network), [MlpNet, ShapedMlpNet])
Example #2
0
    def test_optimizer(self):
        class ResultNode(PipelineNode):
            def fit(self, X_train, Y_train):
                return {
                    'loss': X_train.shape[1],
                    'info': {
                        'a': X_train.shape[1],
                        'b': Y_train.shape[1]
                    }
                }

            def get_hyperparameter_search_space(self, **pipeline_config):
                cs = CS.ConfigurationSpace()
                cs.add_hyperparameter(
                    CSH.UniformIntegerHyperparameter('hyper',
                                                     lower=0,
                                                     upper=30))
                return cs

        logger = logging.getLogger('hpbandster')
        logger.setLevel(logging.ERROR)
        logger = logging.getLogger('autonet')
        logger.setLevel(logging.ERROR)

        pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])])

        pipeline_config = pipeline.get_pipeline_config(num_iterations=1,
                                                       budget_type='epochs')
        pipeline.fit_pipeline(pipeline_config=pipeline_config,
                              X_train=torch.rand(15, 10),
                              Y_train=torch.rand(15, 5),
                              X_valid=None,
                              Y_valid=None,
                              one_hot_encoder=None)

        result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name(
        )].fit_output['optimized_hyperparamater_config']

        self.assertIn(
            result_of_opt_pipeline[ResultNode.get_name() +
                                   ConfigWrapper.delimiter + 'hyper'],
            list(range(0, 31)))
Example #3
0
    def test_loss_selector(self):
        pipeline = Pipeline([LossModuleSelector()])

        selector = pipeline[LossModuleSelector.get_name()]
        selector.add_loss_module("L1", nn.L1Loss)
        selector.add_loss_module("cross_entropy", nn.CrossEntropyLoss,
                                 LossWeightStrategyWeighted(), True)

        pipeline_config = pipeline.get_pipeline_config(
            loss_modules=["L1", "cross_entropy"])
        pipeline_hyperparameter_config = pipeline.get_hyperparameter_search_space(
            **pipeline_config).sample_configuration()

        pipeline_hyperparameter_config["LossModuleSelector:loss_module"] = "L1"
        pipeline.fit_pipeline(
            hyperparameter_config=pipeline_hyperparameter_config,
            train_indices=np.array([0, 1, 2]),
            X=np.random.rand(3, 3),
            Y=np.random.rand(3, 2),
            pipeline_config=pipeline_config,
            tmp=None)
        selected_loss = pipeline[
            selector.get_name()].fit_output['loss_function']
        self.assertEqual(type(selected_loss.function), nn.L1Loss)

        pipeline_hyperparameter_config[
            "LossModuleSelector:loss_module"] = "cross_entropy"
        pipeline.fit_pipeline(
            hyperparameter_config=pipeline_hyperparameter_config,
            train_indices=np.array([0, 1, 2]),
            X=np.random.rand(3, 3),
            Y=np.array([[1, 0], [0, 1], [1, 0]]),
            pipeline_config=pipeline_config,
            tmp=None)
        selected_loss = pipeline[
            selector.get_name()].fit_output['loss_function']
        self.assertEqual(type(selected_loss.function), nn.CrossEntropyLoss)
        self.assertEqual(
            selected_loss(torch.tensor([[0.0, 10000.0]]),
                          torch.tensor([[0, 1]])), 0)
    def test_lr_scheduler_selector(self):
        pipeline = Pipeline([
            NetworkSelector(),
            OptimizerSelector(),
            LearningrateSchedulerSelector(),
        ])

        net_selector = pipeline[NetworkSelector.get_name()]
        net_selector.add_network("mlpnet", MlpNet)
        net_selector.add_network("shapedmlpnet", ShapedMlpNet)
        net_selector.add_final_activation('none', nn.Sequential())

        opt_selector = pipeline[OptimizerSelector.get_name()]
        opt_selector.add_optimizer("adam", AdamOptimizer)
        opt_selector.add_optimizer("sgd", SgdOptimizer)

        lr_scheduler_selector = pipeline[
            LearningrateSchedulerSelector.get_name()]
        lr_scheduler_selector.add_lr_scheduler("step", SchedulerStepLR)
        lr_scheduler_selector.add_lr_scheduler("exp", SchedulerExponentialLR)

        pipeline_config = pipeline.get_pipeline_config()
        pipeline_config["random_seed"] = 42
        hyper_config = pipeline.get_hyperparameter_search_space(
        ).sample_configuration()

        pipeline.fit_pipeline(hyperparameter_config=hyper_config,
                              pipeline_config=pipeline_config,
                              X=torch.rand(3, 3),
                              Y=torch.rand(3, 2),
                              embedding=nn.Sequential(),
                              training_techniques=[],
                              train_indices=np.array([0, 1, 2]))

        sampled_lr_scheduler = pipeline[
            lr_scheduler_selector.get_name()].fit_output[
                'training_techniques'][0].training_components['lr_scheduler']

        self.assertIn(type(sampled_lr_scheduler),
                      [lr_scheduler.ExponentialLR, lr_scheduler.StepLR])
    def test_selector(self):
        pipeline = Pipeline([NetworkSelector(), OptimizerSelector()])

        net_selector = pipeline[NetworkSelector.get_name()]
        net_selector.add_network("mlpnet", MlpNet)
        net_selector.add_network("shapedmlpnet", ShapedMlpNet)
        net_selector.add_final_activation('none', nn.Sequential())

        opt_selector = pipeline[OptimizerSelector.get_name()]
        opt_selector.add_optimizer("adam", AdamOptimizer)
        opt_selector.add_optimizer("sgd", SgdOptimizer)

        pipeline_config = pipeline.get_pipeline_config()
        hyper_config = pipeline.get_hyperparameter_search_space(
        ).sample_configuration()
        pipeline.fit_pipeline(hyperparameter_config=hyper_config,
                              pipeline_config=pipeline_config,
                              X_train=torch.rand(3, 3),
                              Y_train=torch.rand(3, 2),
                              embedding=nn.Sequential())

        sampled_optimizer = opt_selector.fit_output['optimizer']

        self.assertIn(type(sampled_optimizer), [optim.Adam, optim.SGD])
class SubPipelineNode(PipelineNode):
    def __init__(self, sub_pipeline_nodes):
        super(SubPipelineNode, self).__init__()

        self.sub_pipeline = Pipeline(sub_pipeline_nodes)

    def set_pipeline(self, pipeline):
        super(SubPipelineNode, self).set_pipeline(pipeline)
        self.sub_pipeline.set_parent_pipeline(pipeline)

    def fit(self, **kwargs):
        return self.sub_pipeline.fit_pipeline(**kwargs)

    def predict(self, **kwargs):
        return self.sub_pipeline.predict_pipeline(**kwargs)
Example #7
0
class SubPipelineNode(PipelineNode):
    def __init__(self, sub_pipeline_nodes):
        super(SubPipelineNode, self).__init__()

        self.sub_pipeline = Pipeline(sub_pipeline_nodes)

    def set_pipeline(self, pipeline):
        super(SubPipelineNode, self).set_pipeline(pipeline)
        self.sub_pipeline.set_parent_pipeline(pipeline)

    def fit(self, **kwargs):
        return self.sub_pipeline.fit_pipeline(**kwargs)

    def predict(self, **kwargs):
        return self.sub_pipeline.predict_pipeline(**kwargs)

    def clone(self):
        sub_pipeline = self.sub_pipeline.clone()
        new_node = super().clone(skip=("pipeline", "fit_output",
                                       "predict_output", "child_node",
                                       "sub_pipeline"))
        new_node.sub_pipeline = sub_pipeline
        return new_node
    def test_cross_validation(self):

        class ResultNode(PipelineNode):
            def fit(self, X, Y, train_indices, valid_indices):
                return { 'loss': np.sum(X[valid_indices]), 'info': {'a': np.sum(X[train_indices]), 'b': np.sum(X[valid_indices])} }

        pipeline = Pipeline([
            CrossValidation([
                ResultNode()
            ])
        ])

        pipeline["CrossValidation"].add_cross_validator("k_fold", KFold, lambda x: x.reshape((-1 ,)))
        pipeline["CrossValidation"].add_cross_validator("stratified_k_fold", StratifiedKFold, lambda x: x.reshape((-1 ,)))

        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y_train = np.array([[1], [0], [1]])

        # test cv_splits
        pipeline_config = pipeline.get_pipeline_config(cross_validator="k_fold", cross_validator_args={"n_splits": 3})
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape
        pipeline_config["random_seed"] = 42

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 15)
        self.assertDictEqual(cv_result['info'], {'a': 30, 'b': 15})

        
        # test validation split
        pipeline_config = pipeline.get_pipeline_config(validation_split=0.3)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        pipeline_config['random_seed'] = 42
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 24)
        self.assertDictEqual(cv_result['info'], {'a': 21, 'b': 24})


        # test stratified cv split
        x_valid = x_train
        y_valid = y_train
        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]])
        y_train = np.array([[1], [1], [0], [0], [1], [0]])

        pipeline_config = pipeline.get_pipeline_config(cross_validator="stratified_k_fold", cross_validator_args={"n_splits": 3})
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        pipeline_config['random_seed'] = 42
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 57)
        self.assertDictEqual(cv_result['info'], {'a': 114, 'b': 57})

        pipeline_config = pipeline.get_pipeline_config()
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        pipeline_config['random_seed'] = 42
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=x_valid, Y_valid=y_valid, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 45)
        self.assertDictEqual(cv_result['info'], {'a': 171, 'b': 45})
Example #9
0
    def test_cross_validation(self):
        class ResultNode(PipelineNode):
            def fit(self, X_train, X_valid):
                return {
                    'loss': np.sum(X_valid),
                    'info': {
                        'a': np.sum(X_train),
                        'b': np.sum(X_valid)
                    }
                }

        pipeline = Pipeline([CrossValidation([ResultNode()])])

        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y_train = np.array([[1], [0], [1]])

        # test cv_splits
        pipeline_config = pipeline.get_pipeline_config(cv_splits=3)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(
            **pipeline_config)
        pipeline_config['categorical_features'] = None

        cv_result = pipeline.fit_pipeline(
            hyperparameter_config=pipeline_config_space,
            pipeline_config=pipeline_config,
            X_train=x_train,
            Y_train=y_train,
            X_valid=None,
            Y_valid=None,
            budget=5,
            budget_type=BudgetTypeEpochs,
            one_hot_encoder=None,
            optimize_start_time=time.time())

        self.assertEqual(cv_result['loss'], 15)
        self.assertDictEqual(cv_result['info'], {'a': 30, 'b': 15})

        # test validation split
        pipeline_config = pipeline.get_pipeline_config(validation_split=0.3)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(
            **pipeline_config)
        pipeline_config['categorical_features'] = None

        cv_result = pipeline.fit_pipeline(
            hyperparameter_config=pipeline_config_space,
            pipeline_config=pipeline_config,
            X_train=x_train,
            Y_train=y_train,
            X_valid=None,
            Y_valid=None,
            budget=5,
            budget_type=BudgetTypeEpochs,
            one_hot_encoder=None,
            optimize_start_time=time.time())

        self.assertEqual(cv_result['loss'], 24)
        self.assertDictEqual(cv_result['info'], {'a': 21, 'b': 24})

        # test stratified cv split
        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
                            [13, 14, 15], [16, 17, 18]])
        y_train = np.array([[1], [1], [0], [0], [1], [0]])

        pipeline_config = pipeline.get_pipeline_config(
            cv_splits=3, use_stratified_cv_split=True)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(
            **pipeline_config)
        pipeline_config['categorical_features'] = None

        cv_result = pipeline.fit_pipeline(
            hyperparameter_config=pipeline_config_space,
            pipeline_config=pipeline_config,
            X_train=x_train,
            Y_train=y_train,
            X_valid=None,
            Y_valid=None,
            budget=5,
            budget_type=BudgetTypeEpochs,
            one_hot_encoder=None,
            optimize_start_time=time.time())

        self.assertEqual(cv_result['loss'], 57)
        self.assertDictEqual(cv_result['info'], {'a': 114, 'b': 57})