class SubPipelineNode(PipelineNode):
    """A Pipeline node that contains a sub-pipeline"""
    def __init__(self, sub_pipeline_nodes):
        """Construct the node and the sub pipeline
        
        Arguments:
            sub_pipeline_nodes {list} -- A list of nodes of the sub-pipeline
        """
        super(SubPipelineNode, self).__init__()

        self.sub_pipeline = Pipeline(sub_pipeline_nodes)

    def set_pipeline(self, pipeline):
        super(SubPipelineNode, self).set_pipeline(pipeline)
        self.sub_pipeline.set_parent_pipeline(pipeline)

    def fit(self, **kwargs):
        return self.sub_pipeline.fit_pipeline(**kwargs)

    def predict(self, **kwargs):
        return self.sub_pipeline.predict_pipeline(**kwargs)

    def clone(self):
        sub_pipeline = self.sub_pipeline.clone()
        new_node = super().clone(skip=("pipeline", "fit_output",
                                       "predict_output", "child_node",
                                       "sub_pipeline"))
        new_node.sub_pipeline = sub_pipeline
        return new_node
    def __init__(self, sub_pipeline_nodes):
        """Construct the node and the sub pipeline
        
        Arguments:
            sub_pipeline_nodes {list} -- A list of nodes of the sub-pipeline
        """
        super(SubPipelineNode, self).__init__()

        self.sub_pipeline = Pipeline(sub_pipeline_nodes)
    def test_optimizer(self):
        class ResultNode(PipelineNode):
            def fit(self, X_train, Y_train):
                return {
                    'loss': X_train.shape[1],
                    'info': {
                        'train_a': X_train.shape[1],
                        'train_b': Y_train.shape[1]
                    }
                }

            def get_hyperparameter_search_space(self, **pipeline_config):
                cs = CS.ConfigurationSpace()
                cs.add_hyperparameter(
                    CSH.UniformIntegerHyperparameter('hyper',
                                                     lower=0,
                                                     upper=30))
                return cs

            def get_pipeline_config_options(self):
                return [
                    ConfigOption("result_logger_dir",
                                 default=".",
                                 type="directory"),
                    ConfigOption("optimize_metric", default="a", type=str),
                ]

        logger = logging.getLogger('hpbandster')
        logger.setLevel(logging.ERROR)
        logger = logging.getLogger('autonet')
        logger.setLevel(logging.ERROR)

        pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])])

        pipeline_config = pipeline.get_pipeline_config(num_iterations=1,
                                                       budget_type='epochs',
                                                       result_logger_dir=".")
        pipeline.fit_pipeline(
            pipeline_config=pipeline_config,
            X_train=np.random.rand(15, 10),
            Y_train=np.random.rand(15, 5),
            X_valid=None,
            Y_valid=None,
            result_loggers=[json_result_logger(directory=".", overwrite=True)],
            dataset_info=None,
            shutdownables=[])

        result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name(
        )].fit_output['optimized_hyperparameter_config']
        print(pipeline[OptimizationAlgorithm.get_name()].fit_output)

        self.assertIn(
            result_of_opt_pipeline[ResultNode.get_name() +
                                   ConfigWrapper.delimiter + 'hyper'],
            list(range(0, 31)))
class SubPipelineNode(PipelineNode):
    def __init__(self, sub_pipeline_nodes):
        super(SubPipelineNode, self).__init__()

        self.sub_pipeline = Pipeline(sub_pipeline_nodes)

    def set_pipeline(self, pipeline):
        super(SubPipelineNode, self).set_pipeline(pipeline)
        self.sub_pipeline.set_parent_pipeline(pipeline)

    def fit(self, **kwargs):
        return self.sub_pipeline.fit_pipeline(**kwargs)

    def predict(self, **kwargs):
        return self.sub_pipeline.predict_pipeline(**kwargs)
    def get_default_pipeline(cls):
        from autoPyTorch.pipeline.base.pipeline import Pipeline
        from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \
            CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \
            EmbeddingSelector, NetworkSelector, OptimizerSelector, LearningrateSchedulerSelector, LogFunctionsSelector, MetricSelector, \
            LossModuleSelector, TrainNode, CreateDataLoader, CreateDatasetInfo, InitializationSelector

        # build the pipeline
        pipeline = Pipeline([
            AutoNetSettings(),
            CreateDatasetInfo(),
            OptimizationAlgorithm([
                CrossValidation([
                    Imputation(),
                    NormalizationStrategySelector(),
                    OneHotEncoding(),
                    PreprocessorSelector(),
                    ResamplingStrategySelector(),
                    EmbeddingSelector(),
                    NetworkSelector(),
                    InitializationSelector(),
                    OptimizerSelector(),
                    LearningrateSchedulerSelector(),
                    LogFunctionsSelector(),
                    MetricSelector(),
                    LossModuleSelector(),
                    CreateDataLoader(),
                    TrainNode()
                ])
            ]),
        ])

        cls._apply_default_pipeline_settings(pipeline)
        return pipeline
Example #6
0
    def test_selector(self):
        pipeline = Pipeline([MetricSelector()])

        selector = pipeline[MetricSelector.get_name()]
        selector.add_metric("auc", auc_metric)
        selector.add_metric("accuracy", accuracy)
        selector.add_metric("mean", mean_distance)

        pipeline_config = pipeline.get_pipeline_config(
            optimize_metric="accuracy", additional_metrics=['auc', 'mean'])
        pipeline.fit_pipeline(pipeline_config=pipeline_config)

        selected_optimize_metric = selector.fit_output['optimize_metric']
        selected_additional_metrics = selector.fit_output['additional_metrics']

        self.assertEqual(selected_optimize_metric.metric, accuracy)
        self.assertSetEqual(set(x.metric for x in selected_additional_metrics),
                            set([auc_metric, mean_distance]))
Example #7
0
    def test_selector(self):
        def log_fnc1(network, epoch):
            print("a")

        def log_fnc2(network, epoch):
            print("b")

        selector = LogFunctionsSelector()
        pipeline = Pipeline([selector])
        selector.add_log_function("log1", log_fnc1)
        selector.add_log_function("log2", log_fnc2)

        pipeline_config = pipeline.get_pipeline_config(
            additional_logs=["log2"])
        pipeline.fit_pipeline(pipeline_config=pipeline_config)

        log_functions = selector.fit_output['log_functions']

        self.assertListEqual([x.log for x in log_functions], [log_fnc2])
Example #8
0
    def test_optimizer(self):
        class ResultNode(PipelineNode):
            def fit(self, X_train, Y_train):
                return {
                    'loss': X_train.shape[1],
                    'info': {
                        'a': X_train.shape[1],
                        'b': Y_train.shape[1]
                    }
                }

            def get_hyperparameter_search_space(self, **pipeline_config):
                cs = CS.ConfigurationSpace()
                cs.add_hyperparameter(
                    CSH.UniformIntegerHyperparameter('hyper',
                                                     lower=0,
                                                     upper=30))
                return cs

        logger = logging.getLogger('hpbandster')
        logger.setLevel(logging.ERROR)
        logger = logging.getLogger('autonet')
        logger.setLevel(logging.ERROR)

        pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])])

        pipeline_config = pipeline.get_pipeline_config(num_iterations=1,
                                                       budget_type='epochs')
        pipeline.fit_pipeline(pipeline_config=pipeline_config,
                              X_train=torch.rand(15, 10),
                              Y_train=torch.rand(15, 5),
                              X_valid=None,
                              Y_valid=None,
                              one_hot_encoder=None)

        result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name(
        )].fit_output['optimized_hyperparamater_config']

        self.assertIn(
            result_of_opt_pipeline[ResultNode.get_name() +
                                   ConfigWrapper.delimiter + 'hyper'],
            list(range(0, 31)))
Example #9
0
    def get_default_pipeline(cls):
        """Build a pipeline for AutoNet. Should be implemented by child classes.
        
        Returns:
            Pipeline -- The Pipeline for AutoNet
        """

        # build the pipeline
        pipeline = Pipeline()

        cls._apply_default_pipeline_settings(pipeline)
        return pipeline
Example #10
0
    def test_optimizer_selector(self):
        pipeline = Pipeline([NetworkSelector(), OptimizerSelector()])

        net_selector = pipeline[NetworkSelector.get_name()]
        net_selector.add_network("mlpnet", MlpNet)
        net_selector.add_network("shapedmlpnet", ShapedMlpNet)
        net_selector.add_final_activation('none', nn.Sequential())

        opt_selector = pipeline[OptimizerSelector.get_name()]
        opt_selector.add_optimizer("adam", AdamOptimizer)
        opt_selector.add_optimizer("sgd", SgdOptimizer)

        pipeline_config = pipeline.get_pipeline_config()
        pipeline_config["random_seed"] = 42
        hyper_config = pipeline.get_hyperparameter_search_space(
        ).sample_configuration()
        pipeline.fit_pipeline(hyperparameter_config=hyper_config,
                              pipeline_config=pipeline_config,
                              X=torch.rand(3, 3),
                              Y=torch.rand(3, 2),
                              embedding=nn.Sequential())

        sampled_optimizer = opt_selector.fit_output['optimizer']

        self.assertIn(type(sampled_optimizer), [optim.Adam, optim.SGD])
Example #11
0
 def get_visualization_pipeline(self):
     return Pipeline([
         VisualizationSettings(),
         VisualizationForInstance([
             CollectAutoNetConfigTrajectories([
                 CollectRunTrajectories([
                     ReadInstanceInfo(),
                     CreateAutoNet(),
                     GetRunTrajectories()
                 ])
             ]),
             PlotTrajectories()
         ])
     ])
Example #12
0
class SubPipelineNode(PipelineNode):
    def __init__(self, sub_pipeline_nodes):
        super(SubPipelineNode, self).__init__()

        self.sub_pipeline = Pipeline(sub_pipeline_nodes)

    def set_pipeline(self, pipeline):
        super(SubPipelineNode, self).set_pipeline(pipeline)
        self.sub_pipeline.set_parent_pipeline(pipeline)

    def fit(self, **kwargs):
        return self.sub_pipeline.fit_pipeline(**kwargs)

    def predict(self, **kwargs):
        return self.sub_pipeline.predict_pipeline(**kwargs)

    def clone(self):
        sub_pipeline = self.sub_pipeline.clone()
        new_node = super().clone(skip=("pipeline", "fit_output",
                                       "predict_output", "child_node",
                                       "sub_pipeline"))
        new_node.sub_pipeline = sub_pipeline
        return new_node
Example #13
0
 def get_ensemble_performance_pipeline(self):
     return Pipeline([
         VisualizationSettings(),
         CollectInstanceTrajectories([
             CollectAutoNetConfigTrajectories([
                 CollectRunTrajectories([
                     ReadInstanceInfo(),
                     CreateAutoNet(),
                     SetEnsembleConfig(),
                     SaveEnsembleLogs(),
                     GetEnsembleTrajectories()
                 ])
             ])
         ])
     ])
Example #14
0
 def get_benchmark_pipeline(self):
     return Pipeline([
         BenchmarkSettings(),
         ForInstance([  #instance_file
             ReadInstanceData(),  #test_split, is_classification, instance
             CreateAutoNet(),
             ForAutoNetConfig([  #autonet_config_file
                 SetAutoNetConfig(
                 ),  #use_dataset_metric, use_dataset_max_runtime
                 ForRun([  #num_runs, run_ids
                     PrepareResultFolder(),
                     FitAutoNet(),
                     SaveResults()
                 ])
             ])
         ])
     ])
Example #15
0
 def get_visualization_pipeline(self):
     return Pipeline([
         VisualizationSettings(),
         CollectInstanceTrajectories([
             CollectAutoNetConfigTrajectories([
                 CollectRunTrajectories([
                     ReadInstanceInfo(),
                     CreateAutoNet(),
                     GetRunTrajectories(),
                     GetEnsembleTrajectories()
                 ])
             ]),
             GetAdditionalTrajectories(),
             PlotTrajectories()
         ]),
         PlotSummary()
     ])
Example #16
0
    def get_default_ensemble_pipeline(cls):
        """Construct a default pipeline, include nodes for Ensemble.
        
        Returns:
            Pipeline -- The constructed default pipeline
        """
        from autoPyTorch.pipeline.base.pipeline import Pipeline
        from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \
            CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \
            EmbeddingSelector, NetworkSelector, OptimizerSelector, LearningrateSchedulerSelector, LogFunctionsSelector, MetricSelector, \
            LossModuleSelector, TrainNode, CreateDataLoader, CreateDatasetInfo, EnableComputePredictionsForEnsemble, SavePredictionsForEnsemble, \
            BuildEnsemble, EnsembleServer, InitializationSelector, BaselineTrainer
        
        # build the pipeline
        pipeline = Pipeline([
            AutoNetSettings(),
            CreateDatasetInfo(),
            EnsembleServer(),
            OptimizationAlgorithm([
                CrossValidation([
                    Imputation(),
                    BaselineTrainer(),
                    NormalizationStrategySelector(),
                    OneHotEncoding(),
                    PreprocessorSelector(),
                    ResamplingStrategySelector(),
                    EmbeddingSelector(),
                    NetworkSelector(),
                    InitializationSelector(),
                    OptimizerSelector(),
                    LearningrateSchedulerSelector(),
                    LogFunctionsSelector(),
                    MetricSelector(),
                    EnableComputePredictionsForEnsemble(),
                    LossModuleSelector(),
                    CreateDataLoader(),
                    TrainNode(),
                    SavePredictionsForEnsemble()
                ])
            ]),
            BuildEnsemble()
        ])

        cls._apply_default_pipeline_settings(pipeline)
        return pipeline
Example #17
0
    def get_default_pipeline(cls):
        from autoPyTorch.pipeline.base.pipeline import Pipeline
        from autoPyTorch.pipeline.nodes.image.optimization_algorithm_no_timelimit import OptimizationAlgorithmNoTimeLimit
        from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
        from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
        from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector
        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector

        from autoPyTorch.pipeline.nodes.image.simple_scheduler_selector import SimpleLearningrateSchedulerSelector
        from autoPyTorch.pipeline.nodes.image.cross_validation_indices import CrossValidationIndices
        from autoPyTorch.pipeline.nodes.image.autonet_settings_no_shuffle import AutoNetSettingsNoShuffle
        from autoPyTorch.pipeline.nodes.image.network_selector_datasetinfo import NetworkSelectorDatasetInfo
        from autoPyTorch.pipeline.nodes.image.loss_module_selector_indices import LossModuleSelectorIndices
        from autoPyTorch.pipeline.nodes.image.image_augmentation import ImageAugmentation
        from autoPyTorch.pipeline.nodes.image.create_image_dataloader import CreateImageDataLoader
        from autoPyTorch.pipeline.nodes.image.create_dataset_info import CreateDatasetInfo
        from autoPyTorch.pipeline.nodes.image.simple_train_node import SimpleTrainNode
        from autoPyTorch.pipeline.nodes.image.image_dataset_reader import ImageDatasetReader
        from autoPyTorch.pipeline.nodes.image.single_dataset import SingleDataset

        # build the pipeline
        pipeline = Pipeline([
            AutoNetSettingsNoShuffle(),
            OptimizationAlgorithmNoTimeLimit([
                SingleDataset([
                    ImageDatasetReader(),
                    CreateDatasetInfo(),
                    CrossValidationIndices([
                        NetworkSelectorDatasetInfo(),
                        OptimizerSelector(),
                        SimpleLearningrateSchedulerSelector(),
                        LogFunctionsSelector(),
                        MetricSelector(),
                        LossModuleSelectorIndices(),
                        ImageAugmentation(),
                        CreateImageDataLoader(),
                        SimpleTrainNode()
                    ])
                ])
            ])
        ])

        cls._apply_default_pipeline_settings(pipeline)
        return pipeline
    def get_default_pipeline(cls):
        from autoPyTorch.pipeline.base.pipeline import Pipeline
        from autoPyTorch.pipeline.nodes.autonet_settings import AutoNetSettings
        from autoPyTorch.pipeline.nodes.optimization_algorithm import OptimizationAlgorithm
        from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
        from autoPyTorch.pipeline.nodes.imputation import Imputation
        from autoPyTorch.pipeline.nodes.normalization_strategy_selector import NormalizationStrategySelector
        from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
        from autoPyTorch.pipeline.nodes.preprocessor_selector import PreprocessorSelector
        from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
        from autoPyTorch.pipeline.nodes.embedding_selector import EmbeddingSelector
        from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
        from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
        from autoPyTorch.pipeline.nodes.lr_scheduler_selector import LearningrateSchedulerSelector
        from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector
        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
        from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
        from autoPyTorch.pipeline.nodes.train_node import TrainNode

        # build the pipeline
        pipeline = Pipeline([
            AutoNetSettings(),
            OptimizationAlgorithm([
                CrossValidation([
                    Imputation(),
                    NormalizationStrategySelector(),
                    OneHotEncoding(),
                    PreprocessorSelector(),
                    ResamplingStrategySelector(),
                    EmbeddingSelector(),
                    NetworkSelector(),
                    OptimizerSelector(),
                    LearningrateSchedulerSelector(),
                    LogFunctionsSelector(),
                    MetricSelector(),
                    LossModuleSelector(),
                    TrainNode()
                ])
            ])
        ])

        cls._apply_default_pipeline_settings(pipeline)
        return pipeline
Example #19
0
    def test_loss_selector(self):
        pipeline = Pipeline([LossModuleSelector()])

        selector = pipeline[LossModuleSelector.get_name()]
        selector.add_loss_module("L1", nn.L1Loss)
        selector.add_loss_module("cross_entropy", nn.CrossEntropyLoss,
                                 LossWeightStrategyWeighted(), True)

        pipeline_config = pipeline.get_pipeline_config(
            loss_modules=["L1", "cross_entropy"])
        pipeline_hyperparameter_config = pipeline.get_hyperparameter_search_space(
            **pipeline_config).sample_configuration()

        pipeline_hyperparameter_config["LossModuleSelector:loss_module"] = "L1"
        pipeline.fit_pipeline(
            hyperparameter_config=pipeline_hyperparameter_config,
            train_indices=np.array([0, 1, 2]),
            X=np.random.rand(3, 3),
            Y=np.random.rand(3, 2),
            pipeline_config=pipeline_config,
            tmp=None)
        selected_loss = pipeline[
            selector.get_name()].fit_output['loss_function']
        self.assertEqual(type(selected_loss.function), nn.L1Loss)

        pipeline_hyperparameter_config[
            "LossModuleSelector:loss_module"] = "cross_entropy"
        pipeline.fit_pipeline(
            hyperparameter_config=pipeline_hyperparameter_config,
            train_indices=np.array([0, 1, 2]),
            X=np.random.rand(3, 3),
            Y=np.array([[1, 0], [0, 1], [1, 0]]),
            pipeline_config=pipeline_config,
            tmp=None)
        selected_loss = pipeline[
            selector.get_name()].fit_output['loss_function']
        self.assertEqual(type(selected_loss.function), nn.CrossEntropyLoss)
        self.assertEqual(
            selected_loss(torch.tensor([[0.0, 10000.0]]),
                          torch.tensor([[0, 1]])), 0)
Example #20
0
 def get_benchmark_pipeline(self):
     return Pipeline([
         BenchmarkSettings(),
         ForInstance([  # loop through instance files
             ReadInstanceData(
             ),  # get test_split, is_classification, instance
             CreateAutoNet(),
             #ApplyUserUpdates(),
             ForAutoNetConfig([  # loop through autonet_config_file
                 SetAutoNetConfig(
                 ),  # use_dataset_metric, use_dataset_max_runtime
                 ForRun([  # loop through num_runs, run_ids
                     PrepareResultFolder(),
                     FitAutoNet(),
                     SaveResults(),
                     SaveEnsembleLogs()
                 ])
             ])
         ])
     ])
    def test_network_selector(self):
        pipeline = Pipeline([
            NetworkSelector()
        ])

        selector = pipeline[NetworkSelector.get_name()]
        selector.add_network("mlpnet", MlpNet)
        selector.add_network("shapedmlpnet", ShapedMlpNet)
        selector.add_final_activation('none', nn.Sequential())

        pipeline_config = pipeline.get_pipeline_config()
        pipeline_config["random_seed"] = 42
        hyper_config = pipeline.get_hyperparameter_search_space().sample_configuration()
        pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, 
                                X=torch.rand(3,3), Y=torch.rand(3, 2), embedding=nn.Sequential())

        sampled_network = pipeline[selector.get_name()].fit_output['network']

        self.assertIn(type(sampled_network), [MlpNet, ShapedMlpNet])
    def test_lr_scheduler_selector(self):
        pipeline = Pipeline([
            NetworkSelector(),
            OptimizerSelector(),
            LearningrateSchedulerSelector(),
        ])

        net_selector = pipeline[NetworkSelector.get_name()]
        net_selector.add_network("mlpnet", MlpNet)
        net_selector.add_network("shapedmlpnet", ShapedMlpNet)
        net_selector.add_final_activation('none', nn.Sequential())

        opt_selector = pipeline[OptimizerSelector.get_name()]
        opt_selector.add_optimizer("adam", AdamOptimizer)
        opt_selector.add_optimizer("sgd", SgdOptimizer)

        lr_scheduler_selector = pipeline[
            LearningrateSchedulerSelector.get_name()]
        lr_scheduler_selector.add_lr_scheduler("step", SchedulerStepLR)
        lr_scheduler_selector.add_lr_scheduler("exp", SchedulerExponentialLR)

        pipeline_config = pipeline.get_pipeline_config()
        pipeline_config["random_seed"] = 42
        hyper_config = pipeline.get_hyperparameter_search_space(
        ).sample_configuration()

        pipeline.fit_pipeline(hyperparameter_config=hyper_config,
                              pipeline_config=pipeline_config,
                              X=torch.rand(3, 3),
                              Y=torch.rand(3, 2),
                              embedding=nn.Sequential(),
                              training_techniques=[],
                              train_indices=np.array([0, 1, 2]))

        sampled_lr_scheduler = pipeline[
            lr_scheduler_selector.get_name()].fit_output[
                'training_techniques'][0].training_components['lr_scheduler']

        self.assertIn(type(sampled_lr_scheduler),
                      [lr_scheduler.ExponentialLR, lr_scheduler.StepLR])
    def __init__(self, sub_pipeline_nodes):
        super(SubPipelineNode, self).__init__()

        self.sub_pipeline = Pipeline(sub_pipeline_nodes)
Example #24
0
    def test_cross_validation(self):
        class ResultNode(PipelineNode):
            def fit(self, X_train, X_valid):
                return {
                    'loss': np.sum(X_valid),
                    'info': {
                        'a': np.sum(X_train),
                        'b': np.sum(X_valid)
                    }
                }

        pipeline = Pipeline([CrossValidation([ResultNode()])])

        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y_train = np.array([[1], [0], [1]])

        # test cv_splits
        pipeline_config = pipeline.get_pipeline_config(cv_splits=3)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(
            **pipeline_config)
        pipeline_config['categorical_features'] = None

        cv_result = pipeline.fit_pipeline(
            hyperparameter_config=pipeline_config_space,
            pipeline_config=pipeline_config,
            X_train=x_train,
            Y_train=y_train,
            X_valid=None,
            Y_valid=None,
            budget=5,
            budget_type=BudgetTypeEpochs,
            one_hot_encoder=None,
            optimize_start_time=time.time())

        self.assertEqual(cv_result['loss'], 15)
        self.assertDictEqual(cv_result['info'], {'a': 30, 'b': 15})

        # test validation split
        pipeline_config = pipeline.get_pipeline_config(validation_split=0.3)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(
            **pipeline_config)
        pipeline_config['categorical_features'] = None

        cv_result = pipeline.fit_pipeline(
            hyperparameter_config=pipeline_config_space,
            pipeline_config=pipeline_config,
            X_train=x_train,
            Y_train=y_train,
            X_valid=None,
            Y_valid=None,
            budget=5,
            budget_type=BudgetTypeEpochs,
            one_hot_encoder=None,
            optimize_start_time=time.time())

        self.assertEqual(cv_result['loss'], 24)
        self.assertDictEqual(cv_result['info'], {'a': 21, 'b': 24})

        # test stratified cv split
        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12],
                            [13, 14, 15], [16, 17, 18]])
        y_train = np.array([[1], [1], [0], [0], [1], [0]])

        pipeline_config = pipeline.get_pipeline_config(
            cv_splits=3, use_stratified_cv_split=True)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(
            **pipeline_config)
        pipeline_config['categorical_features'] = None

        cv_result = pipeline.fit_pipeline(
            hyperparameter_config=pipeline_config_space,
            pipeline_config=pipeline_config,
            X_train=x_train,
            Y_train=y_train,
            X_valid=None,
            Y_valid=None,
            budget=5,
            budget_type=BudgetTypeEpochs,
            one_hot_encoder=None,
            optimize_start_time=time.time())

        self.assertEqual(cv_result['loss'], 57)
        self.assertDictEqual(cv_result['info'], {'a': 114, 'b': 57})
    def test_cross_validation(self):

        class ResultNode(PipelineNode):
            def fit(self, X, Y, train_indices, valid_indices):
                return { 'loss': np.sum(X[valid_indices]), 'info': {'a': np.sum(X[train_indices]), 'b': np.sum(X[valid_indices])} }

        pipeline = Pipeline([
            CrossValidation([
                ResultNode()
            ])
        ])

        pipeline["CrossValidation"].add_cross_validator("k_fold", KFold, lambda x: x.reshape((-1 ,)))
        pipeline["CrossValidation"].add_cross_validator("stratified_k_fold", StratifiedKFold, lambda x: x.reshape((-1 ,)))

        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y_train = np.array([[1], [0], [1]])

        # test cv_splits
        pipeline_config = pipeline.get_pipeline_config(cross_validator="k_fold", cross_validator_args={"n_splits": 3})
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape
        pipeline_config["random_seed"] = 42

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 15)
        self.assertDictEqual(cv_result['info'], {'a': 30, 'b': 15})

        
        # test validation split
        pipeline_config = pipeline.get_pipeline_config(validation_split=0.3)
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        pipeline_config['random_seed'] = 42
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 24)
        self.assertDictEqual(cv_result['info'], {'a': 21, 'b': 24})


        # test stratified cv split
        x_valid = x_train
        y_valid = y_train
        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]])
        y_train = np.array([[1], [1], [0], [0], [1], [0]])

        pipeline_config = pipeline.get_pipeline_config(cross_validator="stratified_k_fold", cross_validator_args={"n_splits": 3})
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        pipeline_config['random_seed'] = 42
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 57)
        self.assertDictEqual(cv_result['info'], {'a': 114, 'b': 57})

        pipeline_config = pipeline.get_pipeline_config()
        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
        pipeline_config['random_seed'] = 42
        dataset_info = DataSetInfo()
        dataset_info.categorical_features = [None] * 3
        dataset_info.x_shape = x_train.shape
        dataset_info.y_shape = y_train.shape

        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
                                          X_train=x_train, Y_train=y_train, X_valid=x_valid, Y_valid=y_valid, 
                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)

        self.assertEqual(cv_result['loss'], 45)
        self.assertDictEqual(cv_result['info'], {'a': 171, 'b': 45})