class SubPipelineNode(PipelineNode): """A Pipeline node that contains a sub-pipeline""" def __init__(self, sub_pipeline_nodes): """Construct the node and the sub pipeline Arguments: sub_pipeline_nodes {list} -- A list of nodes of the sub-pipeline """ super(SubPipelineNode, self).__init__() self.sub_pipeline = Pipeline(sub_pipeline_nodes) def set_pipeline(self, pipeline): super(SubPipelineNode, self).set_pipeline(pipeline) self.sub_pipeline.set_parent_pipeline(pipeline) def fit(self, **kwargs): return self.sub_pipeline.fit_pipeline(**kwargs) def predict(self, **kwargs): return self.sub_pipeline.predict_pipeline(**kwargs) def clone(self): sub_pipeline = self.sub_pipeline.clone() new_node = super().clone(skip=("pipeline", "fit_output", "predict_output", "child_node", "sub_pipeline")) new_node.sub_pipeline = sub_pipeline return new_node
def __init__(self, sub_pipeline_nodes): """Construct the node and the sub pipeline Arguments: sub_pipeline_nodes {list} -- A list of nodes of the sub-pipeline """ super(SubPipelineNode, self).__init__() self.sub_pipeline = Pipeline(sub_pipeline_nodes)
def test_optimizer(self): class ResultNode(PipelineNode): def fit(self, X_train, Y_train): return { 'loss': X_train.shape[1], 'info': { 'train_a': X_train.shape[1], 'train_b': Y_train.shape[1] } } def get_hyperparameter_search_space(self, **pipeline_config): cs = CS.ConfigurationSpace() cs.add_hyperparameter( CSH.UniformIntegerHyperparameter('hyper', lower=0, upper=30)) return cs def get_pipeline_config_options(self): return [ ConfigOption("result_logger_dir", default=".", type="directory"), ConfigOption("optimize_metric", default="a", type=str), ] logger = logging.getLogger('hpbandster') logger.setLevel(logging.ERROR) logger = logging.getLogger('autonet') logger.setLevel(logging.ERROR) pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])]) pipeline_config = pipeline.get_pipeline_config(num_iterations=1, budget_type='epochs', result_logger_dir=".") pipeline.fit_pipeline( pipeline_config=pipeline_config, X_train=np.random.rand(15, 10), Y_train=np.random.rand(15, 5), X_valid=None, Y_valid=None, result_loggers=[json_result_logger(directory=".", overwrite=True)], dataset_info=None, shutdownables=[]) result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name( )].fit_output['optimized_hyperparameter_config'] print(pipeline[OptimizationAlgorithm.get_name()].fit_output) self.assertIn( result_of_opt_pipeline[ResultNode.get_name() + ConfigWrapper.delimiter + 'hyper'], list(range(0, 31)))
class SubPipelineNode(PipelineNode): def __init__(self, sub_pipeline_nodes): super(SubPipelineNode, self).__init__() self.sub_pipeline = Pipeline(sub_pipeline_nodes) def set_pipeline(self, pipeline): super(SubPipelineNode, self).set_pipeline(pipeline) self.sub_pipeline.set_parent_pipeline(pipeline) def fit(self, **kwargs): return self.sub_pipeline.fit_pipeline(**kwargs) def predict(self, **kwargs): return self.sub_pipeline.predict_pipeline(**kwargs)
def get_default_pipeline(cls): from autoPyTorch.pipeline.base.pipeline import Pipeline from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \ CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \ EmbeddingSelector, NetworkSelector, OptimizerSelector, LearningrateSchedulerSelector, LogFunctionsSelector, MetricSelector, \ LossModuleSelector, TrainNode, CreateDataLoader, CreateDatasetInfo, InitializationSelector # build the pipeline pipeline = Pipeline([ AutoNetSettings(), CreateDatasetInfo(), OptimizationAlgorithm([ CrossValidation([ Imputation(), NormalizationStrategySelector(), OneHotEncoding(), PreprocessorSelector(), ResamplingStrategySelector(), EmbeddingSelector(), NetworkSelector(), InitializationSelector(), OptimizerSelector(), LearningrateSchedulerSelector(), LogFunctionsSelector(), MetricSelector(), LossModuleSelector(), CreateDataLoader(), TrainNode() ]) ]), ]) cls._apply_default_pipeline_settings(pipeline) return pipeline
def test_selector(self): pipeline = Pipeline([MetricSelector()]) selector = pipeline[MetricSelector.get_name()] selector.add_metric("auc", auc_metric) selector.add_metric("accuracy", accuracy) selector.add_metric("mean", mean_distance) pipeline_config = pipeline.get_pipeline_config( optimize_metric="accuracy", additional_metrics=['auc', 'mean']) pipeline.fit_pipeline(pipeline_config=pipeline_config) selected_optimize_metric = selector.fit_output['optimize_metric'] selected_additional_metrics = selector.fit_output['additional_metrics'] self.assertEqual(selected_optimize_metric.metric, accuracy) self.assertSetEqual(set(x.metric for x in selected_additional_metrics), set([auc_metric, mean_distance]))
def test_selector(self): def log_fnc1(network, epoch): print("a") def log_fnc2(network, epoch): print("b") selector = LogFunctionsSelector() pipeline = Pipeline([selector]) selector.add_log_function("log1", log_fnc1) selector.add_log_function("log2", log_fnc2) pipeline_config = pipeline.get_pipeline_config( additional_logs=["log2"]) pipeline.fit_pipeline(pipeline_config=pipeline_config) log_functions = selector.fit_output['log_functions'] self.assertListEqual([x.log for x in log_functions], [log_fnc2])
def test_optimizer(self): class ResultNode(PipelineNode): def fit(self, X_train, Y_train): return { 'loss': X_train.shape[1], 'info': { 'a': X_train.shape[1], 'b': Y_train.shape[1] } } def get_hyperparameter_search_space(self, **pipeline_config): cs = CS.ConfigurationSpace() cs.add_hyperparameter( CSH.UniformIntegerHyperparameter('hyper', lower=0, upper=30)) return cs logger = logging.getLogger('hpbandster') logger.setLevel(logging.ERROR) logger = logging.getLogger('autonet') logger.setLevel(logging.ERROR) pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])]) pipeline_config = pipeline.get_pipeline_config(num_iterations=1, budget_type='epochs') pipeline.fit_pipeline(pipeline_config=pipeline_config, X_train=torch.rand(15, 10), Y_train=torch.rand(15, 5), X_valid=None, Y_valid=None, one_hot_encoder=None) result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name( )].fit_output['optimized_hyperparamater_config'] self.assertIn( result_of_opt_pipeline[ResultNode.get_name() + ConfigWrapper.delimiter + 'hyper'], list(range(0, 31)))
def get_default_pipeline(cls): """Build a pipeline for AutoNet. Should be implemented by child classes. Returns: Pipeline -- The Pipeline for AutoNet """ # build the pipeline pipeline = Pipeline() cls._apply_default_pipeline_settings(pipeline) return pipeline
def test_optimizer_selector(self): pipeline = Pipeline([NetworkSelector(), OptimizerSelector()]) net_selector = pipeline[NetworkSelector.get_name()] net_selector.add_network("mlpnet", MlpNet) net_selector.add_network("shapedmlpnet", ShapedMlpNet) net_selector.add_final_activation('none', nn.Sequential()) opt_selector = pipeline[OptimizerSelector.get_name()] opt_selector.add_optimizer("adam", AdamOptimizer) opt_selector.add_optimizer("sgd", SgdOptimizer) pipeline_config = pipeline.get_pipeline_config() pipeline_config["random_seed"] = 42 hyper_config = pipeline.get_hyperparameter_search_space( ).sample_configuration() pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, X=torch.rand(3, 3), Y=torch.rand(3, 2), embedding=nn.Sequential()) sampled_optimizer = opt_selector.fit_output['optimizer'] self.assertIn(type(sampled_optimizer), [optim.Adam, optim.SGD])
def get_visualization_pipeline(self): return Pipeline([ VisualizationSettings(), VisualizationForInstance([ CollectAutoNetConfigTrajectories([ CollectRunTrajectories([ ReadInstanceInfo(), CreateAutoNet(), GetRunTrajectories() ]) ]), PlotTrajectories() ]) ])
class SubPipelineNode(PipelineNode): def __init__(self, sub_pipeline_nodes): super(SubPipelineNode, self).__init__() self.sub_pipeline = Pipeline(sub_pipeline_nodes) def set_pipeline(self, pipeline): super(SubPipelineNode, self).set_pipeline(pipeline) self.sub_pipeline.set_parent_pipeline(pipeline) def fit(self, **kwargs): return self.sub_pipeline.fit_pipeline(**kwargs) def predict(self, **kwargs): return self.sub_pipeline.predict_pipeline(**kwargs) def clone(self): sub_pipeline = self.sub_pipeline.clone() new_node = super().clone(skip=("pipeline", "fit_output", "predict_output", "child_node", "sub_pipeline")) new_node.sub_pipeline = sub_pipeline return new_node
def get_ensemble_performance_pipeline(self): return Pipeline([ VisualizationSettings(), CollectInstanceTrajectories([ CollectAutoNetConfigTrajectories([ CollectRunTrajectories([ ReadInstanceInfo(), CreateAutoNet(), SetEnsembleConfig(), SaveEnsembleLogs(), GetEnsembleTrajectories() ]) ]) ]) ])
def get_benchmark_pipeline(self): return Pipeline([ BenchmarkSettings(), ForInstance([ #instance_file ReadInstanceData(), #test_split, is_classification, instance CreateAutoNet(), ForAutoNetConfig([ #autonet_config_file SetAutoNetConfig( ), #use_dataset_metric, use_dataset_max_runtime ForRun([ #num_runs, run_ids PrepareResultFolder(), FitAutoNet(), SaveResults() ]) ]) ]) ])
def get_visualization_pipeline(self): return Pipeline([ VisualizationSettings(), CollectInstanceTrajectories([ CollectAutoNetConfigTrajectories([ CollectRunTrajectories([ ReadInstanceInfo(), CreateAutoNet(), GetRunTrajectories(), GetEnsembleTrajectories() ]) ]), GetAdditionalTrajectories(), PlotTrajectories() ]), PlotSummary() ])
def get_default_ensemble_pipeline(cls): """Construct a default pipeline, include nodes for Ensemble. Returns: Pipeline -- The constructed default pipeline """ from autoPyTorch.pipeline.base.pipeline import Pipeline from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \ CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \ EmbeddingSelector, NetworkSelector, OptimizerSelector, LearningrateSchedulerSelector, LogFunctionsSelector, MetricSelector, \ LossModuleSelector, TrainNode, CreateDataLoader, CreateDatasetInfo, EnableComputePredictionsForEnsemble, SavePredictionsForEnsemble, \ BuildEnsemble, EnsembleServer, InitializationSelector, BaselineTrainer # build the pipeline pipeline = Pipeline([ AutoNetSettings(), CreateDatasetInfo(), EnsembleServer(), OptimizationAlgorithm([ CrossValidation([ Imputation(), BaselineTrainer(), NormalizationStrategySelector(), OneHotEncoding(), PreprocessorSelector(), ResamplingStrategySelector(), EmbeddingSelector(), NetworkSelector(), InitializationSelector(), OptimizerSelector(), LearningrateSchedulerSelector(), LogFunctionsSelector(), MetricSelector(), EnableComputePredictionsForEnsemble(), LossModuleSelector(), CreateDataLoader(), TrainNode(), SavePredictionsForEnsemble() ]) ]), BuildEnsemble() ]) cls._apply_default_pipeline_settings(pipeline) return pipeline
def get_default_pipeline(cls): from autoPyTorch.pipeline.base.pipeline import Pipeline from autoPyTorch.pipeline.nodes.image.optimization_algorithm_no_timelimit import OptimizationAlgorithmNoTimeLimit from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector from autoPyTorch.pipeline.nodes.image.simple_scheduler_selector import SimpleLearningrateSchedulerSelector from autoPyTorch.pipeline.nodes.image.cross_validation_indices import CrossValidationIndices from autoPyTorch.pipeline.nodes.image.autonet_settings_no_shuffle import AutoNetSettingsNoShuffle from autoPyTorch.pipeline.nodes.image.network_selector_datasetinfo import NetworkSelectorDatasetInfo from autoPyTorch.pipeline.nodes.image.loss_module_selector_indices import LossModuleSelectorIndices from autoPyTorch.pipeline.nodes.image.image_augmentation import ImageAugmentation from autoPyTorch.pipeline.nodes.image.create_image_dataloader import CreateImageDataLoader from autoPyTorch.pipeline.nodes.image.create_dataset_info import CreateDatasetInfo from autoPyTorch.pipeline.nodes.image.simple_train_node import SimpleTrainNode from autoPyTorch.pipeline.nodes.image.image_dataset_reader import ImageDatasetReader from autoPyTorch.pipeline.nodes.image.single_dataset import SingleDataset # build the pipeline pipeline = Pipeline([ AutoNetSettingsNoShuffle(), OptimizationAlgorithmNoTimeLimit([ SingleDataset([ ImageDatasetReader(), CreateDatasetInfo(), CrossValidationIndices([ NetworkSelectorDatasetInfo(), OptimizerSelector(), SimpleLearningrateSchedulerSelector(), LogFunctionsSelector(), MetricSelector(), LossModuleSelectorIndices(), ImageAugmentation(), CreateImageDataLoader(), SimpleTrainNode() ]) ]) ]) ]) cls._apply_default_pipeline_settings(pipeline) return pipeline
def get_default_pipeline(cls): from autoPyTorch.pipeline.base.pipeline import Pipeline from autoPyTorch.pipeline.nodes.autonet_settings import AutoNetSettings from autoPyTorch.pipeline.nodes.optimization_algorithm import OptimizationAlgorithm from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation from autoPyTorch.pipeline.nodes.imputation import Imputation from autoPyTorch.pipeline.nodes.normalization_strategy_selector import NormalizationStrategySelector from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding from autoPyTorch.pipeline.nodes.preprocessor_selector import PreprocessorSelector from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector from autoPyTorch.pipeline.nodes.embedding_selector import EmbeddingSelector from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector from autoPyTorch.pipeline.nodes.lr_scheduler_selector import LearningrateSchedulerSelector from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector from autoPyTorch.pipeline.nodes.train_node import TrainNode # build the pipeline pipeline = Pipeline([ AutoNetSettings(), OptimizationAlgorithm([ CrossValidation([ Imputation(), NormalizationStrategySelector(), OneHotEncoding(), PreprocessorSelector(), ResamplingStrategySelector(), EmbeddingSelector(), NetworkSelector(), OptimizerSelector(), LearningrateSchedulerSelector(), LogFunctionsSelector(), MetricSelector(), LossModuleSelector(), TrainNode() ]) ]) ]) cls._apply_default_pipeline_settings(pipeline) return pipeline
def test_loss_selector(self): pipeline = Pipeline([LossModuleSelector()]) selector = pipeline[LossModuleSelector.get_name()] selector.add_loss_module("L1", nn.L1Loss) selector.add_loss_module("cross_entropy", nn.CrossEntropyLoss, LossWeightStrategyWeighted(), True) pipeline_config = pipeline.get_pipeline_config( loss_modules=["L1", "cross_entropy"]) pipeline_hyperparameter_config = pipeline.get_hyperparameter_search_space( **pipeline_config).sample_configuration() pipeline_hyperparameter_config["LossModuleSelector:loss_module"] = "L1" pipeline.fit_pipeline( hyperparameter_config=pipeline_hyperparameter_config, train_indices=np.array([0, 1, 2]), X=np.random.rand(3, 3), Y=np.random.rand(3, 2), pipeline_config=pipeline_config, tmp=None) selected_loss = pipeline[ selector.get_name()].fit_output['loss_function'] self.assertEqual(type(selected_loss.function), nn.L1Loss) pipeline_hyperparameter_config[ "LossModuleSelector:loss_module"] = "cross_entropy" pipeline.fit_pipeline( hyperparameter_config=pipeline_hyperparameter_config, train_indices=np.array([0, 1, 2]), X=np.random.rand(3, 3), Y=np.array([[1, 0], [0, 1], [1, 0]]), pipeline_config=pipeline_config, tmp=None) selected_loss = pipeline[ selector.get_name()].fit_output['loss_function'] self.assertEqual(type(selected_loss.function), nn.CrossEntropyLoss) self.assertEqual( selected_loss(torch.tensor([[0.0, 10000.0]]), torch.tensor([[0, 1]])), 0)
def get_benchmark_pipeline(self): return Pipeline([ BenchmarkSettings(), ForInstance([ # loop through instance files ReadInstanceData( ), # get test_split, is_classification, instance CreateAutoNet(), #ApplyUserUpdates(), ForAutoNetConfig([ # loop through autonet_config_file SetAutoNetConfig( ), # use_dataset_metric, use_dataset_max_runtime ForRun([ # loop through num_runs, run_ids PrepareResultFolder(), FitAutoNet(), SaveResults(), SaveEnsembleLogs() ]) ]) ]) ])
def test_network_selector(self): pipeline = Pipeline([ NetworkSelector() ]) selector = pipeline[NetworkSelector.get_name()] selector.add_network("mlpnet", MlpNet) selector.add_network("shapedmlpnet", ShapedMlpNet) selector.add_final_activation('none', nn.Sequential()) pipeline_config = pipeline.get_pipeline_config() pipeline_config["random_seed"] = 42 hyper_config = pipeline.get_hyperparameter_search_space().sample_configuration() pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, X=torch.rand(3,3), Y=torch.rand(3, 2), embedding=nn.Sequential()) sampled_network = pipeline[selector.get_name()].fit_output['network'] self.assertIn(type(sampled_network), [MlpNet, ShapedMlpNet])
def test_lr_scheduler_selector(self): pipeline = Pipeline([ NetworkSelector(), OptimizerSelector(), LearningrateSchedulerSelector(), ]) net_selector = pipeline[NetworkSelector.get_name()] net_selector.add_network("mlpnet", MlpNet) net_selector.add_network("shapedmlpnet", ShapedMlpNet) net_selector.add_final_activation('none', nn.Sequential()) opt_selector = pipeline[OptimizerSelector.get_name()] opt_selector.add_optimizer("adam", AdamOptimizer) opt_selector.add_optimizer("sgd", SgdOptimizer) lr_scheduler_selector = pipeline[ LearningrateSchedulerSelector.get_name()] lr_scheduler_selector.add_lr_scheduler("step", SchedulerStepLR) lr_scheduler_selector.add_lr_scheduler("exp", SchedulerExponentialLR) pipeline_config = pipeline.get_pipeline_config() pipeline_config["random_seed"] = 42 hyper_config = pipeline.get_hyperparameter_search_space( ).sample_configuration() pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, X=torch.rand(3, 3), Y=torch.rand(3, 2), embedding=nn.Sequential(), training_techniques=[], train_indices=np.array([0, 1, 2])) sampled_lr_scheduler = pipeline[ lr_scheduler_selector.get_name()].fit_output[ 'training_techniques'][0].training_components['lr_scheduler'] self.assertIn(type(sampled_lr_scheduler), [lr_scheduler.ExponentialLR, lr_scheduler.StepLR])
def __init__(self, sub_pipeline_nodes): super(SubPipelineNode, self).__init__() self.sub_pipeline = Pipeline(sub_pipeline_nodes)
def test_cross_validation(self): class ResultNode(PipelineNode): def fit(self, X_train, X_valid): return { 'loss': np.sum(X_valid), 'info': { 'a': np.sum(X_train), 'b': np.sum(X_valid) } } pipeline = Pipeline([CrossValidation([ResultNode()])]) x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) y_train = np.array([[1], [0], [1]]) # test cv_splits pipeline_config = pipeline.get_pipeline_config(cv_splits=3) pipeline_config_space = pipeline.get_hyperparameter_search_space( **pipeline_config) pipeline_config['categorical_features'] = None cv_result = pipeline.fit_pipeline( hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time()) self.assertEqual(cv_result['loss'], 15) self.assertDictEqual(cv_result['info'], {'a': 30, 'b': 15}) # test validation split pipeline_config = pipeline.get_pipeline_config(validation_split=0.3) pipeline_config_space = pipeline.get_hyperparameter_search_space( **pipeline_config) pipeline_config['categorical_features'] = None cv_result = pipeline.fit_pipeline( hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time()) self.assertEqual(cv_result['loss'], 24) self.assertDictEqual(cv_result['info'], {'a': 21, 'b': 24}) # test stratified cv split x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]]) y_train = np.array([[1], [1], [0], [0], [1], [0]]) pipeline_config = pipeline.get_pipeline_config( cv_splits=3, use_stratified_cv_split=True) pipeline_config_space = pipeline.get_hyperparameter_search_space( **pipeline_config) pipeline_config['categorical_features'] = None cv_result = pipeline.fit_pipeline( hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time()) self.assertEqual(cv_result['loss'], 57) self.assertDictEqual(cv_result['info'], {'a': 114, 'b': 57})
def test_cross_validation(self): class ResultNode(PipelineNode): def fit(self, X, Y, train_indices, valid_indices): return { 'loss': np.sum(X[valid_indices]), 'info': {'a': np.sum(X[train_indices]), 'b': np.sum(X[valid_indices])} } pipeline = Pipeline([ CrossValidation([ ResultNode() ]) ]) pipeline["CrossValidation"].add_cross_validator("k_fold", KFold, lambda x: x.reshape((-1 ,))) pipeline["CrossValidation"].add_cross_validator("stratified_k_fold", StratifiedKFold, lambda x: x.reshape((-1 ,))) x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) y_train = np.array([[1], [0], [1]]) # test cv_splits pipeline_config = pipeline.get_pipeline_config(cross_validator="k_fold", cross_validator_args={"n_splits": 3}) pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config) dataset_info = DataSetInfo() dataset_info.categorical_features = [None] * 3 dataset_info.x_shape = x_train.shape dataset_info.y_shape = y_train.shape pipeline_config["random_seed"] = 42 cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False) self.assertEqual(cv_result['loss'], 15) self.assertDictEqual(cv_result['info'], {'a': 30, 'b': 15}) # test validation split pipeline_config = pipeline.get_pipeline_config(validation_split=0.3) pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config) pipeline_config['random_seed'] = 42 dataset_info = DataSetInfo() dataset_info.categorical_features = [None] * 3 dataset_info.x_shape = x_train.shape dataset_info.y_shape = y_train.shape cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False) self.assertEqual(cv_result['loss'], 24) self.assertDictEqual(cv_result['info'], {'a': 21, 'b': 24}) # test stratified cv split x_valid = x_train y_valid = y_train x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]]) y_train = np.array([[1], [1], [0], [0], [1], [0]]) pipeline_config = pipeline.get_pipeline_config(cross_validator="stratified_k_fold", cross_validator_args={"n_splits": 3}) pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config) pipeline_config['random_seed'] = 42 dataset_info = DataSetInfo() dataset_info.categorical_features = [None] * 3 dataset_info.x_shape = x_train.shape dataset_info.y_shape = y_train.shape cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False) self.assertEqual(cv_result['loss'], 57) self.assertDictEqual(cv_result['info'], {'a': 114, 'b': 57}) pipeline_config = pipeline.get_pipeline_config() pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config) pipeline_config['random_seed'] = 42 dataset_info = DataSetInfo() dataset_info.categorical_features = [None] * 3 dataset_info.x_shape = x_train.shape dataset_info.y_shape = y_train.shape cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=x_valid, Y_valid=y_valid, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False) self.assertEqual(cv_result['loss'], 45) self.assertDictEqual(cv_result['info'], {'a': 171, 'b': 45})