def test_pipeline_predict(self, fit_dictionary_tabular):
        """This test makes sure that the pipeline is able to predict
        given a random configuration"""
        X = fit_dictionary_tabular['X_train'].copy()
        pipeline = TabularClassificationPipeline(
            dataset_properties=fit_dictionary_tabular['dataset_properties'])

        cs = pipeline.get_hyperparameter_search_space()
        config = cs.sample_configuration()
        pipeline.set_hyperparameters(config)

        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
             as patch_train:
            patch_train.return_value = 1, {}
            pipeline.fit(fit_dictionary_tabular)

        # we expect the output to have the same batch size as the test input,
        # and number of outputs per batch sample equal to the number of outputs
        expected_output_shape = (
            X.shape[0],
            fit_dictionary_tabular["dataset_properties"]["output_shape"])

        prediction = pipeline.predict(X)
        assert isinstance(prediction, np.ndarray)
        assert prediction.shape == expected_output_shape
    def test_pipeline_fit(self, fit_dictionary_tabular):
        """This test makes sure that the pipeline is able to fit
        given random combinations of hyperparameters across the pipeline"""

        pipeline = TabularClassificationPipeline(
            dataset_properties=fit_dictionary_tabular['dataset_properties'])
        cs = pipeline.get_hyperparameter_search_space()
        config = cs.sample_configuration()
        pipeline.set_hyperparameters(config)
        try:
            pipeline.fit(fit_dictionary_tabular)
        except Exception as e:
            pytest.fail(f"Failed due to {e} for config={config}")

        # To make sure we fitted the model, there should be a
        # run summary object with accuracy
        run_summary = pipeline.named_steps['trainer'].run_summary
        assert run_summary is not None

        # Make sure that performance was properly captured
        assert run_summary.performance_tracker['train_loss'][1] > 0
        assert run_summary.total_parameter_count > 0
        assert 'accuracy' in run_summary.performance_tracker['train_metrics'][
            1]

        # Make sure a network was fit
        assert isinstance(pipeline.named_steps['network'].get_network(),
                          torch.nn.Module)
    def test_pipeline_fit_include(self, fit_dictionary_tabular, preprocessor):
        """
        This test ensures that a tabular classification
        pipeline can be fit with all preprocessors
        in the include
        """

        fit_dictionary_tabular['epochs'] = 1

        pipeline = TabularClassificationPipeline(
            dataset_properties=fit_dictionary_tabular['dataset_properties'],
            include={'feature_preprocessor': [preprocessor]})
        cs = pipeline.get_hyperparameter_search_space()
        config = cs.sample_configuration()
        pipeline.set_hyperparameters(config)
        try:
            pipeline.fit(fit_dictionary_tabular)
        except Exception as e:
            pytest.fail(f"For config {config} failed with {e}")

        # To make sure we fitted the model, there should be a
        # run summary object with accuracy
        run_summary = pipeline.named_steps['trainer'].run_summary
        assert run_summary is not None

        assert preprocessor == pipeline.named_steps[
            'feature_preprocessor'].choice.__class__.__name__
    def test_pipeline_predict_proba(self, fit_dictionary_tabular):
        """This test makes sure that the pipeline is able to fit
        given random combinations of hyperparameters across the pipeline
        And then predict using predict probability
        """
        X = fit_dictionary_tabular['X_train'].copy()
        pipeline = TabularClassificationPipeline(
            dataset_properties=fit_dictionary_tabular['dataset_properties'])

        cs = pipeline.get_hyperparameter_search_space()
        config = cs.sample_configuration()
        pipeline.set_hyperparameters(config)

        try:
            with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
                 as patch_train:
                patch_train.return_value = 1, {}
                pipeline.fit(fit_dictionary_tabular)
        except Exception as e:
            pytest.fail(f"Failed on config={config} with {e}")

        # we expect the output to have the same batch size as the test input,
        # and number of outputs per batch sample equal to the number of classes ("num_classes" in dataset_properties)
        expected_output_shape = (
            X.shape[0],
            fit_dictionary_tabular["dataset_properties"]["output_shape"])

        prediction = pipeline.predict_proba(X)
        assert isinstance(prediction, np.ndarray)
        assert prediction.shape == expected_output_shape
예제 #5
0
    def test_pipeline_fit(self):
        """This test makes sure that the pipeline is able to fit
        given random combinations of hyperparameters across the pipeline"""

        pipeline = TabularClassificationPipeline(
            dataset_properties=self.dataset_properties)
        cs = pipeline.get_hyperparameter_search_space()
        config = cs.sample_configuration()
        pipeline.set_hyperparameters(config)
        pipeline.fit({
            'num_features':
            self.num_features,
            'num_classes':
            self.num_classes,
            'numerical_columns':
            list(range(self.num_features)),
            'categorical_columns': [],
            'categories': [],
            'X_train':
            self.X,
            'y_train':
            self.y,
            'train_indices':
            list(range(self.X.shape[0] // 2)),
            'val_indices':
            list(range(self.X.shape[0] // 2, self.X.shape[0])),
            'is_small_preprocess':
            False,
            # Training configuration
            'dataset_properties':
            self.dataset_properties,
            'job_id':
            'example_tabular_classification_1',
            'device':
            'cpu',
            'budget_type':
            'epochs',
            'epochs':
            5,
            'torch_num_threads':
            1,
            'early_stopping':
            20,
            'working_dir':
            '/tmp',
            'use_tensorboard_logger':
            True,
            'use_pynisher':
            False,
            'metrics_during_training':
            True,
            'split_id':
            0,
            'backend':
            self.backend,
        })

        # To make sure we fitted the model, there should be a
        # run summary object with accuracy
        self.assertIsNotNone(pipeline.named_steps['trainer'].run_summary)
def test_pipeline_score(fit_dictionary_tabular_dummy):
    """This test makes sure that the pipeline is able to achieve a decent score on dummy data
    given the default configuration"""
    X = fit_dictionary_tabular_dummy['X_train'].copy()
    y = fit_dictionary_tabular_dummy['y_train'].copy()

    # increase number of epochs to test for performance
    fit_dictionary_tabular_dummy['epochs'] = 50

    pipeline = TabularClassificationPipeline(
        dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'])

    cs = pipeline.get_hyperparameter_search_space()
    config = cs.get_default_configuration()
    pipeline.set_hyperparameters(config)

    pipeline.fit(fit_dictionary_tabular_dummy)

    # Ensure that the network is an instance of torch Module
    assert isinstance(pipeline.named_steps['network'].get_network(),
                      torch.nn.Module)

    accuracy = pipeline.score(X, y)

    # we should be able to get a decent score on this dummy data
    assert accuracy >= 0.8, f"Pipeline:{pipeline} Config:{config} FitDict: {fit_dictionary_tabular_dummy}"
def test_train_pipeline_with_runtime(fit_dictionary_tabular_dummy):
    """This test makes sure that the pipeline is able to achieve a decent score on dummy data
    given the default configuration"""

    # Convert the training to runtime
    fit_dictionary_tabular_dummy.pop('epochs', None)
    fit_dictionary_tabular_dummy['budget_type'] = 'runtime'
    fit_dictionary_tabular_dummy['runtime'] = 5
    fit_dictionary_tabular_dummy['early_stopping'] = -1

    pipeline = TabularClassificationPipeline(
        dataset_properties=fit_dictionary_tabular_dummy['dataset_properties'])

    cs = pipeline.get_hyperparameter_search_space()
    config = cs.get_default_configuration()
    pipeline.set_hyperparameters(config)

    pipeline.fit(fit_dictionary_tabular_dummy)
    run_summary = pipeline.named_steps['trainer'].run_summary
    budget_tracker = pipeline.named_steps['trainer'].budget_tracker
    assert budget_tracker.budget_type == 'runtime'
    assert budget_tracker.max_runtime == 5
    assert budget_tracker.is_max_time_reached()

    # There is no epoch limitation
    assert not budget_tracker.is_max_epoch_reached(epoch=np.inf)

    # More than 200 epochs would have pass in 5 seconds for this dataset
    assert len(run_summary.performance_tracker['start_time']) > 100
예제 #8
0
    def test_default_configuration(self):
        """Makes sure that when no config is set, we can trust the
        default configuration from the space"""
        pipeline = TabularClassificationPipeline(
            dataset_properties=self.dataset_properties)

        pipeline.fit({
            'num_features':
            self.num_features,
            'num_classes':
            self.num_classes,
            'numerical_columns':
            list(range(self.num_features)),
            'categorical_columns': [],
            'categories': [],
            'X_train':
            self.X,
            'y_train':
            self.y,
            'train_indices':
            list(range(self.X.shape[0] // 2)),
            'val_indices':
            list(range(self.X.shape[0] // 2, self.X.shape[0])),
            'is_small_preprocess':
            False,
            # Training configuration
            'dataset_properties':
            self.dataset_properties,
            'job_id':
            'example_tabular_classification_1',
            'device':
            'cpu',
            'budget_type':
            'epochs',
            'epochs':
            5,
            'torch_num_threads':
            1,
            'early_stopping':
            20,
            'working_dir':
            '/tmp',
            'use_tensorboard_logger':
            True,
            'use_pynisher':
            False,
            'metrics_during_training':
            True,
            'split_id':
            0,
            'backend':
            self.backend,
        })
    def test_default_configuration(self, fit_dictionary_tabular,
                                   is_small_preprocess):
        """Makes sure that when no config is set, we can trust the
        default configuration from the space"""

        fit_dictionary_tabular['is_small_preprocess'] = is_small_preprocess

        pipeline = TabularClassificationPipeline(
            dataset_properties=fit_dictionary_tabular['dataset_properties'])
        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
             as patch_train:
            patch_train.return_value = 1, {}
            pipeline.fit(fit_dictionary_tabular)
예제 #10
0
def random_search_and_save(fit_dictionary: typing.Dict[str, typing.Any],
                           backend: Backend, num_models: int) -> None:
    """
    A function to generate randomly fitted pipelines.
    It inefficiently pass the data in the fit dictionary, as there is no datamanager yet.

    It uses the backend to save the models and predictions for the ensemble selection
    """

    # Ensemble selection will evaluate performance on the OOF predictions. Store the OOF
    # Ground truth
    datamanager = backend.load_datamanager()
    X_train, y_train = datamanager.train_tensors
    X_test, y_test = (None, None)
    if datamanager.test_tensors is not None:
        X_test, y_test = datamanager.test_tensors
    targets = np.take(y_train, fit_dictionary['val_indices'], axis=0)
    backend.save_targets_ensemble(targets)

    for idx in range(num_models):
        pipeline = TabularClassificationPipeline(
            dataset_properties=fit_dictionary['dataset_properties'])

        # Sample a random configuration
        pipeline_cs = pipeline.get_hyperparameter_search_space()
        config = pipeline_cs.sample_configuration()
        pipeline.set_hyperparameters(config)

        # Fit the sample configuration
        pipeline.fit(fit_dictionary)

        # Predict using the fit model
        ensemble_predictions = pipeline.predict(
            np.take(X_train, fit_dictionary['val_indices'], axis=0))
        test_predictions = pipeline.predict(X_test)

        backend.save_numrun_to_dir(
            seed=fit_dictionary['seed'],
            idx=idx,
            budget=fit_dictionary['epochs'],
            model=pipeline,
            cv_model=None,
            ensemble_predictions=ensemble_predictions,
            valid_predictions=None,
            test_predictions=test_predictions,
        )

        score = accuracy_score(y_test, np.argmax(test_predictions, axis=1))
        print(f"Fitted a pipeline {idx} with score = {score}")

    return
 def test_remove_key_check_requirements(self, fit_dictionary_tabular):
     """Makes sure that when a key is removed from X, correct error is outputted"""
     pipeline = TabularClassificationPipeline(
         dataset_properties=fit_dictionary_tabular['dataset_properties'])
     for key in [
             'num_run', 'device', 'split_id', 'torch_num_threads',
             'dataset_properties'
     ]:
         fit_dictionary_tabular_copy = fit_dictionary_tabular.copy()
         fit_dictionary_tabular_copy.pop(key)
         with pytest.raises(
                 ValueError,
                 match=r"To fit .+?, expected fit dictionary to have"):
             pipeline.fit(fit_dictionary_tabular_copy)
예제 #12
0
 def test_tabular_preprocess(self):
     dataset_properties = {
         'numerical_columns': list(range(15)),
         'categorical_columns': [],
         'task_type': 'tabular_classification'
     }
     X = dict(
         X_train=np.random.random((10, 15)),
         y_train=np.random.random(10),
         train_indices=[0, 1, 2, 3, 4, 5],
         val_indices=[6, 7, 8, 9],
         is_small_preprocess=True,
         numerical_columns=list(range(15)),
         categorical_columns=[],
         num_features=15,
         num_classes=2,
         categories=[],
         # Training configuration
         job_id='test',
         device='cpu',
         budget_type='epochs',
         epochs=10,
         torch_num_threads=1,
         early_stopping=20,
         dataset_properties=dataset_properties,
         split_id=0,
         backend=self.backend,
     )
     pipeline = TabularClassificationPipeline(
         dataset_properties=dataset_properties)
     # Remove the trainer
     pipeline.steps.pop()
     pipeline = pipeline.fit(X)
     X = pipeline.transform(X)
     self.assertNotIn('preprocess_transforms', X.keys())
    def test_pipeline_transform(self, fit_dictionary_tabular):
        """
        In the context of autopytorch, transform expands a fit dictionary with
        components that where previously fit. We can use this as a nice way to make sure
        that fit properly work.
        This code is added in light of components not properly added to the fit dicitonary
        """

        pipeline = TabularClassificationPipeline(
            dataset_properties=fit_dictionary_tabular['dataset_properties'])
        cs = pipeline.get_hyperparameter_search_space()
        config = cs.sample_configuration()
        pipeline.set_hyperparameters(config)

        with unittest.mock.patch.object(pipeline.named_steps['trainer'].choice, 'train_epoch') \
             as patch_train:
            patch_train.return_value = 1, {}
            # We do not want to make the same early preprocessing operation to the fit dictionary
            pipeline.fit(fit_dictionary_tabular.copy())

        transformed_fit_dictionary_tabular = pipeline.transform(
            fit_dictionary_tabular)

        # First, we do not lose anyone! (We use a fancy subset containment check)
        assert fit_dictionary_tabular.items(
        ) <= transformed_fit_dictionary_tabular.items()

        # Then the pipeline should have added the following keys
        expected_keys = {
            'imputer', 'encoder', 'scaler', 'tabular_transformer',
            'preprocess_transforms', 'network', 'optimizer', 'lr_scheduler',
            'train_data_loader', 'val_data_loader', 'run_summary'
        }
        assert expected_keys.issubset(
            set(transformed_fit_dictionary_tabular.keys()))

        # Then we need to have transformations being created.
        assert len(
            get_preprocess_transforms(transformed_fit_dictionary_tabular)) > 0

        # We expect the transformations to be in the pipeline at anytime for inference
        assert 'preprocess_transforms' in transformed_fit_dictionary_tabular.keys(
        )
예제 #14
0
 def test_remove_key_check_requirements(self):
     """Makes sure that when a key is removed from X, correct error is outputted"""
     pipeline = TabularClassificationPipeline(
         dataset_properties=self.dataset_properties)
     X = {
         'num_features': self.num_features,
         'num_classes': self.num_classes,
         'numerical_columns': list(range(self.num_features)),
         'categorical_columns': [],
         'categories': [],
         'X_train': self.X,
         'y_train': self.y,
         'train_indices': list(range(self.X.shape[0] // 2)),
         'val_indices': list(range(self.X.shape[0] // 2, self.X.shape[0])),
         'is_small_preprocess': False,
         # Training configuration
         'dataset_properties': self.dataset_properties,
         'job_id': 'example_tabular_classification_1',
         'device': 'cpu',
         'budget_type': 'epochs',
         'epochs': 5,
         'torch_num_threads': 1,
         'early_stopping': 20,
         'working_dir': '/tmp',
         'use_tensorboard_logger': True,
         'use_pynisher': False,
         'metrics_during_training': True,
         'split_id': 0,
         'backend': self.backend,
     }
     for key in X.keys():
         # skip tests for data loader requirements as data loader has different check_requirements
         if key == 'y_train' or 'val_indices':
             continue
         X_copy = X.copy()
         X_copy.pop(key)
         try:
             pipeline.fit(X_copy)
         except ValueError as msg:
             self.assertRegex(
                 str(msg),
                 r"To fit .+?, expected fit dictionary to have .+? but got .*"
             )
예제 #15
0
    def test_tabular_no_preprocess(self):
        dataset_properties = {
            'numerical_columns': list(range(15)),
            'categorical_columns': [],
            'task_type': TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION],
            'output_type': OUTPUT_TYPES_TO_STRING[MULTICLASS],
            'is_small_preprocess': False,
            'input_shape': (15, ),
            'output_shape': 2,
            'categories': [],
            'issparse': False
        }
        X = dict(
            X_train=np.random.random((10, 15)),
            y_train=np.random.random(10),
            train_indices=[0, 1, 2, 3, 4, 5],
            val_indices=[6, 7, 8, 9],
            dataset_properties=dataset_properties,
            # Training configuration
            num_run=16,
            device='cpu',
            budget_type='epochs',
            epochs=10,
            torch_num_threads=1,
            early_stopping=20,
            split_id=0,
            backend=self.backend,
        )

        pipeline = TabularClassificationPipeline(
            dataset_properties=dataset_properties)
        # Remove the trainer
        pipeline.steps.pop()
        pipeline = pipeline.fit(X)
        X = pipeline.transform(X)
        self.assertIn('preprocess_transforms', X.keys())
        self.assertIsInstance(X['preprocess_transforms'], list)
        self.assertIsInstance(X['preprocess_transforms'][-1].preprocessor,
                              BaseEstimator)
예제 #16
0
    'epochs': 100,
    'runtime': 3600,
    'torch_num_threads': 1,
    'early_stopping': 20,
    'use_tensorboard_logger': True,
    'use_pynisher': False,
    'metrics_during_training': True,
    'backend': backend,
    'split_id': 0,
}

# Configuration space
pipeline_cs = pipeline.get_hyperparameter_search_space()
print("Pipeline CS:\n", '_' * 40, f"\n{pipeline_cs}")
config = pipeline_cs.sample_configuration()
print("Pipeline Random Config:\n", '_' * 40, f"\n{config}")
pipeline.set_hyperparameters(config)

# Make sure the working directory exists. Something that backend will handle
os.makedirs('./tmp/example_tabular_classification_1', exist_ok=True)

# Fit the pipeline
print("Fitting the pipeline...")
pipeline.fit(fit_dictionary)

# Showcase some components of the pipeline
print(pipeline)

# Showcase performance of pipeline
print(pipeline.named_steps['trainer'].run_summary.repr_last_epoch())
예제 #17
0
    def test_pipeline_fit(self, fit_dictionary_tabular, embedding, backbone,
                          head):
        """This test makes sure that the pipeline is able to fit
        every combination of network embedding, backbone, head"""

        # increase number of epochs to test for performance
        fit_dictionary_tabular['epochs'] = 50

        include = {
            'network_backbone': [backbone],
            'network_head': [head],
            'network_embedding': [embedding]
        }

        if len(fit_dictionary_tabular['dataset_properties']
               ['categorical_columns']
               ) == 0 and embedding == 'LearnedEntityEmbedding':
            pytest.skip(
                "Learned Entity Embedding is not used with numerical only data"
            )
        pipeline = TabularClassificationPipeline(
            dataset_properties=fit_dictionary_tabular['dataset_properties'],
            include=include)

        cs = pipeline.get_hyperparameter_search_space()
        config = cs.get_default_configuration()

        assert embedding == config.get('network_embedding:__choice__', None)
        assert backbone == config.get('network_backbone:__choice__', None)
        assert head == config.get('network_head:__choice__', None)
        pipeline.set_hyperparameters(config)
        # Early stop to the best configuration seen
        fit_dictionary_tabular['early_stopping'] = 50

        pipeline.fit(fit_dictionary_tabular)

        # To make sure we fitted the model, there should be a
        # run summary object with accuracy
        run_summary = pipeline.named_steps['trainer'].run_summary
        assert run_summary is not None

        # Make sure that performance was properly captured
        assert run_summary.performance_tracker['train_loss'][1] > 0
        assert run_summary.total_parameter_count > 0
        assert 'accuracy' in run_summary.performance_tracker['train_metrics'][
            1]

        # Make sure default pipeline achieves a good score for dummy datasets
        epoch_where_best = int(
            np.argmax([
                run_summary.performance_tracker['val_metrics'][e]['accuracy']
                for e in range(
                    1,
                    len(run_summary.performance_tracker['val_metrics']) + 1)
            ])) + 1  # Epochs start at 1
        score = run_summary.performance_tracker['val_metrics'][
            epoch_where_best]['accuracy']

        assert score >= 0.8, run_summary.performance_tracker['val_metrics']

        # Check that early stopping happened, if it did

        # We should not stop before patience
        assert run_summary.get_last_epoch(
        ) >= fit_dictionary_tabular['early_stopping']

        # we should not be greater than max allowed epoch
        assert run_summary.get_last_epoch() <= fit_dictionary_tabular['epochs']

        # every trained epoch has a val metric
        assert run_summary.get_last_epoch() == max(
            list(run_summary.performance_tracker['train_metrics'].keys()))

        epochs_since_best = run_summary.get_last_epoch(
        ) - run_summary.get_best_epoch()
        if epochs_since_best >= fit_dictionary_tabular['early_stopping']:
            assert run_summary.get_best_epoch() == epoch_where_best

        # Make sure a network was fit
        assert isinstance(pipeline.named_steps['network'].get_network(),
                          torch.nn.Module)
def test_constant_pipeline_iris(fit_dictionary_tabular):
    search_space_updates = HyperparameterSearchSpaceUpdates()
    search_space_updates.append(node_name='feature_preprocessor',
                                hyperparameter='__choice__',
                                value_range=['PolynomialFeatures'],
                                default_value='PolynomialFeatures')
    search_space_updates.append(node_name='scaler',
                                hyperparameter='__choice__',
                                value_range=['StandardScaler'],
                                default_value='StandardScaler')
    search_space_updates.append(
        node_name='network_backbone',
        hyperparameter='__choice__',
        value_range=['MLPBackbone', 'ShapedMLPBackbone'],
        default_value='MLPBackbone')
    search_space_updates.append(node_name='network_backbone',
                                hyperparameter='MLPBackbone:num_groups',
                                value_range=[1, 1],
                                default_value=1)
    search_space_updates.append(node_name='network_backbone',
                                hyperparameter='MLPBackbone:num_units',
                                value_range=[100],
                                default_value=100)
    search_space_updates.append(node_name='trainer',
                                hyperparameter='__choice__',
                                value_range=['StandardTrainer'],
                                default_value='StandardTrainer')
    search_space_updates.append(node_name='lr_scheduler',
                                hyperparameter='__choice__',
                                value_range=['NoScheduler'],
                                default_value='NoScheduler')
    search_space_updates.append(node_name='optimizer',
                                hyperparameter='__choice__',
                                value_range=['AdamOptimizer'],
                                default_value='AdamOptimizer')
    search_space_updates.append(node_name='optimizer',
                                hyperparameter='AdamOptimizer:lr',
                                value_range=[1e-2],
                                default_value=1e-2)
    pipeline = TabularClassificationPipeline(
        dataset_properties=fit_dictionary_tabular['dataset_properties'],
        search_space_updates=search_space_updates)

    fit_dictionary_tabular['additional_metrics'] = ['balanced_accuracy']
    # increase number of epochs to test for performance
    fit_dictionary_tabular['epochs'] = 50

    try:
        pipeline.fit(fit_dictionary_tabular)
    except Exception as e:
        pytest.fail(f"Failed due to {e}")

    configuration = pipeline.configuration

    assert 'PolynomialFeatures' == configuration.get(
        'feature_preprocessor:__choice__')
    assert 'StandardScaler' == configuration.get('scaler:__choice__')
    assert 'MLPBackbone' == configuration.get('network_backbone:__choice__')
    assert 'StandardTrainer' == configuration.get('trainer:__choice__')
    assert 'NoScheduler' == configuration.get('lr_scheduler:__choice__')
    assert 'AdamOptimizer' == configuration.get('optimizer:__choice__')
    assert 1 == configuration.get('network_backbone:MLPBackbone:num_groups')
    assert 100 == configuration.get('network_backbone:MLPBackbone:num_units_1')
    assert 1e-2 == configuration.get('optimizer:AdamOptimizer:lr')

    # To make sure we fitted the model, there should be a
    # run summary object with accuracy
    run_summary = pipeline.named_steps['trainer'].run_summary
    assert run_summary is not None

    # Make sure that performance was properly captured
    assert run_summary.performance_tracker['train_loss'][1] > 0
    assert run_summary.total_parameter_count > 0
    assert 'balanced_accuracy' in run_summary.performance_tracker[
        'train_metrics'][1]

    # Make sure default pipeline achieves a good score for dummy datasets
    epoch2loss = run_summary.performance_tracker['val_loss']
    best_loss = min(list(epoch2loss.values()))
    epoch_where_best = list(epoch2loss.keys())[list(
        epoch2loss.values()).index(best_loss)]
    val_score = run_summary.performance_tracker['val_metrics'][
        epoch_where_best]['balanced_accuracy']
    train_score = run_summary.performance_tracker['train_metrics'][
        epoch_where_best]['balanced_accuracy']

    assert val_score >= 0.8, run_summary.performance_tracker['val_metrics']
    assert train_score >= 0.8, run_summary.performance_tracker['train_metrics']