Beispiel #1
0
def main():
    p = Pipeline([
        ('step1', MultiplyByN()),
        ('step2', MultiplyByN()),
        Pipeline([
            Identity(),
            Identity(),
            PCA(n_components=4)
        ])
    ])

    p.set_hyperparams_space({
        'step1__multiply_by': RandInt(42, 50),
        'step2__multiply_by': RandInt(-10, 0),
        'Pipeline__PCA__n_components': RandInt(2, 3)
    })

    samples = p.get_hyperparams_space().rvs()
    p.set_hyperparams(samples)

    samples = p.get_hyperparams().to_flat_as_dict_primitive()
    assert 42 <= samples['step1__multiply_by'] <= 50
    assert -10 <= samples['step2__multiply_by'] <= 0
    assert samples['Pipeline__PCA__n_components'] in [2, 3]
    assert p['Pipeline']['PCA'].get_wrapped_sklearn_predictor().n_components in [2, 3]
Beispiel #2
0
def test_choose_one_step_of_invalid_chosen_step():
    with pytest.raises(ValueError):
        Pipeline([
            ChooseOneStepOf([
                ('a', Identity()),
                ('b', Identity())
            ]).set_hyperparams({'choice': 'c'}),
        ])
Beispiel #3
0
def test_has_children_mixin_apply_should_return_recursive_dict_to_direct_childrends(
):
    p = Pipeline([
        ('a', Identity().set_hyperparams(HyperparameterSamples({'hp': 0}))),
        ('b', Identity().set_hyperparams(HyperparameterSamples({'hp': 1})))
    ])

    results = p.apply('_get_hyperparams', ra=None)

    assert results.to_flat_as_dict_primitive()['a__hp'] == 0
    assert results.to_flat_as_dict_primitive()['b__hp'] == 1
Beispiel #4
0
def test_feature_union_should_transform_with_numpy_transpose():
    p = Pipeline(
        [FeatureUnion([
            Identity(),
            Identity(),
        ], joiner=NumpyTranspose())])
    data_inputs = np.random.randint((1, 20))

    outputs = p.transform(data_inputs)

    assert np.array_equal(outputs,
                          np.array([data_inputs, data_inputs]).transpose())
Beispiel #5
0
def test_feature_union_should_transform_with_zip_features():
    p = Pipeline(
        [FeatureUnion([
            Identity(),
            Identity(),
        ], joiner=ZipFeatures())])
    data_inputs = np.random.randint(low=0, high=100, size=(2, 20))

    outputs = p.transform(data_inputs)

    assert np.array_equal(outputs, np.stack([data_inputs, data_inputs],
                                            axis=1))
Beispiel #6
0
def test_feature_union_should_transform_with_concatenate_inner_features():
    p = Pipeline([
        FeatureUnion([
            Identity(),
            Identity(),
        ],
                     joiner=NumpyConcatenateInnerFeatures())
    ])
    data_inputs = np.random.randint((1, 20))

    outputs = p.transform(data_inputs)

    assert np.array_equal(outputs, np.concatenate([data_inputs, data_inputs]))
Beispiel #7
0
def test_feature_union_should_fit_transform_with_numpy_transpose():
    p = Pipeline(
        [FeatureUnion([
            Identity(),
            Identity(),
        ], joiner=NumpyTranspose())])
    data_inputs = np.random.randint(low=0, high=100, size=(2, 20))
    expected_outputs = None

    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    assert np.array_equal(outputs,
                          np.array([data_inputs, data_inputs]).transpose())
Beispiel #8
0
def test_feature_union_should_apply_to_self_and_sub_steps():
    p = Pipeline(
        [FeatureUnion([
            Identity(),
            Identity(),
        ], joiner=NumpyTranspose())])

    p.apply(lambda step: step._set_hyperparams(
        HyperparameterSamples({'applied': True})))

    assert p.hyperparams['applied']
    assert p['FeatureUnion'].hyperparams['applied']
    assert p['FeatureUnion'][0].hyperparams['applied']
    assert p['FeatureUnion'][1].hyperparams['applied']
    assert p['FeatureUnion'][2].hyperparams['applied']
Beispiel #9
0
def test_has_children_mixin_apply_should_return_recursive_dict_to_recursive_childrends(
):
    p = Pipeline([
        Pipeline([
            ('c', Identity().set_hyperparams(HyperparameterSamples({'hp':
                                                                    3}))),
            ('d', Identity().set_hyperparams(HyperparameterSamples({'hp': 4})))
        ]).set_hyperparams(HyperparameterSamples({'hp': 2})),
    ])

    results = p.apply('_get_hyperparams', ra=None)

    assert results['Pipeline__hp'] == 2
    assert results['Pipeline__c__hp'] == 3
    assert results['Pipeline__d__hp'] == 4
Beispiel #10
0
def test_feature_union_should_fit_transform_with_concatenate_inner_features():
    p = Pipeline([
        FeatureUnion([
            Identity(),
            Identity(),
        ],
                     joiner=NumpyConcatenateInnerFeatures())
    ])
    data_inputs = np.random.randint(low=0, high=100, size=(2, 20))
    expected_outputs = None

    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    assert np.array_equal(outputs,
                          np.concatenate([data_inputs, data_inputs], axis=-1))
Beispiel #11
0
def test_when_hyperparams_and_saved_no_pipeline_should_not_load_checkpoint_pickle(tmpdir: LocalPath):
    # Given
    tape = TapeCallbackFunction()
    pickle_checkpoint_step = DefaultCheckpoint()

    # When
    pipeline_save = create_pipeline(
        tmpdir=tmpdir,
        pickle_checkpoint_step=Identity(),
        tape=TapeCallbackFunction(),
        hyperparameters=HyperparameterSamples({"a__learning_rate": 1}),
        different=True,
        save_pipeline=False
    )
    pipeline_save.fit_transform(data_inputs, expected_outputs)

    pipeline_load = create_pipeline(
        tmpdir=tmpdir,
        pickle_checkpoint_step=pickle_checkpoint_step,
        tape=tape,
        hyperparameters=HyperparameterSamples({"a__learning_rate": 1})
    )
    pipeline_load, actual_data_inputs = pipeline_load.fit_transform(data_inputs, expected_outputs)

    # Then
    actual_tape = tape.get_name_tape()
    assert np.array_equal(actual_data_inputs, data_inputs)
    assert actual_tape == ["1", "2", "3"]
Beispiel #12
0
def given_failed_trial_split(trial):
    with trial.new_validation_split(Identity()) as trial_split:
        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.5,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.5,
                                                  higher_score_is_better=False)

        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.7,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.7,
                                                  higher_score_is_better=False)

        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.4,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.4,
                                                  higher_score_is_better=False)
        error = IndexError('index error')
        trial_split.set_failed(error)
        trial.set_failed(error)
    return trial_split
Beispiel #13
0
def test_trial_split_is_new_best_score_should_return_true_with_a_new_best_score_after_multiple_scores(
):
    hp = HyperparameterSamples({'a': 2})
    repo = InMemoryHyperparamsRepository()
    trial = Trial(save_trial_function=repo.save_trial,
                  hyperparams=hp,
                  main_metric_name=MAIN_METRIC_NAME)

    with trial.new_validation_split(Identity()) as trial_split:
        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.5,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.5,
                                                  higher_score_is_better=False)

        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.7,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.7,
                                                  higher_score_is_better=False)

        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.4,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.4,
                                                  higher_score_is_better=False)

    assert trial_split.is_new_best_score()
Beispiel #14
0
def given_success_trial_validation_split(trial, best_score=0.4):
    with trial.new_validation_split(Identity()) as trial_split:
        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.5,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.5,
                                                  higher_score_is_better=False)

        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.7,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.7,
                                                  higher_score_is_better=False)

        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=best_score,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=best_score,
                                                  higher_score_is_better=False)
        trial_split.set_success()
        trial.set_success()

    return trial_split
Beispiel #15
0
def test_load_full_dump_from_path(tmpdir):
    # Given
    tape_fit_callback_function = TapeCallbackFunction()
    tape_transform_callback_function = TapeCallbackFunction()
    pipeline = Pipeline(
        [('step_a', Identity()),
         ('step_b',
          OutputTransformerWrapper(
              FitTransformCallbackStep(tape_fit_callback_function,
                                       tape_transform_callback_function)))],
        cache_folder=tmpdir).set_name(PIPELINE_NAME)

    # When
    pipeline, outputs = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS)
    pipeline.save(ExecutionContext(tmpdir), full_dump=True)

    # Then
    loaded_pipeline = ExecutionContext(tmpdir).load(
        os.path.join(PIPELINE_NAME, 'step_b'))

    assert isinstance(loaded_pipeline, OutputTransformerWrapper)
    loaded_step_b_wrapped_step = loaded_pipeline.wrapped
    assert np.array_equal(
        loaded_step_b_wrapped_step.transform_callback_function.data[0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][1],
        [None] * len(EXPECTED_OUTPUTS))
Beispiel #16
0
    def __init__(self, wrapped, max_plotted_predictions=None):
        if max_plotted_predictions is None:
            max_plotted_predictions = 10

        FeatureUnion.__init__(self, [Identity(), wrapped],
                              joiner=PlotPredictionsJoiner(
                                  plot_predictions, max_plotted_predictions),
                              n_jobs=1)
Beispiel #17
0
    def test_trial_should_have_end_time_later_than_start_time(self):
        with self.trial.new_validation_split(Identity()) as trial_split:
            time.sleep(0.001)  # TODO: maybe remove sleep?
            trial_split.set_success()

        assert isinstance(trial_split.start_time, datetime.datetime)
        assert isinstance(trial_split.end_time, datetime.datetime)
        assert trial_split.start_time < trial_split.end_time
Beispiel #18
0
 def __init__(self, steps_as_tuple: NamedTupleList, **kwargs):
     """
     Create a ``FeatureUnion`` where ``Identity`` is the first step so as to also keep
     the inputs to concatenate them to the outputs.
     :param steps_as_tuple: The steps to be sent to the ``FeatureUnion``. ``Identity()`` is prepended.
     :param kwargs: Other arguments to send to ``FeatureUnion``.
     """
     FeatureUnion.__init__(self, [Identity()] + steps_as_tuple, **kwargs)
Beispiel #19
0
def test_has_children_mixin_apply_should_apply_method_to_recursive_childrends(
):
    p = Pipeline([
        ('a', Identity()),
        ('b', Identity()),
        Pipeline([('c', Identity()), ('d', Identity())]),
    ])

    p.apply('_set_hyperparams',
            ra=None,
            hyperparams=HyperparameterSamples({
                'Pipeline__c__hp': 3,
                'Pipeline__d__hp': 4
            }))

    assert p['Pipeline']['c'].hyperparams.to_flat_dict()['hp'] == 3
    assert p['Pipeline']['d'].hyperparams.to_flat_dict()['hp'] == 4
Beispiel #20
0
def test_has_children_mixin_apply_should_apply_method_to_direct_childrends():
    p = Pipeline([
        ('a', Identity()),
        ('b', Identity()),
        Pipeline([('c', Identity()), ('d', Identity())]),
    ])

    p.apply('_set_hyperparams',
            ra=None,
            hyperparams=HyperparameterSamples({
                'a__hp': 0,
                'b__hp': 1,
                'Pipeline__hp': 2
            }))

    assert p['a'].hyperparams.to_flat_as_dict_primitive()['hp'] == 0
    assert p['b'].hyperparams.to_flat_as_dict_primitive()['hp'] == 1
    assert p['Pipeline'].hyperparams.to_flat_as_dict_primitive()['hp'] == 2
Beispiel #21
0
def main():
    p = Pipeline([
        NonFittableStep(),
        NonTransformableStep(),
        Identity()  # Note: Identity does nothing: it inherits from both NonFittableMixin and NonTransformableMixin.
    ])

    p = p.fit(np.array([0, 1]), np.array([0, 1]))

    out = p.transform(np.array([0, 1]))
Beispiel #22
0
    def test_trial_split_is_new_best_score_should_return_true_with_one_score(
            self):
        with self.trial.new_validation_split(Identity()) as trial_split:
            trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                                 score=0.5,
                                                 higher_score_is_better=False)
            trial_split.add_metric_results_validation(
                name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False)

        assert trial_split.is_new_best_score()
Beispiel #23
0
def test_trial_should_create_new_split():
    hp = HyperparameterSamples({'a': 2})
    trial = Trial(hyperparams=hp, main_metric_name=MAIN_METRIC_NAME)

    with trial.new_validation_split(Identity()) as trial_split:
        trial_split.set_success()

    assert isinstance(trial_split.start_time, datetime.datetime)
    assert isinstance(trial_split.end_time, datetime.datetime)
    assert trial_split.start_time < trial_split.end_time
    assert trial.validation_splits[0] == trial_split
Beispiel #24
0
def main():
    np.random.seed(42)
    X = np.random.randint(5, size=(100, 5))

    # Create and fit the pipeline:
    pipeline = Pipeline([
        StandardScaler(),
        Identity(),
        Pipeline([
            Identity(),
            Identity(),  # Note: an Identity step is a step that does nothing.
            Identity(),  # We use it here for demonstration purposes.
            Pipeline([Identity(), PCA(n_components=2)])
        ])
    ])
    pipeline, X_t = pipeline.fit_transform(X)

    # Get the components:
    pca_components = pipeline["Pipeline"]["Pipeline"][
        -1].get_wrapped_sklearn_predictor().components_
    assert pca_components.shape == (2, 5)
Beispiel #25
0
def test_trial_split_is_new_best_score_should_return_true_with_one_score():
    hp = HyperparameterSamples({'a': 2})
    trial = Trial(hyperparams=hp, main_metric_name=MAIN_METRIC_NAME)

    with trial.new_validation_split(Identity()) as trial_split:
        trial_split.add_metric_results_train(name=MAIN_METRIC_NAME,
                                             score=0.5,
                                             higher_score_is_better=False)
        trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME,
                                                  score=0.5,
                                                  higher_score_is_better=False)

    assert trial_split.is_new_best_score()
Beispiel #26
0
def test_can_update_scipy_distribution():
    p = Identity().set_hyperparams_space(HyperparameterSpace({
        'rand_int_neuraxle': RandInt(2, 5)  # neuraxle
    }))

    p.update_hyperparams_space(HyperparameterSpace({
        'rand_int_scipy': randint(low=2, high=5),  # scipy
        'gamma_scipy': gamma(0.2),  # scipy
    }))

    assert isinstance(p.get_hyperparams_space()['rand_int_scipy'], ScipyDiscreteDistributionWrapper)
    assert isinstance(p.get_hyperparams_space()['gamma_scipy'], ScipyContinuousDistributionWrapper)
    randint_sample = p.get_hyperparams_space()['rand_int_scipy'].rvs()
    gamma_sample = p.get_hyperparams_space()['gamma_scipy'].rvs()
    assert 5 >= randint_sample >= 2
    assert isinstance(gamma_sample, float)
def main():
    p = Pipeline([
        NonFittableStep(),
        NonTransformableStep(),
        Identity()  # Note: Identity does nothing: it inherits from both NonFittableMixin and NonTransformableMixin.
    ])

    some_data = np.array([0, 1])
    p = p.fit(some_data)
    # Out:
    #     NonFittableStep: I transformed.
    #     NonTransformableStep: I fitted.

    out = p.transform(some_data)
    # Out:
    #     NonFittableStep: I transformed.

    assert np.array_equal(out, some_data)
Beispiel #28
0
def test_sklearn_wrapper_with_an_invalid_step():
    with pytest.raises(ValueError):
        SKLearnWrapper(Identity())
Beispiel #29
0
 def _get_expected_output_checkpoint_path(self, context):
     return context.push(
         Identity(
             name=DataCheckpointType.EXPECTED_OUTPUT.value)).get_path()
Beispiel #30
0
 def _get_data_input_checkpoint_path(self, context):
     return context.push(
         Identity(name=DataCheckpointType.DATA_INPUT.value)).get_path()