Esempio n. 1
0
def test_pipeline_set_one_hyperparam_level_two_dict():
    p = Pipeline([
        ("a", SomeStep()),
        ("b", Pipeline([
            ("a", SomeStep()),
            ("b", SomeStep()),
            ("c", SomeStep())
        ])),
        ("c", SomeStep())
    ])

    p.set_hyperparams({
        "b": {
            "a": {
                "learning_rate": 7
            },
            "learning_rate": 9
        }
    })
    print(p.get_hyperparams())

    assert p["b"]["a"].hyperparams["learning_rate"] == 7
    assert p["b"]["c"].hyperparams == dict()
    assert p["b"].hyperparams["learning_rate"] == 9
    assert p["c"].hyperparams == dict()
Esempio n. 2
0
def test_pipeline_nested_mutate_inverse_transform():
    expected_tape = ["1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1"]
    tape = TapeCallbackFunction()

    p = Pipeline([
        Identity(),
        TransformCallbackStep(tape.callback, ["1"]),
        TransformCallbackStep(tape.callback, ["2"]),
        Pipeline([
            Identity(),
            TransformCallbackStep(tape.callback, ["3"]),
            TransformCallbackStep(tape.callback, ["4"]),
            TransformCallbackStep(tape.callback, ["5"]),
            Identity()
        ]),
        TransformCallbackStep(tape.callback, ["6"]),
        TransformCallbackStep(tape.callback, ["7"]),
        Identity()
    ])

    p, _ = p.fit_transform(np.ones((1, 1)))  # will add range(1, 8) to tape.

    print("[mutating]")
    p = p.mutate(new_method="inverse_transform", method_to_assign_to="transform")

    p.transform(np.ones((1, 1)))  # will add reversed(range(1, 8)) to tape.

    print(expected_tape)
    print(tape.get_name_tape())
    assert expected_tape == tape.get_name_tape()
Esempio n. 3
0
def main():
    p = Pipeline([
        ('step1', MultiplyByN()),
        ('step2', MultiplyByN()),
        Pipeline([
            Identity(),
            Identity(),
            PCA(n_components=4)
        ])
    ])

    p.set_hyperparams_space({
        'step1__multiply_by': RandInt(42, 50),
        'step2__multiply_by': RandInt(-10, 0),
        'Pipeline__PCA__n_components': RandInt(2, 3)
    })

    samples = p.get_hyperparams_space().rvs()
    p.set_hyperparams(samples)

    samples = p.get_hyperparams().to_flat_as_dict_primitive()
    assert 42 <= samples['step1__multiply_by'] <= 50
    assert -10 <= samples['step2__multiply_by'] <= 0
    assert samples['Pipeline__PCA__n_components'] in [2, 3]
    assert p['Pipeline']['PCA'].get_wrapped_sklearn_predictor().n_components in [2, 3]
Esempio n. 4
0
def test_pipeline_nested_mutate_inverse_transform_without_identities():
    """
    This test was required for a strange bug at the border of the pipelines
    that happened when the identities were not used.
    """
    expected_tape = ["1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1"]
    tape = TapeCallbackFunction()

    p = Pipeline([
        TransformCallbackStep(tape.callback, ["1"]),
        TransformCallbackStep(tape.callback, ["2"]),
        Pipeline([
            TransformCallbackStep(tape.callback, ["3"]),
            TransformCallbackStep(tape.callback, ["4"]),
            TransformCallbackStep(tape.callback, ["5"]),
        ]),
        TransformCallbackStep(tape.callback, ["6"]),
        TransformCallbackStep(tape.callback, ["7"]),
    ])

    p, _ = p.fit_transform(np.ones((1, 1)))  # will add range(1, 8) to tape.

    print("[mutating, inversing, and calling each inverse_transform]")
    reversed(p).transform(np.ones((1, 1)))  # will add reversed(range(1, 8)) to tape, calling inverse_transforms.

    print(expected_tape)
    print(tape.get_name_tape())
    assert expected_tape == tape.get_name_tape()
def main():
    """
    Process tasks of batch size 10 with 8 queued workers that have a max queue size of 10.
    Each task doest the following: For each data input, sleep 0.02 seconds, and multiply by 2.
    """
    sleep_time = 0.02
    p = SequentialQueuedPipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
    ], n_workers_per_step=8, max_queue_size=10, batch_size=10)

    a = time.time()
    outputs_streaming = p.transform(list(range(100)))
    b = time.time()
    time_queued_pipeline = b - a
    print('SequentialQueuedPipeline')
    print('execution time: {} seconds'.format(time_queued_pipeline))

    """
    Process data inputs sequentially. 
    For each data input, sleep 0.02 seconds, and then multiply by 2.
    """
    p = Pipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
    ])

    a = time.time()
    outputs_vanilla = p.transform(list(range(100)))
    b = time.time()
    time_vanilla_pipeline = b - a

    print('VanillaPipeline')
    print('execution time: {} seconds'.format(time_vanilla_pipeline))

    assert time_queued_pipeline < time_vanilla_pipeline
    assert np.array_equal(outputs_streaming, outputs_vanilla)
Esempio n. 6
0
def test_parallel_queued_parallelize_correctly():
    sleep_time = 0.001
    p = SequentialQueuedPipeline([
        ('1', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('2', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('3', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])),
        ('4', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]))
    ], batch_size=10)

    a = time.time()
    outputs_streaming = p.transform(list(range(100)))
    b = time.time()
    time_queued_pipeline = b - a

    p = Pipeline([
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]),
        Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])
    ])

    a = time.time()
    outputs_vanilla = p.transform(list(range(100)))
    b = time.time()
    time_vanilla_pipeline = b - a

    assert time_queued_pipeline < time_vanilla_pipeline
    assert np.array_equal(outputs_streaming, outputs_vanilla)
Esempio n. 7
0
def test_set_train_should_set_train_to_true():
    pipeline = Pipeline([SomeStep(),
                         SomeStep(),
                         Pipeline([
                             SomeStep(),
                         ])])

    assert pipeline.is_train
    assert pipeline[0].is_train
    assert pipeline[1].is_train
    assert pipeline[2].is_train
    assert pipeline[2][0].is_train
def test_tensorflowv2_saver(tmpdir):
    dataset = toy_dataset()
    model = Pipeline([create_model_step(tmpdir)])
    loss_first_fit = evaluate_model_on_dataset(model, dataset)

    model.save(ExecutionContext(root=tmpdir))

    loaded = Pipeline([create_model_step(tmpdir)
                       ]).load(ExecutionContext(root=tmpdir))
    loss_second_fit = evaluate_model_on_dataset(loaded, dataset)

    assert loss_second_fit < (loss_first_fit / 2)
Esempio n. 9
0
def test_transform_should_transform_all_steps_for_each_data_inputs_expected_outputs():
    tape = TapeCallbackFunction()
    p = Pipeline([
        ForEachDataInput(Pipeline([
            TransformCallbackStep(tape.callback, ["1"]),
            TransformCallbackStep(tape.callback, ["2"]),
        ]))
    ])
    data_inputs = [[0, 1], [1, 2]]

    outputs = p.transform(data_inputs)

    assert tape.get_name_tape() == ["1", "2", "1", "2"]
Esempio n. 10
0
def test_pipeline_set_one_hyperparam_level_two_flat():
    p = Pipeline([("a", SomeStep()),
                  ("b",
                   Pipeline([("a", SomeStep()), ("b", SomeStep()),
                             ("c", SomeStep())])), ("c", SomeStep())])

    p.set_hyperparams({"b__a__learning_rate": 7})
    print(p.get_hyperparams())

    assert p["b"]["a"].hyperparams["learning_rate"] == 7
    assert p["b"]["c"].hyperparams.to_flat_dict() == dict()
    assert p["b"].hyperparams.to_flat_dict() == {'a__learning_rate': 7}
    assert p["c"].hyperparams.to_flat_dict() == dict()
Esempio n. 11
0
def test_set_train_should_set_train_to_false():
    pipeline = Pipeline([SomeStep(),
                         SomeStep(),
                         Pipeline([
                             SomeStep(),
                         ])])

    pipeline.set_train(False)

    assert not pipeline.is_train
    assert not pipeline[0].is_train
    assert not pipeline[1].is_train
    assert not pipeline[2].is_train
    assert not pipeline[2][0].is_train
Esempio n. 12
0
def test_has_children_mixin_apply_should_return_recursive_dict_to_recursive_childrends(
):
    p = Pipeline([
        Pipeline([
            ('c', Identity().set_hyperparams(HyperparameterSamples({'hp':
                                                                    3}))),
            ('d', Identity().set_hyperparams(HyperparameterSamples({'hp': 4})))
        ]).set_hyperparams(HyperparameterSamples({'hp': 2})),
    ])

    results = p.apply('_get_hyperparams', ra=None)

    assert results['Pipeline__hp'] == 2
    assert results['Pipeline__c__hp'] == 3
    assert results['Pipeline__d__hp'] == 4
Esempio n. 13
0
def test_expectedoutputnull_is_fine_when_null(tmpdir):

    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = None

    p = Pipeline([SomeStep()])
    p.fit_transform(data_inputs,expected_outputs)
Esempio n. 14
0
def create_test_case_fit_multiple_steps_choosen():
    a_callback = TapeCallbackFunction()
    b_callback = TapeCallbackFunction()
    c_callback = TapeCallbackFunction()
    d_callback = TapeCallbackFunction()

    return NeuraxleTestCase(
        pipeline=Pipeline([
            ChooseOneOrManyStepsOf([
                ('a', FitTransformCallbackStep(a_callback, c_callback, transform_function=lambda di: di * 2)),
                ('b', FitTransformCallbackStep(b_callback, d_callback, transform_function=lambda di: di * 2))
            ]),
        ]),
        callbacks=[a_callback, c_callback, b_callback, d_callback],
        expected_callbacks_data=[
            [],
            (DATA_INPUTS, EXPECTED_OUTPUTS),
            [],
            (DATA_INPUTS, EXPECTED_OUTPUTS)
        ],
        hyperparams={
            'ChooseOneOrManyStepsOf__a__enabled': True,
            'ChooseOneOrManyStepsOf__b__enabled': True
        },
        hyperparams_space={
            'ChooseOneOrManyStepsOf__a__enabled': Boolean(),
            'ChooseOneOrManyStepsOf__b__enabled': Boolean()
        },
        expected_processed_outputs=np.array([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
    )
Esempio n. 15
0
def test_load_full_dump_from_path(tmpdir):
    # Given
    tape_fit_callback_function = TapeCallbackFunction()
    tape_transform_callback_function = TapeCallbackFunction()
    pipeline = Pipeline(
        [('step_a', Identity()),
         ('step_b',
          OutputTransformerWrapper(
              FitTransformCallbackStep(tape_fit_callback_function,
                                       tape_transform_callback_function)))],
        cache_folder=tmpdir).set_name(PIPELINE_NAME)

    # When
    pipeline, outputs = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS)
    pipeline.save(ExecutionContext(tmpdir), full_dump=True)

    # Then
    loaded_pipeline = ExecutionContext(tmpdir).load(
        os.path.join(PIPELINE_NAME, 'step_b'))

    assert isinstance(loaded_pipeline, OutputTransformerWrapper)
    loaded_step_b_wrapped_step = loaded_pipeline.wrapped
    assert np.array_equal(
        loaded_step_b_wrapped_step.transform_callback_function.data[0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][1],
        [None] * len(EXPECTED_OUTPUTS))
def test_automl_sequential_wrapper(tmpdir):
    # Given
    data_inputs = np.array(range(100))
    expected_outputs = np.array(range(100, 200))

    hyperparameter_space = HyperparameterSpace({
        'multiplication_1__multiply_by':
        RandInt(1, 3),
        'multiplication_2__multiply_by':
        RandInt(1, 3),
        'multiplication_3__multiply_by':
        RandInt(1, 3),
    })

    pipeline = Pipeline(
        [('multiplication_1', MultiplyByN()),
         ('multiplication_2', MultiplyByN()),
         ('multiplication_3', MultiplyByN())],
        cache_folder=tmpdir).set_hyperparams_space(hyperparameter_space)

    auto_ml = RandomSearch(
        KFoldCrossValidationWrapper().set_step(pipeline),
        hyperparams_repository=HyperparamsJSONRepository(tmpdir),
        n_iter=10)

    # When
    auto_ml: AutoMLSequentialWrapper = auto_ml.fit(data_inputs,
                                                   expected_outputs)
    best_model: Pipeline = auto_ml.get_best_model()
    predicted_outputs = best_model.transform(data_inputs)

    # Then
    actual_mse = ((predicted_outputs - expected_outputs)**2).mean()
    assert actual_mse < 20000
Esempio n. 17
0
def test_step_cloner_should_fit_transform():
    # Given
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(
        Pipeline([FitCallbackStep(tape), MultiplyByN(2)]))
    data_inputs = _create_data((2, 2))
    expected_outputs = _create_data((2, 2))

    # When
    p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)

    # Then
    assert isinstance(p.steps[0], Pipeline)
    assert np.array_equal(p.steps[0][0].callback_function.data[0][0],
                          data_inputs[0])
    assert np.array_equal(p.steps[0][0].callback_function.data[0][1],
                          expected_outputs[0])

    assert isinstance(p.steps[1], Pipeline)
    assert np.array_equal(p.steps[1][0].callback_function.data[0][0],
                          data_inputs[1])
    assert np.array_equal(p.steps[1][0].callback_function.data[0][1],
                          expected_outputs[1])

    assert np.array_equal(processed_outputs, data_inputs * 2)
Esempio n. 18
0
def main():
    """
    The task is to sleep 0.02 seconds for each data input and then multiply by 2.
    """
    sleep_time = 0.02
    preprocessing_and_model_steps = [ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]

    # Classical pipeline - all at once with one big batch:
    p = Pipeline(preprocessing_and_model_steps)
    time_vanilla_pipeline, output_classical = eval_run_time(p)
    print(f"Classical 'Pipeline' execution time: {time_vanilla_pipeline} seconds.")

    # Classical minibatch pipeline - minibatch size 10:
    p = MiniBatchSequentialPipeline(preprocessing_and_model_steps,
                                    batch_size=10)
    time_minibatch_pipeline, output_minibatch = eval_run_time(p)
    print(f"Minibatched 'MiniBatchSequentialPipeline' execution time: {time_minibatch_pipeline} seconds.")

    # Parallel pipeline - minibatch size 10 with 16 parallel workers per step that
    # have a max queue size of 10 batches between preprocessing and the model:
    p = SequentialQueuedPipeline(preprocessing_and_model_steps,
                                 n_workers_per_step=16, max_queue_size=10, batch_size=10)
    time_parallel_pipeline, output_parallel = eval_run_time(p)
    print(f"Parallel 'SequentialQueuedPipeline' execution time: {time_parallel_pipeline} seconds.")

    assert time_parallel_pipeline < time_minibatch_pipeline, str((time_parallel_pipeline, time_vanilla_pipeline))
    assert np.array_equal(output_classical, output_minibatch)
    assert np.array_equal(output_classical, output_parallel)
Esempio n. 19
0
def test_inner_concatenate_data_should_merge_2d_with_3d():
    # Given
    data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
    data_inputs_2d, expected_outputs_2d = _create_data_source(SHAPE_2D)
    data_container_2d = DataContainer(data_inputs=data_inputs_2d,
                                      expected_outputs=expected_outputs_2d)
    data_container_3d = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
        .add_sub_data_container('2d', data_container_2d)

    # When
    p = Pipeline(
        [InnerConcatenateDataContainer(sub_data_container_names=['2d'])])

    data_container_3d = p.handle_transform(data_container_3d,
                                           ExecutionContext())

    # Then
    assert data_container_3d.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                   SHAPE_3D[2] + 1)
    assert data_container_3d.expected_outputs.shape == (SHAPE_3D[0],
                                                        SHAPE_3D[1],
                                                        SHAPE_3D[2] + 1)
    assert np.array_equal(data_container_3d.data_inputs[..., -1],
                          data_container_2d.data_inputs)
    assert np.array_equal(data_container_3d.expected_outputs[..., -1],
                          data_container_2d.expected_outputs)
Esempio n. 20
0
def test_hyperparam_space():
    p = Pipeline([
        AddFeatures([
            SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)}))
        ]),
        ModelStacking([
            SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)}))
        ],
            joiner=NumpyTranspose(),
            judge=SomeStep(hyperparams_space=HyperparameterSpace({"alpha": LogUniform(0.1, 10.0)}))
        )
    ])

    rvsed = p.get_hyperparams_space()
    p.set_hyperparams(rvsed)

    hyperparams = p.get_hyperparams()

    assert "AddFeatures" in hyperparams.keys()
    assert "SomeStep" in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep"]
    assert "SomeStep1" in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"]
    assert "SomeStep" in hyperparams["ModelStacking"]
    assert "n_estimators" in hyperparams["ModelStacking"]["SomeStep"]
    assert "SomeStep1" in hyperparams["ModelStacking"]
    assert "max_depth" in hyperparams["ModelStacking"]["SomeStep2"]
Esempio n. 21
0
def choose_one_step_single_step_chosen_transform():
    a_callback = TapeCallbackFunction()
    b_callback = TapeCallbackFunction()
    c_callback = TapeCallbackFunction()
    d_callback = TapeCallbackFunction()

    return NeuraxleTestCase(pipeline=Pipeline([
        ChooseOneStepOf([
            ('a',
             FitTransformCallbackStep(a_callback,
                                      c_callback,
                                      transform_function=lambda di: di * 2)),
            ('b',
             FitTransformCallbackStep(b_callback,
                                      d_callback,
                                      transform_function=lambda di: di * 2))
        ]),
    ]),
                            callbacks=[
                                a_callback, c_callback, b_callback, d_callback
                            ],
                            expected_callbacks_data=[DATA_INPUTS, [], [], []],
                            hyperparams={
                                'ChooseOneOrManyStepsOf__choice': 'a'
                            },
                            expected_processed_outputs=np.array(
                                [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]))
Esempio n. 22
0
def main():
    p = Pipeline([
        ForceAlwaysAlwaysHandleMixinStep(),
    ])

    p = p.fit(np.array([0, 1]), np.array([0, 1]))
    p = p.transform(np.array([0, 1]))
def test_pipeline_should_update_hyperparams_space():
    p = Pipeline([
        SomeStep().set_name('step_1'),
        SomeStep().set_name('step_2')
    ])

    p.set_hyperparams_space({
        'hp': RandInt(1, 2),
        'step_1__hp': RandInt(2, 3),
        'step_2__hp': RandInt(3, 4)
    })
    p.update_hyperparams_space({
        'hp': RandInt(4, 6),
        'step_2__hp': RandInt(6, 8)
    })

    assert isinstance(p.hyperparams_space, HyperparameterSpace)

    assert p.hyperparams_space['hp'].min_included == 4
    assert p.hyperparams_space['hp'].max_included == 6

    assert p[0].hyperparams_space['hp'].min_included == 2
    assert p[0].hyperparams_space['hp'].max_included == 3

    assert p[1].hyperparams_space['hp'].min_included == 6
    assert p[1].hyperparams_space['hp'].max_included == 8
Esempio n. 24
0
def create_test_case_invalid_step_choosen():
    a_callback = TapeCallbackFunction()
    b_callback = TapeCallbackFunction()

    return NeuraxleTestCase(pipeline=Pipeline([
        ChooseOneOrManyStepsOf([
            ('a',
             TransformCallbackStep(a_callback,
                                   transform_function=lambda di: di * 2)),
            ('b',
             TransformCallbackStep(b_callback,
                                   transform_function=lambda di: di * 2))
        ]),
    ]),
                            callbacks=[a_callback, b_callback],
                            expected_callbacks_data=[DATA_INPUTS, DATA_INPUTS],
                            hyperparams={
                                'ChooseOneOrManyStepsOf__c__enabled': True,
                                'ChooseOneOrManyStepsOf__b__enabled': False
                            },
                            hyperparams_space={
                                'ChooseOneOrManyStepsOf__a__enabled':
                                Boolean(),
                                'ChooseOneOrManyStepsOf__b__enabled':
                                Boolean()
                            },
                            expected_processed_outputs=np.array(
                                [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]))
Esempio n. 25
0
def test_trainer_train():
    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = data_inputs * 4
    p = Pipeline([
        MultiplyByN(2).set_hyperparams_space(
            HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})),
        NumpyReshape(new_shape=(-1, 1)),
        linear_model.LinearRegression()
    ])

    trainer: Trainer = Trainer(
        epochs=10,
        scoring_callback=ScoringCallback(mean_squared_error,
                                         higher_score_is_better=False),
        validation_splitter=ValidationSplitter(test_size=0.20))

    repo_trial: Trial = trainer.train(pipeline=p,
                                      data_inputs=data_inputs,
                                      expected_outputs=expected_outputs)

    trained_pipeline = repo_trial.get_trained_pipeline(split_number=0)

    outputs = trained_pipeline.transform(data_inputs)
    mse = mean_squared_error(expected_outputs, outputs)

    assert mse < 1
Esempio n. 26
0
def test_logger():
    file_path = "test.log"

    if os.path.exists(file_path):
        os.remove(file_path)

    # Given
    logger = logging.getLogger('test')
    file_handler = logging.FileHandler(file_path)
    file_handler.setLevel('DEBUG')
    logger.addHandler(file_handler)
    logger.setLevel('DEBUG')
    context = ExecutionContext(logger=logger)
    pipeline = Pipeline([
        MultiplyByN(2).set_hyperparams_space(
            HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})),
        NumpyReshape(new_shape=(-1, 1)),
        LoggingStep()
    ])

    # When
    data_container = DataContainer(
        data_inputs=np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
    pipeline.handle_fit(data_container, context)

    # Then
    assert os.path.exists(file_path)
    with open(file_path) as f:
        l = f.read()

    # Teardown
    file_handler.close()
    os.remove(file_path)
Esempio n. 27
0
def test_model_stacking_fit_transform():
    model_stacking = Pipeline([
        ModelStacking(
            [
                SKLearnWrapper(
                    GradientBoostingRegressor(),
                    HyperparameterSpace({
                        "n_estimators": RandInt(50, 600),
                        "max_depth": RandInt(1, 10),
                        "learning_rate": LogUniform(0.07, 0.7)
                    })),
                SKLearnWrapper(
                    KMeans(),
                    HyperparameterSpace({"n_clusters": RandInt(5, 10)})),
            ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({
                    "alpha": LogUniform(0.7, 1.4),
                    "fit_intercept": Boolean()
                })),
        )
    ])
    expected_outputs_shape = (379, 1)
    data_inputs_shape = (379, 13)
    data_inputs = _create_data(data_inputs_shape)
    expected_outputs = _create_data(expected_outputs_shape)

    model_stacking, outputs = model_stacking.fit_transform(
        data_inputs, expected_outputs)

    assert outputs.shape == expected_outputs_shape
Esempio n. 28
0
def test_inner_concatenate_data_should_merge_1d_with_3d():
    # Given
    data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
    data_inputs_1d, expected_outputs_1d = _create_data_source(SHAPE_1D)
    data_container_1d = DataContainer(data_inputs=data_inputs_1d,
                                      expected_outputs=expected_outputs_1d)
    data_container = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
        .add_sub_data_container('1d', data_container_1d)

    # When
    p = Pipeline(
        [InnerConcatenateDataContainer(sub_data_container_names=['1d'])])

    data_container = p.handle_transform(data_container, ExecutionContext())

    # Then
    broadcasted_data_inputs_1d = np.broadcast_to(
        np.expand_dims(data_container_1d.data_inputs, axis=-1),
        shape=(SHAPE_3D[0], SHAPE_3D[1]))
    broadcasted_expected_outputs_1d = np.broadcast_to(
        np.expand_dims(data_container_1d.expected_outputs, axis=-1),
        shape=(SHAPE_3D[0], SHAPE_3D[1]))

    assert np.array_equal(data_container.data_inputs[..., -1],
                          broadcasted_data_inputs_1d)
    assert np.array_equal(data_container.expected_outputs[..., -1],
                          broadcasted_expected_outputs_1d)

    assert data_container.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                SHAPE_3D[2] + 1)
    assert data_container.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                     SHAPE_3D[2] + 1)
Esempio n. 29
0
def test_automl_early_stopping_callback(tmpdir):
    # TODO: fix this unit test
    # Given
    hp_repository = InMemoryHyperparamsRepository(cache_folder=str(tmpdir))
    n_epochs = 60
    auto_ml = AutoML(
        pipeline=Pipeline([
            FitTransformCallbackStep().set_name('callback'),
            MultiplyByN(2).set_hyperparams_space(
                HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})),
            NumpyReshape(new_shape=(-1, 1)),
            linear_model.LinearRegression()
        ]),
        hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),
        validation_splitter=ValidationSplitter(0.20),
        scoring_callback=ScoringCallback(mean_squared_error,
                                         higher_score_is_better=False),
        callbacks=[
            MetricCallback('mse',
                           metric_function=mean_squared_error,
                           higher_score_is_better=False),
        ],
        n_trials=1,
        refit_trial=True,
        epochs=n_epochs,
        hyperparams_repository=hp_repository)

    # When
    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = data_inputs * 2
    auto_ml = auto_ml.fit(data_inputs=data_inputs,
                          expected_outputs=expected_outputs)

    # Then
    p = auto_ml.get_best_model()
Esempio n. 30
0
def test_automl_should_shallow_copy_data_before_each_epoch():
    # see issue #332 https://github.com/Neuraxio/Neuraxle/issues/332
    data_inputs = np.random.randint(0, 100, (100, 3))
    expected_outputs = np.random.randint(0, 3, 100)

    from sklearn.preprocessing import StandardScaler
    p = Pipeline([
        SKLearnWrapper(StandardScaler()),
        SKLearnWrapper(LinearSVC(),
                       HyperparameterSpace({'C': RandInt(0, 10000)})),
    ])

    auto_ml = AutoML(p,
                     validation_splitter=ValidationSplitter(0.20),
                     refit_trial=True,
                     n_trials=10,
                     epochs=10,
                     cache_folder_when_no_handle='cache',
                     scoring_callback=ScoringCallback(
                         mean_squared_error, higher_score_is_better=False),
                     callbacks=[
                         MetricCallback('mse',
                                        metric_function=mean_squared_error,
                                        higher_score_is_better=False)
                     ],
                     hyperparams_repository=InMemoryHyperparamsRepository(
                         cache_folder='cache'),
                     continue_loop_on_error=False)

    random_search = auto_ml.fit(data_inputs, expected_outputs)

    best_model = random_search.get_best_model()

    assert isinstance(best_model, Pipeline)