def test_pipeline_set_one_hyperparam_level_two_dict(): p = Pipeline([ ("a", SomeStep()), ("b", Pipeline([ ("a", SomeStep()), ("b", SomeStep()), ("c", SomeStep()) ])), ("c", SomeStep()) ]) p.set_hyperparams({ "b": { "a": { "learning_rate": 7 }, "learning_rate": 9 } }) print(p.get_hyperparams()) assert p["b"]["a"].hyperparams["learning_rate"] == 7 assert p["b"]["c"].hyperparams == dict() assert p["b"].hyperparams["learning_rate"] == 9 assert p["c"].hyperparams == dict()
def test_pipeline_nested_mutate_inverse_transform(): expected_tape = ["1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1"] tape = TapeCallbackFunction() p = Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["3"]), TransformCallbackStep(tape.callback, ["4"]), TransformCallbackStep(tape.callback, ["5"]), Identity() ]), TransformCallbackStep(tape.callback, ["6"]), TransformCallbackStep(tape.callback, ["7"]), Identity() ]) p, _ = p.fit_transform(np.ones((1, 1))) # will add range(1, 8) to tape. print("[mutating]") p = p.mutate(new_method="inverse_transform", method_to_assign_to="transform") p.transform(np.ones((1, 1))) # will add reversed(range(1, 8)) to tape. print(expected_tape) print(tape.get_name_tape()) assert expected_tape == tape.get_name_tape()
def main(): p = Pipeline([ ('step1', MultiplyByN()), ('step2', MultiplyByN()), Pipeline([ Identity(), Identity(), PCA(n_components=4) ]) ]) p.set_hyperparams_space({ 'step1__multiply_by': RandInt(42, 50), 'step2__multiply_by': RandInt(-10, 0), 'Pipeline__PCA__n_components': RandInt(2, 3) }) samples = p.get_hyperparams_space().rvs() p.set_hyperparams(samples) samples = p.get_hyperparams().to_flat_as_dict_primitive() assert 42 <= samples['step1__multiply_by'] <= 50 assert -10 <= samples['step2__multiply_by'] <= 0 assert samples['Pipeline__PCA__n_components'] in [2, 3] assert p['Pipeline']['PCA'].get_wrapped_sklearn_predictor().n_components in [2, 3]
def test_pipeline_nested_mutate_inverse_transform_without_identities(): """ This test was required for a strange bug at the border of the pipelines that happened when the identities were not used. """ expected_tape = ["1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1"] tape = TapeCallbackFunction() p = Pipeline([ TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), Pipeline([ TransformCallbackStep(tape.callback, ["3"]), TransformCallbackStep(tape.callback, ["4"]), TransformCallbackStep(tape.callback, ["5"]), ]), TransformCallbackStep(tape.callback, ["6"]), TransformCallbackStep(tape.callback, ["7"]), ]) p, _ = p.fit_transform(np.ones((1, 1))) # will add range(1, 8) to tape. print("[mutating, inversing, and calling each inverse_transform]") reversed(p).transform(np.ones((1, 1))) # will add reversed(range(1, 8)) to tape, calling inverse_transforms. print(expected_tape) print(tape.get_name_tape()) assert expected_tape == tape.get_name_tape()
def main(): """ Process tasks of batch size 10 with 8 queued workers that have a max queue size of 10. Each task doest the following: For each data input, sleep 0.02 seconds, and multiply by 2. """ sleep_time = 0.02 p = SequentialQueuedPipeline([ Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), ], n_workers_per_step=8, max_queue_size=10, batch_size=10) a = time.time() outputs_streaming = p.transform(list(range(100))) b = time.time() time_queued_pipeline = b - a print('SequentialQueuedPipeline') print('execution time: {} seconds'.format(time_queued_pipeline)) """ Process data inputs sequentially. For each data input, sleep 0.02 seconds, and then multiply by 2. """ p = Pipeline([ Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), ]) a = time.time() outputs_vanilla = p.transform(list(range(100))) b = time.time() time_vanilla_pipeline = b - a print('VanillaPipeline') print('execution time: {} seconds'.format(time_vanilla_pipeline)) assert time_queued_pipeline < time_vanilla_pipeline assert np.array_equal(outputs_streaming, outputs_vanilla)
def test_parallel_queued_parallelize_correctly(): sleep_time = 0.001 p = SequentialQueuedPipeline([ ('1', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])), ('2', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])), ('3', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])), ('4', 4, 10, Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)])) ], batch_size=10) a = time.time() outputs_streaming = p.transform(list(range(100))) b = time.time() time_queued_pipeline = b - a p = Pipeline([ Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]), Pipeline([ForEachDataInput(Sleep(sleep_time=sleep_time)), MultiplyByN(2)]) ]) a = time.time() outputs_vanilla = p.transform(list(range(100))) b = time.time() time_vanilla_pipeline = b - a assert time_queued_pipeline < time_vanilla_pipeline assert np.array_equal(outputs_streaming, outputs_vanilla)
def test_set_train_should_set_train_to_true(): pipeline = Pipeline([SomeStep(), SomeStep(), Pipeline([ SomeStep(), ])]) assert pipeline.is_train assert pipeline[0].is_train assert pipeline[1].is_train assert pipeline[2].is_train assert pipeline[2][0].is_train
def test_tensorflowv2_saver(tmpdir): dataset = toy_dataset() model = Pipeline([create_model_step(tmpdir)]) loss_first_fit = evaluate_model_on_dataset(model, dataset) model.save(ExecutionContext(root=tmpdir)) loaded = Pipeline([create_model_step(tmpdir) ]).load(ExecutionContext(root=tmpdir)) loss_second_fit = evaluate_model_on_dataset(loaded, dataset) assert loss_second_fit < (loss_first_fit / 2)
def test_transform_should_transform_all_steps_for_each_data_inputs_expected_outputs(): tape = TapeCallbackFunction() p = Pipeline([ ForEachDataInput(Pipeline([ TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), ])) ]) data_inputs = [[0, 1], [1, 2]] outputs = p.transform(data_inputs) assert tape.get_name_tape() == ["1", "2", "1", "2"]
def test_pipeline_set_one_hyperparam_level_two_flat(): p = Pipeline([("a", SomeStep()), ("b", Pipeline([("a", SomeStep()), ("b", SomeStep()), ("c", SomeStep())])), ("c", SomeStep())]) p.set_hyperparams({"b__a__learning_rate": 7}) print(p.get_hyperparams()) assert p["b"]["a"].hyperparams["learning_rate"] == 7 assert p["b"]["c"].hyperparams.to_flat_dict() == dict() assert p["b"].hyperparams.to_flat_dict() == {'a__learning_rate': 7} assert p["c"].hyperparams.to_flat_dict() == dict()
def test_set_train_should_set_train_to_false(): pipeline = Pipeline([SomeStep(), SomeStep(), Pipeline([ SomeStep(), ])]) pipeline.set_train(False) assert not pipeline.is_train assert not pipeline[0].is_train assert not pipeline[1].is_train assert not pipeline[2].is_train assert not pipeline[2][0].is_train
def test_has_children_mixin_apply_should_return_recursive_dict_to_recursive_childrends( ): p = Pipeline([ Pipeline([ ('c', Identity().set_hyperparams(HyperparameterSamples({'hp': 3}))), ('d', Identity().set_hyperparams(HyperparameterSamples({'hp': 4}))) ]).set_hyperparams(HyperparameterSamples({'hp': 2})), ]) results = p.apply('_get_hyperparams', ra=None) assert results['Pipeline__hp'] == 2 assert results['Pipeline__c__hp'] == 3 assert results['Pipeline__d__hp'] == 4
def test_expectedoutputnull_is_fine_when_null(tmpdir): data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) expected_outputs = None p = Pipeline([SomeStep()]) p.fit_transform(data_inputs,expected_outputs)
def create_test_case_fit_multiple_steps_choosen(): a_callback = TapeCallbackFunction() b_callback = TapeCallbackFunction() c_callback = TapeCallbackFunction() d_callback = TapeCallbackFunction() return NeuraxleTestCase( pipeline=Pipeline([ ChooseOneOrManyStepsOf([ ('a', FitTransformCallbackStep(a_callback, c_callback, transform_function=lambda di: di * 2)), ('b', FitTransformCallbackStep(b_callback, d_callback, transform_function=lambda di: di * 2)) ]), ]), callbacks=[a_callback, c_callback, b_callback, d_callback], expected_callbacks_data=[ [], (DATA_INPUTS, EXPECTED_OUTPUTS), [], (DATA_INPUTS, EXPECTED_OUTPUTS) ], hyperparams={ 'ChooseOneOrManyStepsOf__a__enabled': True, 'ChooseOneOrManyStepsOf__b__enabled': True }, hyperparams_space={ 'ChooseOneOrManyStepsOf__a__enabled': Boolean(), 'ChooseOneOrManyStepsOf__b__enabled': Boolean() }, expected_processed_outputs=np.array([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18]) )
def test_load_full_dump_from_path(tmpdir): # Given tape_fit_callback_function = TapeCallbackFunction() tape_transform_callback_function = TapeCallbackFunction() pipeline = Pipeline( [('step_a', Identity()), ('step_b', OutputTransformerWrapper( FitTransformCallbackStep(tape_fit_callback_function, tape_transform_callback_function)))], cache_folder=tmpdir).set_name(PIPELINE_NAME) # When pipeline, outputs = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS) pipeline.save(ExecutionContext(tmpdir), full_dump=True) # Then loaded_pipeline = ExecutionContext(tmpdir).load( os.path.join(PIPELINE_NAME, 'step_b')) assert isinstance(loaded_pipeline, OutputTransformerWrapper) loaded_step_b_wrapped_step = loaded_pipeline.wrapped assert np.array_equal( loaded_step_b_wrapped_step.transform_callback_function.data[0], EXPECTED_OUTPUTS) assert np.array_equal( loaded_step_b_wrapped_step.fit_callback_function.data[0][0], EXPECTED_OUTPUTS) assert np.array_equal( loaded_step_b_wrapped_step.fit_callback_function.data[0][1], [None] * len(EXPECTED_OUTPUTS))
def test_automl_sequential_wrapper(tmpdir): # Given data_inputs = np.array(range(100)) expected_outputs = np.array(range(100, 200)) hyperparameter_space = HyperparameterSpace({ 'multiplication_1__multiply_by': RandInt(1, 3), 'multiplication_2__multiply_by': RandInt(1, 3), 'multiplication_3__multiply_by': RandInt(1, 3), }) pipeline = Pipeline( [('multiplication_1', MultiplyByN()), ('multiplication_2', MultiplyByN()), ('multiplication_3', MultiplyByN())], cache_folder=tmpdir).set_hyperparams_space(hyperparameter_space) auto_ml = RandomSearch( KFoldCrossValidationWrapper().set_step(pipeline), hyperparams_repository=HyperparamsJSONRepository(tmpdir), n_iter=10) # When auto_ml: AutoMLSequentialWrapper = auto_ml.fit(data_inputs, expected_outputs) best_model: Pipeline = auto_ml.get_best_model() predicted_outputs = best_model.transform(data_inputs) # Then actual_mse = ((predicted_outputs - expected_outputs)**2).mean() assert actual_mse < 20000
def test_step_cloner_should_fit_transform(): # Given tape = TapeCallbackFunction() p = StepClonerForEachDataInput( Pipeline([FitCallbackStep(tape), MultiplyByN(2)])) data_inputs = _create_data((2, 2)) expected_outputs = _create_data((2, 2)) # When p, processed_outputs = p.fit_transform(data_inputs, expected_outputs) # Then assert isinstance(p.steps[0], Pipeline) assert np.array_equal(p.steps[0][0].callback_function.data[0][0], data_inputs[0]) assert np.array_equal(p.steps[0][0].callback_function.data[0][1], expected_outputs[0]) assert isinstance(p.steps[1], Pipeline) assert np.array_equal(p.steps[1][0].callback_function.data[0][0], data_inputs[1]) assert np.array_equal(p.steps[1][0].callback_function.data[0][1], expected_outputs[1]) assert np.array_equal(processed_outputs, data_inputs * 2)
def main(): """ The task is to sleep 0.02 seconds for each data input and then multiply by 2. """ sleep_time = 0.02 preprocessing_and_model_steps = [ForEach(Sleep(sleep_time=sleep_time)), MultiplyByN(2)] # Classical pipeline - all at once with one big batch: p = Pipeline(preprocessing_and_model_steps) time_vanilla_pipeline, output_classical = eval_run_time(p) print(f"Classical 'Pipeline' execution time: {time_vanilla_pipeline} seconds.") # Classical minibatch pipeline - minibatch size 10: p = MiniBatchSequentialPipeline(preprocessing_and_model_steps, batch_size=10) time_minibatch_pipeline, output_minibatch = eval_run_time(p) print(f"Minibatched 'MiniBatchSequentialPipeline' execution time: {time_minibatch_pipeline} seconds.") # Parallel pipeline - minibatch size 10 with 16 parallel workers per step that # have a max queue size of 10 batches between preprocessing and the model: p = SequentialQueuedPipeline(preprocessing_and_model_steps, n_workers_per_step=16, max_queue_size=10, batch_size=10) time_parallel_pipeline, output_parallel = eval_run_time(p) print(f"Parallel 'SequentialQueuedPipeline' execution time: {time_parallel_pipeline} seconds.") assert time_parallel_pipeline < time_minibatch_pipeline, str((time_parallel_pipeline, time_vanilla_pipeline)) assert np.array_equal(output_classical, output_minibatch) assert np.array_equal(output_classical, output_parallel)
def test_inner_concatenate_data_should_merge_2d_with_3d(): # Given data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D) data_inputs_2d, expected_outputs_2d = _create_data_source(SHAPE_2D) data_container_2d = DataContainer(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d) data_container_3d = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \ .add_sub_data_container('2d', data_container_2d) # When p = Pipeline( [InnerConcatenateDataContainer(sub_data_container_names=['2d'])]) data_container_3d = p.handle_transform(data_container_3d, ExecutionContext()) # Then assert data_container_3d.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1) assert data_container_3d.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1) assert np.array_equal(data_container_3d.data_inputs[..., -1], data_container_2d.data_inputs) assert np.array_equal(data_container_3d.expected_outputs[..., -1], data_container_2d.expected_outputs)
def test_hyperparam_space(): p = Pipeline([ AddFeatures([ SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})), SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})) ]), ModelStacking([ SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})), SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})), SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})), SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})) ], joiner=NumpyTranspose(), judge=SomeStep(hyperparams_space=HyperparameterSpace({"alpha": LogUniform(0.1, 10.0)})) ) ]) rvsed = p.get_hyperparams_space() p.set_hyperparams(rvsed) hyperparams = p.get_hyperparams() assert "AddFeatures" in hyperparams.keys() assert "SomeStep" in hyperparams["AddFeatures"] assert "n_components" in hyperparams["AddFeatures"]["SomeStep"] assert "SomeStep1" in hyperparams["AddFeatures"] assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"] assert "SomeStep" in hyperparams["ModelStacking"] assert "n_estimators" in hyperparams["ModelStacking"]["SomeStep"] assert "SomeStep1" in hyperparams["ModelStacking"] assert "max_depth" in hyperparams["ModelStacking"]["SomeStep2"]
def choose_one_step_single_step_chosen_transform(): a_callback = TapeCallbackFunction() b_callback = TapeCallbackFunction() c_callback = TapeCallbackFunction() d_callback = TapeCallbackFunction() return NeuraxleTestCase(pipeline=Pipeline([ ChooseOneStepOf([ ('a', FitTransformCallbackStep(a_callback, c_callback, transform_function=lambda di: di * 2)), ('b', FitTransformCallbackStep(b_callback, d_callback, transform_function=lambda di: di * 2)) ]), ]), callbacks=[ a_callback, c_callback, b_callback, d_callback ], expected_callbacks_data=[DATA_INPUTS, [], [], []], hyperparams={ 'ChooseOneOrManyStepsOf__choice': 'a' }, expected_processed_outputs=np.array( [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]))
def main(): p = Pipeline([ ForceAlwaysAlwaysHandleMixinStep(), ]) p = p.fit(np.array([0, 1]), np.array([0, 1])) p = p.transform(np.array([0, 1]))
def test_pipeline_should_update_hyperparams_space(): p = Pipeline([ SomeStep().set_name('step_1'), SomeStep().set_name('step_2') ]) p.set_hyperparams_space({ 'hp': RandInt(1, 2), 'step_1__hp': RandInt(2, 3), 'step_2__hp': RandInt(3, 4) }) p.update_hyperparams_space({ 'hp': RandInt(4, 6), 'step_2__hp': RandInt(6, 8) }) assert isinstance(p.hyperparams_space, HyperparameterSpace) assert p.hyperparams_space['hp'].min_included == 4 assert p.hyperparams_space['hp'].max_included == 6 assert p[0].hyperparams_space['hp'].min_included == 2 assert p[0].hyperparams_space['hp'].max_included == 3 assert p[1].hyperparams_space['hp'].min_included == 6 assert p[1].hyperparams_space['hp'].max_included == 8
def create_test_case_invalid_step_choosen(): a_callback = TapeCallbackFunction() b_callback = TapeCallbackFunction() return NeuraxleTestCase(pipeline=Pipeline([ ChooseOneOrManyStepsOf([ ('a', TransformCallbackStep(a_callback, transform_function=lambda di: di * 2)), ('b', TransformCallbackStep(b_callback, transform_function=lambda di: di * 2)) ]), ]), callbacks=[a_callback, b_callback], expected_callbacks_data=[DATA_INPUTS, DATA_INPUTS], hyperparams={ 'ChooseOneOrManyStepsOf__c__enabled': True, 'ChooseOneOrManyStepsOf__b__enabled': False }, hyperparams_space={ 'ChooseOneOrManyStepsOf__a__enabled': Boolean(), 'ChooseOneOrManyStepsOf__b__enabled': Boolean() }, expected_processed_outputs=np.array( [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]))
def test_trainer_train(): data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) expected_outputs = data_inputs * 4 p = Pipeline([ MultiplyByN(2).set_hyperparams_space( HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})), NumpyReshape(new_shape=(-1, 1)), linear_model.LinearRegression() ]) trainer: Trainer = Trainer( epochs=10, scoring_callback=ScoringCallback(mean_squared_error, higher_score_is_better=False), validation_splitter=ValidationSplitter(test_size=0.20)) repo_trial: Trial = trainer.train(pipeline=p, data_inputs=data_inputs, expected_outputs=expected_outputs) trained_pipeline = repo_trial.get_trained_pipeline(split_number=0) outputs = trained_pipeline.transform(data_inputs) mse = mean_squared_error(expected_outputs, outputs) assert mse < 1
def test_logger(): file_path = "test.log" if os.path.exists(file_path): os.remove(file_path) # Given logger = logging.getLogger('test') file_handler = logging.FileHandler(file_path) file_handler.setLevel('DEBUG') logger.addHandler(file_handler) logger.setLevel('DEBUG') context = ExecutionContext(logger=logger) pipeline = Pipeline([ MultiplyByN(2).set_hyperparams_space( HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})), NumpyReshape(new_shape=(-1, 1)), LoggingStep() ]) # When data_container = DataContainer( data_inputs=np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])) pipeline.handle_fit(data_container, context) # Then assert os.path.exists(file_path) with open(file_path) as f: l = f.read() # Teardown file_handler.close() os.remove(file_path)
def test_model_stacking_fit_transform(): model_stacking = Pipeline([ ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), ) ]) expected_outputs_shape = (379, 1) data_inputs_shape = (379, 13) data_inputs = _create_data(data_inputs_shape) expected_outputs = _create_data(expected_outputs_shape) model_stacking, outputs = model_stacking.fit_transform( data_inputs, expected_outputs) assert outputs.shape == expected_outputs_shape
def test_inner_concatenate_data_should_merge_1d_with_3d(): # Given data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D) data_inputs_1d, expected_outputs_1d = _create_data_source(SHAPE_1D) data_container_1d = DataContainer(data_inputs=data_inputs_1d, expected_outputs=expected_outputs_1d) data_container = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \ .add_sub_data_container('1d', data_container_1d) # When p = Pipeline( [InnerConcatenateDataContainer(sub_data_container_names=['1d'])]) data_container = p.handle_transform(data_container, ExecutionContext()) # Then broadcasted_data_inputs_1d = np.broadcast_to( np.expand_dims(data_container_1d.data_inputs, axis=-1), shape=(SHAPE_3D[0], SHAPE_3D[1])) broadcasted_expected_outputs_1d = np.broadcast_to( np.expand_dims(data_container_1d.expected_outputs, axis=-1), shape=(SHAPE_3D[0], SHAPE_3D[1])) assert np.array_equal(data_container.data_inputs[..., -1], broadcasted_data_inputs_1d) assert np.array_equal(data_container.expected_outputs[..., -1], broadcasted_expected_outputs_1d) assert data_container.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1) assert data_container.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1], SHAPE_3D[2] + 1)
def test_automl_early_stopping_callback(tmpdir): # TODO: fix this unit test # Given hp_repository = InMemoryHyperparamsRepository(cache_folder=str(tmpdir)) n_epochs = 60 auto_ml = AutoML( pipeline=Pipeline([ FitTransformCallbackStep().set_name('callback'), MultiplyByN(2).set_hyperparams_space( HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})), NumpyReshape(new_shape=(-1, 1)), linear_model.LinearRegression() ]), hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(), validation_splitter=ValidationSplitter(0.20), scoring_callback=ScoringCallback(mean_squared_error, higher_score_is_better=False), callbacks=[ MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False), ], n_trials=1, refit_trial=True, epochs=n_epochs, hyperparams_repository=hp_repository) # When data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) expected_outputs = data_inputs * 2 auto_ml = auto_ml.fit(data_inputs=data_inputs, expected_outputs=expected_outputs) # Then p = auto_ml.get_best_model()
def test_automl_should_shallow_copy_data_before_each_epoch(): # see issue #332 https://github.com/Neuraxio/Neuraxle/issues/332 data_inputs = np.random.randint(0, 100, (100, 3)) expected_outputs = np.random.randint(0, 3, 100) from sklearn.preprocessing import StandardScaler p = Pipeline([ SKLearnWrapper(StandardScaler()), SKLearnWrapper(LinearSVC(), HyperparameterSpace({'C': RandInt(0, 10000)})), ]) auto_ml = AutoML(p, validation_splitter=ValidationSplitter(0.20), refit_trial=True, n_trials=10, epochs=10, cache_folder_when_no_handle='cache', scoring_callback=ScoringCallback( mean_squared_error, higher_score_is_better=False), callbacks=[ MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False) ], hyperparams_repository=InMemoryHyperparamsRepository( cache_folder='cache'), continue_loop_on_error=False) random_search = auto_ml.fit(data_inputs, expected_outputs) best_model = random_search.get_best_model() assert isinstance(best_model, Pipeline)