def test_pipeline_nested_mutate_inverse_transform_without_identities(): """ This test was required for a strange bug at the border of the pipelines that happened when the identities were not used. """ expected_tape = [ "1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1" ] tape = TapeCallbackFunction() p = Pipeline([ TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), Pipeline([ TransformCallbackStep(tape.callback, ["3"]), TransformCallbackStep(tape.callback, ["4"]), TransformCallbackStep(tape.callback, ["5"]), ]), TransformCallbackStep(tape.callback, ["6"]), TransformCallbackStep(tape.callback, ["7"]), ]) p, _ = p.fit_transform(np.ones((1, 1))) # will add range(1, 8) to tape. print("[mutating, inversing, and calling each inverse_transform]") reversed(p).transform(np.ones( (1, 1) )) # will add reversed(range(1, 8)) to tape, calling inverse_transforms. print(expected_tape) print(tape.get_name_tape()) assert expected_tape == tape.get_name_tape()
def test_pipeline_nested_mutate_inverse_transform(): expected_tape = [ "1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1" ] tape = TapeCallbackFunction() p = Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["3"]), TransformCallbackStep(tape.callback, ["4"]), TransformCallbackStep(tape.callback, ["5"]), Identity() ]), TransformCallbackStep(tape.callback, ["6"]), TransformCallbackStep(tape.callback, ["7"]), Identity() ]) p, _ = p.fit_transform(np.ones((1, 1))) # will add range(1, 8) to tape. print("[mutating]") p = p.mutate(new_method="inverse_transform", method_to_assign_to="transform") p.transform(np.ones((1, 1))) # will add reversed(range(1, 8)) to tape. print(expected_tape) print(tape.get_name_tape()) assert expected_tape == tape.get_name_tape()
def test_when_hyperparams_and_saved_no_pipeline_should_not_load_checkpoint_pickle(tmpdir: LocalPath): # Given tape = TapeCallbackFunction() pickle_checkpoint_step = DefaultCheckpoint() # When pipeline_save = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=Identity(), tape=TapeCallbackFunction(), hyperparameters=HyperparameterSamples({"a__learning_rate": 1}), different=True, save_pipeline=False ) pipeline_save.fit_transform(data_inputs, expected_outputs) pipeline_load = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=pickle_checkpoint_step, tape=tape, hyperparameters=HyperparameterSamples({"a__learning_rate": 1}) ) pipeline_load, actual_data_inputs = pipeline_load.fit_transform(data_inputs, expected_outputs) # Then actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == ["1", "2", "3"]
def test_when_hyperparams_and_saved_same_pipeline_should_load_checkpoint_pickle(tmpdir: LocalPath): # Given tape = TapeCallbackFunction() # When pipeline_save = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=DefaultCheckpoint(), tape=TapeCallbackFunction(), hyperparameters=HyperparameterSamples({"a__learning_rate": 1}) ) pipeline_save.fit_transform(data_inputs, expected_outputs) pipeline_load = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=DefaultCheckpoint(), tape=tape, hyperparameters=HyperparameterSamples({"a__learning_rate": 1}) ) pipeline_load, actual_data_inputs = pipeline_load.fit_transform(data_inputs, expected_outputs) # Then actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == EXPECTED_TAPE_AFTER_CHECKPOINT
def test_when_hyperparams_should_save_checkpoint_pickle(tmpdir: LocalPath): tape = TapeCallbackFunction() pickle_checkpoint_step = DefaultCheckpoint() pipeline = create_pipeline(tmpdir, pickle_checkpoint_step, tape, HyperparameterSamples({"a__learning_rate": 1})) pipeline, actual_data_inputs = pipeline.fit_transform( data_inputs, expected_outputs) actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == ["1", "2", "3"] assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '44f9d6dd8b6ccae571ca04525c3eaffa.pickle')) assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '898a67b2f5eeae6393ca4b3162ba8e3d.pickle')) assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '44f9d6dd8b6ccae571ca04525c3eaffa.pickle')) assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '898a67b2f5eeae6393ca4b3162ba8e3d.pickle'))
def test_fit_transform_should_fit_transform_all_steps_for_each_data_inputs_expected_outputs(): tape = TapeCallbackFunction() tape_fit = TapeCallbackFunction() p = Pipeline([ ForEachDataInput(Pipeline([ FitTransformCallbackStep(tape.callback, tape_fit, ["1"]), FitTransformCallbackStep(tape.callback, tape_fit, ["2"]), ])) ]) data_inputs = [[0, 1], [1, 2]] expected_outputs = [[2, 3], [4, 5]] p, outputs = p.fit_transform(data_inputs, expected_outputs) assert tape.get_name_tape() == ["1", "2", "1", "2"] assert tape_fit.get_name_tape() == ["1", "2", "1", "2"] assert tape_fit.data == [([0, 1], [2, 3]), ([0, 1], [2, 3]), ([1, 2], [4, 5]), ([1, 2], [4, 5])]
def test_transform_should_transform_all_steps_for_each_data_inputs_expected_outputs(): tape = TapeCallbackFunction() p = Pipeline([ ForEachDataInput(Pipeline([ TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), ])) ]) data_inputs = [[0, 1], [1, 2]] outputs = p.transform(data_inputs) assert tape.get_name_tape() == ["1", "2", "1", "2"]
def test_when_no_hyperparams_should_save_checkpoint_pickle(tmpdir: LocalPath): tape = TapeCallbackFunction() pickle_checkpoint_step = DefaultCheckpoint() pipeline = create_pipeline(tmpdir, pickle_checkpoint_step, tape) pipeline, actual_data_inputs = pipeline.fit_transform(data_inputs, expected_outputs) actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == ["1", "2", "3"] assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '0.pickle')) assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '1.pickle')) assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '0.pickle')) assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '1.pickle'))
def test_fit_for_each_should_fit_all_steps_for_each_data_inputs_expected_outputs(): tape = TapeCallbackFunction() p = Pipeline([ ForEachDataInput(Pipeline([ FitCallbackStep(tape.callback, ["1"]), FitCallbackStep(tape.callback, ["2"]), ])) ]) data_inputs = [[0, 1], [1, 2]] expected_outputs = [[2, 3], [4, 5]] p = p.fit(data_inputs, expected_outputs) assert isinstance(p, Pipeline) assert tape.get_name_tape() == ["1", "2", "1", "2"] assert tape.data == [([0, 1], [2, 3]), ([0, 1], [2, 3]), ([1, 2], [4, 5]), ([1, 2], [4, 5])]
def test_pipeline_simple_mutate_inverse_transform(): expected_tape = ["1", "2", "3", "4", "4", "3", "2", "1"] tape = TapeCallbackFunction() p = Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), TransformCallbackStep(tape.callback, ["3"]), TransformCallbackStep(tape.callback, ["4"]), Identity() ]) p, _ = p.fit_transform(np.ones((1, 1))) print("[mutating]") p = p.mutate(new_method="inverse_transform", method_to_assign_to="transform") p.transform(np.ones((1, 1))) assert expected_tape == tape.get_name_tape()