def create_test_case_invalid_step_choosen(): a_callback = TapeCallbackFunction() b_callback = TapeCallbackFunction() return NeuraxleTestCase(pipeline=Pipeline([ ChooseOneOrManyStepsOf([ ('a', TransformCallbackStep(a_callback, transform_function=lambda di: di * 2)), ('b', TransformCallbackStep(b_callback, transform_function=lambda di: di * 2)) ]), ]), callbacks=[a_callback, b_callback], expected_callbacks_data=[DATA_INPUTS, DATA_INPUTS], hyperparams={ 'ChooseOneOrManyStepsOf__c__enabled': True, 'ChooseOneOrManyStepsOf__b__enabled': False }, hyperparams_space={ 'ChooseOneOrManyStepsOf__a__enabled': Boolean(), 'ChooseOneOrManyStepsOf__b__enabled': Boolean() }, expected_processed_outputs=np.array( [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]))
def create_test_case_fit_multiple_steps_choosen(): a_callback = TapeCallbackFunction() b_callback = TapeCallbackFunction() c_callback = TapeCallbackFunction() d_callback = TapeCallbackFunction() return NeuraxleTestCase( pipeline=Pipeline([ ChooseOneOrManyStepsOf([ ('a', FitTransformCallbackStep(a_callback, c_callback, transform_function=lambda di: di * 2)), ('b', FitTransformCallbackStep(b_callback, d_callback, transform_function=lambda di: di * 2)) ]), ]), callbacks=[a_callback, c_callback, b_callback, d_callback], expected_callbacks_data=[ [], (DATA_INPUTS, EXPECTED_OUTPUTS), [], (DATA_INPUTS, EXPECTED_OUTPUTS) ], hyperparams={ 'ChooseOneOrManyStepsOf__a__enabled': True, 'ChooseOneOrManyStepsOf__b__enabled': True }, hyperparams_space={ 'ChooseOneOrManyStepsOf__a__enabled': Boolean(), 'ChooseOneOrManyStepsOf__b__enabled': Boolean() }, expected_processed_outputs=np.array([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18]) )
def test_pipeline_nested_mutate_inverse_transform_without_identities(): """ This test was required for a strange bug at the border of the pipelines that happened when the identities were not used. """ expected_tape = [ "1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1" ] tape = TapeCallbackFunction() p = Pipeline([ TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), Pipeline([ TransformCallbackStep(tape.callback, ["3"]), TransformCallbackStep(tape.callback, ["4"]), TransformCallbackStep(tape.callback, ["5"]), ]), TransformCallbackStep(tape.callback, ["6"]), TransformCallbackStep(tape.callback, ["7"]), ]) p, _ = p.fit_transform(np.ones((1, 1))) # will add range(1, 8) to tape. print("[mutating, inversing, and calling each inverse_transform]") reversed(p).transform(np.ones( (1, 1) )) # will add reversed(range(1, 8)) to tape, calling inverse_transforms. print(expected_tape) print(tape.get_name_tape()) assert expected_tape == tape.get_name_tape()
def test_pipeline_nested_mutate_inverse_transform(): expected_tape = [ "1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1" ] tape = TapeCallbackFunction() p = Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["3"]), TransformCallbackStep(tape.callback, ["4"]), TransformCallbackStep(tape.callback, ["5"]), Identity() ]), TransformCallbackStep(tape.callback, ["6"]), TransformCallbackStep(tape.callback, ["7"]), Identity() ]) p, _ = p.fit_transform(np.ones((1, 1))) # will add range(1, 8) to tape. print("[mutating]") p = p.mutate(new_method="inverse_transform", method_to_assign_to="transform") p.transform(np.ones((1, 1))) # will add reversed(range(1, 8)) to tape. print(expected_tape) print(tape.get_name_tape()) assert expected_tape == tape.get_name_tape()
def test_when_hyperparams_and_saved_same_pipeline_should_load_checkpoint_pickle(tmpdir: LocalPath): # Given tape = TapeCallbackFunction() # When pipeline_save = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=DefaultCheckpoint(), tape=TapeCallbackFunction(), hyperparameters=HyperparameterSamples({"a__learning_rate": 1}) ) pipeline_save.fit_transform(data_inputs, expected_outputs) pipeline_load = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=DefaultCheckpoint(), tape=tape, hyperparameters=HyperparameterSamples({"a__learning_rate": 1}) ) pipeline_load, actual_data_inputs = pipeline_load.fit_transform(data_inputs, expected_outputs) # Then actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == EXPECTED_TAPE_AFTER_CHECKPOINT
def test_load_full_dump_from_path(tmpdir): # Given tape_fit_callback_function = TapeCallbackFunction() tape_transform_callback_function = TapeCallbackFunction() pipeline = Pipeline( [('step_a', Identity()), ('step_b', OutputTransformerWrapper( FitTransformCallbackStep(tape_fit_callback_function, tape_transform_callback_function)))], cache_folder=tmpdir).set_name(PIPELINE_NAME) # When pipeline, outputs = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS) pipeline.save(ExecutionContext(tmpdir), full_dump=True) # Then loaded_pipeline = ExecutionContext(tmpdir).load( os.path.join(PIPELINE_NAME, 'step_b')) assert isinstance(loaded_pipeline, OutputTransformerWrapper) loaded_step_b_wrapped_step = loaded_pipeline.wrapped assert np.array_equal( loaded_step_b_wrapped_step.transform_callback_function.data[0], EXPECTED_OUTPUTS) assert np.array_equal( loaded_step_b_wrapped_step.fit_callback_function.data[0][0], EXPECTED_OUTPUTS) assert np.array_equal( loaded_step_b_wrapped_step.fit_callback_function.data[0][1], [None] * len(EXPECTED_OUTPUTS))
def test_when_hyperparams_and_saved_no_pipeline_should_not_load_checkpoint_pickle(tmpdir: LocalPath): # Given tape = TapeCallbackFunction() pickle_checkpoint_step = DefaultCheckpoint() # When pipeline_save = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=Identity(), tape=TapeCallbackFunction(), hyperparameters=HyperparameterSamples({"a__learning_rate": 1}), different=True, save_pipeline=False ) pipeline_save.fit_transform(data_inputs, expected_outputs) pipeline_load = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=pickle_checkpoint_step, tape=tape, hyperparameters=HyperparameterSamples({"a__learning_rate": 1}) ) pipeline_load, actual_data_inputs = pipeline_load.fit_transform(data_inputs, expected_outputs) # Then actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == ["1", "2", "3"]
def test_validation_split_wrapper_should_split_data(tmpdir): transform_callback = TapeCallbackFunction() fit_callback = TapeCallbackFunction() random_search = RandomSearch( ValidationSplitWrapper(FitTransformCallbackStep( transform_callback_function=transform_callback, fit_callback_function=fit_callback, transform_function=lambda di: di * 2), test_size=0.1)) data_inputs = np.random.randint(1, 100, (100, 5)) expected_outputs = np.random.randint(1, 100, (100, 5)) random_search, outputs = random_search.fit_transform( data_inputs, expected_outputs) assert np.array_equal(outputs, data_inputs * 2) # should fit on train split assert np.array_equal(fit_callback.data[0][0], data_inputs[0:90]) assert np.array_equal(fit_callback.data[0][1], expected_outputs[0:90]) # should transform on test split assert np.array_equal(transform_callback.data[0], data_inputs[0:90]) assert np.array_equal(transform_callback.data[1], data_inputs[90:]) # should transform on all data at the end assert np.array_equal(transform_callback.data[2], data_inputs) assert random_search.best_model.scores_train is not None assert random_search.best_model.scores_validation is not None assert random_search.best_model.scores_train_mean is not None assert random_search.best_model.scores_validation_mean is not None assert random_search.best_model.scores_train_std is not None assert random_search.best_model.scores_validation_std is not None
def choose_one_step_single_step_chosen_transform(): a_callback = TapeCallbackFunction() b_callback = TapeCallbackFunction() c_callback = TapeCallbackFunction() d_callback = TapeCallbackFunction() return NeuraxleTestCase(pipeline=Pipeline([ ChooseOneStepOf([ ('a', FitTransformCallbackStep(a_callback, c_callback, transform_function=lambda di: di * 2)), ('b', FitTransformCallbackStep(b_callback, d_callback, transform_function=lambda di: di * 2)) ]), ]), callbacks=[ a_callback, c_callback, b_callback, d_callback ], expected_callbacks_data=[DATA_INPUTS, [], [], []], hyperparams={ 'ChooseOneOrManyStepsOf__choice': 'a' }, expected_processed_outputs=np.array( [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]))
def test_when_hyperparams_should_save_checkpoint_pickle(tmpdir: LocalPath): tape = TapeCallbackFunction() pickle_checkpoint_step = DefaultCheckpoint() pipeline = create_pipeline(tmpdir, pickle_checkpoint_step, tape, HyperparameterSamples({"a__learning_rate": 1})) pipeline, actual_data_inputs = pipeline.fit_transform( data_inputs, expected_outputs) actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == ["1", "2", "3"] assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '44f9d6dd8b6ccae571ca04525c3eaffa.pickle')) assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '898a67b2f5eeae6393ca4b3162ba8e3d.pickle')) assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '44f9d6dd8b6ccae571ca04525c3eaffa.pickle')) assert os.path.exists( os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '898a67b2f5eeae6393ca4b3162ba8e3d.pickle'))
def test_transform_should_transform_all_steps_for_each_data_inputs_expected_outputs(): tape = TapeCallbackFunction() p = Pipeline([ ForEachDataInput(Pipeline([ TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), ])) ]) data_inputs = [[0, 1], [1, 2]] outputs = p.transform(data_inputs) assert tape.get_name_tape() == ["1", "2", "1", "2"]
def create_checkpoint_test_case(tmpdir): tape_transform_1 = TapeCallbackFunction() tape_fit_1 = TapeCallbackFunction() tape_transform_2 = TapeCallbackFunction() tape_fit_2 = TapeCallbackFunction() pipeline = ResumablePipeline( [('step1', FitTransformCallbackStep(tape_transform_1, tape_fit_1)), ('checkpoint', DefaultCheckpoint()), ('step2', FitTransformCallbackStep(tape_transform_2, tape_fit_2))], cache_folder=tmpdir) return CheckpointTest(tape_transform_1, tape_fit_1, tape_transform_2, tape_fit_2, pipeline)
def test_predict_should_predict_in_test_mode(): tape_fit = TapeCallbackFunction() tape_transform = TapeCallbackFunction() p = Pipeline([ TestOnlyWrapper( CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)), TrainOnlyWrapper( CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit)) ]) outputs = p.predict(np.array([1, 1])) assert np.array_equal(outputs, np.array([2, 2]))
def test_predict_should_transform_with_initial_is_train_mode_after_predict(): tape_fit = TapeCallbackFunction() tape_transform = TapeCallbackFunction() p = Pipeline([ TestOnlyWrapper( CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)), TrainOnlyWrapper( CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit)) ]) p.predict(np.array([1, 1])) outputs = p.transform(np.array([1, 1])) assert np.array_equal(outputs, np.array([4, 4]))
def test_when_no_hyperparams_should_save_checkpoint_pickle(tmpdir: LocalPath): tape = TapeCallbackFunction() pickle_checkpoint_step = DefaultCheckpoint() pipeline = create_pipeline(tmpdir, pickle_checkpoint_step, tape) pipeline, actual_data_inputs = pipeline.fit_transform(data_inputs, expected_outputs) actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == ["1", "2", "3"] assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '0.pickle')) assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '1.pickle')) assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '0.pickle')) assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '1.pickle'))
def test_handle_predict_should_predict_in_test_mode(): tape_fit = TapeCallbackFunction() tape_transform = TapeCallbackFunction() p = Pipeline([ TestOnlyWrapper( CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)), TrainOnlyWrapper( CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit)) ]) data_container = p.handle_predict(data_container=DataContainer( data_inputs=np.array([1, 1]), expected_outputs=np.array([1, 1])), context=ExecutionContext()) assert np.array_equal(data_container.data_inputs, np.array([2, 2]))
def test_fit_transform_should_fit_then_use_cache(tmpdir): tape_transform = TapeCallbackFunction() tape_fit = TapeCallbackFunction() p = Pipeline([ JoblibValueCachingWrapper( LogFitTransformCallbackStep(tape_transform, tape_fit, transform_function=np.log), tmpdir) ]) p, outputs = p.fit_transform([1, 1, 2, 2], [2, 2, 4, 4]) assert outputs == EXPECTED_OUTPUTS assert tape_transform.data == [[1], [2]] assert tape_fit.data == [([1, 1, 2, 2], [2, 2, 4, 4])]
def test_step_cloner_should_fit_transform(): # Given tape = TapeCallbackFunction() p = StepClonerForEachDataInput( Pipeline([FitCallbackStep(tape), MultiplyByN(2)])) data_inputs = _create_data((2, 2)) expected_outputs = _create_data((2, 2)) # When p, processed_outputs = p.fit_transform(data_inputs, expected_outputs) # Then assert isinstance(p.steps[0], Pipeline) assert np.array_equal(p.steps[0][0].callback_function.data[0][0], data_inputs[0]) assert np.array_equal(p.steps[0][0].callback_function.data[0][1], expected_outputs[0]) assert isinstance(p.steps[1], Pipeline) assert np.array_equal(p.steps[1][0].callback_function.data[0][0], data_inputs[1]) assert np.array_equal(p.steps[1][0].callback_function.data[0][1], expected_outputs[1]) assert np.array_equal(processed_outputs, data_inputs * 2)
def test_should_flush_cache_on_every_fit(tmpdir): tape_transform = TapeCallbackFunction() tape_fit = TapeCallbackFunction() wrapper = JoblibValueCachingWrapper(LogFitTransformCallbackStep( tape_transform, tape_fit, transform_function=np.log), cache_folder=tmpdir) p = Pipeline([wrapper]) wrapper.create_checkpoint_path() wrapper.write_cache(1, 10) wrapper.write_cache(2, 20) p, outputs = p.fit_transform([1, 1, 2, 2], [2, 2, 4, 4]) assert outputs == EXPECTED_OUTPUTS assert tape_transform.data == [[1], [2]] assert tape_fit.data == [([1, 1, 2, 2], [2, 2, 4, 4])]
def test_data_shuffling_should_shuffle_data_inputs_and_expected_outputs(): callback_fit = TapeCallbackFunction() callback_transform = TapeCallbackFunction() data_shuffler = Pipeline([ DataShuffler(seed=42, increment_seed_after_each_fit=True), FitTransformCallbackStep(callback_transform, callback_fit) ]) data_inputs = np.array(range(10)) expected_outputs = np.array(range(10, 20)) outputs = data_shuffler.fit_transform(data_inputs, expected_outputs) assert not np.array_equal(outputs, data_inputs) assert not np.array_equal(callback_fit.data[0][0], data_inputs) assert not np.array_equal(callback_fit.data[0][1], expected_outputs) assert not np.array_equal(callback_transform.data, data_inputs)
def create_callback_step(tape_step_name, hyperparams): step = (tape_step_name, TransformCallbackStepWithMockHasher( callback_function=TapeCallbackFunction().callback, more_arguments=[tape_step_name], hyperparams=HyperparameterSamples(hyperparams))) return step
def test_fit_for_each_should_fit_all_steps_for_each_data_inputs_expected_outputs(): tape = TapeCallbackFunction() p = Pipeline([ ForEachDataInput(Pipeline([ FitCallbackStep(tape.callback, ["1"]), FitCallbackStep(tape.callback, ["2"]), ])) ]) data_inputs = [[0, 1], [1, 2]] expected_outputs = [[2, 3], [4, 5]] p = p.fit(data_inputs, expected_outputs) assert isinstance(p, Pipeline) assert tape.get_name_tape() == ["1", "2", "1", "2"] assert tape.data == [([0, 1], [2, 3]), ([0, 1], [2, 3]), ([1, 2], [4, 5]), ([1, 2], [4, 5])]
def test_input_and_output_transformer_wrapper_should_fit_with_data_inputs_and_expected_outputs_as_data_inputs( ): tape = TapeCallbackFunction() p = InputAndOutputTransformerWrapper(FitCallbackStep(tape)) data_inputs, expected_outputs = _create_data_source((10, 10)) p.fit(data_inputs, expected_outputs) assert np.array_equal(tape.data[0][0][0], data_inputs) assert np.array_equal(tape.data[0][0][1], expected_outputs)
def test_step_cloner_should_save_sub_steps(tmpdir): tape = TapeCallbackFunction() p = StepClonerForEachDataInput(Pipeline( [FitCallbackStep(tape), MultiplyByN(2)]), cache_folder_when_no_handle=tmpdir) data_inputs = _create_data((2, 2)) expected_outputs = _create_data((2, 2)) p, processed_outputs = p.fit_transform(data_inputs, expected_outputs) p.save(ExecutionContext(tmpdir), full_dump=True) saved_paths = [ os.path.join( tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/FitCallbackStep/FitCallbackStep.joblib' ), os.path.join( tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib' ), os.path.join( tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib' ), os.path.join( tmpdir, 'StepClonerForEachDataInput/Pipeline[0]/Pipeline[0].joblib'), os.path.join( tmpdir, 'StepClonerForEachDataInput/Pipeline[1]/FitCallbackStep/FitCallbackStep.joblib' ), os.path.join( tmpdir, 'StepClonerForEachDataInput/Pipeline[1]/MultiplyByN/MultiplyByN.joblib' ), os.path.join( tmpdir, 'StepClonerForEachDataInput/Pipeline[1]/Pipeline[1].joblib'), os.path.join( tmpdir, 'StepClonerForEachDataInput/Pipeline/FitCallbackStep/FitCallbackStep.joblib' ), os.path.join( tmpdir, 'StepClonerForEachDataInput/Pipeline/MultiplyByN/MultiplyByN.joblib' ), os.path.join(tmpdir, 'StepClonerForEachDataInput/Pipeline/Pipeline.joblib'), os.path.join( tmpdir, 'StepClonerForEachDataInput/StepClonerForEachDataInput.joblib') ] for p in saved_paths: assert os.path.exists(p)
def test_step_cloner_should_transform(): tape = TapeCallbackFunction() p = StepClonerForEachDataInput( Pipeline([FitCallbackStep(tape), MultiplyByN(2)])) data_inputs = _create_data((2, 2)) processed_outputs = p.transform(data_inputs) assert isinstance(p.steps[0], Pipeline) assert isinstance(p.steps[1], Pipeline) assert np.array_equal(processed_outputs, data_inputs * 2)
def test_minibatch_sequential_pipeline_change_batch_size_works(): tape1 = TapeCallbackFunction() tape1_fit = TapeCallbackFunction() tape2 = TapeCallbackFunction() tape2_fit = TapeCallbackFunction() p = MiniBatchSequentialPipeline([ MultiplyBy2FitTransformCallbackStep(tape1, tape1_fit, ["1"]), Joiner(batch_size=10), MultiplyBy2FitTransformCallbackStep(tape2, tape2_fit, ["2"]), Joiner(batch_size=10) ]) # When p, outputs = p.fit_transform(list(range(20)), list(range(20))) p.set_batch_size(5) p, outputs = p.fit_transform(list(range(20, 30)), list(range(20, 30))) # Then assert tape1.data == [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24], [25, 26, 27, 28, 29]] assert tape1_fit.data == [([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ([10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), ([20, 21, 22, 23, 24], [20, 21, 22, 23, 24]), ([25, 26, 27, 28, 29], [25, 26, 27, 28, 29])] assert tape1.name_tape == ["1", "1", "1", "1"] assert tape2.data == [[0, 2, 4, 6, 8, 10, 12, 14, 16, 18], [20, 22, 24, 26, 28, 30, 32, 34, 36, 38], [40, 42, 44, 46, 48], [50, 52, 54, 56, 58]] assert tape2_fit.data == [([0, 2, 4, 6, 8, 10, 12, 14, 16, 18], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ([20, 22, 24, 26, 28, 30, 32, 34, 36, 38], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), ([40, 42, 44, 46, 48], [20, 21, 22, 23, 24]), ([50, 52, 54, 56, 58], [25, 26, 27, 28, 29])] assert tape2.name_tape == ["2", "2", "2", "2"]
def test_pipeline_simple_mutate_inverse_transform(): expected_tape = ["1", "2", "3", "4", "4", "3", "2", "1"] tape = TapeCallbackFunction() p = Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), TransformCallbackStep(tape.callback, ["3"]), TransformCallbackStep(tape.callback, ["4"]), Identity() ]) p, _ = p.fit_transform(np.ones((1, 1))) print("[mutating]") p = p.mutate(new_method="inverse_transform", method_to_assign_to="transform") p.transform(np.ones((1, 1))) assert expected_tape == tape.get_name_tape()
def test_mini_batch_sequential_pipeline_should_transform_steps_sequentially_for_each_barrier_for_each_batch( ): # Given tape1 = TapeCallbackFunction() tape2 = TapeCallbackFunction() tape3 = TapeCallbackFunction() tape4 = TapeCallbackFunction() p = MiniBatchSequentialPipeline([ MultiplyBy2TransformCallbackStep(tape1, ["1"]), MultiplyBy2TransformCallbackStep(tape2, ["2"]), Joiner(batch_size=10), MultiplyBy2TransformCallbackStep(tape3, ["3"]), MultiplyBy2TransformCallbackStep(tape4, ["4"]), Joiner(batch_size=10) ]) # When outputs = p.transform(list(range(20))) # Then assert outputs == [ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 272, 288, 304 ] assert tape1.data == [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]] assert tape1.name_tape == ["1", "1"] assert tape2.data == [[0, 2, 4, 6, 8, 10, 12, 14, 16, 18], [20, 22, 24, 26, 28, 30, 32, 34, 36, 38]] assert tape2.name_tape == ["2", "2"] assert tape3.data == [[0, 4, 8, 12, 16, 20, 24, 28, 32, 36], [40, 44, 48, 52, 56, 60, 64, 68, 72, 76]] assert tape3.name_tape == ["3", "3"] assert tape4.data == [[0, 8, 16, 24, 32, 40, 48, 56, 64, 72], [80, 88, 96, 104, 112, 120, 128, 136, 144, 152]] assert tape4.name_tape == ["4", "4"]
def test_expand_dim_fit(): handle_fit_callback = TapeCallbackFunction() handle_transform_callback = TapeCallbackFunction() handle_fit_transform_callback = TapeCallbackFunction() p = Pipeline([ ExpandDim( HandleCallbackStep(handle_fit_callback, handle_transform_callback, handle_fit_transform_callback)) ]) p = p.fit(np.array(range(10)), np.array(range(10))) assert handle_transform_callback.data == [] assert handle_fit_transform_callback.data == [] assert handle_fit_callback.data[0][0].current_ids == [ '781e5e245d69b566979b86e28d23f2c7' ] assert np.array_equal(np.array(handle_fit_callback.data[0][0].data_inputs), np.array([np.array(range(10))])) assert np.array_equal( np.array(handle_fit_callback.data[0][0].expected_outputs), np.array([np.array(range(10))]))
def test_expand_dim_fit(): handle_fit_callback = TapeCallbackFunction() handle_transform_callback = TapeCallbackFunction() handle_fit_transform_callback = TapeCallbackFunction() p = Pipeline([ ExpandDim( HandleCallbackStep(handle_fit_callback, handle_transform_callback, handle_fit_transform_callback)) ]) p['ExpandDim'].hashers = [SomeSummaryHasher(fake_summary_id=SUMMARY_ID)] p = p.fit(np.array(range(10)), np.array(range(10))) assert handle_transform_callback.data == [] assert handle_fit_transform_callback.data == [] assert handle_fit_callback.data[0][0].current_ids == [SUMMARY_ID] assert handle_fit_callback.data[0][0].summary_id == SUMMARY_ID assert np.array_equal(np.array(handle_fit_callback.data[0][0].data_inputs), np.array([np.array(range(10))])) assert np.array_equal( np.array(handle_fit_callback.data[0][0].expected_outputs), np.array([np.array(range(10))]))