def test_resumable_pipeline_fit_transform_should_save_all_fitted_pipeline_steps( tmpdir: LocalPath): p = ResumablePipeline( [(SOME_STEP_1, MultiplyByN(multiply_by=2)), (PIPELINE_2, ResumablePipeline([(SOME_STEP_2, MultiplyByN(multiply_by=4)), (CHECKPOINT, DefaultCheckpoint()), (SOME_STEP_3, MultiplyByN(multiply_by=6))]))], cache_folder=tmpdir) p.name = ROOT p, outputs = p.fit_transform(np.array(range(10)), np.array(range(10))) not_saved_paths = [create_some_step3_path(tmpdir)] saved_paths = [ create_root_path(tmpdir), create_pipeline2_path(tmpdir), create_some_step1_path(tmpdir), create_some_step2_path(tmpdir), create_some_checkpoint_path(tmpdir) ] assert np.array_equal(outputs, EXPECTED_OUTPUTS) for p in saved_paths: assert os.path.exists(p) for p in not_saved_paths: assert not os.path.exists(p)
def test_should_fit_transform_each_steps(test_case: ResumablePipelineTestCase, tmpdir): pipeline = ResumablePipeline(steps=test_case.steps, cache_folder=tmpdir) actual_pipeline, actual_data_inputs = pipeline.fit_transform( test_case.data_inputs, test_case.expected_outputs) actual_tape = test_case.tape.get_name_tape() assert isinstance(actual_pipeline, Pipeline) assert actual_tape == test_case.expected_tape assert np.array_equal(actual_data_inputs, test_case.data_inputs)