Example #1
0
def test_step_cloner_should_fit_transform():
    # Given
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(
        Pipeline([FitCallbackStep(tape), MultiplyByN(2)]))
    data_inputs = _create_data((2, 2))
    expected_outputs = _create_data((2, 2))

    # When
    p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)

    # Then
    assert isinstance(p.steps[0], Pipeline)
    assert np.array_equal(p.steps[0][0].callback_function.data[0][0],
                          data_inputs[0])
    assert np.array_equal(p.steps[0][0].callback_function.data[0][1],
                          expected_outputs[0])

    assert isinstance(p.steps[1], Pipeline)
    assert np.array_equal(p.steps[1][0].callback_function.data[0][0],
                          data_inputs[1])
    assert np.array_equal(p.steps[1][0].callback_function.data[0][1],
                          expected_outputs[1])

    assert np.array_equal(processed_outputs, data_inputs * 2)
Example #2
0
    def _fit_data_container(self, data_container: DataContainer, context: ExecutionContext) -> BaseStep:
        assert self.wrapped is not None

        step = StepClonerForEachDataInput(self.wrapped)
        step = step.handle_fit(data_container, context)

        return step
def test_should_inverse_transform():
    step_cloner = StepClonerForEachDataInput(SomeStepInverseTransform())

    step_cloner, processed_outputs = step_cloner.fit_transform([0])
    step_cloner = step_cloner.reverse()
    processed_outputs = step_cloner.inverse_transform(processed_outputs)

    assert processed_outputs == ['inverse_transform']
def test_should_fit_transform():
    some_step = SomeStepInverseTransform()
    step_cloner = StepClonerForEachDataInput(some_step)

    step_cloner, processed_outputs = step_cloner.fit_transform([0])

    assert isinstance(step_cloner.steps[0], SomeStepInverseTransform)
    assert processed_outputs == ['fit_transform']
def test_step_cloner_should_save_sub_steps(tmpdir):
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(Pipeline(
        [FitCallbackStep(tape), MultiplyByN(2)]),
                                   cache_folder_when_no_handle=tmpdir)
    data_inputs = _create_data((2, 2))
    expected_outputs = _create_data((2, 2))
    p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)

    p.save(ExecutionContext(tmpdir), full_dump=True)

    saved_paths = [
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/FitCallbackStep/FitCallbackStep.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/Pipeline[0].joblib'),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[1]/FitCallbackStep/FitCallbackStep.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[1]/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[1]/Pipeline[1].joblib'),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline/FitCallbackStep/FitCallbackStep.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(tmpdir,
                     'StepClonerForEachDataInput/Pipeline/Pipeline.joblib'),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/StepClonerForEachDataInput.joblib')
    ]

    for p in saved_paths:
        assert os.path.exists(p)
def test_step_cloner_should_get_hyperparams():
    p = StepClonerForEachDataInput(SomeStep())
    p.set_hyperparams(HyperparameterSamples({
        META_STEP_HP: META_STEP_HP_VALUE,
        SOME_STEP_HP: SOME_STEP_HP_VALUE
    }))

    hyperparams = p.get_hyperparams()

    assert hyperparams[META_STEP_HP] == META_STEP_HP_VALUE
    assert hyperparams[SOME_STEP_HP] == SOME_STEP_HP_VALUE
Example #7
0
def test_step_cloner_should_transform():
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(
        Pipeline([FitCallbackStep(tape), MultiplyByN(2)]))
    data_inputs = _create_data((2, 2))

    processed_outputs = p.transform(data_inputs)

    assert isinstance(p.steps[0], Pipeline)
    assert isinstance(p.steps[1], Pipeline)
    assert np.array_equal(processed_outputs, data_inputs * 2)
def test_step_cloner_should_set_hyperparams():
    p = StepClonerForEachDataInput(SomeStep())

    p.set_hyperparams(HyperparameterSamples({
        META_STEP_HP: META_STEP_HP_VALUE,
        SOME_STEP_HP: SOME_STEP_HP_VALUE
    }))

    assert isinstance(p.hyperparams, HyperparameterSamples)
    assert p.hyperparams[META_STEP_HP] == META_STEP_HP_VALUE
    assert p.get_step().get_hyperparams()[SOME_STEP_HP_KEY] == SOME_STEP_HP_VALUE
def test_step_cloner_should_get_hyperparams_space():
    p = StepClonerForEachDataInput(SomeStep())
    p.set_hyperparams_space(HyperparameterSpace({
        META_STEP_HP: RAND_INT_STEP_CLONER,
        SOME_STEP_HP: RAND_INT_SOME_STEP
    }))

    hyperparams_space = p.get_hyperparams_space()

    assert hyperparams_space[META_STEP_HP] == RAND_INT_STEP_CLONER
    assert hyperparams_space[SOME_STEP_HP] == RAND_INT_SOME_STEP
def test_step_cloner_should_set_hyperparams_space():
    p = StepClonerForEachDataInput(SomeStep())

    p.set_hyperparams_space(HyperparameterSpace({
        META_STEP_HP: RAND_INT_STEP_CLONER,
        SOME_STEP_HP: RAND_INT_SOME_STEP
    }))

    assert isinstance(p.hyperparams_space, HyperparameterSpace)
    assert p.hyperparams_space[META_STEP_HP] == RAND_INT_STEP_CLONER
    assert p.get_step().hyperparams_space[SOME_STEP_HP_KEY] == RAND_INT_SOME_STEP
def test_step_cloner_should_set_train():
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(
        Pipeline([FitCallbackStep(tape), MultiplyByN(2)]))
    data_inputs = _create_data((2, 2))
    expected_outputs = _create_data((2, 2))
    p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)

    p.set_train(False)

    assert not p.is_train
    assert not p.steps_as_tuple[0][1].is_train
    assert not p.steps_as_tuple[1][1].is_train
Example #12
0
def test_step_cloner_should_inverse_transform():
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(
        Pipeline([FitCallbackStep(tape), MultiplyByN(2)]))
    data_inputs = _create_data((2, 2))
    expected_outputs = _create_data((2, 2))

    p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)
    p = p.reverse()

    assert np.array_equal(processed_outputs, data_inputs * 2)
    inverse_processed_outputs = p.inverse_transform(processed_outputs)
    assert np.array_equal(np.array(inverse_processed_outputs),
                          np.array(data_inputs))
Example #13
0
    def fit(self, data_inputs, expected_outputs=None) -> 'BaseCrossValidationWrapper':
        assert self.wrapped is not None

        train_data_inputs, train_expected_outputs, validation_data_inputs, validation_expected_outputs = self.split(
            data_inputs, expected_outputs)

        step = StepClonerForEachDataInput(self.wrapped)
        step = step.fit(train_data_inputs, train_expected_outputs)

        results = step.transform(validation_data_inputs)
        self.scores = [self.scoring_function(a, b) for a, b in zip(results, validation_expected_outputs)]
        self.scores_mean = np.mean(self.scores)
        self.scores_std = np.std(self.scores)

        return self
Example #14
0
    def _fit_data_container(self, data_container: DataContainer, context: ExecutionContext) -> BaseStep:
        assert self.wrapped is not None

        if self.split_data_container_during_fit:
            train_data_container, validation_data_container = self.split_data_container(data_container)
        else:
            train_data_container = data_container

        step = StepClonerForEachDataInput(self.wrapped)
        step = step.handle_fit(train_data_container, context)

        if self.predict_after_fit:
            results = step.handle_predict(validation_data_container, context)
            self.calculate_score(results)

        return self
def test_step_cloner_should_load_sub_steps(tmpdir):
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(Pipeline(
        [FitCallbackStep(tape), MultiplyByN(2)]),
                                   cache_folder_when_no_handle=tmpdir)
    data_inputs = _create_data((2, 2))
    expected_outputs = _create_data((2, 2))
    p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)

    p.save(ExecutionContext(tmpdir), full_dump=True)

    loaded_step_cloner = ExecutionContext(tmpdir).load(
        'StepClonerForEachDataInput')
    assert isinstance(loaded_step_cloner.wrapped, Pipeline)
    assert len(loaded_step_cloner.steps_as_tuple) == len(data_inputs)
    assert isinstance(loaded_step_cloner.steps_as_tuple[0][1], Pipeline)
    assert isinstance(loaded_step_cloner.steps_as_tuple[1][1], Pipeline)
Example #16
0
    def _fit_data_container(self, data_container: DataContainer,
                            context: ExecutionContext) -> BaseStep:
        assert self.wrapped is not None

        train_data_container, validation_data_container = self.split_data_container(
            data_container)

        step = StepClonerForEachDataInput(self.wrapped)
        step = step.handle_fit(train_data_container, context)

        results = step.handle_transform(validation_data_container, context)
        self.scores = [
            self.scoring_function(a, b)
            for a, b in zip(results.data_inputs, results.expected_outputs)
        ]
        self.scores_mean = np.mean(self.scores)
        self.scores_std = np.std(self.scores)

        return self
def test_step_cloner_should_set_steps_hyperparams():
    p = StepClonerForEachDataInput(SomeStep())
    p.fit_transform([[0, 0]])

    p.set_hyperparams(
        HyperparameterSamples({
            META_STEP_HP: META_STEP_HP_VALUE,
            SOME_STEP_HP: SOME_STEP_HP_VALUE
        }))

    assert isinstance(p.hyperparams, HyperparameterSamples)
    assert isinstance(p.steps[0].hyperparams, HyperparameterSamples)
    assert p.steps[0].get_hyperparams()[SOME_STEP_HP_KEY] == SOME_STEP_HP_VALUE
def test_step_cloner_update_hyperparams_space_should_update_wrapped_step_hyperparams():
    p = StepClonerForEachDataInput(SomeStep())
    p.set_hyperparams_space(HyperparameterSpace({
        META_STEP_HP: RAND_INT_META_STEP,
        SOME_STEP_HP: RAND_INT_SOME_STEP
    }))

    updated_some_step_hp_space = RandInt(0, 400)
    p.update_hyperparams_space(HyperparameterSpace({
        SOME_STEP_HP: updated_some_step_hp_space
    }))

    assert isinstance(p.hyperparams, HyperparameterSamples)
    assert p.hyperparams_space[META_STEP_HP] == RAND_INT_META_STEP
    assert p.wrapped.get_hyperparams_space()[SOME_STEP_HP_KEY] == updated_some_step_hp_space
Example #19
0
    def train(self, train_data_container: DataContainer,
              context: ExecutionContext):
        step = StepClonerForEachDataInput(self.wrapped)
        step = step.handle_fit(train_data_container, context)

        return step