Beispiel #1
0
def test_final_steps(mocker, split_result, raw_model, n_steps):
    fit_params = {"c": 2, "d": 3}
    predict_params = {"_": 4, "+": 5}

    config = mocker.Mock()
    config.raw_split_data = split_result
    config.raw_model = raw_model
    config.fit_params = fit_params
    config.predict_params = predict_params
    config.scorers_providers = mocker.MagicMock()

    steps = []
    for _ in range(n_steps):
        step = mocker.Mock()
        step.fit_split_data.return_value = split_result
        step.fit_params.return_value = fit_params
        step.predict_params.return_value = predict_params
        steps.append(step)
    fp_steps = FitPredictorSteps(final_steps=steps)
    fp = FitPredictor(steps=fp_steps)

    models_configs_func = mocker.patch.object(fp, "_models_configs")
    trava_model = mocker.Mock()
    models_configs_func.return_value = [(trava_model, config)]

    tracker = mocker.Mock()
    fp.fit_predict(config=config, tracker=tracker)

    for step in steps:
        step.handle.assert_called_with(trava_model=trava_model,
                                       config=config,
                                       tracker=tracker)
Beispiel #2
0
def test_evaluators_get_called(mocker, model_id, split_result, raw_model,
                               n_models):
    config = mocker.Mock()
    config.model_id = model_id
    config.raw_split_data = split_result
    config.raw_model = raw_model
    model_init_params = mocker.Mock()

    config.model_init_params = model_init_params
    fit_params = mocker.MagicMock()
    config.fit_params = fit_params
    predict_params = mocker.MagicMock()
    config.predict_params = predict_params
    scorers_providers = mocker.MagicMock()
    config.scorers_providers = scorers_providers
    fp = FitPredictor()

    models_configs = []
    for idx in range(n_models):
        nested_model = mocker.Mock()
        nested_model.raw_model = raw_model
        nested_model_id = model_id + "_" + str(idx)
        nested_model.model_id = nested_model_id
        model_config = copy(config)
        model_config.model_id = nested_model_id
        models_configs.append((nested_model, model_config))

    models_configs_func = mocker.patch.object(fp, "_models_configs")
    models_configs_func.return_value = models_configs

    evaluator_func = mocker.patch.object(fp, "_evaluator")
    evaluator_func.return_value = mocker.Mock()

    evaluators = fp.fit_predict(config=config, tracker=mocker.Mock())

    is_multiple_models = n_models > 1
    if is_multiple_models:
        for idx in range(n_models):
            model = models_configs[idx][0]
            config = models_configs[idx][1]
            evaluator = evaluators[idx]

            eval_func_args = evaluator_func.call_args_list[idx][1]
            assert eval_func_args["model_config"] == config
            assert eval_func_args["split_result"] == config.raw_split_data
            assert eval_func_args["model"] == model

            evaluator.evaluate.assert_called_with(
                scorers_providers=scorers_providers)
    else:
        model = models_configs[0][0]
        config = models_configs[0][1]
        evaluator = evaluators[0]

        evaluator_func.assert_called_with(model_config=config,
                                          split_result=split_result,
                                          model=model)
        evaluator.evaluate.assert_called_with(
            scorers_providers=scorers_providers)
Beispiel #3
0
def test_has_any_log_calls(mocker, split_result, raw_model):
    config = mocker.Mock()
    config.raw_split_data = split_result
    config.raw_model = raw_model
    config.fit_params = mocker.MagicMock()
    config.predict_params = mocker.MagicMock()
    config.scorers_providers = mocker.MagicMock()

    logger = mocker.Mock()
    fp = FitPredictor(logger=logger)
    fp.fit_predict(config=config, tracker=mocker.Mock())
    logger.log.assert_called()
Beispiel #4
0
def test_return_evaluators(mocker, model_id, split_result, raw_model,
                           n_models):
    config = mocker.Mock()
    config.model_id = model_id
    config.raw_split_data = split_result
    config.raw_model = raw_model
    model_init_params = mocker.Mock()

    config.model_init_params = model_init_params
    fit_params = mocker.MagicMock()
    config.fit_params = fit_params
    predict_params = mocker.MagicMock()
    config.predict_params = predict_params
    config.scorers_providers = mocker.MagicMock()
    fp = FitPredictor()

    models_configs = []
    for idx in range(n_models):
        nested_model = mocker.Mock()
        nested_model.raw_model = raw_model
        nested_model_id = model_id + "_" + str(idx)
        nested_model.model_id = nested_model_id
        model_config = copy(config)
        model_config.model_id = nested_model_id
        models_configs.append((nested_model, model_config))

    models_configs_func = mocker.patch.object(fp, "_models_configs")
    models_configs_func.return_value = models_configs

    tracker = mocker.Mock()
    evaluators = fp.fit_predict(config=config, tracker=tracker)

    assert len(evaluators) == n_models

    is_multiple_models = n_models > 1
    if is_multiple_models:
        models = [model_config[0] for model_config in models_configs]

        for model, evaluator in zip(models, evaluators):
            assert model.model_id == evaluator.model_id
            assert model == evaluator.trava_model
            assert split_result == evaluator.fit_split_data
            assert split_result == evaluator.raw_split_data
    else:
        model = models_configs[0][0]
        evaluator = evaluators[0]

        assert model.model_id == evaluator.model_id
        assert model == evaluator.trava_model
        assert split_result == evaluator.fit_split_data
        assert split_result == evaluator.raw_split_data
Beispiel #5
0
def test_models_configs(mocker, raw_model, model_id):
    fp = FitPredictor()
    models_configs_func = mocker.patch.object(fp, "_models_configs")
    models_configs = mocker.MagicMock()
    models_configs_func.return_value = models_configs

    config = mocker.Mock()
    config.raw_model = raw_model
    config.scorers_providers = mocker.MagicMock()

    fp.fit_predict(config=config, tracker=mocker.Mock())

    models_configs_func.assert_called_once_with(raw_model=raw_model,
                                                config=config)
    models_configs.__iter__.assert_called()
Beispiel #6
0
    def _fit_predict(
        self,
        model_id: str,
        raw_model,
        model_init_params: Optional[dict],
        fit_predictor: FitPredictor,
        fit_params: dict,
        predict_params: dict,
        serializer: Optional[ModelSerializer],
        split_result: Optional[SplitResult] = None,
        description: Optional[str] = None,
    ):
        all_results_handlers = self._results_handlers + [self._tracker]
        scorers_providers: List[ScorersProvider] = list(all_results_handlers)

        config = FitPredictConfig(
            raw_split_data=split_result,
            raw_model=raw_model,
            model_init_params=model_init_params,
            model_id=model_id,
            scorers_providers=scorers_providers,
            serializer=serializer,
            fit_params=fit_params,
            predict_params=predict_params,
            description=description,
        )

        evaluators = fit_predictor.fit_predict(config=config, tracker=self._tracker)

        return evaluators
Beispiel #7
0
def test_model_update_steps(mocker, n_steps, raw_model):
    config = mocker.MagicMock()
    config.fit_params = mocker.MagicMock()
    config.predict_params = mocker.MagicMock()
    config.raw_model = raw_model
    config.scorers_providers = mocker.MagicMock()

    steps = []
    for _ in range(n_steps):
        step = mocker.Mock()
        step.update_model.return_value = raw_model
        steps.append(step)

    fp_steps = FitPredictorSteps(raw_model_steps=steps)
    fp = FitPredictor(steps=fp_steps)

    fp.fit_predict(config=config, tracker=mocker.Mock())

    for step in steps:
        step.update_model.assert_called_with(raw_model=raw_model,
                                             config=config)
Beispiel #8
0
    def fit_predict(
        self,
        model_id: str,
        model_type: Type,
        description: Optional[str] = None,
        model_init_params: Optional[dict] = None,
        raw_split_data: Optional[SplitResult] = None,
        fit_predictor: FitPredictor = None,
        fit_params: dict = None,
        predict_params: dict = None,
        only_calculate_metrics: bool = False,
        keep_models_in_memory: bool = True,
        keep_data_in_memory: bool = True,
        serializer: Optional[ModelSerializer] = None,
    ):
        """
        Calls model's fit and predict with the data provided, calculates metrics and stores them.
        model_type and model_init_params were separated for the sake of easy parameters tracking.

        Parameters
        ----------
        model_id: str
            Model unique identifier, will be used for saving metrics etc
        model_type: type of sklearn-style model
            Type of model that supports fit, predict and predict_proba methods
        description: str
            Describe the fit. It will be tracked if you set up tracker.
        model_init_params: dict
            Parameters to use to initialize model_type
        raw_split_data: optional SplitResult
             Already split train/test sets.
             Optional makes sense only if your FitPredictor contains
             some custom logic like CVFitPredictor.
        fit_predictor: FitPredictor
            Object responsible for performing fit and predict on a model
        fit_params: dict
            Custom params to use when calling model's fit method
        predict_params: dict
            Custom params to use when calling model's predict method
        only_calculate_metrics: bool
            if True, ResultsHandlers won't be called, but all the metrics
            for them will be ready. Can be useful when you run many
            consecutive runs and you don't to be overwhelmed by the output.
        keep_models_in_memory: bool
            Whether it's needed to store models in memory after the fit
        keep_data_in_memory: bool
            Whether it's needed to store the provided data in memory after the fit
        serializer: ModelSerializer
            If provided - fitted model will be serialized and tracked

        Returns
        -------
        List of outputs for every results handler that returns not None result
        """
        fit_params = fit_params or {}
        predict_params = predict_params or {}
        model_init_params = model_init_params or {}
        fit_predictor = fit_predictor or FitPredictor(logger=self._logger)

        raw_model, model_init_params = self._create_raw_model(
            model_type=model_type, model_init_params=model_init_params
        )

        evaluators = self._fit_predict(
            raw_model=raw_model,
            model_id=model_id,
            model_init_params=model_init_params,
            fit_predictor=fit_predictor,
            fit_params=fit_params,
            predict_params=predict_params,
            serializer=serializer,
            split_result=raw_split_data,
            description=description,
        )
        result = None

        if only_calculate_metrics:
            self._prepare_model_results(evaluators=evaluators, save=True, main_model_id=model_id)
        else:
            result = self._results_for_evaluators(
                evaluators=evaluators, save_model_results=True, main_model_id=model_id
            )

        if not keep_models_in_memory:
            [evaluator.trava_model.unload_model() for evaluator in evaluators]

        if not keep_data_in_memory:
            [evaluator.unload_data() for evaluator in evaluators]
        return result
Beispiel #9
0
def test_fit_predict(mocker, model_id, split_result, raw_model, n_models):
    config = mocker.Mock()
    config.model_id = model_id
    config.raw_split_data = split_result
    config.raw_model = raw_model
    model_init_params = mocker.Mock()

    config.model_init_params = model_init_params
    fit_params = mocker.MagicMock()
    config.fit_params = fit_params
    predict_params = mocker.MagicMock()
    config.predict_params = predict_params
    scorers_providers = mocker.MagicMock()
    config.scorers_providers = scorers_providers
    fp = FitPredictor()
    fit_mock = mocker.patch.object(fp, "_fit")
    fit_mock.return_value = None
    predict_mock = mocker.patch.object(fp, "_predict")
    predict_mock.return_value = None

    models_configs = []
    for idx in range(n_models):
        nested_model = mocker.Mock()
        nested_model.raw_model = raw_model
        nested_model_id = model_id + "_" + str(idx)
        nested_model.model_id = nested_model_id

        model_config = copy(config)

        model_config.model_id = nested_model_id
        models_configs.append((nested_model, model_config))

    models_configs_func = mocker.patch.object(fp, "_models_configs")
    models_configs_func.return_value = models_configs

    tracker = mocker.Mock()
    fp.fit_predict(config=config, tracker=tracker)

    is_multiple_models = n_models > 1
    if is_multiple_models:
        for idx, (model, config) in enumerate(models_configs):
            fit_call_args = fit_mock.call_args_list[idx][1]
            assert fit_call_args["trava_model"] == model
            assert fit_call_args["X"] == config.raw_split_data.X_train
            assert fit_call_args["y"] == config.raw_split_data.y_train
            assert fit_call_args["fit_params"] == fit_params
            assert fit_call_args["predict_params"] == predict_params

            predict_call_args = predict_mock.call_args_list[idx][1]

            assert predict_call_args["trava_model"] == model
            assert predict_call_args["X"] == config.raw_split_data.X_test
            assert predict_call_args["y"] == config.raw_split_data.y_test
    else:
        fit_mock.assert_called_once_with(
            trava_model=models_configs[0][0],
            X=split_result.X_train,
            y=split_result.y_train,
            fit_params=fit_params,
            predict_params=predict_params,
        )

        predict_mock.assert_called_once_with(trava_model=models_configs[0][0],
                                             X=split_result.X_test,
                                             y=split_result.y_test)
Beispiel #10
0
def test_tracking(mocker, model_id, split_result, raw_model, n_models,
                  has_description, serializer):
    config = mocker.Mock()
    config.model_id = model_id
    config.raw_split_data = split_result
    config.serializer = serializer
    config.raw_model = raw_model
    model_init_params = mocker.Mock()

    description = "descr"
    if has_description:
        config.description = description
    else:
        config.description = None

    config.model_init_params = model_init_params
    fit_params = mocker.MagicMock()
    config.fit_params = fit_params
    predict_params = mocker.MagicMock()
    config.predict_params = predict_params
    config.scorers_providers = mocker.MagicMock()
    fp = FitPredictor()

    models_configs = []
    for idx in range(n_models):
        nested_model = mocker.Mock()
        nested_model.raw_model = raw_model
        nested_model_id = model_id + "_" + str(idx)
        nested_model.model_id = nested_model_id
        model_config = copy(config)
        model_config.model_id = nested_model_id
        models_configs.append((nested_model, model_config))

    models_configs_func = mocker.patch.object(fp, "_models_configs")
    models_configs_func.return_value = models_configs

    tracker = mocker.Mock()
    fp.fit_predict(config=config, tracker=tracker)

    expected_calls = []

    is_multiple_models = n_models > 1
    if is_multiple_models:
        expected_calls += _start_tracking_check_get_calls(
            mocker=mocker,
            model_id=model_id,
            raw_model=raw_model,
            tracker=tracker,
            description=description,
            model_init_params=model_init_params,
            has_description=has_description,
            nested=False,
        )

    models = [model_config[0] for model_config in models_configs]
    for idx, model in enumerate(models):
        expected_calls += _start_tracking_check_get_calls(
            mocker=mocker,
            model_id=model.model_id,
            raw_model=raw_model,
            tracker=tracker,
            description=description,
            model_init_params=model_init_params,
            has_description=has_description,
            nested=is_multiple_models,
        )

        expected_calls += [
            mocker.call.track_fit_params(model_id=model.model_id,
                                         params=fit_params),
            mocker.call.track_predict_params(model_id=model.model_id,
                                             params=predict_params),
        ]

        model_results_kwargs = tracker.track_model_results.call_args_list[idx][
            1]
        model_results = model_results_kwargs["model_results"]
        assert model_results.model_id == model.model_id

        expected_calls += [
            mocker.call.track_model_results(model_results=model_results)
        ]

        if serializer:
            expected_calls += [
                mocker.call.track_model_artifact(model_id=model.model_id,
                                                 model=raw_model,
                                                 serializer=serializer)
            ]

        expected_calls += [mocker.call.end_tracking(model_id=model.model_id)]

    if is_multiple_models:
        model_results_kwargs = tracker.track_model_results.call_args_list[
            n_models][1]
        model_results = model_results_kwargs["model_results"]
        assert model_results.model_id == model_id

        expected_calls += [
            mocker.call.track_model_results(model_results=model_results),
            mocker.call.end_tracking(model_id=model_id),
        ]

    tracker.assert_has_calls(expected_calls)