def test_train_metrics(mocker, n_evaluators): scorers_provider = mocker.Mock() main_model_id = "main_model_id" all_metrics = [] evaluators = [] for idx in range(n_evaluators): evaluator = mocker.Mock() if n_evaluators > 1: evaluator_model_id = "model_" + str(idx) else: evaluator_model_id = main_model_id evaluator.model_id = evaluator_model_id metrics = [ Metric(name="test_metric_1", value=random.randint(0, 1000), model_id=evaluator_model_id), Metric(name="test_metric_2", value="blahblah", model_id=evaluator_model_id), ] all_metrics.append(metrics) def _wrapper(metrics): def _train_metrics(provider): if provider != scorers_provider: raise ValueError return metrics.copy() return _train_metrics evaluator.train_metrics = mocker.MagicMock(side_effect=_wrapper( metrics=metrics)) evaluators.append(evaluator) model_result = ModelResult(model_id=model_id, evaluators=evaluators) if n_evaluators > 1: filtered_all_metrics = [] for i in range(len(all_metrics)): filtered_all_metrics.append( [metric for metric in all_metrics[i] if metric.is_scalar]) all_metrics = filtered_all_metrics for metric, eval_metrics in zip( model_result.train_metrics(provider=scorers_provider), zip(*all_metrics)): first_eval_metric = eval_metrics[0] assert metric.name == first_eval_metric.name if n_evaluators > 1: if not metric.is_scalar: continue eval_metrics_value = np.mean( [metric.value for metric in eval_metrics]) else: eval_metrics_value = metric.value assert metric.value == eval_metrics_value
def _handle_model_results(self, model_result: ModelResult, logger: TravaLogger, tracker: Tracker, track_only: bool): model_id = model_result.model_id self._start_run(model_id=model_id, tracker=tracker) self._handle_run(text="* Results for {} model *".format(model_id), logger=logger, track_only=track_only) if model_result.train_metrics(provider=self): self._handle_run(text="Train metrics:", logger=logger, track_only=track_only) self._handle_metrics( metrics=model_result.train_metrics(provider=self), logger=logger, track_only=track_only) self._handle_run(text="Test metrics:", logger=logger, track_only=track_only) self._handle_metrics(metrics=model_result.test_metrics(provider=self), logger=logger, track_only=track_only) self._handle_run(text="Other metrics:", logger=logger, track_only=track_only) self._handle_metrics(metrics=model_result.other_metrics(provider=self), logger=logger, track_only=track_only) self._track_run(model_id=model_id, tracker=tracker) self._stop_run(model_id=model_id, tracker=tracker)
def track_model_results(self, model_results: ModelResult): self._track_metrics(model_id=model_results.model_id, metrics=model_results.train_metrics(provider=self), train=True) self._track_metrics(model_id=model_results.model_id, metrics=model_results.test_metrics(provider=self), train=False) self._track_metrics(model_id=model_results.model_id, metrics=model_results.other_metrics(provider=self), train=False)
def test_is_one_fit_result(mocker, model_id, n_evaluators): evaluators = [mocker.Mock() for _ in range(n_evaluators)] model_result = ModelResult(model_id=model_id, evaluators=evaluators) if n_evaluators > 1: assert not model_result.is_one_fit_result else: assert model_result.is_one_fit_result
def _prepare_model_results( self, evaluators: List[Evaluator], save: bool = False, main_model_id: Optional[str] = None ) -> List[ModelResult]: is_agg_result = len(evaluators) > 1 and main_model_id evaluators_results = [] for evaluator in evaluators: model_results = ModelResult(model_id=evaluator.model_id, evaluators=[evaluator]) evaluators_results.append(model_results) if not is_agg_result and save: self._results[model_results.model_id] = model_results if is_agg_result: model_id = str(main_model_id) main_model_result = ModelResult(model_id=model_id, evaluators=evaluators) evaluators_results = [main_model_result] if save: self._results[model_id] = main_model_result return evaluators_results
def test_models(mocker, model_id, n_evaluators): raw_models = {} evaluators = [] for idx in range(n_evaluators): evaluator = mocker.Mock() trava_model = mocker.Mock() raw_model = mocker.Mock() trava_model.raw_model = raw_model evaluator.model_id = model_id + "_" + str(idx) evaluator.trava_model = trava_model evaluators.append(evaluator) raw_models[evaluator.model_id] = raw_model model_result = ModelResult(model_id=model_id, evaluators=evaluators) assert model_result.raw_models == raw_models
def handle(self, results: t.List[ModelResult], logger: TravaLogger, tracker: Tracker): logger.log("*** Logging: ***") for model_result in results: self._handle_model_results(model_result=model_result, logger=logger, tracker=tracker, track_only=False) if not model_result.is_one_fit_result: for evaluator in model_result.evaluators: nested_model_result = ModelResult( model_id=evaluator.model_id, evaluators=[evaluator]) self._handle_model_results( model_result=nested_model_result, logger=logger, tracker=tracker, track_only=True) logger.log("*** END ***\n")
def _track_evaluators(evaluators: List[Evaluator], model_id: str, tracker: TravaTracker): tracker.start_tracking(model_id=model_id) main_model_results = ModelResult(model_id=model_id, evaluators=evaluators) tracker.track_model_results(model_results=main_model_results) tracker.end_tracking(model_id=model_id)
def test_evaluators_required(mocker, model_id): evaluators = [] with pytest.raises(Exception): ModelResult(model_id=model_id, evaluators=evaluators) with pytest.raises(Exception): ModelResult(model_id=model_id, evaluators=None)
def test_model_id(mocker, model_id): evaluators = [mocker.Mock()] model_result = ModelResult(model_id=model_id, evaluators=evaluators) assert model_result.model_id == model_id
def _plot( self, results: List[ModelResult], logger: TravaLogger, show: bool, tracker: Tracker, use_one_figure: bool = False, model_id: str = None, ): all_train_metrics: List[List[Metric]] = [] all_test_metrics: List[List[Metric]] = [] for model_result in results: if not model_result.is_one_fit_result: if show: continue many_model_results = [] for evaluator in model_result.evaluators: many_model_results.append( ModelResult(model_id=evaluator.model_id, evaluators=[evaluator])) self._plot( results=many_model_results, logger=logger, show=False, tracker=tracker, use_one_figure=True, model_id=model_result.model_id, ) self._plot(results=many_model_results, logger=logger, show=False, tracker=tracker, use_one_figure=False) continue if model_result.train_metrics(provider=self): model_train_metrics = model_result.train_metrics(provider=self) all_train_metrics.append(model_train_metrics) model_test_metrics = model_result.test_metrics(provider=self) all_test_metrics.append(model_test_metrics) if len(all_train_metrics) > 0: # TODO: couldn't figure out how to fix this, postponed it train_metrics_set: List[Tuple[Metric]] = list( zip(*all_train_metrics)) # type: ignore self._plot_metrics_set( metrics_set=train_metrics_set, label="Train", show=show, use_one_figure=use_one_figure, tracker=tracker, model_id=model_id, ) # TODO: couldn't figure out how to fix this, postponed it test_metrics_set: List[Tuple[Metric]] = list( zip(*all_test_metrics)) # type: ignore self._plot_metrics_set( metrics_set=test_metrics_set, label="Test", show=show, use_one_figure=use_one_figure, tracker=tracker, model_id=model_id, )
def _track_metrics(self, model_id: str, evaluators: List[Evaluator], tracker: Tracker): model_results = ModelResult(model_id=model_id, evaluators=evaluators) tracker.track_model_results(model_results=model_results)