def _evaluate(self) -> Metric: metric = Metric(amount_limit=None) data_loader_length = len(self.data_loader_validation) full_output_log = DataOutputLog() for i, batch in enumerate(self.data_loader_validation): if not batch: continue self._log_service.log_progress(i, data_loader_length, evaluation=True) loss_batch, metrics_batch, current_output_log = self._perform_batch_iteration( batch, train_mode=False, output_characters=(len(full_output_log) < 100)) if math.isnan(loss_batch): raise Exception( f'loss is NaN during evaluation at iteration {i}') if current_output_log is not None: full_output_log.extend(current_output_log) metric.add_accuracies(metrics_batch) metric.add_loss(loss_batch) final_metric = self._model.calculate_evaluation_metrics() metric.add_accuracies(final_metric) self._log_service.log_batch_results(full_output_log) assert not math.isnan( metric.get_current_loss() ), f'combined loss is NaN during evaluation at iteration {i}; losses are - {metric._losses}' return metric
def _perform_epoch_iteration( self, epoch_num: int, best_metrics: Metric, patience: int, metric: Metric, resets_left: int, start_iteration: int = 0) -> Tuple[Metric, int]: """ one epoch implementation """ data_loader_length = len(self.data_loader_train) for i, batch in enumerate(self.data_loader_train): if i < start_iteration: continue self._log_service.log_progress(i, data_loader_length, epoch_num) loss_batch, accuracies_batch, _ = self._perform_batch_iteration( batch) assert not math.isnan( loss_batch), f'loss is NaN during training at iteration {i}' metric.add_loss(loss_batch) metric.add_accuracies(accuracies_batch) # calculate amount of batches and walltime passed batches_passed = i + (epoch_num * data_loader_length) # run on validation set and print progress to terminal # if we have eval_frequency or if we have finished the epoch if self._should_evaluate(batches_passed, i, data_loader_length): if not self._arguments_service.skip_validation: validation_metric = self._evaluate() else: validation_metric = Metric(metric=metric) assert not math.isnan( metric.get_current_loss() ), f'combined loss is NaN during training at iteration {i}; losses are - {metric._losses}' new_best = self._model.compare_metric(best_metrics, validation_metric) if new_best: best_metrics, patience = self._save_current_best_result( validation_metric, epoch_num, i, resets_left) else: patience -= 1 self._log_service.log_evaluation( metric, validation_metric, batches_passed, epoch_num, i, data_loader_length, new_best, metric_log_key=self._model.metric_log_key) self._log_service.log_summary(key='Patience left', value=patience) self._model.finalize_batch_evaluation(is_new_best=new_best) # check if runtime is expired self._validate_time_passed() if patience == 0: break return best_metrics, patience