def log_metrics(self, metrics: _OUT_DICT, step: Optional[int] = None) -> None: """Logs the metric dict passed in. If `step` parameter is None and `step` key is presented is metrics, uses metrics["step"] as a step. Args: metrics: Metric values step: Step for which metrics should be logged. Default value is `self.global_step` during training or the total validation / test log step count during validation and testing. """ if self.trainer.logger is None or not metrics: return # turn all tensors to scalars scalar_metrics = metrics_to_scalars(metrics) if step is None: step = scalar_metrics.pop("step", None) if step is None: # added metrics for convenience scalar_metrics.setdefault("epoch", self.trainer.current_epoch) step = self.trainer.global_step # log actual metrics self.trainer.logger.agg_and_log_metrics(scalar_metrics, step=step) self.trainer.logger.save() self._logged_metrics.update(scalar_metrics)
def log_metrics(self, metrics: Dict[str, _METRIC], step: Optional[int] = None) -> None: """Logs the metric dict passed in. If `step` parameter is None and `step` key is presented is metrics, uses metrics["step"] as a step Args: metrics: Metric values step: Step for which metrics should be logged. Default value is `self.global_step` during training or the total validation / test log step count during validation and testing. """ if self.trainer.logger is None or not metrics: return # add gpu memory if self.trainer._device_type == DeviceType.GPU and self.log_gpu_memory: mem_map = memory.get_memory_profile(self.log_gpu_memory) metrics.update(mem_map) # turn all tensors to scalars scalar_metrics = metrics_to_scalars(metrics) if step is None: step = scalar_metrics.pop("step", None) if step is None: # added metrics for convenience scalar_metrics.setdefault("epoch", self.trainer.current_epoch) step = self.trainer.global_step # log actual metrics if self.trainer.is_global_zero: self.trainer.logger.agg_and_log_metrics(scalar_metrics, step=step) self.trainer.logger.save() self._logged_metrics.update(scalar_metrics)
def metrics(self, on_step: bool) -> Dict[MetricSource, Dict[str, _METRIC]]: metrics = {k: {} for k in MetricSource} for _, result_metric in self.valid_items(): # extract forward_cache or computed from the ResultMetric. ignore when the output is None value = apply_to_collection(result_metric, ResultMetric, self._get_cache, on_step, include_none=False) # check if the collection is empty has_tensor = False def any_tensor(_): nonlocal has_tensor has_tensor = True apply_to_collection(value, torch.Tensor, any_tensor) if not has_tensor: continue name, forked_name = self._forked_name(result_metric, on_step) # populate logging metrics if result_metric.meta.logger: metrics[MetricSource.LOG][forked_name] = value # populate callback metrics. callback metrics don't take `_step` forked metrics if self.training or result_metric.meta.on_epoch and not on_step: metrics[MetricSource.CALLBACK][name] = value metrics[MetricSource.CALLBACK][forked_name] = value # populate progress_bar metrics. convert tensors to numbers if result_metric.meta.prog_bar: metrics[MetricSource.PBAR][forked_name] = metrics_to_scalars(value) return metrics
def metrics(self, on_step: bool) -> _METRICS: metrics = _METRICS(callback={}, log={}, pbar={}) for _, result_metric in self.valid_items(): # extract forward_cache or computed from the _ResultMetric. ignore when the output is None value = apply_to_collection(result_metric, _ResultMetric, self._get_cache, on_step, include_none=False) # convert metric collection to dict container. if isinstance(value, _ResultMetricCollection): value = dict(value.items()) # check if the collection is empty has_tensor = False def any_tensor(_: Any) -> None: nonlocal has_tensor has_tensor = True apply_to_collection(value, torch.Tensor, any_tensor) if not has_tensor: continue name, forked_name = self._forked_name(result_metric, on_step) # populate logging metrics if result_metric.meta.logger: metrics["log"][forked_name] = value # populate callback metrics. callback metrics don't take `_step` forked metrics if self.training or result_metric.meta.on_epoch and not on_step: metrics["callback"][name] = value metrics["callback"][forked_name] = value # populate progress_bar metrics. convert tensors to numbers if result_metric.meta.prog_bar: metrics["pbar"][forked_name] = metrics_to_scalars(value) return metrics
def log_metrics(self, metrics, grad_norm_dict, step=None): """Logs the metric dict passed in. If `step` parameter is None and `step` key is presented is metrics, uses metrics["step"] as a step Args: metrics (dict): Metric values grad_norm_dict (dict): Gradient norms step (int): Step for which metrics should be logged. Default value is `self.global_step` during training or the total validation / test log step count during validation and testing. """ # add gpu memory if self.trainer._device_type == DeviceType.GPU and self.log_gpu_memory: mem_map = memory.get_memory_profile(self.log_gpu_memory) metrics.update(mem_map) # add norms metrics.update(grad_norm_dict) # turn all tensors to scalars scalar_metrics = metrics_to_scalars(metrics) if "step" in scalar_metrics and step is None: step = scalar_metrics.pop("step") elif step is None: # added metrics by Lightning for convenience scalar_metrics['epoch'] = self.trainer.current_epoch step = self.trainer.global_step # log actual metrics if self.trainer.logger is not None: if self.trainer.is_global_zero: self.trainer.logger.agg_and_log_metrics(scalar_metrics, step=step) self.trainer.logger.save() # track the logged metrics self.logged_metrics.update(scalar_metrics) self.trainer.dev_debugger.track_logged_metrics_history( scalar_metrics)