def on_batch_end(self, batch, logs=None): if not _.is_dict(logs): self._log.error("No logs dict given, unable to average metrics") return average = dict() for metric, value in logs.items(): if not metric.endswith(self._metrics_name_postfix): continue if metric not in self._window: self._log.debug("Creating sliding window for metric [%s]" % metric) has_init_values = _.is_dict(self._window_values) and metric in self._window_values init_window_values = self._window_values[metric] if has_init_values else None self._window[metric] = SlidingWindow(self._window_length, init_window_values) self._window[metric].slide(value) m = PerformanceAverager.average(self._window[metric]) if not (m is None): if m == math.inf or m == math.nan: self._log.warn("Mean value for %s is %s" % (metric, m)) self._log.warn("Window values : \n%s\n\n" % str(self._window[metric].get_window())) average['mean_%s' % metric] = m logs.update(average)
def _get_all_metrics_from(self, base_path, current_logs, batch_level): dataset_metrics = get_value_at(base_path, current_logs) # TODO : Make this a library level constant skip_metric_names = {"auxiliary_results"} metrics = {} def _add_metrics(m, base_path=None): for metric_name, metric in m.items(): if metric_name in skip_metric_names: continue if metric is None: continue if base_path is not None: metric_name = f"{base_path}.{metric_name}" if _.is_dict(metric): _add_metrics(metric, metric_name) continue label = self._get_label(metric_name, batch_level) metrics[label] = metric if _.is_dict(dataset_metrics): _add_metrics(dataset_metrics) return metrics, len(metrics) > 0
def convert_to_dict(type, components): if _.is_sequence(components): if len(components) == 1: components = {type: components[0]} elif len(components) > 1: components = { f"{type}_{i}": component for i, component in enumerate(components) } elif not _.is_dict(components): # Assuming single optimizer components = {type: components} return components
def _add_metrics(m, base_path=None): for metric_name, metric in m.items(): if metric_name in skip_metric_names: continue if metric is None: continue if base_path is not None: metric_name = f"{base_path}.{metric_name}" if _.is_dict(metric): _add_metrics(metric, metric_name) continue label = self._get_label(metric_name, batch_level) metrics[label] = metric
def _create_log_for(self, metrics, base_metric=None, log_depth=0): if not _.is_dict(metrics): return None metric_names = set(metrics.keys()) # TODO : Make this a library level constant skip_metric_names = {"auxiliary_results", "duration"} num_metrics = len(metric_names - skip_metric_names) if num_metrics < 1: return None log = "\n" * int(log_depth > 0) + "\t" * log_depth if base_metric is not None: log += f"{base_metric:<15}: " metric_value_logs = [] for c, (metric, value) in enumerate(metrics.items()): if metric in skip_metric_names: continue if type(value) is tuple: # use the first value as metric value, the other values are auxiliary results meant for other purposes value = value[0] if type(value) is dict: nested_logs = self._create_log_for(value, metric, log_depth + 1) if nested_logs is not None: metric_value_logs += ["\n" + nested_logs] else: try: log_format = self._get_log_format(value) metric_value_logs += [log_format.format(metric, value)] except Exception as e: metric_value_logs += ["[UNKNOWN]"] if len(metric_value_logs) > 0: log += ', '.join(metric_value_logs) return log
def set_state(self, state): """ :param state Dict with saved checkpoint state to continue tracking the best model during training :return: """ success = True if not _.is_dict(state): self._log.debug("No initial checkpoint state given, starting with clean slate ...") return success self._log.debug("Using given initial checkpoint state: \n\n%s" % json.dumps(state, indent=4)) try: self._best_model_quality = state['best_model_quality'] self._best_model_iter = state['best_model_iter'] except Exception as e: _.log_exception(self._log, "Unable to set checkpoint manager state", e) success = False return success
def _set_state(self, checkpoint_state): if not _.is_dict(checkpoint_state): if self._simulation_mode or self._debug_mode: self._log.debug( "No initial checkpoint state given, starting with clean slate ..." ) return if self._simulation_mode or self._debug_mode: self._log.debug("Using given initial checkpoint state: \n\n%s" % json.dumps(checkpoint_state, indent=4)) try: self._model_quality = checkpoint_state['model_quality'] self._model_iter = checkpoint_state['model_iter'] self._best_model = checkpoint_state['best_model'] self._best_model_quality = checkpoint_state['best_model_quality'] self._earliest_good_model = checkpoint_state['earliest_good_model'] self._earliest_good_model_iter = checkpoint_state[ 'earliest_good_model_iter'] self._iter = checkpoint_state['iter'] except Exception as e: _.log_exception(self._log, "Unable to set checkpoint state", e)