def __init__(self, label: str, metric: str, label_mapping: Optional[Dict[str, Any]] = None, bounds: Union[None, str, Iterable[Union[str, None]]] = "std", mode: Union[None, str, Iterable[str]] = "eval", ds_id: Union[None, str, Iterable[str]] = None, outputs: Optional[str] = None): super().__init__(inputs=[label, metric], outputs=outputs or f"{metric}_by_{label}", mode=mode, ds_id=ds_id) self.points = [] self.label_summaries = DefaultKeyDict( default=lambda x: Summary(name=x)) self.label_mapping = {val: key for key, val in label_mapping.items() } if label_mapping else None bounds = to_set(bounds) if not bounds: bounds.add(None) for option in bounds: if option not in (None, "std", "range"): raise ValueError( f"'interval' must be either None, 'std', or 'range', but got '{bounds}'." ) self.bounds = bounds
def __init__(self, root_log_dir: str, time_stamp: str, network: TFNetwork) -> None: super().__init__(root_log_dir=root_log_dir, time_stamp=time_stamp, network=network) self.tf_summary_writers = DefaultKeyDict( lambda key: (tf.summary.create_file_writer( os.path.join(root_log_dir, time_stamp, key))))
def __setstate__(self, state: Dict[str, Any]) -> None: """Set this objects internal state from a dictionary of variables. This method is invoked by pickle. Args: state: The saved state to be used by this object. """ label_summaries = DefaultKeyDict(default=lambda x: Summary(name=x)) label_summaries.update(state.get('label_summaries', {})) state['label_summaries'] = label_summaries self.__dict__.update(state)
def on_end(self, data: Data) -> None: index_summaries = DefaultKeyDict(default=lambda x: Summary(name=x)) for mode in self.mode: final_scores = sorted([(idx, elem[-1][1]) for idx, elem in self.index_history[mode].items()], key=lambda x: x[1]) max_idx_list = {elem[0] for elem in final_scores[-1:-self.n_max_to_keep - 1:-1]} min_idx_list = {elem[0] for elem in final_scores[:self.n_min_to_keep]} target_idx_list = Set.union(min_idx_list, max_idx_list, self.idx_to_keep) for idx in target_idx_list: for step, score in self.index_history[mode][idx]: index_summaries[idx].history[mode][self.metric_key][step] = score self.system.add_graph(self.outputs[0], list(index_summaries.values())) # So traceability can draw it data.write_without_log(self.outputs[0], list(index_summaries.values()))
class _TfWriter(_BaseWriter): """A class to write various TensorFlow data into TensorBoard summary files. This class is intentionally not @traceable. Args: root_log_dir: The directory into which to store a new directory corresponding to this experiment's summary data time_stamp: The timestamp of this experiment (used as a folder name within `root_log_dir`). network: The network associated with the current experiment. """ tf_summary_writers: Dict[str, tf.summary.SummaryWriter] def __init__(self, root_log_dir: str, time_stamp: str, network: TFNetwork) -> None: super().__init__(root_log_dir=root_log_dir, time_stamp=time_stamp, network=network) self.tf_summary_writers = DefaultKeyDict( lambda key: (tf.summary.create_file_writer(os.path.join(root_log_dir, time_stamp, key)))) def write_epoch_models(self, mode: str) -> None: with self.tf_summary_writers[mode].as_default(), summary_ops_v2.always_record_summaries(): summary_ops_v2.graph(backend.get_graph(), step=0) for model in self.network.epoch_models: summary_writable = (model.__class__.__name__ == 'Sequential' or (hasattr(model, '_is_graph_network') and model._is_graph_network)) if summary_writable: summary_ops_v2.keras_model(model.model_name, model, step=0) def write_weights(self, mode: str, models: Iterable[Model], step: int, visualize: bool) -> None: # Similar to TF implementation, but multiple models with self.tf_summary_writers[mode].as_default(), summary_ops_v2.always_record_summaries(): for model in models: for layer in model.layers: for weight in layer.weights: weight_name = weight.name.replace(':', '_') weight_name = "{}_{}".format(model.model_name, weight_name) with tfops.init_scope(): weight = backend.get_value(weight) summary_ops_v2.histogram(weight_name, weight, step=step) if visualize: weight = self._weight_to_image(weight=weight, kernel_channels_last=True) if weight is not None: summary_ops_v2.image(weight_name, weight, step=step, max_images=weight.shape[0]) def close(self) -> None: super().close() modes = list(self.tf_summary_writers.keys()) # break connection with dictionary so can delete in iteration for mode in modes: self.tf_summary_writers[mode].close() del self.tf_summary_writers[mode]
def __init__(self, root_log_dir: str, time_stamp: str, network: BaseNetwork) -> None: self.summary_writers = DefaultKeyDict(lambda key: (SummaryWriter( log_dir=os.path.join(root_log_dir, time_stamp, key)))) self.network = network
class _BaseWriter: """A class to write various types of data into TensorBoard summary files. This class is intentionally not @traceable. Args: root_log_dir: The directory into which to store a new directory corresponding to this experiment's summary data time_stamp: The timestamp of this experiment (used as a folder name within `root_log_dir`). network: The network associated with the current experiment. """ summary_writers: Dict[str, SummaryWriter] network: BaseNetwork def __init__(self, root_log_dir: str, time_stamp: str, network: BaseNetwork) -> None: self.summary_writers = DefaultKeyDict(lambda key: (SummaryWriter( log_dir=os.path.join(root_log_dir, time_stamp, key)))) self.network = network def write_epoch_models(self, mode: str) -> None: """Write summary graphs for all of the models in the current epoch. Args: mode: The current mode of execution ('train', 'eval', 'test', 'infer'). """ raise NotImplementedError def write_weights(self, mode: str, models: Iterable[Model], step: int, visualize: bool) -> None: """Write summaries of all of the weights of a given collection of `models`. Args: mode: The current mode of execution ('train', 'eval', 'test', 'infer'). models: A list of models compiled with fe.build whose weights should be recorded. step: The current training step. visualize: Whether to attempt to paint graphical representations of the weights in addition to the default histogram summaries. """ raise NotImplementedError def write_scalars(self, mode: str, scalars: Iterable[Tuple[str, Any]], step: int) -> None: """Write summaries of scalars to TensorBoard. Args: mode: The current mode of execution ('train', 'eval', 'test', 'infer'). scalars: A collection of pairs like [("key", val), ("key2", val2), ...]. step: The current training step. """ for key, val in scalars: self.summary_writers[mode].add_scalar(tag=key, scalar_value=to_number(val), global_step=step) def write_images(self, mode: str, images: Iterable[Tuple[str, Any]], step: int) -> None: """Write images to TensorBoard. Args: mode: The current mode of execution ('train', 'eval', 'test', 'infer'). images: A collection of pairs like [("key", image1), ("key2", image2), ...]. step: The current training step. """ for key, img in images: if isinstance(img, ImgData): img = img.paint_figure() if isinstance(img, plt.Figure): self.summary_writers[mode].add_figure(tag=key, figure=img, global_step=step) else: self.summary_writers[mode].add_images( tag=key, img_tensor=to_number(img), global_step=step, dataformats='NCHW' if isinstance(img, torch.Tensor) else 'NHWC') def write_embeddings( self, mode: str, embeddings: Iterable[Tuple[str, Tensor, Optional[List[Any]], Optional[Tensor]]], step: int, ): """Write embeddings (like UMAP) to TensorBoard. Args: mode: The current mode of execution ('train', 'eval', 'test', 'infer'). embeddings: A collection of quadruplets like [("key", <features>, [<label1>, ...], <label_images>)]. Features are expected to be batched, and if labels and/or label images are provided they should have the same batch dimension as the features. step: The current training step. """ for key, features, labels, label_imgs in embeddings: flat = to_number(reshape(features, [features.shape[0], -1])) if not isinstance(label_imgs, (torch.Tensor, type(None))): label_imgs = to_tensor(label_imgs, 'torch') if len(label_imgs.shape) == 4: label_imgs = permute(label_imgs, [0, 3, 1, 2]) self.summary_writers[mode].add_embedding(mat=flat, metadata=labels, label_img=label_imgs, tag=key, global_step=step) def close(self) -> None: """A method to flush and close all connections to the files on disk. """ modes = list(self.summary_writers.keys( )) # break connection with dictionary so can delete in iteration for mode in modes: self.summary_writers[mode].close() del self.summary_writers[mode] @staticmethod def _weight_to_image( weight: Tensor, kernel_channels_last: bool = False) -> Optional[Tensor]: """Logs a weight as a TensorBoard image. Implementation from TensorFlow codebase, would have invoked theirs directly but they didn't make it a static method. """ w_img = squeeze(weight) shape = backend.int_shape(w_img) if len(shape) == 1: # Bias case w_img = reshape(w_img, [1, shape[0], 1, 1]) elif len(shape) == 2: # Dense layer kernel case if shape[0] > shape[1]: w_img = permute(w_img, [0, 1]) shape = backend.int_shape(w_img) w_img = reshape(w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # ConvNet case if kernel_channels_last: # Switch to channels_first to display every kernel as a separate images w_img = permute(w_img, [2, 0, 1]) w_img = expand_dims(w_img, axis=-1) elif len(shape) == 4: # Conv filter with multiple input channels if kernel_channels_last: # Switch to channels first to display kernels as separate images w_img = permute(w_img, [3, 2, 0, 1]) w_img = reduce_sum( abs(w_img), axis=1) # Sum over the each channel within the kernel w_img = expand_dims(w_img, axis=-1) shape = backend.int_shape(w_img) # Not possible to handle 3D convnets etc. if len(shape) == 4 and shape[-1] in [1, 3, 4]: return w_img
class LabelTracker(Trace): """A Trace to track metrics grouped by labels, for example per-class loss over time during training. Use this in conjunction with ImageViewer or ImageSaver to see the graph at training end. This also automatically integrates with Traceability reports. Args: label: The key of the labels by which to group data. metric: The key of the metric by which to score data. label_mapping: A mapping of {DisplayName: LabelValue} to use when generating the graph. This can also be used to limit which label values are graphed, since any label values not included here will not be graphed. A None value will monitor all label values. bounds: What error bounds should be graphed around the mean value. Options include None, 'std' for standard deviation, and 'range' to plot (min_value, mean, max_value). Multiple values can be specified, ex. ['std', 'range'] to generate multiple graphs. mode: What mode(s) to execute this Trace in. For example, "train", "eval", "test", or "infer". To execute regardless of mode, pass None. To execute in all modes except for a particular one, you can pass an argument like "!infer" or "!train". ds_id: What dataset id(s) to execute this Trace in. To execute regardless of ds_id, pass None. To execute in all ds_ids except for a particular one, you can pass an argument like "!ds1". outputs: The name of the output which will be generated by this trace at the end of training. If None then it will default to "<metric>_by_<label>". Raises: ValueError: If `bounds` is not one of the allowed options. """ def __init__(self, label: str, metric: str, label_mapping: Optional[Dict[str, Any]] = None, bounds: Union[None, str, Iterable[Union[str, None]]] = "std", mode: Union[None, str, Iterable[str]] = "eval", ds_id: Union[None, str, Iterable[str]] = None, outputs: Optional[str] = None): super().__init__(inputs=[label, metric], outputs=outputs or f"{metric}_by_{label}", mode=mode, ds_id=ds_id) self.points = [] self.label_summaries = DefaultKeyDict( default=lambda x: Summary(name=x)) self.label_mapping = {val: key for key, val in label_mapping.items() } if label_mapping else None bounds = to_set(bounds) if not bounds: bounds.add(None) for option in bounds: if option not in (None, "std", "range"): raise ValueError( f"'interval' must be either None, 'std', or 'range', but got '{bounds}'." ) self.bounds = bounds @property def label_key(self) -> str: return self.inputs[0] @property def metric_key(self) -> str: return self.inputs[1] def on_batch_end(self, data: Data) -> None: self.points.append((to_number(data[self.label_key]), to_number(data[self.metric_key]))) def on_epoch_end(self, data: Data) -> None: label_scores = defaultdict(list) for batch in self.points: for label, metric in ((batch[0][i], batch[1][i]) for i in range(len(batch[0]))): label_scores[label.item()].append(metric.item()) for label, metric in label_scores.items(): if self.label_mapping: if label in self.label_mapping: label = self.label_mapping[label] else: # Skip labels which the user does not want to inspect continue if 'std' in self.bounds: mean, std = stats.mean(metric), stats.stdev( metric) if len(metric) > 1 else 0.0 val = ValWithError(mean - std, mean, mean + std) key = f"{self.metric_key} ($\\mu \\pm \\sigma$)" # {label: {mode: {key: {step: value}}}} self.label_summaries[label].history[self.system.mode][key][ self.system.global_step] = val if 'range' in self.bounds: val = ValWithError(min(metric), stats.mean(metric), max(metric)) key = f"{self.metric_key} ($min, \\mu, max$)" self.label_summaries[label].history[self.system.mode][key][ self.system.global_step] = val if None in self.bounds: val = stats.mean(metric) key = self.metric_key self.label_summaries[label].history[self.system.mode][key][ self.system.global_step] = val self.points = [] def on_end(self, data: Data) -> None: self.system.add_graph( self.outputs[0], list(self.label_summaries.values())) # So traceability can draw it data.write_without_log(self.outputs[0], list(self.label_summaries.values())) def __getstate__(self) -> Dict[str, Any]: """Get a representation of the state of this object. This method is invoked by pickle. Returns: The information to be recorded by a pickle summary of this object. """ state = self.__dict__.copy() state['label_summaries'] = dict(state['label_summaries']) return state def __setstate__(self, state: Dict[str, Any]) -> None: """Set this objects internal state from a dictionary of variables. This method is invoked by pickle. Args: state: The saved state to be used by this object. """ label_summaries = DefaultKeyDict(default=lambda x: Summary(name=x)) label_summaries.update(state.get('label_summaries', {})) state['label_summaries'] = label_summaries self.__dict__.update(state)
def _draw_diagram(self, mode: str, epoch: int, ds_id: str) -> pydot.Dot: """Draw a summary diagram of the FastEstimator Ops / Traces. Args: mode: The execution mode to summarize ('train', 'eval', 'test', or 'infer'). epoch: The epoch to summarize. ds_id: The ds_id to summarize. Returns: A pydot digraph representing the execution flow. """ ds = self.system.pipeline.data[mode][ds_id] if isinstance(ds, Scheduler): ds = ds.get_current_value(epoch) pipe_ops = get_current_items( self.system.pipeline.ops, run_modes=mode, epoch=epoch, ds_id=ds_id) if isinstance(ds, Dataset) else [] net_ops = get_current_items(self.system.network.ops, run_modes=mode, epoch=epoch, ds_id=ds_id) net_post = get_current_items(self.system.network.postprocessing, run_modes=mode, epoch=epoch, ds_id=ds_id) traces = sort_traces(get_current_items(self.system.traces, run_modes=mode, epoch=epoch, ds_id=ds_id), ds_ids=self.system.pipeline.get_ds_ids( epoch=epoch, mode=mode)) diagram = pydot.Dot( compound='true' ) # Compound lets you draw edges which terminate at sub-graphs diagram.set('rankdir', 'TB') diagram.set('dpi', 300) diagram.set_node_defaults(shape='box') # Make the dataset the first of the pipeline ops pipe_ops.insert(0, ds) label_last_seen = DefaultKeyDict( lambda k: str(id(ds))) # Where was this key last generated batch_size = "" if isinstance(ds, Dataset): if hasattr(ds, "fe_batch") and ds.fe_batch: batch_size = ds.fe_batch else: batch_size = self.system.pipeline.batch_size if isinstance(batch_size, Scheduler): batch_size = batch_size.get_current_value(epoch) if isinstance(batch_size, dict): batch_size = batch_size[mode] if batch_size is not None: batch_size = f" (Batch Size: {batch_size})" self._draw_subgraph(diagram, diagram, label_last_seen, f'Pipeline{batch_size}', pipe_ops, ds_id) self._draw_subgraph(diagram, diagram, label_last_seen, 'Network', net_ops + net_post, ds_id) self._draw_subgraph(diagram, diagram, label_last_seen, 'Traces', traces, ds_id) return diagram