class WrongPredictionVis: """ WrongPredictionVis class for visualizing video inputs to Tensorboard for instances that the model makes wrong predictions. """ def __init__(self, cfg): """ Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ self.cfg = cfg self.class_names, _, self.subset = get_class_names( cfg.TENSORBOARD.CLASS_NAMES_PATH, subset_path=cfg.TENSORBOARD.WRONG_PRED_VIS.SUBSET_PATH, ) if self.subset is not None: self.subset = set(self.subset) self.num_class = cfg.MODEL.NUM_CLASSES self.video_vis = VideoVisualizer( cfg.MODEL.NUM_CLASSES, cfg.TENSORBOARD.CLASS_NAMES_PATH, 1, cfg.TENSORBOARD.MODEL_VIS.COLORMAP, ) self.tag = cfg.TENSORBOARD.WRONG_PRED_VIS.TAG self.writer = tb.TensorboardWriter(cfg) self.model_incorrect_classes = set() def _pick_wrong_preds(self, labels, preds): """ Returns a 1D tensor that contains the indices of instances that have wrong predictions, where true labels in in the specified subset. Args: labels (tensor): tensor of shape (n_instances,) containing class ids. preds (tensor): class scores from model, shape (n_intances, n_classes) Returns: mask (tensor): boolean tensor. `mask[i]` is True if `model` makes a wrong prediction. """ subset_mask = torch.ones(size=(len(labels), ), dtype=torch.bool) if self.subset is not None: for i, label in enumerate(labels): if label not in self.subset: subset_mask[i] = False preds_ids = torch.argmax(preds, dim=-1) mask = preds_ids != labels mask &= subset_mask for i, wrong_pred in enumerate(mask): if wrong_pred: self.model_incorrect_classes.add(labels[i]) return mask def visualize_vid(self, video_input, labels, preds, batch_idx): """ Draw predicted labels on video inputs and visualize all incorrectly classified videos in the current batch. Args: video_input (list of list of tensor(s)): list of videos for all pathways. labels (array-like): shape (n_instances,) of true label for each instance. preds (tensor): shape (n, instances, n_classes). The predicted scores for all instances. tag (Optional[str]): all visualized video will be added under this tag. This is for organization purposes in Tensorboard. batch_idx (int): batch index of the current videos. """ def add_video(vid, preds, tag, true_class_name): """ Draw predicted label on video and add it to Tensorboard. Args: vid (array-like): shape (C, T, H, W). Each image in `vid` is a RGB image. preds (tensor): shape (n_classes,) or (1, n_classes). The predicted scores for the current `vid`. tag (str): tag for `vid` in Tensorboard. true_class_name (str): the ground-truth class name of the current `vid` instance. """ # Permute to (T, H, W, C). vid = vid.permute(1, 2, 3, 0) vid = data_utils.revert_tensor_normalize(vid.cpu(), self.cfg.DATA.MEAN, self.cfg.DATA.STD) vid = self.video_vis.draw_clip(vid, preds) vid = torch.from_numpy(np.array(vid)).permute(0, 3, 1, 2) vid = torch.unsqueeze(vid, dim=0) self.writer.add_video(vid, tag="{}: {}".format(tag, true_class_name)) mask = self._pick_wrong_preds(labels, preds) video_indices = torch.squeeze(mask.nonzero(), dim=-1) # Visualize each wrongly classfied video. for vid_idx in video_indices: cur_vid_idx = batch_idx * len(video_input[0]) + vid_idx for pathway in range(len(video_input)): add_video( video_input[pathway][vid_idx], preds=preds[vid_idx], tag=self.tag + "/Video {}, Pathway {}".format(cur_vid_idx, pathway), true_class_name=self.class_names[labels[vid_idx]], ) @property def wrong_class_prediction(self): """ Return class ids that the model predicted incorrectly. """ incorrect_class_names = [ self.class_names[i] for i in self.model_incorrect_classes ] return list(set(incorrect_class_names)) def clean(self): """ Close Tensorboard writer. """ self.writer.close()
def run_visualization(vis_loader, model, cfg, writer=None): """ Run model visualization (weights, activations and model inputs) and visualize them on Tensorboard. Args: vis_loader (loader): video visualization loader. model (model): the video model to visualize. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ n_devices = cfg.NUM_GPUS * cfg.NUM_SHARDS prefix = "module/" if n_devices > 1 else "" # Get a list of selected layer names and indexing. layer_ls, indexing_dict = process_layer_index_data( cfg.TENSORBOARD.MODEL_VIS.LAYER_LIST, layer_name_prefix=prefix) logger.info("Start Model Visualization.") # Register hooks for activations. model_vis = GetWeightAndActivation(model, layer_ls) if writer is not None and cfg.TENSORBOARD.MODEL_VIS.MODEL_WEIGHTS: layer_weights = model_vis.get_weights() writer.plot_weights_and_activations(layer_weights, tag="Layer Weights/", heat_map=False) video_vis = VideoVisualizer( cfg.MODEL.NUM_CLASSES, cfg.TENSORBOARD.CLASS_NAMES_PATH, cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS, cfg.TENSORBOARD.MODEL_VIS.COLORMAP, ) if n_devices > 1: grad_cam_layer_ls = [ "module/" + layer for layer in cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST ] else: grad_cam_layer_ls = cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: gradcam = GradCAM( model, target_layers=grad_cam_layer_ls, data_mean=cfg.DATA.MEAN, data_std=cfg.DATA.STD, colormap=cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.COLORMAP, ) logger.info("Finish drawing weights.") global_idx = -1 for inputs, labels, _, meta in tqdm.tqdm(vis_loader): if cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: activations, preds = model_vis.get_activations( inputs, meta["boxes"]) else: activations, preds = model_vis.get_activations(inputs) if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.USE_TRUE_LABEL: inputs, preds = gradcam(inputs, labels=labels) else: inputs, preds = gradcam(inputs) if cfg.NUM_GPUS: inputs = du.all_gather_unaligned(inputs) activations = du.all_gather_unaligned(activations) preds = du.all_gather_unaligned(preds) if isinstance(inputs[0], list): for i in range(len(inputs)): for j in range(len(inputs[0])): inputs[i][j] = inputs[i][j].cpu() else: inputs = [inp.cpu() for inp in inputs] preds = [pred.cpu() for pred in preds] else: inputs, activations, preds = [inputs], [activations], [preds] boxes = [None] * max(n_devices, 1) if cfg.DETECTION.ENABLE and cfg.NUM_GPUS: boxes = du.all_gather_unaligned(meta["boxes"]) boxes = [box.cpu() for box in boxes] if writer is not None: total_vids = 0 for i in range(max(n_devices, 1)): cur_input = inputs[i] cur_activations = activations[i] cur_batch_size = cur_input[0].shape[0] cur_preds = preds[i] cur_boxes = boxes[i] for cur_batch_idx in range(cur_batch_size): global_idx += 1 total_vids += 1 if (cfg.TENSORBOARD.MODEL_VIS.INPUT_VIDEO or cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE): for path_idx, input_pathway in enumerate(cur_input): if cfg.TEST.DATASET == "ava" and cfg.AVA.BGR: video = input_pathway[cur_batch_idx, [2, 1, 0], ...] else: video = input_pathway[cur_batch_idx] if not cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: # Permute to (T, H, W, C) from (C, T, H, W). video = video.permute(1, 2, 3, 0) video = data_utils.revert_tensor_normalize( video, cfg.DATA.MEAN, cfg.DATA.STD) else: # Permute from (T, C, H, W) to (T, H, W, C) video = video.permute(0, 2, 3, 1) bboxes = (None if cur_boxes is None else cur_boxes[:, 1:]) cur_prediction = (cur_preds if cfg.DETECTION.ENABLE else cur_preds[cur_batch_idx]) video = video_vis.draw_clip(video, cur_prediction, bboxes=bboxes) video = (torch.from_numpy(np.array(video)).permute( 0, 3, 1, 2).unsqueeze(0)) writer.add_video( video, tag="Input {}/Pathway {}".format( global_idx, path_idx + 1), ) if cfg.TENSORBOARD.MODEL_VIS.ACTIVATIONS: writer.plot_weights_and_activations( cur_activations, tag="Input {}/Activations: ".format(global_idx), batch_idx=cur_batch_idx, indexing_dict=indexing_dict, )
def run_visualization(vis_loader, model, cfg, writer=None): """ Run model visualization (weights, activations and model inputs) and visualize them on Tensorboard. Args: vis_loader (loader): video visualization loader. model (model): the video model to visualize. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ n_devices = cfg.NUM_GPUS * cfg.NUM_SHARDS prefix = "module/" if n_devices > 1 else "" # Get a list of selected layer names and indexing. layer_ls, indexing_dict = process_layer_index_data( cfg.TENSORBOARD.MODEL_VIS.LAYER_LIST, layer_name_prefix=prefix) logger.info("Start Model Visualization.") # Register hooks for activations. model_vis = GetWeightAndActivation(model, layer_ls) if writer is not None and cfg.TENSORBOARD.MODEL_VIS.MODEL_WEIGHTS: layer_weights = model_vis.get_weights() writer.plot_weights_and_activations(layer_weights, tag="Layer Weights/", heat_map=False) video_vis = VideoVisualizer( cfg.MODEL.NUM_CLASSES, cfg.TENSORBOARD.CLASS_NAMES_PATH, cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS, cfg.TENSORBOARD.MODEL_VIS.COLORMAP, ) logger.info("Finish drawing weights.") global_idx = -1 for inputs, _, _, meta in vis_loader: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: activations, preds = model_vis.get_activations( inputs, meta["boxes"]) else: activations, preds = model_vis.get_activations(inputs) inputs = du.all_gather_unaligned(inputs) activations = du.all_gather_unaligned(activations) preds = du.all_gather_unaligned(preds) boxes = [None] * n_devices if cfg.DETECTION.ENABLE: boxes = du.all_gather_unaligned(meta["boxes"]) if writer is not None: total_vids = 0 for i in range(n_devices): cur_input = inputs[i] cur_activations = activations[i] cur_batch_size = cur_input[0].shape[0] cur_preds = preds[i].cpu() cur_boxes = boxes[i] for cur_batch_idx in range(cur_batch_size): global_idx += 1 total_vids += 1 if cfg.TENSORBOARD.MODEL_VIS.INPUT_VIDEO: for path_idx, input_pathway in enumerate(cur_input): if (cfg.TEST.DATASET == "ava" or cfg.TEST.DATASET == "custom") and cfg.AVA.BGR: video = input_pathway[cur_batch_idx, [2, 1, 0], ...] else: video = input_pathway[cur_batch_idx] # Permute to (T, H, W, C) from (C, T, H, W). video = video.permute(1, 2, 3, 0) video = data_utils.revert_tensor_normalize( video.cpu(), cfg.DATA.MEAN, cfg.DATA.STD) bboxes = (None if cur_boxes is None else cur_boxes[:, 1:].cpu()) video = video_vis.draw_clip(video, cur_preds, bboxes=bboxes) video = (torch.Tensor(video).permute( 0, 3, 1, 2).unsqueeze(0)) writer.add_video( video, tag="Input {}/Input from pathway {}".format( global_idx, path_idx + 1), ) if cfg.TENSORBOARD.MODEL_VIS.ACTIVATIONS: writer.plot_weights_and_activations( cur_activations, tag="Input {}/Activations: ".format(global_idx), batch_idx=cur_batch_idx, indexing_dict=indexing_dict, ) logger.info("Visualized {} videos...".format(total_vids))