Exemple #1
0
def run_demo(cfg, frame_provider):
    """
    Run demo visualization.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        frame_provider (iterator): Python iterator that return task objects that are filled
            with necessary information such as `frames`, `id` and `num_buffer_frames` for the
            prediction and visualization pipeline.
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)
    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)
    assert cfg.NUM_GPUS <= 1, "Cannot run demo on multiple GPUs."
    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)
    video_vis = VideoVisualizer(
        cfg.MODEL.NUM_CLASSES,
        cfg.DEMO.LABEL_FILE_PATH,
        cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
        cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
    )

    if cfg.DETECTION.ENABLE:
        object_detector = Detectron2Predictor(cfg)

    model = ActionPredictor(cfg)

    seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
    assert (cfg.DEMO.BUFFER_SIZE <= seq_len //
            2), "Buffer size cannot be greater than half of sequence length."
    init_task_info(
        frame_provider.display_height,
        frame_provider.display_width,
        cfg.DATA.TEST_CROP_SIZE,
        cfg.DEMO.CLIP_VIS_SIZE,
    )
    for able_to_read, task in frame_provider:
        if not able_to_read:
            break

        if cfg.DETECTION.ENABLE:
            task = object_detector(task)

        task = model(task)
        frames = draw_predictions(task, video_vis)
        # hit Esc to quit the demo.
        key = cv2.waitKey(1)
        if key == 27:
            break
        yield frames
Exemple #2
0
 def __init__(self, cfg):
     """
     Args:
         cfg (CfgNode): configs. Details can be found in
             slowfast/config/defaults.py
     """
     self.cfg = cfg
     self.class_names, _, self.subset = get_class_names(
         cfg.TENSORBOARD.CLASS_NAMES_PATH,
         subset_path=cfg.TENSORBOARD.WRONG_PRED_VIS.SUBSET_PATH,
     )
     if self.subset is not None:
         self.subset = set(self.subset)
     self.num_class = cfg.MODEL.NUM_CLASSES
     self.video_vis = VideoVisualizer(
         cfg.MODEL.NUM_CLASSES,
         cfg.TENSORBOARD.CLASS_NAMES_PATH,
         1,
         cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
     )
     self.tag = cfg.TENSORBOARD.WRONG_PRED_VIS.TAG
     self.writer = tb.TensorboardWriter(cfg)
     self.model_incorrect_classes = set()
Exemple #3
0
def run_visualization(vis_loader, model, cfg, writer=None):
    """
    Run model visualization (weights, activations and model inputs) and visualize
    them on Tensorboard.
    Args:
        vis_loader (loader): video visualization loader.
        model (model): the video model to visualize.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter, optional): TensorboardWriter object
            to writer Tensorboard log.
    """
    n_devices = cfg.NUM_GPUS * cfg.NUM_SHARDS
    prefix = "module/" if n_devices > 1 else ""
    # Get a list of selected layer names and indexing.
    layer_ls, indexing_dict = process_layer_index_data(
        cfg.TENSORBOARD.MODEL_VIS.LAYER_LIST, layer_name_prefix=prefix)
    logger.info("Start Model Visualization.")
    # Register hooks for activations.
    model_vis = GetWeightAndActivation(model, layer_ls)

    if writer is not None and cfg.TENSORBOARD.MODEL_VIS.MODEL_WEIGHTS:
        layer_weights = model_vis.get_weights()
        writer.plot_weights_and_activations(layer_weights,
                                            tag="Layer Weights/",
                                            heat_map=False)

    video_vis = VideoVisualizer(
        cfg.MODEL.NUM_CLASSES,
        cfg.TENSORBOARD.CLASS_NAMES_PATH,
        cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
        cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
    )
    if n_devices > 1:
        grad_cam_layer_ls = [
            "module/" + layer
            for layer in cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST
        ]
    else:
        grad_cam_layer_ls = cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST

    if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE:
        gradcam = GradCAM(
            model,
            target_layers=grad_cam_layer_ls,
            data_mean=cfg.DATA.MEAN,
            data_std=cfg.DATA.STD,
            colormap=cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.COLORMAP,
        )
    logger.info("Finish drawing weights.")
    global_idx = -1
    for inputs, labels, _, meta in tqdm.tqdm(vis_loader):
        if cfg.NUM_GPUS:
            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)
            labels = labels.cuda()
            for key, val in meta.items():
                if isinstance(val, (list, )):
                    for i in range(len(val)):
                        val[i] = val[i].cuda(non_blocking=True)
                else:
                    meta[key] = val.cuda(non_blocking=True)

        if cfg.DETECTION.ENABLE:
            activations, preds = model_vis.get_activations(
                inputs, meta["boxes"])
        else:
            activations, preds = model_vis.get_activations(inputs)
        if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE:
            if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.USE_TRUE_LABEL:
                inputs, preds = gradcam(inputs, labels=labels)
            else:
                inputs, preds = gradcam(inputs)
        if cfg.NUM_GPUS:
            inputs = du.all_gather_unaligned(inputs)
            activations = du.all_gather_unaligned(activations)
            preds = du.all_gather_unaligned(preds)
            if isinstance(inputs[0], list):
                for i in range(len(inputs)):
                    for j in range(len(inputs[0])):
                        inputs[i][j] = inputs[i][j].cpu()
            else:
                inputs = [inp.cpu() for inp in inputs]
            preds = [pred.cpu() for pred in preds]
        else:
            inputs, activations, preds = [inputs], [activations], [preds]

        boxes = [None] * max(n_devices, 1)
        if cfg.DETECTION.ENABLE and cfg.NUM_GPUS:
            boxes = du.all_gather_unaligned(meta["boxes"])
            boxes = [box.cpu() for box in boxes]

        if writer is not None:
            total_vids = 0
            for i in range(max(n_devices, 1)):
                cur_input = inputs[i]
                cur_activations = activations[i]
                cur_batch_size = cur_input[0].shape[0]
                cur_preds = preds[i]
                cur_boxes = boxes[i]
                for cur_batch_idx in range(cur_batch_size):
                    global_idx += 1
                    total_vids += 1
                    if (cfg.TENSORBOARD.MODEL_VIS.INPUT_VIDEO
                            or cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE):
                        for path_idx, input_pathway in enumerate(cur_input):
                            if cfg.TEST.DATASET == "ava" and cfg.AVA.BGR:
                                video = input_pathway[cur_batch_idx, [2, 1, 0],
                                                      ...]
                            else:
                                video = input_pathway[cur_batch_idx]

                            if not cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE:
                                # Permute to (T, H, W, C) from (C, T, H, W).
                                video = video.permute(1, 2, 3, 0)
                                video = data_utils.revert_tensor_normalize(
                                    video, cfg.DATA.MEAN, cfg.DATA.STD)
                            else:
                                # Permute from (T, C, H, W) to (T, H, W, C)
                                video = video.permute(0, 2, 3, 1)
                            bboxes = (None if cur_boxes is None else
                                      cur_boxes[:, 1:])
                            cur_prediction = (cur_preds if cfg.DETECTION.ENABLE
                                              else cur_preds[cur_batch_idx])
                            video = video_vis.draw_clip(video,
                                                        cur_prediction,
                                                        bboxes=bboxes)
                            video = (torch.from_numpy(np.array(video)).permute(
                                0, 3, 1, 2).unsqueeze(0))
                            writer.add_video(
                                video,
                                tag="Input {}/Pathway {}".format(
                                    global_idx, path_idx + 1),
                            )
                    if cfg.TENSORBOARD.MODEL_VIS.ACTIVATIONS:
                        writer.plot_weights_and_activations(
                            cur_activations,
                            tag="Input {}/Activations: ".format(global_idx),
                            batch_idx=cur_batch_idx,
                            indexing_dict=indexing_dict,
                        )
Exemple #4
0
def run_demo(cfg, frame_provider):
    """
    Run demo visualization.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        frame_provider (iterator): Python iterator that return task objects that are filled
            with necessary information such as `frames`, `id` and `num_buffer_frames` for the
            prediction and visualization pipeline.
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)
    # Setup logging format.
    loggings.setup_logging(cfg.OUTPUT_DIR)
    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)
    common_classes = (cfg.DEMO.COMMON_CLASS_NAMES
                      if len(cfg.DEMO.LABEL_FILE_PATH) != 0 else None)
    ## draw box
    video_vis = VideoVisualizer(
        num_classes=cfg.MODEL.NUM_CLASSES,
        class_names_path=cfg.DEMO.LABEL_FILE_PATH,
        top_k=cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
        thres=cfg.DEMO.COMMON_CLASS_THRES,
        lower_thres=cfg.DEMO.UNCOMMON_CLASS_THRES,
        common_class_names=common_classes,
        colormap=cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
        mode=cfg.DEMO.VIS_MODE,
    )

    async_vis = AsyncVis(video_vis, n_workers=cfg.DEMO.NUM_VIS_INSTANCES)

    if cfg.NUM_GPUS <= 1:
        model = ActionPredictor(cfg=cfg, async_vis=async_vis)  ## 实例化动作检测类
    else:
        model = AsyncDemo(cfg=cfg, async_vis=async_vis)

    seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE

    assert (cfg.DEMO.BUFFER_SIZE <= seq_len //
            2), "Buffer size cannot be greater than half of sequence length."
    num_task = 0
    # Start reading frames.
    frame_provider.start()
    for able_to_read, task in frame_provider:
        if not able_to_read:
            break
        if task is None:
            time.sleep(0.02)
            continue
        num_task += 1
        ## Start detction and recogintiaon task
        model.put(task)
        try:
            task = model.get()
            num_task -= 1
            yield task
        except IndexError:
            continue

    while num_task != 0:
        try:
            task = model.get()
            num_task -= 1
            yield task
        except IndexError:
            continue
    def draw_video(self):
        """
        Draw predicted and ground-truth (if provided) results on the video/folder of images.
        Write the visualized result to a video output file.
        """
        all_boxes = merge_pred_gt_boxes(self.pred_boxes, self.gt_boxes)
        common_classes = (self.cfg.DEMO.COMMON_CLASS_NAMES
                          if len(self.cfg.DEMO.LABEL_FILE_PATH) != 0 else None)
        video_vis = VideoVisualizer(
            num_classes=self.cfg.MODEL.NUM_CLASSES,
            class_names_path=self.cfg.DEMO.LABEL_FILE_PATH,
            top_k=self.cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
            thres=self.cfg.DEMO.COMMON_CLASS_THRES,
            lower_thres=self.cfg.DEMO.UNCOMMON_CLASS_THRES,
            common_class_names=common_classes,
            colormap=self.cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
            mode=self.cfg.DEMO.VIS_MODE,
        )

        all_keys = sorted(all_boxes.keys())
        # Draw around the keyframe for 2/10 of the sequence length.
        # This is chosen using heuristics.
        draw_range = [
            self.seq_length // 2 - self.seq_length // 10,
            self.seq_length // 2 + self.seq_length // 10,
        ]
        draw_range_repeat = [
            draw_range[0],
            (draw_range[1] - draw_range[0]) * self.no_frames_repeat +
            draw_range[0],
        ]
        prev_buffer = []
        prev_end_idx = 0

        logger.info("Start Visualization...")
        for keyframe_idx in tqdm.tqdm(all_keys):
            pred_gt_boxes = all_boxes[keyframe_idx]
            # Find the starting index of the clip. If start_idx exceeds the beginning
            # of the video, we only choose valid frame from index 0.
            start_idx = max(0, keyframe_idx - self.seq_length // 2)
            # Number of frames from the start of the current clip and the
            # end of the previous clip.
            dist = start_idx - prev_end_idx
            # If there are unwritten frames in between clips.
            if dist >= 0:
                # Get the frames in between previous clip and current clip.
                frames = self._get_frame_range(prev_end_idx, dist)
                # We keep a buffer of frames for overlapping visualization.
                # Write these to the output file.
                for frame in prev_buffer:
                    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                    self.display(frame)
                # Write them to output file without any visualization
                # since they don't have any corresponding keyframes.
                for frame in frames:
                    self.display(frame)
                prev_buffer = []
                num_new_frames = self.seq_length

            # If there are overlapping frames in between clips.
            elif dist < 0:
                # Flush all ready frames.
                for frame in prev_buffer[:dist]:
                    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                    self.display(frame)
                prev_buffer = prev_buffer[dist:]
                num_new_frames = self.seq_length + dist
            # Obtain new frames for the current clip from the input video file.
            new_frames = self._get_frame_range(max(start_idx, prev_end_idx),
                                               num_new_frames)
            new_frames = [
                cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in new_frames
            ]
            clip = prev_buffer + new_frames
            # Calculate the end of this clip. This will be `prev_end_idx` for the
            # next iteration.
            prev_end_idx = max(start_idx, prev_end_idx) + len(new_frames)
            # For each precomputed or gt boxes.
            for i, boxes in enumerate(pred_gt_boxes):
                if i == 0:
                    repeat = self.no_frames_repeat
                    current_draw_range = draw_range
                else:
                    repeat = 1
                    current_draw_range = draw_range_repeat
                # Make sure draw range does not fall out of end of clip.
                current_draw_range[1] = min(current_draw_range[1],
                                            len(clip) - 1)
                ground_truth = boxes[0]
                bboxes = boxes[1]
                label = boxes[2]
                # Draw predictions.
                clip = video_vis.draw_clip_range(
                    clip,
                    label,
                    bboxes=torch.Tensor(bboxes),
                    ground_truth=ground_truth,
                    draw_range=current_draw_range,
                    repeat_frame=repeat,
                )
            # Store the current clip as buffer.
            prev_buffer = clip

        # Write the remaining buffer to output file.
        for frame in prev_buffer:
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            self.display(frame)
        # If we still have some remaining frames in the input file,
        # write those to the output file as well.
        if prev_end_idx < self.total_frames:
            dist = self.total_frames - prev_end_idx
            remaining_clip = self._get_frame_range(prev_end_idx, dist)
            for frame in remaining_clip:
                self.display(frame)
Exemple #6
0
class WrongPredictionVis:
    """
    WrongPredictionVis class for visualizing video inputs to Tensorboard
    for instances that the model makes wrong predictions.
    """
    def __init__(self, cfg):
        """
        Args:
            cfg (CfgNode): configs. Details can be found in
                slowfast/config/defaults.py
        """
        self.cfg = cfg
        self.class_names, _, self.subset = get_class_names(
            cfg.TENSORBOARD.CLASS_NAMES_PATH,
            subset_path=cfg.TENSORBOARD.WRONG_PRED_VIS.SUBSET_PATH,
        )
        if self.subset is not None:
            self.subset = set(self.subset)
        self.num_class = cfg.MODEL.NUM_CLASSES
        self.video_vis = VideoVisualizer(
            cfg.MODEL.NUM_CLASSES,
            cfg.TENSORBOARD.CLASS_NAMES_PATH,
            1,
            cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
        )
        self.tag = cfg.TENSORBOARD.WRONG_PRED_VIS.TAG
        self.writer = tb.TensorboardWriter(cfg)
        self.model_incorrect_classes = set()

    def _pick_wrong_preds(self, labels, preds):
        """
        Returns a 1D tensor that contains the indices of instances that have
        wrong predictions, where true labels in in the specified subset.
        Args:
            labels (tensor): tensor of shape (n_instances,) containing class ids.
            preds (tensor): class scores from model, shape (n_intances, n_classes)
        Returns:
            mask (tensor): boolean tensor. `mask[i]` is True if `model` makes a wrong prediction.
        """
        subset_mask = torch.ones(size=(len(labels), ), dtype=torch.bool)
        if self.subset is not None:
            for i, label in enumerate(labels):
                if label not in self.subset:
                    subset_mask[i] = False

        preds_ids = torch.argmax(preds, dim=-1)

        mask = preds_ids != labels
        mask &= subset_mask
        for i, wrong_pred in enumerate(mask):
            if wrong_pred:
                self.model_incorrect_classes.add(labels[i])

        return mask

    def visualize_vid(self, video_input, labels, preds, batch_idx):
        """
        Draw predicted labels on video inputs and visualize all incorrectly classified
        videos in the current batch.
        Args:
            video_input (list of list of tensor(s)): list of videos for all pathways.
            labels (array-like): shape (n_instances,) of true label for each instance.
            preds (tensor): shape (n, instances, n_classes). The predicted scores for all instances.
            tag (Optional[str]): all visualized video will be added under this tag. This is for organization
                purposes in Tensorboard.
            batch_idx (int): batch index of the current videos.
        """
        def add_video(vid, preds, tag, true_class_name):
            """
            Draw predicted label on video and add it to Tensorboard.
            Args:
                vid (array-like): shape (C, T, H, W). Each image in `vid` is a RGB image.
                preds (tensor): shape (n_classes,) or (1, n_classes). The predicted scores
                    for the current `vid`.
                tag (str): tag for `vid` in Tensorboard.
                true_class_name (str): the ground-truth class name of the current `vid` instance.
            """
            # Permute to (T, H, W, C).
            vid = vid.permute(1, 2, 3, 0)
            vid = data_utils.revert_tensor_normalize(vid.cpu(),
                                                     self.cfg.DATA.MEAN,
                                                     self.cfg.DATA.STD)
            vid = self.video_vis.draw_clip(vid, preds)
            vid = torch.from_numpy(np.array(vid)).permute(0, 3, 1, 2)
            vid = torch.unsqueeze(vid, dim=0)
            self.writer.add_video(vid,
                                  tag="{}: {}".format(tag, true_class_name))

        mask = self._pick_wrong_preds(labels, preds)
        video_indices = torch.squeeze(mask.nonzero(), dim=-1)
        # Visualize each wrongly classfied video.
        for vid_idx in video_indices:
            cur_vid_idx = batch_idx * len(video_input[0]) + vid_idx
            for pathway in range(len(video_input)):
                add_video(
                    video_input[pathway][vid_idx],
                    preds=preds[vid_idx],
                    tag=self.tag +
                    "/Video {}, Pathway {}".format(cur_vid_idx, pathway),
                    true_class_name=self.class_names[labels[vid_idx]],
                )

    @property
    def wrong_class_prediction(self):
        """
        Return class ids that the model predicted incorrectly.
        """
        incorrect_class_names = [
            self.class_names[i] for i in self.model_incorrect_classes
        ]
        return list(set(incorrect_class_names))

    def clean(self):
        """
        Close Tensorboard writer.
        """
        self.writer.close()
Exemple #7
0
def run_visualization(vis_loader, model, cfg, writer=None):
    """
    Run model visualization (weights, activations and model inputs) and visualize
    them on Tensorboard.
    Args:
        vis_loader (loader): video visualization loader.
        model (model): the video model to visualize.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter, optional): TensorboardWriter object
            to writer Tensorboard log.
    """
    n_devices = cfg.NUM_GPUS * cfg.NUM_SHARDS
    prefix = "module/" if n_devices > 1 else ""
    # Get a list of selected layer names and indexing.
    layer_ls, indexing_dict = process_layer_index_data(
        cfg.TENSORBOARD.MODEL_VIS.LAYER_LIST, layer_name_prefix=prefix)
    logger.info("Start Model Visualization.")
    # Register hooks for activations.
    model_vis = GetWeightAndActivation(model, layer_ls)

    if writer is not None and cfg.TENSORBOARD.MODEL_VIS.MODEL_WEIGHTS:
        layer_weights = model_vis.get_weights()
        writer.plot_weights_and_activations(layer_weights,
                                            tag="Layer Weights/",
                                            heat_map=False)

    video_vis = VideoVisualizer(
        cfg.MODEL.NUM_CLASSES,
        cfg.TENSORBOARD.CLASS_NAMES_PATH,
        cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
        cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
    )
    logger.info("Finish drawing weights.")
    global_idx = -1
    for inputs, _, _, meta in vis_loader:
        # Transfer the data to the current GPU device.
        if isinstance(inputs, (list, )):
            for i in range(len(inputs)):
                inputs[i] = inputs[i].cuda(non_blocking=True)
        else:
            inputs = inputs.cuda(non_blocking=True)
        for key, val in meta.items():
            if isinstance(val, (list, )):
                for i in range(len(val)):
                    val[i] = val[i].cuda(non_blocking=True)
            else:
                meta[key] = val.cuda(non_blocking=True)

        if cfg.DETECTION.ENABLE:
            activations, preds = model_vis.get_activations(
                inputs, meta["boxes"])
        else:
            activations, preds = model_vis.get_activations(inputs)

        inputs = du.all_gather_unaligned(inputs)
        activations = du.all_gather_unaligned(activations)
        preds = du.all_gather_unaligned(preds)
        boxes = [None] * n_devices
        if cfg.DETECTION.ENABLE:
            boxes = du.all_gather_unaligned(meta["boxes"])

        if writer is not None:
            total_vids = 0
            for i in range(n_devices):
                cur_input = inputs[i]
                cur_activations = activations[i]
                cur_batch_size = cur_input[0].shape[0]
                cur_preds = preds[i].cpu()
                cur_boxes = boxes[i]
                for cur_batch_idx in range(cur_batch_size):
                    global_idx += 1
                    total_vids += 1
                    if cfg.TENSORBOARD.MODEL_VIS.INPUT_VIDEO:
                        for path_idx, input_pathway in enumerate(cur_input):
                            if (cfg.TEST.DATASET == "ava" or cfg.TEST.DATASET
                                    == "custom") and cfg.AVA.BGR:
                                video = input_pathway[cur_batch_idx, [2, 1, 0],
                                                      ...]
                            else:
                                video = input_pathway[cur_batch_idx]
                            # Permute to (T, H, W, C) from (C, T, H, W).
                            video = video.permute(1, 2, 3, 0)
                            video = data_utils.revert_tensor_normalize(
                                video.cpu(), cfg.DATA.MEAN, cfg.DATA.STD)
                            bboxes = (None if cur_boxes is None else
                                      cur_boxes[:, 1:].cpu())
                            video = video_vis.draw_clip(video,
                                                        cur_preds,
                                                        bboxes=bboxes)
                            video = (torch.Tensor(video).permute(
                                0, 3, 1, 2).unsqueeze(0))
                            writer.add_video(
                                video,
                                tag="Input {}/Input from pathway {}".format(
                                    global_idx, path_idx + 1),
                            )
                    if cfg.TENSORBOARD.MODEL_VIS.ACTIVATIONS:
                        writer.plot_weights_and_activations(
                            cur_activations,
                            tag="Input {}/Activations: ".format(global_idx),
                            batch_idx=cur_batch_idx,
                            indexing_dict=indexing_dict,
                        )
            logger.info("Visualized {} videos...".format(total_vids))