def run_visualization(vis_loader, model, cfg, writer=None):
    n_devices = cfg.NUM_GPUS * cfg.NUM_SHARDS

    global_idx = 0
    for inputs, _, _, _ in vis_loader:
        # Transfer the data to the current GPU device.
        if isinstance(inputs, (list,)):
            for i in range(len(inputs)):
                inputs[i] = inputs[i].cuda(non_blocking=True)
        else:
            inputs = inputs.cuda(non_blocking=True)

        inputs = du.all_gather_unaligned(inputs)

        if writer is not None:
            total_vids = 0
            for i in range(n_devices):
                cur_input = inputs[i]
                cur_batch_size = cur_input[0].shape[0]

                for cur_batch_idx in range(cur_batch_size):
                    global_idx += 1
                    total_vids += 1
                    for path_idx, input_pathway in enumerate(cur_input):
                        if cfg.TEST.DATASET == "ava" and cfg.AVA.BGR:
                            video = input_pathway[cur_batch_idx, [2, 1, 0], ...]
                        else:
                            video = input_pathway[cur_batch_idx]
                        # Permute to (T, H, W, C) from (C, T, H, W).
                        video = video.permute(1, 2, 3, 0)
                        video = data_utils.revert_tensor_normalize(
                            video.cpu(), cfg.DATA.MEAN, cfg.DATA.STD
                        )
                        video = video.permute(0, 3, 1, 2).unsqueeze(0)
                        writer.add_video(
                            video,
                            tag="Input {}/Input from pathway {}".format(
                                global_idx, path_idx + 1
                            ),
                        )

            logger.info("Visualized {} videos...".format(total_vids))
Exemple #2
0
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer=None):
    """
    Evaluate the model on the val set.
    Args:
        val_loader (loader): data loader to provide validation data.
        model (model): model to evaluate the performance.
        val_meter (ValMeter): meter instance to record and calculate the metrics.
        cur_epoch (int): number of the current epoch of training.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter, optional): TensorboardWriter object
            to writer Tensorboard log.
    """

    # Evaluation mode enabled. The running stats would not be updated.
    model.eval()
    val_meter.iter_tic()

    for cur_iter, (inputs, labels, _, meta, boxes,
                   b_indices) in enumerate(val_loader):
        if cfg.NUM_GPUS:
            # Transferthe data to the current GPU device.
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)
            labels = labels.cuda()
            for key, val in meta.items():
                if isinstance(val, (list, )):
                    for i in range(len(val)):
                        val[i] = val[i].cuda(non_blocking=True)
                else:
                    meta[key] = val.cuda(non_blocking=True)
        val_meter.data_toc()

        if cfg.DETECTION.ENABLE:
            # Compute the predictions.
            preds = model(inputs, meta["boxes"])
            ori_boxes = meta["ori_boxes"]
            metadata = meta["metadata"]

            if cfg.NUM_GPUS:
                preds = preds.cpu()
                ori_boxes = ori_boxes.cpu()
                metadata = metadata.cpu()

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes),
                                      dim=0)
                metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)

            val_meter.iter_toc()
            # Update and log stats.
            val_meter.update_stats(preds, ori_boxes, metadata)

        else:
            preds = model(inputs)

            if cfg.DATA.MULTI_LABEL:
                if cfg.NUM_GPUS > 1:
                    preds, labels = du.all_gather([preds, labels])
            else:
                # Compute the errors.
                num_topks_correct = metrics.topks_correct(
                    preds, labels, (1, 5))

                # Combine the errors across the GPUs.
                top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0
                                      for x in num_topks_correct]
                if cfg.NUM_GPUS > 1:
                    top1_err, top5_err = du.all_reduce([top1_err, top5_err])

                # Copy the errors from GPU to CPU (sync point).
                top1_err, top5_err = top1_err.item(), top5_err.item()

                val_meter.iter_toc()
                # Update and log stats.
                val_meter.update_stats(
                    top1_err,
                    top5_err,
                    inputs[0].size(0) * max(
                        cfg.NUM_GPUS, 1
                    ),  # If running  on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU.
                )
                # write to tensorboard format if available.
                if writer is not None:
                    writer.add_scalars(
                        {
                            "Val/Top1_err": top1_err,
                            "Val/Top5_err": top5_err
                        },
                        global_step=len(val_loader) * cur_epoch + cur_iter,
                    )

            val_meter.update_predictions(preds, labels)

        val_meter.log_iter_stats(cur_epoch, cur_iter)
        val_meter.iter_tic()

    # Log epoch stats.
    val_meter.log_epoch_stats(cur_epoch)
    # write to tensorboard format if available.
    if writer is not None:
        if cfg.DETECTION.ENABLE:
            writer.add_scalars({"Val/mAP": val_meter.full_map},
                               global_step=cur_epoch)
        else:
            all_preds = [pred.clone().detach() for pred in val_meter.all_preds]
            all_labels = [
                label.clone().detach() for label in val_meter.all_labels
            ]
            if cfg.NUM_GPUS:
                all_preds = [pred.cpu() for pred in all_preds]
                all_labels = [label.cpu() for label in all_labels]
            writer.plot_eval(preds=all_preds,
                             labels=all_labels,
                             global_step=cur_epoch)

    val_meter.reset()
Exemple #3
0
def perform_test(test_loader, model, test_meter, cfg):
    """
    For classification:
    Perform mutli-view testing that uniformly samples N clips from a video along
    its temporal axis. For each clip, it takes 3 crops to cover the spatial
    dimension, followed by averaging the softmax scores across all Nx3 views to
    form a video-level prediction. All video predictions are compared to
    ground-truth labels and the final testing performance is logged.
    For detection:
    Perform fully-convolutional testing on the full frames without crop.
    Args:
        test_loader (loader): video testing loader.
        model (model): the pretrained video model to test.
        test_meter (TestMeter): testing meters to log and ensemble the testing
            results.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Enable eval mode.
    model.eval()
    test_meter.iter_tic()

    for cur_iter, (inputs, labels, video_idx, meta) in enumerate(test_loader):
        # Transfer the data to the current GPU device.
        if isinstance(inputs, (list,)):
            for i in range(len(inputs)):
                inputs[i] = inputs[i].cuda(non_blocking=True)
        else:
            inputs = inputs.cuda(non_blocking=True)

        # Transfer the data to the current GPU device.
        labels = labels.cuda()
        video_idx = video_idx.cuda()
        for key, val in meta.items():
            if isinstance(val, (list,)):
                for i in range(len(val)):
                    val[i] = val[i].cuda(non_blocking=True)
            else:
                meta[key] = val.cuda(non_blocking=True)

        if cfg.DETECTION.ENABLE:
            # Compute the predictions.
            preds = model(inputs, meta["boxes"])

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds.cpu()), dim=0)
                ori_boxes = torch.cat(
                    du.all_gather_unaligned(meta["ori_boxes"].cpu()), dim=0
                )
                metadata = torch.cat(
                    du.all_gather_unaligned(meta["metadata"].cpu()), dim=0
                )

            test_meter.iter_toc()
            # Update and log stats.
            test_meter.update_stats(
                preds.detach().cpu(),
                ori_boxes.detach().cpu(),
                metadata.detach().cpu(),
            )
            test_meter.log_iter_stats(None, cur_iter)
        else:
            # Perform the forward pass.
            preds = model(inputs)

            # Gather all the predictions across all the devices to perform ensemble.
            if cfg.NUM_GPUS > 1:
                preds, labels, video_idx = du.all_gather(
                    [preds, labels, video_idx]
                )

            test_meter.iter_toc()
            # Update and log stats.
            test_meter.update_stats(
                preds.detach().cpu(),
                labels.detach().cpu(),
                video_idx.detach().cpu(),
            )
            test_meter.log_iter_stats(cur_iter)

        test_meter.iter_tic()

    # Log epoch stats and print the final testing results.
    test_meter.finalize_metrics()
    test_meter.reset()
Exemple #4
0
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg):
    """
    Evaluate the model on the val set.
    Args:
        val_loader (loader): data loader to provide validation data.
        model (model): model to evaluate the performance.
        val_meter (ValMeter): meter instance to record and calculate the metrics.
        cur_epoch (int): number of the current epoch of training.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """

    # Evaluation mode enabled. The running stats would not be updated.
    model.eval()
    val_meter.iter_tic()

    for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader):
        # Transferthe data to the current GPU device.
        if isinstance(inputs, (list, )):
            for i in range(len(inputs)):
                inputs[i] = inputs[i].cuda(non_blocking=True)
        else:
            inputs = inputs.cuda(non_blocking=True)
        labels = labels.cuda()
        for key, val in meta.items():
            if isinstance(val, (list, )):
                for i in range(len(val)):
                    val[i] = val[i].cuda(non_blocking=True)
            else:
                meta[key] = val.cuda(non_blocking=True)

        if cfg.DETECTION.ENABLE:
            # Compute the predictions.
            preds = model(inputs, meta["boxes"])

            preds = preds.cpu()
            ori_boxes = meta["ori_boxes"].cpu()
            metadata = meta["metadata"].cpu()

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes),
                                      dim=0)
                metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)

            val_meter.iter_toc()
            # Update and log stats.
            val_meter.update_stats(preds.cpu(), ori_boxes.cpu(),
                                   metadata.cpu())
        else:
            preds = model(inputs)

            if cfg.DATA.MULTI_LABEL:
                if cfg.NUM_GPUS > 1:
                    preds, labels = du.all_gather([preds, labels])
                val_meter.update_predictions(preds, labels)
            else:
                # Compute the errors.
                num_topks_correct = metrics.topks_correct(
                    preds, labels, (1, 5))

                # Combine the errors across the GPUs.
                top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0
                                      for x in num_topks_correct]
                if cfg.NUM_GPUS > 1:
                    top1_err, top5_err = du.all_reduce([top1_err, top5_err])

                # Copy the errors from GPU to CPU (sync point).
                top1_err, top5_err = top1_err.item(), top5_err.item()

                val_meter.iter_toc()
                # Update and log stats.
                val_meter.update_stats(top1_err, top5_err,
                                       inputs[0].size(0) * cfg.NUM_GPUS)

        val_meter.log_iter_stats(cur_epoch, cur_iter)
        val_meter.iter_tic()

    # Log epoch stats.
    val_meter.log_epoch_stats(cur_epoch)
    val_meter.reset()
Exemple #5
0
def run_visualization(vis_loader, model, cfg, writer=None):
    """
    Run model visualization (weights, activations and model inputs) and visualize
    them on Tensorboard.
    Args:
        vis_loader (loader): video visualization loader.
        model (model): the video model to visualize.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter, optional): TensorboardWriter object
            to writer Tensorboard log.
    """
    n_devices = cfg.NUM_GPUS * cfg.NUM_SHARDS
    prefix = "module/" if n_devices > 1 else ""
    # Get a list of selected layer names and indexing.
    layer_ls, indexing_dict = process_layer_index_data(
        cfg.TENSORBOARD.MODEL_VIS.LAYER_LIST, layer_name_prefix=prefix)
    logger.info("Start Model Visualization.")
    # Register hooks for activations.
    model_vis = GetWeightAndActivation(model, layer_ls)

    if writer is not None and cfg.TENSORBOARD.MODEL_VIS.MODEL_WEIGHTS:
        layer_weights = model_vis.get_weights()
        writer.plot_weights_and_activations(layer_weights,
                                            tag="Layer Weights/",
                                            heat_map=False)

    video_vis = VideoVisualizer(
        cfg.MODEL.NUM_CLASSES,
        cfg.TENSORBOARD.CLASS_NAMES_PATH,
        cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
        cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
    )
    if n_devices > 1:
        grad_cam_layer_ls = [
            "module/" + layer
            for layer in cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST
        ]
    else:
        grad_cam_layer_ls = cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST

    if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE:
        gradcam = GradCAM(
            model,
            target_layers=grad_cam_layer_ls,
            data_mean=cfg.DATA.MEAN,
            data_std=cfg.DATA.STD,
            colormap=cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.COLORMAP,
        )
    logger.info("Finish drawing weights.")
    global_idx = -1
    for inputs, labels, _, meta in tqdm.tqdm(vis_loader):
        if cfg.NUM_GPUS:
            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)
            labels = labels.cuda()
            for key, val in meta.items():
                if isinstance(val, (list, )):
                    for i in range(len(val)):
                        val[i] = val[i].cuda(non_blocking=True)
                else:
                    meta[key] = val.cuda(non_blocking=True)

        if cfg.DETECTION.ENABLE:
            activations, preds = model_vis.get_activations(
                inputs, meta["boxes"])
        else:
            activations, preds = model_vis.get_activations(inputs)
        if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE:
            if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.USE_TRUE_LABEL:
                inputs, preds = gradcam(inputs, labels=labels)
            else:
                inputs, preds = gradcam(inputs)
        if cfg.NUM_GPUS:
            inputs = du.all_gather_unaligned(inputs)
            activations = du.all_gather_unaligned(activations)
            preds = du.all_gather_unaligned(preds)
            if isinstance(inputs[0], list):
                for i in range(len(inputs)):
                    for j in range(len(inputs[0])):
                        inputs[i][j] = inputs[i][j].cpu()
            else:
                inputs = [inp.cpu() for inp in inputs]
            preds = [pred.cpu() for pred in preds]
        else:
            inputs, activations, preds = [inputs], [activations], [preds]

        boxes = [None] * max(n_devices, 1)
        if cfg.DETECTION.ENABLE and cfg.NUM_GPUS:
            boxes = du.all_gather_unaligned(meta["boxes"])
            boxes = [box.cpu() for box in boxes]

        if writer is not None:
            total_vids = 0
            for i in range(max(n_devices, 1)):
                cur_input = inputs[i]
                cur_activations = activations[i]
                cur_batch_size = cur_input[0].shape[0]
                cur_preds = preds[i]
                cur_boxes = boxes[i]
                for cur_batch_idx in range(cur_batch_size):
                    global_idx += 1
                    total_vids += 1
                    if (cfg.TENSORBOARD.MODEL_VIS.INPUT_VIDEO
                            or cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE):
                        for path_idx, input_pathway in enumerate(cur_input):
                            if cfg.TEST.DATASET == "ava" and cfg.AVA.BGR:
                                video = input_pathway[cur_batch_idx, [2, 1, 0],
                                                      ...]
                            else:
                                video = input_pathway[cur_batch_idx]

                            if not cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE:
                                # Permute to (T, H, W, C) from (C, T, H, W).
                                video = video.permute(1, 2, 3, 0)
                                video = data_utils.revert_tensor_normalize(
                                    video, cfg.DATA.MEAN, cfg.DATA.STD)
                            else:
                                # Permute from (T, C, H, W) to (T, H, W, C)
                                video = video.permute(0, 2, 3, 1)
                            bboxes = (None if cur_boxes is None else
                                      cur_boxes[:, 1:])
                            cur_prediction = (cur_preds if cfg.DETECTION.ENABLE
                                              else cur_preds[cur_batch_idx])
                            video = video_vis.draw_clip(video,
                                                        cur_prediction,
                                                        bboxes=bboxes)
                            video = (torch.from_numpy(np.array(video)).permute(
                                0, 3, 1, 2).unsqueeze(0))
                            writer.add_video(
                                video,
                                tag="Input {}/Pathway {}".format(
                                    global_idx, path_idx + 1),
                            )
                    if cfg.TENSORBOARD.MODEL_VIS.ACTIVATIONS:
                        writer.plot_weights_and_activations(
                            cur_activations,
                            tag="Input {}/Activations: ".format(global_idx),
                            batch_idx=cur_batch_idx,
                            indexing_dict=indexing_dict,
                        )
Exemple #6
0
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg):
    """
    Evaluate the model on the val set.
    Args:
        val_loader (loader): data loader to provide validation data.
        model (model): model to evaluate the performance.
        val_meter (ValMeter): meter instance to record and calculate the metrics.
        cur_epoch (int): number of the current epoch of training.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Evaluation mode enabled. The running stats would not be updated.
    model.eval()
    val_meter.iter_tic()

    for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader):
        # Transferthe data to the current GPU device.
        if isinstance(inputs, (list, )):
            for i in range(len(inputs)):
                inputs[i] = inputs[i].cuda(non_blocking=True)
        else:
            inputs = inputs.cuda(non_blocking=True)
        if isinstance(labels, (dict, )):
            labels = {k: v.cuda() for k, v in labels.items()}
        else:
            labels = labels.cuda()

        if cfg.DETECTION.ENABLE:
            logger.info("Detection Metadata: {}".format(meta))
            for key, val in meta.items():
                if isinstance(val, (list, )):
                    for i in range(len(val)):
                        val[i] = val[i].cuda(non_blocking=True)
                else:
                    meta[key] = val.cuda(non_blocking=True)
            # Compute the predictions.
            preds = model(inputs, meta["boxes"])

            preds = preds.cpu()
            ori_boxes = meta["ori_boxes"].cpu()
            metadata = meta["metadata"].cpu()

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes),
                                      dim=0)
                metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)

            val_meter.iter_toc()
            # Update and log stats.
            val_meter.update_stats(preds.cpu(), ori_boxes.cpu(),
                                   metadata.cpu())
        else:
            preds = model(inputs)
            if isinstance(labels, (dict, )):
                # Compute the verb accuracies.
                verb_top1_acc, verb_top5_acc = metrics.topk_accuracies(
                    preds[0], labels['verb'], (1, 5))

                # Combine the errors across the GPUs.
                if cfg.NUM_GPUS > 1:
                    verb_top1_acc, verb_top5_acc = du.all_reduce(
                        [verb_top1_acc, verb_top5_acc])

                # Copy the errors from GPU to CPU (sync point).
                verb_top1_acc, verb_top5_acc = verb_top1_acc.item(
                ), verb_top5_acc.item()

                # Compute the noun accuracies.
                noun_top1_acc, noun_top5_acc = metrics.topk_accuracies(
                    preds[1], labels['noun'], (1, 5))

                # Combine the errors across the GPUs.
                if cfg.NUM_GPUS > 1:
                    noun_top1_acc, noun_top5_acc = du.all_reduce(
                        [noun_top1_acc, noun_top5_acc])

                # Copy the errors from GPU to CPU (sync point).
                noun_top1_acc, noun_top5_acc = noun_top1_acc.item(
                ), noun_top5_acc.item()

                # Compute the action accuracies.
                action_top1_acc, action_top5_acc = metrics.multitask_topk_accuracies(
                    (preds[0], preds[1]), (labels['verb'], labels['noun']),
                    (1, 5))
                # Combine the errors across the GPUs.
                if cfg.NUM_GPUS > 1:
                    action_top1_acc, action_top5_acc = du.all_reduce(
                        [action_top1_acc, action_top5_acc])

                # Copy the errors from GPU to CPU (sync point).
                action_top1_acc, action_top5_acc = action_top1_acc.item(
                ), action_top5_acc.item()

                val_meter.iter_toc()
                # Update and log stats.
                val_meter.update_stats(
                    (verb_top1_acc, noun_top1_acc, action_top1_acc),
                    (verb_top5_acc, noun_top5_acc, action_top5_acc),
                    inputs[0].size(0) * cfg.NUM_GPUS)
            else:
                # Compute the errors.
                num_topks_correct = metrics.topks_correct(
                    preds, labels, (1, 5))

                # Combine the errors across the GPUs.
                top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0
                                      for x in num_topks_correct]
                if cfg.NUM_GPUS > 1:
                    top1_err, top5_err = du.all_reduce([top1_err, top5_err])

                # Copy the errors from GPU to CPU (sync point).
                top1_err, top5_err = top1_err.item(), top5_err.item()

                val_meter.iter_toc()
                # Update and log stats.
                val_meter.update_stats(top1_err, top5_err,
                                       inputs[0].size(0) * cfg.NUM_GPUS)
        val_meter.log_iter_stats(cur_epoch, cur_iter)
        val_meter.iter_tic()
    # Log epoch stats.
    is_best_epoch = val_meter.log_epoch_stats(cur_epoch)
    val_meter.reset()
    return is_best_epoch
Exemple #7
0
def perform_test(test_loader, model, test_meter, cfg, writer=None):
    """
    For classification:
    Perform mutli-view testing that uniformly samples N clips from a video along
    its temporal axis. For each clip, it takes 3 crops to cover the spatial
    dimension, followed by averaging the softmax scores across all Nx3 views to
    form a video-level prediction. All video predictions are compared to
    ground-truth labels and the final testing performance is logged.
    For detection:
    Perform fully-convolutional testing on the full frames without crop.
    Args:
        test_loader (loader): video testing loader.
        model (model): the pretrained video model to test.
        test_meter (TestMeter): testing meters to log and ensemble the testing
            results.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter object, optional): TensorboardWriter object
            to writer Tensorboard log.
    """
    # Enable eval mode.
    model.eval()
    test_meter.iter_tic()

    for cur_iter, (inputs, labels, video_idx, meta) in enumerate(test_loader):
        if cfg.NUM_GPUS:
            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)

            # Transfer the data to the current GPU device.
            labels = labels.cuda()
            video_idx = video_idx.cuda()
            for key, val in meta.items():
                if isinstance(val, (list, )):
                    for i in range(len(val)):
                        val[i] = val[i].cuda(non_blocking=True)
                else:
                    meta[key] = val.cuda(non_blocking=True)
        test_meter.data_toc()

        if cfg.DETECTION.ENABLE:
            # Compute the predictions.
            preds = model(inputs, meta["boxes"])
            ori_boxes = meta["ori_boxes"]
            metadata = meta["metadata"]

            preds = preds.detach().cpu() if cfg.NUM_GPUS else preds.detach()
            ori_boxes = (ori_boxes.detach().cpu()
                         if cfg.NUM_GPUS else ori_boxes.detach())
            metadata = (metadata.detach().cpu()
                        if cfg.NUM_GPUS else metadata.detach())

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes),
                                      dim=0)
                metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)

            test_meter.iter_toc()
            # Update and log stats.
            test_meter.update_stats(preds, ori_boxes, metadata)
            test_meter.log_iter_stats(None, cur_iter)
        else:
            # Perform the forward pass.
            preds = model(inputs)

            # Gather all the predictions across all the devices to perform ensemble.
            if cfg.NUM_GPUS > 1:
                preds, labels, video_idx = du.all_gather(
                    [preds, labels, video_idx])
            if cfg.NUM_GPUS:
                preds = preds.cpu()
                labels = labels.cpu()
                video_idx = video_idx.cpu()

            test_meter.iter_toc()
            # Update and log stats.
            test_meter.update_stats(preds.detach(), labels.detach(),
                                    video_idx.detach())
            test_meter.log_iter_stats(cur_iter)

        test_meter.iter_tic()

    # Log epoch stats and print the final testing results.
    if not cfg.DETECTION.ENABLE:
        all_preds = test_meter.video_preds.clone().detach()
        all_labels = test_meter.video_labels
        if cfg.NUM_GPUS:
            all_preds = all_preds.cpu()
            all_labels = all_labels.cpu()
        if writer is not None:
            writer.plot_eval(preds=all_preds, labels=all_labels)

        if cfg.TEST.SAVE_RESULTS_PATH != "":
            save_path = os.path.join(cfg.OUTPUT_DIR,
                                     cfg.TEST.SAVE_RESULTS_PATH)

            with g_pathmgr.open(save_path, "wb") as f:
                pickle.dump([all_preds, all_labels], f)

            logger.info("Successfully saved prediction results to {}".format(
                save_path))

    test_meter.finalize_metrics()
    return test_meter
Exemple #8
0
    def eval_epoch(self,
                   val_loader,
                   model,
                   val_meter,
                   cur_epoch,
                   cfg,
                   writer=None):
        """
        Evaluate the model on the val set.
        Args:
            val_loader (loader): data loader to provide validation data.
            model (model): model to evaluate the performance.
            val_meter (ValMeter): meter instance to record and calculate the metrics.
            cur_epoch (int): number of the current epoch of training.
            cfg (CfgNode): configs. Details can be found in
                slowfast/config/defaults.py
            writer (TensorboardWriter, optional): TensorboardWriter object
                to writer Tensorboard log.
        """

        # Evaluation mode enabled. The running stats would not be updated.
        model.eval()
        data_size = len(val_loader)
        btch = cfg.TRAIN.BATCH_SIZE * self.cfg.NUM_SHARDS
        rankE = os.environ.get("RANK", None)
        worldE = os.environ.get("WORLD_SIZE", None)
        dSize = data_size * btch
        self.logger.info(
            "Val Epoch {} dLen {} Batch {} dSize {} localRank {} rank {} {} world {} {}"
            .format(cur_epoch, data_size, btch, dSize, du.get_local_rank(),
                    du.get_rank(), rankE, du.get_world_size(), worldE))

        val_meter.iter_tic()
        predsAll = []
        labelsAll = []
        data_size = len(val_loader)

        for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader):
            # Transferthe data to the current GPU device.
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)
            labels = labels.cuda()
            for key, val in meta.items():
                if isinstance(val, (list, )):
                    for i in range(len(val)):
                        val[i] = val[i].cuda(non_blocking=True)
                else:
                    meta[key] = val.cuda(non_blocking=True)

            if cfg.DETECTION.ENABLE:
                # Compute the predictions.
                preds = model(inputs, meta["boxes"])

                preds = preds.cpu()
                ori_boxes = meta["ori_boxes"].cpu()
                metadata = meta["metadata"].cpu()

                if cfg.NUM_GPUS > 1:
                    preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                    ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes),
                                          dim=0)
                    metadata = torch.cat(du.all_gather_unaligned(metadata),
                                         dim=0)

                val_meter.iter_toc()
                # Update and log stats.
                val_meter.update_stats(preds.cpu(), ori_boxes.cpu(),
                                       metadata.cpu())

            else:
                preds = model(inputs)

                if cfg.DATA.MULTI_LABEL:
                    if cfg.NUM_GPUS > 1:
                        preds, labels = du.all_gather([preds, labels])
                else:
                    if cfg.MODEL.NUM_CLASSES == 2:
                        predsAll.extend(preds.detach().cpu().numpy()[:, -1])
                        labelsAll.extend(labels.detach().cpu().numpy())

                    # Compute the errors.
                    num_topks_correct = metrics.topks_correct(
                        preds, labels, (1, min(5, cfg.MODEL.NUM_CLASSES)))

                    # Combine the errors across the GPUs.
                    top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0
                                          for x in num_topks_correct]
                    if cfg.NUM_GPUS > 1:
                        top1_err, top5_err = du.all_reduce(
                            [top1_err, top5_err])

                    # Copy the errors from GPU to CPU (sync point).
                    top1_err, top5_err = top1_err.item(), top5_err.item()

                    val_meter.iter_toc()
                    # Update and log stats.
                    val_meter.update_stats(top1_err, top5_err,
                                           inputs[0].size(0) * cfg.NUM_GPUS)
                    # write to tensorboard format if available.
                    if writer is not None:
                        writer.add_scalars(
                            {
                                "Val/Top1_err": top1_err,
                                "Val/Top5_err": top5_err
                            },
                            global_step=len(val_loader) * cur_epoch + cur_iter,
                        )

                    if du.is_master_proc():
                        ite = len(val_loader) * cur_epoch + cur_iter
                        self.logger.log_row(name='ValTop1',
                                            iter=ite,
                                            lr=top1_err,
                                            description="Top 1 Err")
                        self.logger.log_row(name='ValTop5',
                                            iter=ite,
                                            lr=top5_err,
                                            description="Top 5 Err")

                val_meter.update_predictions(preds, labels)

            stats = val_meter.log_iter_stats(cur_epoch, cur_iter, predsAll,
                                             labelsAll)
            ite = dSize * cur_epoch + btch * (cur_iter + 1)
            self.plotStats(stats, ite, 'ValIter')

            val_meter.iter_tic()

        # Log epoch stats.
        gathered = du.all_gather([
            torch.tensor(predsAll).to(torch.device("cuda")),
            torch.tensor(labelsAll).to(torch.device("cuda"))
        ])
        stats = val_meter.log_epoch_stats(cur_epoch,
                                          gathered[0].detach().cpu().numpy(),
                                          gathered[1].detach().cpu().numpy())
        ite = (cur_epoch + 1) * dSize
        self.plotStats(stats, ite, 'ValEpoch')

        # write to tensorboard format if available.
        if writer is not None:
            if cfg.DETECTION.ENABLE:
                writer.add_scalars({"Val/mAP": val_meter.full_map},
                                   global_step=cur_epoch)
            all_preds_cpu = [
                pred.clone().detach().cpu() for pred in val_meter.all_preds
            ]
            all_labels_cpu = [
                label.clone().detach().cpu() for label in val_meter.all_labels
            ]
            # plotScatter(all_preds_cpu, all_labels_cpu, "Epoch_{}".format(cur_epoch))
            # writer.plot_eval(
            #     preds=all_preds_cpu, labels=all_labels_cpu, global_step=cur_epoch
            # )
        val_meter.reset()
Exemple #9
0
def perform_test(test_loader, model, test_meter, cfg):
    """
    For classification:
    Perform mutli-view testing that uniformly samples N clips from a video along
    its temporal axis. For each clip, it takes 3 crops to cover the spatial
    dimension, followed by averaging the softmax scores across all Nx3 views to
    form a video-level prediction. All video predictions are compared to
    ground-truth labels and the final testing performance is logged.
    For detection:
    Perform fully-convolutional testing on the full frames without crop.
    Args:
        test_loader (loader): video testing loader.
        model (model): the pretrained video model to test.
        test_meter (TestMeter): testing meters to log and ensemble the testing
            results.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Enable eval mode.
    model.eval()

    test_meter.iter_tic()

    if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
        x_feat_list = [[],[]]
    elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
        x_feat_list = []
    # for cur_iter, (inputs, bboxs, masks, labels, video_idx, meta) in enumerate(test_loader):
    for cur_iter, output_dict in enumerate(test_loader):
        if cur_iter % 100 == 0:
            logger.info("Testing iter={}".format(cur_iter))
        
        # if (cur_iter+1) % 1000 == 0:
        #     test_meter_preds, test_meter_labels, test_meter_metadata = test_meter.finalize_metrics()
    
        inputs = output_dict['inputs']
        labels = output_dict['label'] 
        video_idx = output_dict['index'] 
        meta = output_dict['metadata'] 
        if cfg.EPICKITCHENS.USE_BBOX:
            bboxs = output_dict['bboxs']
            masks = output_dict['masks']
        else:
            bboxs = None
            masks = None

        # Transfer the data to the current GPU device.
        if isinstance(inputs, (list,)):
            for i in range(len(inputs)):
                inputs[i] = inputs[i].cuda(non_blocking=True)
        else:
            inputs = inputs.cuda(non_blocking=True)

        # Transfer the data to the current GPU device.
        if isinstance(labels, (dict,)):
            labels = {k: v.cuda() for k, v in labels.items()}
        else:
            labels = labels.cuda()
        video_idx = video_idx.cuda()

        if cfg.DETECTION.ENABLE:
            for key, val in meta.items():
                if isinstance(val, (list,)):
                    for i in range(len(val)):
                        val[i] = val[i].cuda(non_blocking=True)
                else:
                    meta[key] = val.cuda(non_blocking=True)

            # Compute the predictions.
            preds = model(inputs, meta["boxes"])

            preds = preds.cpu()
            ori_boxes = meta["ori_boxes"].cpu()
            metadata = meta["metadata"].cpu()

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0)
                metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)

            test_meter.iter_toc()
            # Update and log stats.
            test_meter.update_stats(
                preds.detach().cpu(),
                ori_boxes.detach().cpu(),
                metadata.detach().cpu(),
            )
            test_meter.log_iter_stats(None, cur_iter)
        else:
            # Perform the forward pass.
            if cfg.EPICKITCHENS.USE_BBOX:
                bboxs = to_cuda(bboxs)
                masks = to_cuda(masks)
                preds_pair = model(inputs, bboxes=bboxs, masks=masks)
            else:
                preds_pair = model(inputs)
            
            if cfg.TEST.EXTRACT_FEATURES:
                preds, x_feat = preds_pair
            else:
                preds = preds_pair

            if isinstance(labels, (dict,)):
                # Gather all the predictions across all the devices to perform ensemble.
                if cfg.NUM_GPUS > 1:
                    verb_preds, verb_labels, video_idx = du.all_gather(
                        [preds[0], labels['verb'], video_idx]
                    )

                    noun_preds, noun_labels, video_idx = du.all_gather(
                        [preds[1], labels['noun'], video_idx]
                    )
                    meta = du.all_gather_unaligned(meta)
                    metadata = {'narration_id': []}
                    for i in range(len(meta)):
                        metadata['narration_id'].extend(meta[i]['narration_id'])
                    
                    
                    if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
                        x_feat_slow, x_feat_fast = du.all_gather([x_feat[0], x_feat[1]])
                        #print(x_feat_slow.shape, x_feat_fast.shape)
                        ##torch.Size([8, 2048, 8, 7, 7]) torch.Size([8, 256, 32, 7, 7])
                        x_feat_list[0] += [x_feat_slow]
                        x_feat_list[1] += [x_feat_fast]
                    elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
                        x_feat = du.all_gather([x_feat])
                        x_feat_list.append(x_feat[0])
                else:
                    metadata = meta
                    verb_preds, verb_labels, video_idx = preds[0], labels['verb'], video_idx
                    noun_preds, noun_labels, video_idx = preds[1], labels['noun'], video_idx
                    if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
                        x_feat_list[0].append(x_feat[0])
                        x_feat_list[1].append(x_feat[1])
                    elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
                        x_feat_list.append(x_feat)
                    
                    
                test_meter.iter_toc()
                # Update and log stats.
                test_meter.update_stats(
                    (verb_preds.detach().cpu(), noun_preds.detach().cpu()),
                    (verb_labels.detach().cpu(), noun_labels.detach().cpu()),
                    metadata,
                    video_idx.detach().cpu(),
                )
                # test_meter.log_iter_stats(cur_iter)
            else:
                # Gather all the predictions across all the devices to perform ensemble.
                if cfg.NUM_GPUS > 1:
                    preds, labels, idx = du.all_gather(
                        [preds, labels, video_idx]
                    )

                test_meter.iter_toc()
                # Update and log stats.
                test_meter.update_stats(
                    preds.detach().cpu(),
                    labels.detach().cpu(),
                    video_idx.detach().cpu(),
                )
                # test_meter.log_iter_stats(cur_iter)

        test_meter.iter_tic()

    # Log epoch stats and print the final testing results.
    if cfg.TEST.DATASET == 'epickitchens':
        preds, labels, metadata = test_meter.finalize_metrics()
    else:
        test_meter.finalize_metrics()
        preds, labels, metadata = None, None, None
    test_meter.reset()
    
    if cfg.TEST.EXTRACT_FEATURES:
        if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
            final_feat_list = [[],[]]
            final_feat_list[0] = [t.cpu() for t in x_feat_list[0]]
            final_feat_list[1] = [t.cpu() for t in x_feat_list[1]]
        elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES:
            final_feat_list = [t.cpu() for t in x_feat_list]
        
        return preds, labels, metadata, final_feat_list
    else:
        return preds, labels, metadata
Exemple #10
0
def eval_epoch(val_loader, model, val_meter, cur_epoch, nep, cfg):
    """
    Evaluate the model on the val set.
    Args:
        val_loader (loader): data loader to provide validation data.
        model (model): model to evaluate the performance.
        val_meter (ValMeter): meter instance to record and calculate the metrics.
        cur_epoch (int): number of the current epoch of training.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """

    # Evaluation mode enabled. The running stats would not be updated.
    model.eval()
    val_meter.iter_tic()

    for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader):
        # Transferthe data to the current GPU device.
        if isinstance(inputs, (list, )):
            for i in range(len(inputs)):
                inputs[i] = inputs[i].cuda(non_blocking=True)
        else:
            inputs = inputs.cuda(non_blocking=True)
        labels = labels.cuda()
        for key, val in meta.items():
            if isinstance(val, (list, )):
                for i in range(len(val)):
                    val[i] = val[i].cuda(non_blocking=True)
            else:
                meta[key] = val.cuda(non_blocking=True)

        if cfg.DETECTION.ENABLE:
            # Compute the predictions.
            preds = model(inputs, meta["boxes"])

            preds = preds.cpu()
            ori_boxes = meta["ori_boxes"].cpu()
            metadata = meta["metadata"].cpu()

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes),
                                      dim=0)
                metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)

            val_meter.iter_toc()
            # Update and log stats.
            val_meter.update_stats(preds.cpu(), ori_boxes.cpu(),
                                   metadata.cpu())

        else:

            preds = model(inputs)
            aux_loss_keys = []
            if cfg.PREDICTIVE.ENABLE:
                aux_loss_keys.append('pred_errors')
                errors = preds['pred_errors']
                pred_loss = errors.mean()

            if 'frame_errors' in preds:
                aux_loss_keys.append('frame_errors')
                frame_errors = preds['frame_errors']

            if cfg.PREDICTIVE.CPC:
                aux_loss_keys.append('cpc_loss')
                cpc_loss = preds['cpc_loss']

            if cfg.SUPERVISED:
                preds = preds['logits']

            # Explicitly declare reduction to mean.
            if cfg.MODEL.LOSS_FUNC != '' and cfg.SUPERVISED:
                loss_fun = losses.get_loss_func(
                    cfg.MODEL.LOSS_FUNC)(reduction="mean")

                # Compute the loss.
                loss = loss_fun(preds, labels)
                # total_loss = total_loss + loss

                # Compute the errors.
                num_topks_correct = metrics.topks_correct(
                    preds, labels, (1, 5))

                # Combine the errors across the GPUs.
                top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0
                                      for x in num_topks_correct]

            # Gather all the predictions across all the devices.
            if cfg.NUM_GPUS > 1:
                if cfg.PREDICTIVE.ENABLE:
                    pred_loss = du.all_reduce([pred_loss])[0]

                if cfg.PREDICTIVE.CPC:
                    cpc_loss = du.all_reduce([cpc_loss])[0]

                if cfg.SUPERVISED:
                    loss, top1_err, top5_err = du.all_reduce(
                        [loss, top1_err, top5_err])

            # # Copy the stats from GPU to CPU (sync point).
            # loss, top1_err, top5_err = (
            #     loss.item(),
            #     top1_err.item(),
            #     top5_err.item(),
            # )

            # if cfg.NUM_GPUS > 1:
            #     top1_err, top5_err = du.all_reduce([top1_err, top5_err])

            # Copy the errors from GPU to CPU (sync point).
            loss_logs = {}
            if 'loss_pred' in aux_loss_keys:
                loss_logs['loss_pred'] = pred_loss.item()

            if 'frame_errors' in aux_loss_keys:
                loss_logs['frame_errors'] = frame_errors.item()

            if 'loss_cpc' in aux_loss_keys:
                loss_logs['loss_cpc'] = cpc_loss.item()

            if cfg.SUPERVISED:
                loss_logs['loss_class'] = loss.item()
                loss_logs['top1_err'] = top1_err.item()
                loss_logs['top5_err'] = top5_err.item()

            val_meter.iter_toc()
            # Update and log stats.
            val_meter.update_stats(inputs[0].size(0) * cfg.NUM_GPUS,
                                   **loss_logs)

        val_meter.log_iter_stats(cur_epoch, cur_iter)
        val_meter.iter_tic()

    # neptune update
    if nep is not None:
        for k, v in loss_logs.items():
            nep.log_metric('val_' + k.strip('loss_'),
                           val_meter.stats[k].get_global_avg())

    # Log epoch stats.
    val_meter.log_epoch_stats(cur_epoch)
    val_meter.reset()
Exemple #11
0
def perform_test(test_loader, model, test_meter, cfg, writer=None):
    """
    For classification:
    Perform mutli-view testing that uniformly samples N clips from a video along
    its temporal axis. For each clip, it takes 3 crops to cover the spatial
    dimension, followed by averaging the softmax scores across all Nx3 views to
    form a video-level prediction. All video predictions are compared to
    ground-truth labels and the final testing performance is logged.
    For detection:
    Perform fully-convolutional testing on the full frames without crop.
    Args:
        test_loader (loader): video testing loader.
        model (model): the pretrained video model to test.
        test_meter (TestMeter): testing meters to log and ensemble the testing
            results.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter object, optional): TensorboardWriter object
            to writer Tensorboard log.
    """
    # Enable eval mode.
    model.eval()
    test_meter.iter_tic()

    for cur_iter, (inputs, labels, video_idx, time,
                   meta) in enumerate(test_loader):

        if cfg.NUM_GPUS:
            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)
            # Transfer the data to the current GPU device.
            labels = labels.cuda()
            video_idx = video_idx.cuda()
            for key, val in meta.items():
                if isinstance(val, (list, )):
                    for i in range(len(val)):
                        val[i] = val[i].cuda(non_blocking=True)
                else:
                    meta[key] = val.cuda(non_blocking=True)
        test_meter.data_toc()

        if cfg.DETECTION.ENABLE:
            # Compute the predictions.
            preds = model(inputs, meta["boxes"])
            ori_boxes = meta["ori_boxes"]
            metadata = meta["metadata"]

            preds = preds.detach().cpu() if cfg.NUM_GPUS else preds.detach()
            ori_boxes = (ori_boxes.detach().cpu()
                         if cfg.NUM_GPUS else ori_boxes.detach())
            metadata = (metadata.detach().cpu()
                        if cfg.NUM_GPUS else metadata.detach())

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes),
                                      dim=0)
                metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)

            test_meter.iter_toc()
            # Update and log stats.
            test_meter.update_stats(preds, ori_boxes, metadata)
            test_meter.log_iter_stats(None, cur_iter)
        elif cfg.TASK == "ssl" and cfg.MODEL.MODEL_NAME == "ContrastiveModel":
            if not cfg.CONTRASTIVE.KNN_ON:
                test_meter.finalize_metrics()
                return test_meter
            # preds = model(inputs, video_idx, time)
            train_labels = (model.module.train_labels if hasattr(
                model, "module") else model.train_labels)
            yd, yi = model(inputs, video_idx, time)
            batchSize = yi.shape[0]
            K = yi.shape[1]
            C = cfg.CONTRASTIVE.NUM_CLASSES_DOWNSTREAM  # eg 400 for Kinetics400
            candidates = train_labels.view(1, -1).expand(batchSize, -1)
            retrieval = torch.gather(candidates, 1, yi)
            retrieval_one_hot = torch.zeros((batchSize * K, C)).cuda()
            retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1)
            yd_transform = yd.clone().div_(cfg.CONTRASTIVE.T).exp_()
            probs = torch.mul(
                retrieval_one_hot.view(batchSize, -1, C),
                yd_transform.view(batchSize, -1, 1),
            )
            preds = torch.sum(probs, 1)
        else:
            # Perform the forward pass.
            preds = model(inputs)

        # Gather all the predictions across all the devices to perform ensemble.
        if cfg.NUM_GPUS > 1:
            preds, labels, video_idx = du.all_gather(
                [preds, labels, video_idx])
        if cfg.NUM_GPUS:
            preds = preds.cpu()
            labels = labels.cpu()
            video_idx = video_idx.cpu()

        test_meter.iter_toc()
        # Update and log stats.
        test_meter.update_stats(preds.detach(), labels.detach(),
                                video_idx.detach())
        test_meter.log_iter_stats(cur_iter)

        test_meter.iter_tic()

    # Log epoch stats and print the final testing results.
    if not cfg.DETECTION.ENABLE:
        all_preds = test_meter.video_preds.clone().detach()
        all_labels = test_meter.video_labels
        if cfg.NUM_GPUS:
            all_preds = all_preds.cpu()
            all_labels = all_labels.cpu()
        if writer is not None:
            writer.plot_eval(preds=all_preds, labels=all_labels)

        if cfg.TEST.SAVE_RESULTS_PATH != "":
            save_path = os.path.join(cfg.OUTPUT_DIR,
                                     cfg.TEST.SAVE_RESULTS_PATH)

            if du.is_root_proc():
                with pathmgr.open(save_path, "wb") as f:
                    pickle.dump([all_preds, all_labels], f)

            logger.info("Successfully saved prediction results to {}".format(
                save_path))

    test_meter.finalize_metrics()
    return test_meter
def perform_test(test_loader, model, test_meter, cfg, writer=None):
    """
    For classification:
    Perform mutli-view testing that uniformly samples N clips from an audio along
    its temporal axis. Softmax scores are averaged across all N views to
    form an audio-level prediction. All audio predictions are compared to
    ground-truth labels and the final testing performance is logged.
    Args:
        test_loader (loader): audio testing loader.
        model (model): the pretrained audio model to test.
        test_meter (TestMeter): testing meters to log and ensemble the testing
            results.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter object, optional): TensorboardWriter object
            to writer Tensorboard log.
    """
    # Enable eval mode.
    model.eval()

    test_meter.iter_tic()

    for cur_iter, (inputs, labels, audio_idx, meta) in enumerate(test_loader):
        if cfg.NUM_GPUS:
            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list,)):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)

            # Transfer the data to the current GPU device.
            if isinstance(labels, (dict,)):
                labels = {k: v.cuda() for k, v in labels.items()}
            else:
                labels = labels.cuda()
            audio_idx = audio_idx.cuda()
        test_meter.data_toc()

        # Perform the forward pass.
        preds = model(inputs)

        if isinstance(labels, (dict,)):
            # Gather all the predictions across all the devices to perform ensemble.
            if cfg.NUM_GPUS > 1:
                verb_preds, verb_labels, audio_idx = du.all_gather(
                    [preds[0], labels['verb'], audio_idx]
                )

                noun_preds, noun_labels, audio_idx = du.all_gather(
                    [preds[1], labels['noun'], audio_idx]
                )
                meta = du.all_gather_unaligned(meta)
                metadata = {'narration_id': []}
                for i in range(len(meta)):
                    metadata['narration_id'].extend(meta[i]['narration_id'])
            else:
                metadata = meta
                verb_preds, verb_labels, audio_idx = preds[0], labels['verb'], audio_idx
                noun_preds, noun_labels, audio_idx = preds[1], labels['noun'], audio_idx
            if cfg.NUM_GPUS:
                verb_preds = verb_preds.cpu()
                verb_labels = verb_labels.cpu()
                noun_preds = noun_preds.cpu()
                noun_labels = noun_labels.cpu()
                audio_idx = audio_idx.cpu()

            test_meter.iter_toc()
            # Update and log stats.
            test_meter.update_stats(
                (verb_preds.detach(), noun_preds.detach()),
                (verb_labels.detach(), noun_labels.detach()),
                metadata,
                audio_idx.detach(),
            )

            test_meter.log_iter_stats(cur_iter)
        else:
            # Gather all the predictions across all the devices to perform ensemble.
            if cfg.NUM_GPUS > 1:
                preds, labels, audio_idx = du.all_gather(
                    [preds, labels, audio_idx]
                )
            if cfg.NUM_GPUS:
                preds = preds.cpu()
                labels = labels.cpu()
                audio_idx = audio_idx.cpu()

            test_meter.iter_toc()
            # Update and log stats.
            test_meter.update_stats(
                preds.detach(), labels.detach(), audio_idx.detach()
            )
            test_meter.log_iter_stats(cur_iter)

        test_meter.iter_tic()

    # Log epoch stats and print the final testing results.
    if cfg.TEST.DATASET != 'epickitchens':
        all_preds = test_meter.audio_preds.clone().detach()
        all_labels = test_meter.audio_labels
        if cfg.NUM_GPUS:
            all_preds = all_preds.cpu()
            all_labels = all_labels.cpu()
        if writer is not None:
            writer.plot_eval(preds=all_preds, labels=all_labels)

        if cfg.TEST.SAVE_RESULTS_PATH != "":
            save_path = os.path.join(cfg.OUTPUT_DIR, cfg.TEST.SAVE_RESULTS_PATH)

            if du.is_root_proc():
                with PathManager.open(save_path, "wb") as f:
                    pickle.dump([all_preds, all_labels], f)

            logger.info(
                "Successfully saved prediction results to {}".format(save_path)
            )

    preds, preds_clips, labels, metadata = test_meter.finalize_metrics()
    return test_meter, preds, preds_clips, labels, metadata
Exemple #13
0
def run_visualization(vis_loader, model, cfg, writer=None):
    """
    Run model visualization (weights, activations and model inputs) and visualize
    them on Tensorboard.
    Args:
        vis_loader (loader): video visualization loader.
        model (model): the video model to visualize.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter, optional): TensorboardWriter object
            to writer Tensorboard log.
    """
    n_devices = cfg.NUM_GPUS * cfg.NUM_SHARDS
    prefix = "module/" if n_devices > 1 else ""
    # Get a list of selected layer names and indexing.
    layer_ls, indexing_dict = process_layer_index_data(
        cfg.TENSORBOARD.MODEL_VIS.LAYER_LIST, layer_name_prefix=prefix)
    logger.info("Start Model Visualization.")
    # Register hooks for activations.
    model_vis = GetWeightAndActivation(model, layer_ls)

    if writer is not None and cfg.TENSORBOARD.MODEL_VIS.MODEL_WEIGHTS:
        layer_weights = model_vis.get_weights()
        writer.plot_weights_and_activations(layer_weights,
                                            tag="Layer Weights/",
                                            heat_map=False)

    video_vis = VideoVisualizer(
        cfg.MODEL.NUM_CLASSES,
        cfg.TENSORBOARD.CLASS_NAMES_PATH,
        cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
        cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
    )
    logger.info("Finish drawing weights.")
    global_idx = -1
    for inputs, _, _, meta in vis_loader:
        # Transfer the data to the current GPU device.
        if isinstance(inputs, (list, )):
            for i in range(len(inputs)):
                inputs[i] = inputs[i].cuda(non_blocking=True)
        else:
            inputs = inputs.cuda(non_blocking=True)
        for key, val in meta.items():
            if isinstance(val, (list, )):
                for i in range(len(val)):
                    val[i] = val[i].cuda(non_blocking=True)
            else:
                meta[key] = val.cuda(non_blocking=True)

        if cfg.DETECTION.ENABLE:
            activations, preds = model_vis.get_activations(
                inputs, meta["boxes"])
        else:
            activations, preds = model_vis.get_activations(inputs)

        inputs = du.all_gather_unaligned(inputs)
        activations = du.all_gather_unaligned(activations)
        preds = du.all_gather_unaligned(preds)
        boxes = [None] * n_devices
        if cfg.DETECTION.ENABLE:
            boxes = du.all_gather_unaligned(meta["boxes"])

        if writer is not None:
            total_vids = 0
            for i in range(n_devices):
                cur_input = inputs[i]
                cur_activations = activations[i]
                cur_batch_size = cur_input[0].shape[0]
                cur_preds = preds[i].cpu()
                cur_boxes = boxes[i]
                for cur_batch_idx in range(cur_batch_size):
                    global_idx += 1
                    total_vids += 1
                    if cfg.TENSORBOARD.MODEL_VIS.INPUT_VIDEO:
                        for path_idx, input_pathway in enumerate(cur_input):
                            if (cfg.TEST.DATASET == "ava" or cfg.TEST.DATASET
                                    == "custom") and cfg.AVA.BGR:
                                video = input_pathway[cur_batch_idx, [2, 1, 0],
                                                      ...]
                            else:
                                video = input_pathway[cur_batch_idx]
                            # Permute to (T, H, W, C) from (C, T, H, W).
                            video = video.permute(1, 2, 3, 0)
                            video = data_utils.revert_tensor_normalize(
                                video.cpu(), cfg.DATA.MEAN, cfg.DATA.STD)
                            bboxes = (None if cur_boxes is None else
                                      cur_boxes[:, 1:].cpu())
                            video = video_vis.draw_clip(video,
                                                        cur_preds,
                                                        bboxes=bboxes)
                            video = (torch.Tensor(video).permute(
                                0, 3, 1, 2).unsqueeze(0))
                            writer.add_video(
                                video,
                                tag="Input {}/Input from pathway {}".format(
                                    global_idx, path_idx + 1),
                            )
                    if cfg.TENSORBOARD.MODEL_VIS.ACTIVATIONS:
                        writer.plot_weights_and_activations(
                            cur_activations,
                            tag="Input {}/Activations: ".format(global_idx),
                            batch_idx=cur_batch_idx,
                            indexing_dict=indexing_dict,
                        )
            logger.info("Visualized {} videos...".format(total_vids))
Exemple #14
0
def perform_test(test_loader,
                 model,
                 test_meter,
                 cfg,
                 writer=None,
                 device='cpu'):
    """
    For classification:
    Perform mutli-view testing that uniformly samples N clips from a video along
    its temporal axis. For each clip, it takes 3 crops to cover the spatial
    dimension, followed by averaging the softmax scores across all Nx3 views to
    form a video-level prediction. All video predictions are compared to
    ground-truth labels and the final testing performance is logged.
    For detection:
    Perform fully-convolutional testing on the full frames without crop.
    Args:
        test_loader (loader): video testing loader.
        model (model): the pretrained video model to test.
        test_meter (TestMeter): testing meters to log and ensemble the testing
            results.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter object, optional): TensorboardWriter object
            to writer Tensorboard log.
    """
    # Enable eval mode.
    import time
    model.eval()
    test_meter.iter_tic()
    print('The len of dataloader: ', len(test_loader))
    ntic = time.time()
    for cur_iter, (inputs, labels, video_idx, meta) in enumerate(test_loader):
        print(time.time() - ntic)
        print('in dataloader - input shape is: ', len(inputs))
        print(inputs[0].shape, inputs[1].shape)
        ntic = time.time()
        if cfg.NUM_GPUS:
            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].to(device, non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)

            # Transfer the data to the current GPU device.
            labels = labels.to(device)
            video_idx = video_idx.to(device)
            for key, val in meta.items():
                if isinstance(val, (list, )):
                    for i in range(len(val)):
                        val[i] = val[i].cuda(non_blocking=True)
                else:
                    meta[key] = val.cuda(non_blocking=True)
        print('transfer to gpu: ', time.time() - ntic)
        ntic = time.time()
        if cfg.DETECTION.ENABLE:
            # Compute the predictions.
            preds = model(inputs, meta["boxes"])
            ori_boxes = meta["ori_boxes"].cpu()
            metadata = meta["metadata"].cpu()

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes),
                                      dim=0)
                metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)

            preds = preds.detach().cpu() if cfg.NUM_GPUS else preds.detach()
            ori_boxes = (ori_boxes.detach().cpu()
                         if cfg.NUM_GPUS else ori_boxes.detach())
            metadata = (metadata.detach().cpu()
                        if cfg.NUM_GPUS else metadata.detach())

            test_meter.iter_toc()
            # Update and log stats.
            test_meter.update_stats(preds, ori_boxes, metadata)
            test_meter.log_iter_stats(None, cur_iter)
        else:
            # Perform the forward pass.
            import time
            ntic_1 = time.time()
            with torch.no_grad():
                preds, pre_gap, gap = model(inputs)
            print('after fwd pass: '******'full test done: ', time.time() - ntic)
    ntic = time.time()
    # Log epoch stats and print the final testing results.
    if writer is not None and not cfg.DETECTION.ENABLE:
        all_preds = [pred.clone().detach() for pred in test_meter.video_preds]
        all_labels = [
            label.clone().detach() for label in test_meter.video_labels
        ]
        if cfg.NUM_GPUS:
            all_preds = [pred.cpu() for pred in all_preds]
            all_labels = [label.cpu() for label in all_labels]
        writer.plot_eval(preds=all_preds, labels=all_labels)
    test_meter.finalize_metrics()
    test_meter.reset()

    print('full func done: ', time.time() - ntic)
    return preds, gap
Exemple #15
0
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, train_loader,
               writer):
    """
    Evaluate the model on the val set.
    Args:
        val_loader (loader): data loader to provide validation data.
        model (model): model to evaluate the performance.
        val_meter (ValMeter): meter instance to record and calculate the metrics.
        cur_epoch (int): number of the current epoch of training.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter, optional): TensorboardWriter object
            to writer Tensorboard log.
    """

    # Evaluation mode enabled. The running stats would not be updated.
    model.eval()
    val_meter.iter_tic()

    for cur_iter, (inputs, labels, index, time, meta) in enumerate(val_loader):
        if cfg.NUM_GPUS:
            # Transferthe data to the current GPU device.
            if isinstance(inputs, (list, )):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)
            labels = labels.cuda()
            for key, val in meta.items():
                if isinstance(val, (list, )):
                    for i in range(len(val)):
                        val[i] = val[i].cuda(non_blocking=True)
                else:
                    meta[key] = val.cuda(non_blocking=True)
            index = index.cuda()
            time = time.cuda()
        batch_size = (inputs[0][0].size(0)
                      if isinstance(inputs[0], list) else inputs[0].size(0))
        val_meter.data_toc()

        if cfg.DETECTION.ENABLE:
            # Compute the predictions.
            preds = model(inputs, meta["boxes"])
            ori_boxes = meta["ori_boxes"]
            metadata = meta["metadata"]

            if cfg.NUM_GPUS:
                preds = preds.cpu()
                ori_boxes = ori_boxes.cpu()
                metadata = metadata.cpu()

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes),
                                      dim=0)
                metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)

            val_meter.iter_toc()
            # Update and log stats.
            val_meter.update_stats(preds, ori_boxes, metadata)

        else:
            if cfg.TASK == "ssl" and cfg.MODEL.MODEL_NAME == "ContrastiveModel":
                if not cfg.CONTRASTIVE.KNN_ON:
                    return
                train_labels = (model.module.train_labels if hasattr(
                    model, "module") else model.train_labels)
                yd, yi = model(inputs, index, time)
                K = yi.shape[1]
                C = cfg.CONTRASTIVE.NUM_CLASSES_DOWNSTREAM  # eg 400 for Kinetics400
                candidates = train_labels.view(1, -1).expand(batch_size, -1)
                retrieval = torch.gather(candidates, 1, yi)
                retrieval_one_hot = torch.zeros((batch_size * K, C)).cuda()
                retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1)
                yd_transform = yd.clone().div_(cfg.CONTRASTIVE.T).exp_()
                probs = torch.mul(
                    retrieval_one_hot.view(batch_size, -1, C),
                    yd_transform.view(batch_size, -1, 1),
                )
                preds = torch.sum(probs, 1)
            else:
                preds = model(inputs)

            if cfg.DATA.MULTI_LABEL:
                if cfg.NUM_GPUS > 1:
                    preds, labels = du.all_gather([preds, labels])
            else:
                # Compute the errors.
                num_topks_correct = metrics.topks_correct(
                    preds, labels, (1, 5))

                # Combine the errors across the GPUs.
                top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0
                                      for x in num_topks_correct]
                if cfg.NUM_GPUS > 1:
                    top1_err, top5_err = du.all_reduce([top1_err, top5_err])

                # Copy the errors from GPU to CPU (sync point).
                top1_err, top5_err = top1_err.item(), top5_err.item()

                val_meter.iter_toc()
                # Update and log stats.
                val_meter.update_stats(
                    top1_err,
                    top5_err,
                    batch_size * max(
                        cfg.NUM_GPUS, 1
                    ),  # If running  on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU.
                )
                # write to tensorboard format if available.
                if writer is not None:
                    writer.add_scalars(
                        {
                            "Val/Top1_err": top1_err,
                            "Val/Top5_err": top5_err
                        },
                        global_step=len(val_loader) * cur_epoch + cur_iter,
                    )

            val_meter.update_predictions(preds, labels)

        val_meter.log_iter_stats(cur_epoch, cur_iter)
        val_meter.iter_tic()

    # Log epoch stats.
    val_meter.log_epoch_stats(cur_epoch)
    # write to tensorboard format if available.
    if writer is not None:
        if cfg.DETECTION.ENABLE:
            writer.add_scalars({"Val/mAP": val_meter.full_map},
                               global_step=cur_epoch)
        else:
            all_preds = [pred.clone().detach() for pred in val_meter.all_preds]
            all_labels = [
                label.clone().detach() for label in val_meter.all_labels
            ]
            if cfg.NUM_GPUS:
                all_preds = [pred.cpu() for pred in all_preds]
                all_labels = [label.cpu() for label in all_labels]
            writer.plot_eval(preds=all_preds,
                             labels=all_labels,
                             global_step=cur_epoch)

    val_meter.reset()
Exemple #16
0
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer=None):
    """
    Evaluate the model on the val set.
    Args:
        val_loader (loader): data loader to provide validation data.
        model (model): model to evaluate the performance.
        val_meter (ValMeter): meter instance to record and calculate the metrics.
        cur_epoch (int): number of the current epoch of training.
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        writer (TensorboardWriter, optional): TensorboardWriter object
            to writer Tensorboard log.
    """

    # Evaluation mode enabled. The running stats would not be updated.
    model.eval()
    val_meter.iter_tic()

    for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader):
        # Transferthe data to the current GPU device.
        if isinstance(inputs, (list, )):
            for i in range(len(inputs)):
                inputs[i] = inputs[i].cuda(non_blocking=True)
        else:
            inputs = inputs.cuda(non_blocking=True)
        labels = labels.cuda()
        for key, val in meta.items():
            if isinstance(val, (list, )):
                for i in range(len(val)):
                    val[i] = val[i].cuda(non_blocking=True)
            else:
                meta[key] = val.cuda(non_blocking=True)

        if cfg.DETECTION.ENABLE:
            # Compute the predictions.
            preds = model(inputs, meta["boxes"])

            preds = preds.cpu()
            ori_boxes = meta["ori_boxes"].cpu()
            metadata = meta["metadata"].cpu()

            if cfg.NUM_GPUS > 1:
                preds = torch.cat(du.all_gather_unaligned(preds), dim=0)
                ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes),
                                      dim=0)
                metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0)

            val_meter.iter_toc()
            # Update and log stats.
            val_meter.update_stats(preds.cpu(), ori_boxes.cpu(),
                                   metadata.cpu())

        else:
            preds = model(inputs)

            if cfg.DATA.MULTI_LABEL:
                if cfg.NUM_GPUS > 1:
                    preds, labels = du.all_gather([preds, labels])
            else:
                # Compute the errors.
                num_topks_correct = metrics.topks_correct(
                    preds, labels, (1, 1))

                # Combine the errors across the GPUs.
                top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0
                                      for x in num_topks_correct]
                if cfg.NUM_GPUS > 1:
                    top1_err, top5_err = du.all_reduce([top1_err, top5_err])

                # Copy the errors from GPU to CPU (sync point).
                top1_err, top5_err = top1_err.item(), top5_err.item()

                val_meter.iter_toc()
                # Update and log stats.
                val_meter.update_stats(top1_err, top5_err,
                                       inputs[0].size(0) * cfg.NUM_GPUS)
                # write to tensorboard format if available.
                if writer is not None:
                    writer.add_scalars(
                        {
                            "Val/Top1_err": top1_err,
                            "Val/Top5_err": top5_err
                        },
                        global_step=len(val_loader) * cur_epoch + cur_iter,
                    )

            val_meter.update_predictions(preds, labels)

        val_meter.log_iter_stats(cur_epoch, cur_iter)
        val_meter.iter_tic()

    logger.info('COMPUTING MCC')
    # Log epoch stats.
    val_meter.log_epoch_stats(cur_epoch)

    all_preds_cpu = [
        pred.clone().detach().cpu() for pred in val_meter.all_preds
    ]
    all_labels_cpu = [
        label.clone().detach().cpu() for label in val_meter.all_labels
    ]
    logger.info('PREPROC FOR  MCC')
    preds = torch.cat(all_preds_cpu)
    ypreds = torch.argmax(preds, dim=1)
    ytrue = torch.cat(all_labels_cpu)
    logger.info('COMPUTE CM')
    cm = plmetrics.ConfusionMatrix()(ypreds.to('cuda'), ytrue.to('cuda'))
    logger.info('CM COMPUTED')
    tp, tn, fn, fp = cm[1, 1], cm[0, 0], cm[0, 1], cm[1, 0]
    denom = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)
    mcc = (tp * tn - fp * fn) / torch.sqrt(denom)
    logger.info('COMPUTED  MCC')
    # write to tensorboard format if available.
    if writer is not None:
        if cfg.DETECTION.ENABLE:
            writer.add_scalars({"Val/mAP": val_meter.full_map},
                               global_step=cur_epoch)
        writer.plot_eval(preds=all_preds_cpu,
                         labels=all_labels_cpu,
                         global_step=cur_epoch)

    val_meter.reset()
    return mcc.item()