def run_visualization(vis_loader, model, cfg, writer=None): n_devices = cfg.NUM_GPUS * cfg.NUM_SHARDS global_idx = 0 for inputs, _, _, _ in vis_loader: # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) inputs = du.all_gather_unaligned(inputs) if writer is not None: total_vids = 0 for i in range(n_devices): cur_input = inputs[i] cur_batch_size = cur_input[0].shape[0] for cur_batch_idx in range(cur_batch_size): global_idx += 1 total_vids += 1 for path_idx, input_pathway in enumerate(cur_input): if cfg.TEST.DATASET == "ava" and cfg.AVA.BGR: video = input_pathway[cur_batch_idx, [2, 1, 0], ...] else: video = input_pathway[cur_batch_idx] # Permute to (T, H, W, C) from (C, T, H, W). video = video.permute(1, 2, 3, 0) video = data_utils.revert_tensor_normalize( video.cpu(), cfg.DATA.MEAN, cfg.DATA.STD ) video = video.permute(0, 3, 1, 2).unsqueeze(0) writer.add_video( video, tag="Input {}/Input from pathway {}".format( global_idx, path_idx + 1 ), ) logger.info("Visualized {} videos...".format(total_vids))
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer=None): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, (inputs, labels, _, meta, boxes, b_indices) in enumerate(val_loader): if cfg.NUM_GPUS: # Transferthe data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) val_meter.data_toc() if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) ori_boxes = meta["ori_boxes"] metadata = meta["metadata"] if cfg.NUM_GPUS: preds = preds.cpu() ori_boxes = ori_boxes.cpu() metadata = metadata.cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) val_meter.iter_toc() # Update and log stats. val_meter.update_stats(preds, ori_boxes, metadata) else: preds = model(inputs) if cfg.DATA.MULTI_LABEL: if cfg.NUM_GPUS > 1: preds, labels = du.all_gather([preds, labels]) else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) # Combine the errors across the GPUs. top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] if cfg.NUM_GPUS > 1: top1_err, top5_err = du.all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point). top1_err, top5_err = top1_err.item(), top5_err.item() val_meter.iter_toc() # Update and log stats. val_meter.update_stats( top1_err, top5_err, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Val/Top1_err": top1_err, "Val/Top5_err": top5_err }, global_step=len(val_loader) * cur_epoch + cur_iter, ) val_meter.update_predictions(preds, labels) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() # Log epoch stats. val_meter.log_epoch_stats(cur_epoch) # write to tensorboard format if available. if writer is not None: if cfg.DETECTION.ENABLE: writer.add_scalars({"Val/mAP": val_meter.full_map}, global_step=cur_epoch) else: all_preds = [pred.clone().detach() for pred in val_meter.all_preds] all_labels = [ label.clone().detach() for label in val_meter.all_labels ] if cfg.NUM_GPUS: all_preds = [pred.cpu() for pred in all_preds] all_labels = [label.cpu() for label in all_labels] writer.plot_eval(preds=all_preds, labels=all_labels, global_step=cur_epoch) val_meter.reset()
def perform_test(test_loader, model, test_meter, cfg): """ For classification: Perform mutli-view testing that uniformly samples N clips from a video along its temporal axis. For each clip, it takes 3 crops to cover the spatial dimension, followed by averaging the softmax scores across all Nx3 views to form a video-level prediction. All video predictions are compared to ground-truth labels and the final testing performance is logged. For detection: Perform fully-convolutional testing on the full frames without crop. Args: test_loader (loader): video testing loader. model (model): the pretrained video model to test. test_meter (TestMeter): testing meters to log and ensemble the testing results. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable eval mode. model.eval() test_meter.iter_tic() for cur_iter, (inputs, labels, video_idx, meta) in enumerate(test_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Transfer the data to the current GPU device. labels = labels.cuda() video_idx = video_idx.cuda() for key, val in meta.items(): if isinstance(val, (list,)): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds.cpu()), dim=0) ori_boxes = torch.cat( du.all_gather_unaligned(meta["ori_boxes"].cpu()), dim=0 ) metadata = torch.cat( du.all_gather_unaligned(meta["metadata"].cpu()), dim=0 ) test_meter.iter_toc() # Update and log stats. test_meter.update_stats( preds.detach().cpu(), ori_boxes.detach().cpu(), metadata.detach().cpu(), ) test_meter.log_iter_stats(None, cur_iter) else: # Perform the forward pass. preds = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds, labels, video_idx = du.all_gather( [preds, labels, video_idx] ) test_meter.iter_toc() # Update and log stats. test_meter.update_stats( preds.detach().cpu(), labels.detach().cpu(), video_idx.detach().cpu(), ) test_meter.log_iter_stats(cur_iter) test_meter.iter_tic() # Log epoch stats and print the final testing results. test_meter.finalize_metrics() test_meter.reset()
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader): # Transferthe data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) preds = preds.cpu() ori_boxes = meta["ori_boxes"].cpu() metadata = meta["metadata"].cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) val_meter.iter_toc() # Update and log stats. val_meter.update_stats(preds.cpu(), ori_boxes.cpu(), metadata.cpu()) else: preds = model(inputs) if cfg.DATA.MULTI_LABEL: if cfg.NUM_GPUS > 1: preds, labels = du.all_gather([preds, labels]) val_meter.update_predictions(preds, labels) else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) # Combine the errors across the GPUs. top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] if cfg.NUM_GPUS > 1: top1_err, top5_err = du.all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point). top1_err, top5_err = top1_err.item(), top5_err.item() val_meter.iter_toc() # Update and log stats. val_meter.update_stats(top1_err, top5_err, inputs[0].size(0) * cfg.NUM_GPUS) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() # Log epoch stats. val_meter.log_epoch_stats(cur_epoch) val_meter.reset()
def run_visualization(vis_loader, model, cfg, writer=None): """ Run model visualization (weights, activations and model inputs) and visualize them on Tensorboard. Args: vis_loader (loader): video visualization loader. model (model): the video model to visualize. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ n_devices = cfg.NUM_GPUS * cfg.NUM_SHARDS prefix = "module/" if n_devices > 1 else "" # Get a list of selected layer names and indexing. layer_ls, indexing_dict = process_layer_index_data( cfg.TENSORBOARD.MODEL_VIS.LAYER_LIST, layer_name_prefix=prefix) logger.info("Start Model Visualization.") # Register hooks for activations. model_vis = GetWeightAndActivation(model, layer_ls) if writer is not None and cfg.TENSORBOARD.MODEL_VIS.MODEL_WEIGHTS: layer_weights = model_vis.get_weights() writer.plot_weights_and_activations(layer_weights, tag="Layer Weights/", heat_map=False) video_vis = VideoVisualizer( cfg.MODEL.NUM_CLASSES, cfg.TENSORBOARD.CLASS_NAMES_PATH, cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS, cfg.TENSORBOARD.MODEL_VIS.COLORMAP, ) if n_devices > 1: grad_cam_layer_ls = [ "module/" + layer for layer in cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST ] else: grad_cam_layer_ls = cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: gradcam = GradCAM( model, target_layers=grad_cam_layer_ls, data_mean=cfg.DATA.MEAN, data_std=cfg.DATA.STD, colormap=cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.COLORMAP, ) logger.info("Finish drawing weights.") global_idx = -1 for inputs, labels, _, meta in tqdm.tqdm(vis_loader): if cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: activations, preds = model_vis.get_activations( inputs, meta["boxes"]) else: activations, preds = model_vis.get_activations(inputs) if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.USE_TRUE_LABEL: inputs, preds = gradcam(inputs, labels=labels) else: inputs, preds = gradcam(inputs) if cfg.NUM_GPUS: inputs = du.all_gather_unaligned(inputs) activations = du.all_gather_unaligned(activations) preds = du.all_gather_unaligned(preds) if isinstance(inputs[0], list): for i in range(len(inputs)): for j in range(len(inputs[0])): inputs[i][j] = inputs[i][j].cpu() else: inputs = [inp.cpu() for inp in inputs] preds = [pred.cpu() for pred in preds] else: inputs, activations, preds = [inputs], [activations], [preds] boxes = [None] * max(n_devices, 1) if cfg.DETECTION.ENABLE and cfg.NUM_GPUS: boxes = du.all_gather_unaligned(meta["boxes"]) boxes = [box.cpu() for box in boxes] if writer is not None: total_vids = 0 for i in range(max(n_devices, 1)): cur_input = inputs[i] cur_activations = activations[i] cur_batch_size = cur_input[0].shape[0] cur_preds = preds[i] cur_boxes = boxes[i] for cur_batch_idx in range(cur_batch_size): global_idx += 1 total_vids += 1 if (cfg.TENSORBOARD.MODEL_VIS.INPUT_VIDEO or cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE): for path_idx, input_pathway in enumerate(cur_input): if cfg.TEST.DATASET == "ava" and cfg.AVA.BGR: video = input_pathway[cur_batch_idx, [2, 1, 0], ...] else: video = input_pathway[cur_batch_idx] if not cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: # Permute to (T, H, W, C) from (C, T, H, W). video = video.permute(1, 2, 3, 0) video = data_utils.revert_tensor_normalize( video, cfg.DATA.MEAN, cfg.DATA.STD) else: # Permute from (T, C, H, W) to (T, H, W, C) video = video.permute(0, 2, 3, 1) bboxes = (None if cur_boxes is None else cur_boxes[:, 1:]) cur_prediction = (cur_preds if cfg.DETECTION.ENABLE else cur_preds[cur_batch_idx]) video = video_vis.draw_clip(video, cur_prediction, bboxes=bboxes) video = (torch.from_numpy(np.array(video)).permute( 0, 3, 1, 2).unsqueeze(0)) writer.add_video( video, tag="Input {}/Pathway {}".format( global_idx, path_idx + 1), ) if cfg.TENSORBOARD.MODEL_VIS.ACTIVATIONS: writer.plot_weights_and_activations( cur_activations, tag="Input {}/Activations: ".format(global_idx), batch_idx=cur_batch_idx, indexing_dict=indexing_dict, )
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader): # Transferthe data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) if isinstance(labels, (dict, )): labels = {k: v.cuda() for k, v in labels.items()} else: labels = labels.cuda() if cfg.DETECTION.ENABLE: logger.info("Detection Metadata: {}".format(meta)) for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Compute the predictions. preds = model(inputs, meta["boxes"]) preds = preds.cpu() ori_boxes = meta["ori_boxes"].cpu() metadata = meta["metadata"].cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) val_meter.iter_toc() # Update and log stats. val_meter.update_stats(preds.cpu(), ori_boxes.cpu(), metadata.cpu()) else: preds = model(inputs) if isinstance(labels, (dict, )): # Compute the verb accuracies. verb_top1_acc, verb_top5_acc = metrics.topk_accuracies( preds[0], labels['verb'], (1, 5)) # Combine the errors across the GPUs. if cfg.NUM_GPUS > 1: verb_top1_acc, verb_top5_acc = du.all_reduce( [verb_top1_acc, verb_top5_acc]) # Copy the errors from GPU to CPU (sync point). verb_top1_acc, verb_top5_acc = verb_top1_acc.item( ), verb_top5_acc.item() # Compute the noun accuracies. noun_top1_acc, noun_top5_acc = metrics.topk_accuracies( preds[1], labels['noun'], (1, 5)) # Combine the errors across the GPUs. if cfg.NUM_GPUS > 1: noun_top1_acc, noun_top5_acc = du.all_reduce( [noun_top1_acc, noun_top5_acc]) # Copy the errors from GPU to CPU (sync point). noun_top1_acc, noun_top5_acc = noun_top1_acc.item( ), noun_top5_acc.item() # Compute the action accuracies. action_top1_acc, action_top5_acc = metrics.multitask_topk_accuracies( (preds[0], preds[1]), (labels['verb'], labels['noun']), (1, 5)) # Combine the errors across the GPUs. if cfg.NUM_GPUS > 1: action_top1_acc, action_top5_acc = du.all_reduce( [action_top1_acc, action_top5_acc]) # Copy the errors from GPU to CPU (sync point). action_top1_acc, action_top5_acc = action_top1_acc.item( ), action_top5_acc.item() val_meter.iter_toc() # Update and log stats. val_meter.update_stats( (verb_top1_acc, noun_top1_acc, action_top1_acc), (verb_top5_acc, noun_top5_acc, action_top5_acc), inputs[0].size(0) * cfg.NUM_GPUS) else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) # Combine the errors across the GPUs. top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] if cfg.NUM_GPUS > 1: top1_err, top5_err = du.all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point). top1_err, top5_err = top1_err.item(), top5_err.item() val_meter.iter_toc() # Update and log stats. val_meter.update_stats(top1_err, top5_err, inputs[0].size(0) * cfg.NUM_GPUS) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() # Log epoch stats. is_best_epoch = val_meter.log_epoch_stats(cur_epoch) val_meter.reset() return is_best_epoch
def perform_test(test_loader, model, test_meter, cfg, writer=None): """ For classification: Perform mutli-view testing that uniformly samples N clips from a video along its temporal axis. For each clip, it takes 3 crops to cover the spatial dimension, followed by averaging the softmax scores across all Nx3 views to form a video-level prediction. All video predictions are compared to ground-truth labels and the final testing performance is logged. For detection: Perform fully-convolutional testing on the full frames without crop. Args: test_loader (loader): video testing loader. model (model): the pretrained video model to test. test_meter (TestMeter): testing meters to log and ensemble the testing results. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter object, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable eval mode. model.eval() test_meter.iter_tic() for cur_iter, (inputs, labels, video_idx, meta) in enumerate(test_loader): if cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Transfer the data to the current GPU device. labels = labels.cuda() video_idx = video_idx.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) test_meter.data_toc() if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) ori_boxes = meta["ori_boxes"] metadata = meta["metadata"] preds = preds.detach().cpu() if cfg.NUM_GPUS else preds.detach() ori_boxes = (ori_boxes.detach().cpu() if cfg.NUM_GPUS else ori_boxes.detach()) metadata = (metadata.detach().cpu() if cfg.NUM_GPUS else metadata.detach()) if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) test_meter.iter_toc() # Update and log stats. test_meter.update_stats(preds, ori_boxes, metadata) test_meter.log_iter_stats(None, cur_iter) else: # Perform the forward pass. preds = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds, labels, video_idx = du.all_gather( [preds, labels, video_idx]) if cfg.NUM_GPUS: preds = preds.cpu() labels = labels.cpu() video_idx = video_idx.cpu() test_meter.iter_toc() # Update and log stats. test_meter.update_stats(preds.detach(), labels.detach(), video_idx.detach()) test_meter.log_iter_stats(cur_iter) test_meter.iter_tic() # Log epoch stats and print the final testing results. if not cfg.DETECTION.ENABLE: all_preds = test_meter.video_preds.clone().detach() all_labels = test_meter.video_labels if cfg.NUM_GPUS: all_preds = all_preds.cpu() all_labels = all_labels.cpu() if writer is not None: writer.plot_eval(preds=all_preds, labels=all_labels) if cfg.TEST.SAVE_RESULTS_PATH != "": save_path = os.path.join(cfg.OUTPUT_DIR, cfg.TEST.SAVE_RESULTS_PATH) with g_pathmgr.open(save_path, "wb") as f: pickle.dump([all_preds, all_labels], f) logger.info("Successfully saved prediction results to {}".format( save_path)) test_meter.finalize_metrics() return test_meter
def eval_epoch(self, val_loader, model, val_meter, cur_epoch, cfg, writer=None): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Evaluation mode enabled. The running stats would not be updated. model.eval() data_size = len(val_loader) btch = cfg.TRAIN.BATCH_SIZE * self.cfg.NUM_SHARDS rankE = os.environ.get("RANK", None) worldE = os.environ.get("WORLD_SIZE", None) dSize = data_size * btch self.logger.info( "Val Epoch {} dLen {} Batch {} dSize {} localRank {} rank {} {} world {} {}" .format(cur_epoch, data_size, btch, dSize, du.get_local_rank(), du.get_rank(), rankE, du.get_world_size(), worldE)) val_meter.iter_tic() predsAll = [] labelsAll = [] data_size = len(val_loader) for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader): # Transferthe data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) preds = preds.cpu() ori_boxes = meta["ori_boxes"].cpu() metadata = meta["metadata"].cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) val_meter.iter_toc() # Update and log stats. val_meter.update_stats(preds.cpu(), ori_boxes.cpu(), metadata.cpu()) else: preds = model(inputs) if cfg.DATA.MULTI_LABEL: if cfg.NUM_GPUS > 1: preds, labels = du.all_gather([preds, labels]) else: if cfg.MODEL.NUM_CLASSES == 2: predsAll.extend(preds.detach().cpu().numpy()[:, -1]) labelsAll.extend(labels.detach().cpu().numpy()) # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, min(5, cfg.MODEL.NUM_CLASSES))) # Combine the errors across the GPUs. top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] if cfg.NUM_GPUS > 1: top1_err, top5_err = du.all_reduce( [top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point). top1_err, top5_err = top1_err.item(), top5_err.item() val_meter.iter_toc() # Update and log stats. val_meter.update_stats(top1_err, top5_err, inputs[0].size(0) * cfg.NUM_GPUS) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Val/Top1_err": top1_err, "Val/Top5_err": top5_err }, global_step=len(val_loader) * cur_epoch + cur_iter, ) if du.is_master_proc(): ite = len(val_loader) * cur_epoch + cur_iter self.logger.log_row(name='ValTop1', iter=ite, lr=top1_err, description="Top 1 Err") self.logger.log_row(name='ValTop5', iter=ite, lr=top5_err, description="Top 5 Err") val_meter.update_predictions(preds, labels) stats = val_meter.log_iter_stats(cur_epoch, cur_iter, predsAll, labelsAll) ite = dSize * cur_epoch + btch * (cur_iter + 1) self.plotStats(stats, ite, 'ValIter') val_meter.iter_tic() # Log epoch stats. gathered = du.all_gather([ torch.tensor(predsAll).to(torch.device("cuda")), torch.tensor(labelsAll).to(torch.device("cuda")) ]) stats = val_meter.log_epoch_stats(cur_epoch, gathered[0].detach().cpu().numpy(), gathered[1].detach().cpu().numpy()) ite = (cur_epoch + 1) * dSize self.plotStats(stats, ite, 'ValEpoch') # write to tensorboard format if available. if writer is not None: if cfg.DETECTION.ENABLE: writer.add_scalars({"Val/mAP": val_meter.full_map}, global_step=cur_epoch) all_preds_cpu = [ pred.clone().detach().cpu() for pred in val_meter.all_preds ] all_labels_cpu = [ label.clone().detach().cpu() for label in val_meter.all_labels ] # plotScatter(all_preds_cpu, all_labels_cpu, "Epoch_{}".format(cur_epoch)) # writer.plot_eval( # preds=all_preds_cpu, labels=all_labels_cpu, global_step=cur_epoch # ) val_meter.reset()
def perform_test(test_loader, model, test_meter, cfg): """ For classification: Perform mutli-view testing that uniformly samples N clips from a video along its temporal axis. For each clip, it takes 3 crops to cover the spatial dimension, followed by averaging the softmax scores across all Nx3 views to form a video-level prediction. All video predictions are compared to ground-truth labels and the final testing performance is logged. For detection: Perform fully-convolutional testing on the full frames without crop. Args: test_loader (loader): video testing loader. model (model): the pretrained video model to test. test_meter (TestMeter): testing meters to log and ensemble the testing results. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable eval mode. model.eval() test_meter.iter_tic() if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: x_feat_list = [[],[]] elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: x_feat_list = [] # for cur_iter, (inputs, bboxs, masks, labels, video_idx, meta) in enumerate(test_loader): for cur_iter, output_dict in enumerate(test_loader): if cur_iter % 100 == 0: logger.info("Testing iter={}".format(cur_iter)) # if (cur_iter+1) % 1000 == 0: # test_meter_preds, test_meter_labels, test_meter_metadata = test_meter.finalize_metrics() inputs = output_dict['inputs'] labels = output_dict['label'] video_idx = output_dict['index'] meta = output_dict['metadata'] if cfg.EPICKITCHENS.USE_BBOX: bboxs = output_dict['bboxs'] masks = output_dict['masks'] else: bboxs = None masks = None # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Transfer the data to the current GPU device. if isinstance(labels, (dict,)): labels = {k: v.cuda() for k, v in labels.items()} else: labels = labels.cuda() video_idx = video_idx.cuda() if cfg.DETECTION.ENABLE: for key, val in meta.items(): if isinstance(val, (list,)): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Compute the predictions. preds = model(inputs, meta["boxes"]) preds = preds.cpu() ori_boxes = meta["ori_boxes"].cpu() metadata = meta["metadata"].cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) test_meter.iter_toc() # Update and log stats. test_meter.update_stats( preds.detach().cpu(), ori_boxes.detach().cpu(), metadata.detach().cpu(), ) test_meter.log_iter_stats(None, cur_iter) else: # Perform the forward pass. if cfg.EPICKITCHENS.USE_BBOX: bboxs = to_cuda(bboxs) masks = to_cuda(masks) preds_pair = model(inputs, bboxes=bboxs, masks=masks) else: preds_pair = model(inputs) if cfg.TEST.EXTRACT_FEATURES: preds, x_feat = preds_pair else: preds = preds_pair if isinstance(labels, (dict,)): # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: verb_preds, verb_labels, video_idx = du.all_gather( [preds[0], labels['verb'], video_idx] ) noun_preds, noun_labels, video_idx = du.all_gather( [preds[1], labels['noun'], video_idx] ) meta = du.all_gather_unaligned(meta) metadata = {'narration_id': []} for i in range(len(meta)): metadata['narration_id'].extend(meta[i]['narration_id']) if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: x_feat_slow, x_feat_fast = du.all_gather([x_feat[0], x_feat[1]]) #print(x_feat_slow.shape, x_feat_fast.shape) ##torch.Size([8, 2048, 8, 7, 7]) torch.Size([8, 256, 32, 7, 7]) x_feat_list[0] += [x_feat_slow] x_feat_list[1] += [x_feat_fast] elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: x_feat = du.all_gather([x_feat]) x_feat_list.append(x_feat[0]) else: metadata = meta verb_preds, verb_labels, video_idx = preds[0], labels['verb'], video_idx noun_preds, noun_labels, video_idx = preds[1], labels['noun'], video_idx if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: x_feat_list[0].append(x_feat[0]) x_feat_list[1].append(x_feat[1]) elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: x_feat_list.append(x_feat) test_meter.iter_toc() # Update and log stats. test_meter.update_stats( (verb_preds.detach().cpu(), noun_preds.detach().cpu()), (verb_labels.detach().cpu(), noun_labels.detach().cpu()), metadata, video_idx.detach().cpu(), ) # test_meter.log_iter_stats(cur_iter) else: # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds, labels, idx = du.all_gather( [preds, labels, video_idx] ) test_meter.iter_toc() # Update and log stats. test_meter.update_stats( preds.detach().cpu(), labels.detach().cpu(), video_idx.detach().cpu(), ) # test_meter.log_iter_stats(cur_iter) test_meter.iter_tic() # Log epoch stats and print the final testing results. if cfg.TEST.DATASET == 'epickitchens': preds, labels, metadata = test_meter.finalize_metrics() else: test_meter.finalize_metrics() preds, labels, metadata = None, None, None test_meter.reset() if cfg.TEST.EXTRACT_FEATURES: if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: final_feat_list = [[],[]] final_feat_list[0] = [t.cpu() for t in x_feat_list[0]] final_feat_list[1] = [t.cpu() for t in x_feat_list[1]] elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: final_feat_list = [t.cpu() for t in x_feat_list] return preds, labels, metadata, final_feat_list else: return preds, labels, metadata
def eval_epoch(val_loader, model, val_meter, cur_epoch, nep, cfg): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader): # Transferthe data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) preds = preds.cpu() ori_boxes = meta["ori_boxes"].cpu() metadata = meta["metadata"].cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) val_meter.iter_toc() # Update and log stats. val_meter.update_stats(preds.cpu(), ori_boxes.cpu(), metadata.cpu()) else: preds = model(inputs) aux_loss_keys = [] if cfg.PREDICTIVE.ENABLE: aux_loss_keys.append('pred_errors') errors = preds['pred_errors'] pred_loss = errors.mean() if 'frame_errors' in preds: aux_loss_keys.append('frame_errors') frame_errors = preds['frame_errors'] if cfg.PREDICTIVE.CPC: aux_loss_keys.append('cpc_loss') cpc_loss = preds['cpc_loss'] if cfg.SUPERVISED: preds = preds['logits'] # Explicitly declare reduction to mean. if cfg.MODEL.LOSS_FUNC != '' and cfg.SUPERVISED: loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # total_loss = total_loss + loss # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) # Combine the errors across the GPUs. top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: if cfg.PREDICTIVE.ENABLE: pred_loss = du.all_reduce([pred_loss])[0] if cfg.PREDICTIVE.CPC: cpc_loss = du.all_reduce([cpc_loss])[0] if cfg.SUPERVISED: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # # Copy the stats from GPU to CPU (sync point). # loss, top1_err, top5_err = ( # loss.item(), # top1_err.item(), # top5_err.item(), # ) # if cfg.NUM_GPUS > 1: # top1_err, top5_err = du.all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point). loss_logs = {} if 'loss_pred' in aux_loss_keys: loss_logs['loss_pred'] = pred_loss.item() if 'frame_errors' in aux_loss_keys: loss_logs['frame_errors'] = frame_errors.item() if 'loss_cpc' in aux_loss_keys: loss_logs['loss_cpc'] = cpc_loss.item() if cfg.SUPERVISED: loss_logs['loss_class'] = loss.item() loss_logs['top1_err'] = top1_err.item() loss_logs['top5_err'] = top5_err.item() val_meter.iter_toc() # Update and log stats. val_meter.update_stats(inputs[0].size(0) * cfg.NUM_GPUS, **loss_logs) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() # neptune update if nep is not None: for k, v in loss_logs.items(): nep.log_metric('val_' + k.strip('loss_'), val_meter.stats[k].get_global_avg()) # Log epoch stats. val_meter.log_epoch_stats(cur_epoch) val_meter.reset()
def perform_test(test_loader, model, test_meter, cfg, writer=None): """ For classification: Perform mutli-view testing that uniformly samples N clips from a video along its temporal axis. For each clip, it takes 3 crops to cover the spatial dimension, followed by averaging the softmax scores across all Nx3 views to form a video-level prediction. All video predictions are compared to ground-truth labels and the final testing performance is logged. For detection: Perform fully-convolutional testing on the full frames without crop. Args: test_loader (loader): video testing loader. model (model): the pretrained video model to test. test_meter (TestMeter): testing meters to log and ensemble the testing results. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter object, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable eval mode. model.eval() test_meter.iter_tic() for cur_iter, (inputs, labels, video_idx, time, meta) in enumerate(test_loader): if cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Transfer the data to the current GPU device. labels = labels.cuda() video_idx = video_idx.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) test_meter.data_toc() if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) ori_boxes = meta["ori_boxes"] metadata = meta["metadata"] preds = preds.detach().cpu() if cfg.NUM_GPUS else preds.detach() ori_boxes = (ori_boxes.detach().cpu() if cfg.NUM_GPUS else ori_boxes.detach()) metadata = (metadata.detach().cpu() if cfg.NUM_GPUS else metadata.detach()) if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) test_meter.iter_toc() # Update and log stats. test_meter.update_stats(preds, ori_boxes, metadata) test_meter.log_iter_stats(None, cur_iter) elif cfg.TASK == "ssl" and cfg.MODEL.MODEL_NAME == "ContrastiveModel": if not cfg.CONTRASTIVE.KNN_ON: test_meter.finalize_metrics() return test_meter # preds = model(inputs, video_idx, time) train_labels = (model.module.train_labels if hasattr( model, "module") else model.train_labels) yd, yi = model(inputs, video_idx, time) batchSize = yi.shape[0] K = yi.shape[1] C = cfg.CONTRASTIVE.NUM_CLASSES_DOWNSTREAM # eg 400 for Kinetics400 candidates = train_labels.view(1, -1).expand(batchSize, -1) retrieval = torch.gather(candidates, 1, yi) retrieval_one_hot = torch.zeros((batchSize * K, C)).cuda() retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1) yd_transform = yd.clone().div_(cfg.CONTRASTIVE.T).exp_() probs = torch.mul( retrieval_one_hot.view(batchSize, -1, C), yd_transform.view(batchSize, -1, 1), ) preds = torch.sum(probs, 1) else: # Perform the forward pass. preds = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds, labels, video_idx = du.all_gather( [preds, labels, video_idx]) if cfg.NUM_GPUS: preds = preds.cpu() labels = labels.cpu() video_idx = video_idx.cpu() test_meter.iter_toc() # Update and log stats. test_meter.update_stats(preds.detach(), labels.detach(), video_idx.detach()) test_meter.log_iter_stats(cur_iter) test_meter.iter_tic() # Log epoch stats and print the final testing results. if not cfg.DETECTION.ENABLE: all_preds = test_meter.video_preds.clone().detach() all_labels = test_meter.video_labels if cfg.NUM_GPUS: all_preds = all_preds.cpu() all_labels = all_labels.cpu() if writer is not None: writer.plot_eval(preds=all_preds, labels=all_labels) if cfg.TEST.SAVE_RESULTS_PATH != "": save_path = os.path.join(cfg.OUTPUT_DIR, cfg.TEST.SAVE_RESULTS_PATH) if du.is_root_proc(): with pathmgr.open(save_path, "wb") as f: pickle.dump([all_preds, all_labels], f) logger.info("Successfully saved prediction results to {}".format( save_path)) test_meter.finalize_metrics() return test_meter
def perform_test(test_loader, model, test_meter, cfg, writer=None): """ For classification: Perform mutli-view testing that uniformly samples N clips from an audio along its temporal axis. Softmax scores are averaged across all N views to form an audio-level prediction. All audio predictions are compared to ground-truth labels and the final testing performance is logged. Args: test_loader (loader): audio testing loader. model (model): the pretrained audio model to test. test_meter (TestMeter): testing meters to log and ensemble the testing results. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter object, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable eval mode. model.eval() test_meter.iter_tic() for cur_iter, (inputs, labels, audio_idx, meta) in enumerate(test_loader): if cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Transfer the data to the current GPU device. if isinstance(labels, (dict,)): labels = {k: v.cuda() for k, v in labels.items()} else: labels = labels.cuda() audio_idx = audio_idx.cuda() test_meter.data_toc() # Perform the forward pass. preds = model(inputs) if isinstance(labels, (dict,)): # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: verb_preds, verb_labels, audio_idx = du.all_gather( [preds[0], labels['verb'], audio_idx] ) noun_preds, noun_labels, audio_idx = du.all_gather( [preds[1], labels['noun'], audio_idx] ) meta = du.all_gather_unaligned(meta) metadata = {'narration_id': []} for i in range(len(meta)): metadata['narration_id'].extend(meta[i]['narration_id']) else: metadata = meta verb_preds, verb_labels, audio_idx = preds[0], labels['verb'], audio_idx noun_preds, noun_labels, audio_idx = preds[1], labels['noun'], audio_idx if cfg.NUM_GPUS: verb_preds = verb_preds.cpu() verb_labels = verb_labels.cpu() noun_preds = noun_preds.cpu() noun_labels = noun_labels.cpu() audio_idx = audio_idx.cpu() test_meter.iter_toc() # Update and log stats. test_meter.update_stats( (verb_preds.detach(), noun_preds.detach()), (verb_labels.detach(), noun_labels.detach()), metadata, audio_idx.detach(), ) test_meter.log_iter_stats(cur_iter) else: # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds, labels, audio_idx = du.all_gather( [preds, labels, audio_idx] ) if cfg.NUM_GPUS: preds = preds.cpu() labels = labels.cpu() audio_idx = audio_idx.cpu() test_meter.iter_toc() # Update and log stats. test_meter.update_stats( preds.detach(), labels.detach(), audio_idx.detach() ) test_meter.log_iter_stats(cur_iter) test_meter.iter_tic() # Log epoch stats and print the final testing results. if cfg.TEST.DATASET != 'epickitchens': all_preds = test_meter.audio_preds.clone().detach() all_labels = test_meter.audio_labels if cfg.NUM_GPUS: all_preds = all_preds.cpu() all_labels = all_labels.cpu() if writer is not None: writer.plot_eval(preds=all_preds, labels=all_labels) if cfg.TEST.SAVE_RESULTS_PATH != "": save_path = os.path.join(cfg.OUTPUT_DIR, cfg.TEST.SAVE_RESULTS_PATH) if du.is_root_proc(): with PathManager.open(save_path, "wb") as f: pickle.dump([all_preds, all_labels], f) logger.info( "Successfully saved prediction results to {}".format(save_path) ) preds, preds_clips, labels, metadata = test_meter.finalize_metrics() return test_meter, preds, preds_clips, labels, metadata
def run_visualization(vis_loader, model, cfg, writer=None): """ Run model visualization (weights, activations and model inputs) and visualize them on Tensorboard. Args: vis_loader (loader): video visualization loader. model (model): the video model to visualize. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ n_devices = cfg.NUM_GPUS * cfg.NUM_SHARDS prefix = "module/" if n_devices > 1 else "" # Get a list of selected layer names and indexing. layer_ls, indexing_dict = process_layer_index_data( cfg.TENSORBOARD.MODEL_VIS.LAYER_LIST, layer_name_prefix=prefix) logger.info("Start Model Visualization.") # Register hooks for activations. model_vis = GetWeightAndActivation(model, layer_ls) if writer is not None and cfg.TENSORBOARD.MODEL_VIS.MODEL_WEIGHTS: layer_weights = model_vis.get_weights() writer.plot_weights_and_activations(layer_weights, tag="Layer Weights/", heat_map=False) video_vis = VideoVisualizer( cfg.MODEL.NUM_CLASSES, cfg.TENSORBOARD.CLASS_NAMES_PATH, cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS, cfg.TENSORBOARD.MODEL_VIS.COLORMAP, ) logger.info("Finish drawing weights.") global_idx = -1 for inputs, _, _, meta in vis_loader: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: activations, preds = model_vis.get_activations( inputs, meta["boxes"]) else: activations, preds = model_vis.get_activations(inputs) inputs = du.all_gather_unaligned(inputs) activations = du.all_gather_unaligned(activations) preds = du.all_gather_unaligned(preds) boxes = [None] * n_devices if cfg.DETECTION.ENABLE: boxes = du.all_gather_unaligned(meta["boxes"]) if writer is not None: total_vids = 0 for i in range(n_devices): cur_input = inputs[i] cur_activations = activations[i] cur_batch_size = cur_input[0].shape[0] cur_preds = preds[i].cpu() cur_boxes = boxes[i] for cur_batch_idx in range(cur_batch_size): global_idx += 1 total_vids += 1 if cfg.TENSORBOARD.MODEL_VIS.INPUT_VIDEO: for path_idx, input_pathway in enumerate(cur_input): if (cfg.TEST.DATASET == "ava" or cfg.TEST.DATASET == "custom") and cfg.AVA.BGR: video = input_pathway[cur_batch_idx, [2, 1, 0], ...] else: video = input_pathway[cur_batch_idx] # Permute to (T, H, W, C) from (C, T, H, W). video = video.permute(1, 2, 3, 0) video = data_utils.revert_tensor_normalize( video.cpu(), cfg.DATA.MEAN, cfg.DATA.STD) bboxes = (None if cur_boxes is None else cur_boxes[:, 1:].cpu()) video = video_vis.draw_clip(video, cur_preds, bboxes=bboxes) video = (torch.Tensor(video).permute( 0, 3, 1, 2).unsqueeze(0)) writer.add_video( video, tag="Input {}/Input from pathway {}".format( global_idx, path_idx + 1), ) if cfg.TENSORBOARD.MODEL_VIS.ACTIVATIONS: writer.plot_weights_and_activations( cur_activations, tag="Input {}/Activations: ".format(global_idx), batch_idx=cur_batch_idx, indexing_dict=indexing_dict, ) logger.info("Visualized {} videos...".format(total_vids))
def perform_test(test_loader, model, test_meter, cfg, writer=None, device='cpu'): """ For classification: Perform mutli-view testing that uniformly samples N clips from a video along its temporal axis. For each clip, it takes 3 crops to cover the spatial dimension, followed by averaging the softmax scores across all Nx3 views to form a video-level prediction. All video predictions are compared to ground-truth labels and the final testing performance is logged. For detection: Perform fully-convolutional testing on the full frames without crop. Args: test_loader (loader): video testing loader. model (model): the pretrained video model to test. test_meter (TestMeter): testing meters to log and ensemble the testing results. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter object, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable eval mode. import time model.eval() test_meter.iter_tic() print('The len of dataloader: ', len(test_loader)) ntic = time.time() for cur_iter, (inputs, labels, video_idx, meta) in enumerate(test_loader): print(time.time() - ntic) print('in dataloader - input shape is: ', len(inputs)) print(inputs[0].shape, inputs[1].shape) ntic = time.time() if cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].to(device, non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Transfer the data to the current GPU device. labels = labels.to(device) video_idx = video_idx.to(device) for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) print('transfer to gpu: ', time.time() - ntic) ntic = time.time() if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) ori_boxes = meta["ori_boxes"].cpu() metadata = meta["metadata"].cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) preds = preds.detach().cpu() if cfg.NUM_GPUS else preds.detach() ori_boxes = (ori_boxes.detach().cpu() if cfg.NUM_GPUS else ori_boxes.detach()) metadata = (metadata.detach().cpu() if cfg.NUM_GPUS else metadata.detach()) test_meter.iter_toc() # Update and log stats. test_meter.update_stats(preds, ori_boxes, metadata) test_meter.log_iter_stats(None, cur_iter) else: # Perform the forward pass. import time ntic_1 = time.time() with torch.no_grad(): preds, pre_gap, gap = model(inputs) print('after fwd pass: '******'full test done: ', time.time() - ntic) ntic = time.time() # Log epoch stats and print the final testing results. if writer is not None and not cfg.DETECTION.ENABLE: all_preds = [pred.clone().detach() for pred in test_meter.video_preds] all_labels = [ label.clone().detach() for label in test_meter.video_labels ] if cfg.NUM_GPUS: all_preds = [pred.cpu() for pred in all_preds] all_labels = [label.cpu() for label in all_labels] writer.plot_eval(preds=all_preds, labels=all_labels) test_meter.finalize_metrics() test_meter.reset() print('full func done: ', time.time() - ntic) return preds, gap
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, train_loader, writer): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, (inputs, labels, index, time, meta) in enumerate(val_loader): if cfg.NUM_GPUS: # Transferthe data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) index = index.cuda() time = time.cuda() batch_size = (inputs[0][0].size(0) if isinstance(inputs[0], list) else inputs[0].size(0)) val_meter.data_toc() if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) ori_boxes = meta["ori_boxes"] metadata = meta["metadata"] if cfg.NUM_GPUS: preds = preds.cpu() ori_boxes = ori_boxes.cpu() metadata = metadata.cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) val_meter.iter_toc() # Update and log stats. val_meter.update_stats(preds, ori_boxes, metadata) else: if cfg.TASK == "ssl" and cfg.MODEL.MODEL_NAME == "ContrastiveModel": if not cfg.CONTRASTIVE.KNN_ON: return train_labels = (model.module.train_labels if hasattr( model, "module") else model.train_labels) yd, yi = model(inputs, index, time) K = yi.shape[1] C = cfg.CONTRASTIVE.NUM_CLASSES_DOWNSTREAM # eg 400 for Kinetics400 candidates = train_labels.view(1, -1).expand(batch_size, -1) retrieval = torch.gather(candidates, 1, yi) retrieval_one_hot = torch.zeros((batch_size * K, C)).cuda() retrieval_one_hot.scatter_(1, retrieval.view(-1, 1), 1) yd_transform = yd.clone().div_(cfg.CONTRASTIVE.T).exp_() probs = torch.mul( retrieval_one_hot.view(batch_size, -1, C), yd_transform.view(batch_size, -1, 1), ) preds = torch.sum(probs, 1) else: preds = model(inputs) if cfg.DATA.MULTI_LABEL: if cfg.NUM_GPUS > 1: preds, labels = du.all_gather([preds, labels]) else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) # Combine the errors across the GPUs. top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] if cfg.NUM_GPUS > 1: top1_err, top5_err = du.all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point). top1_err, top5_err = top1_err.item(), top5_err.item() val_meter.iter_toc() # Update and log stats. val_meter.update_stats( top1_err, top5_err, batch_size * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Val/Top1_err": top1_err, "Val/Top5_err": top5_err }, global_step=len(val_loader) * cur_epoch + cur_iter, ) val_meter.update_predictions(preds, labels) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() # Log epoch stats. val_meter.log_epoch_stats(cur_epoch) # write to tensorboard format if available. if writer is not None: if cfg.DETECTION.ENABLE: writer.add_scalars({"Val/mAP": val_meter.full_map}, global_step=cur_epoch) else: all_preds = [pred.clone().detach() for pred in val_meter.all_preds] all_labels = [ label.clone().detach() for label in val_meter.all_labels ] if cfg.NUM_GPUS: all_preds = [pred.cpu() for pred in all_preds] all_labels = [label.cpu() for label in all_labels] writer.plot_eval(preds=all_preds, labels=all_labels, global_step=cur_epoch) val_meter.reset()
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer=None): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader): # Transferthe data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) preds = preds.cpu() ori_boxes = meta["ori_boxes"].cpu() metadata = meta["metadata"].cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) val_meter.iter_toc() # Update and log stats. val_meter.update_stats(preds.cpu(), ori_boxes.cpu(), metadata.cpu()) else: preds = model(inputs) if cfg.DATA.MULTI_LABEL: if cfg.NUM_GPUS > 1: preds, labels = du.all_gather([preds, labels]) else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 1)) # Combine the errors across the GPUs. top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] if cfg.NUM_GPUS > 1: top1_err, top5_err = du.all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point). top1_err, top5_err = top1_err.item(), top5_err.item() val_meter.iter_toc() # Update and log stats. val_meter.update_stats(top1_err, top5_err, inputs[0].size(0) * cfg.NUM_GPUS) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Val/Top1_err": top1_err, "Val/Top5_err": top5_err }, global_step=len(val_loader) * cur_epoch + cur_iter, ) val_meter.update_predictions(preds, labels) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() logger.info('COMPUTING MCC') # Log epoch stats. val_meter.log_epoch_stats(cur_epoch) all_preds_cpu = [ pred.clone().detach().cpu() for pred in val_meter.all_preds ] all_labels_cpu = [ label.clone().detach().cpu() for label in val_meter.all_labels ] logger.info('PREPROC FOR MCC') preds = torch.cat(all_preds_cpu) ypreds = torch.argmax(preds, dim=1) ytrue = torch.cat(all_labels_cpu) logger.info('COMPUTE CM') cm = plmetrics.ConfusionMatrix()(ypreds.to('cuda'), ytrue.to('cuda')) logger.info('CM COMPUTED') tp, tn, fn, fp = cm[1, 1], cm[0, 0], cm[0, 1], cm[1, 0] denom = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) mcc = (tp * tn - fp * fn) / torch.sqrt(denom) logger.info('COMPUTED MCC') # write to tensorboard format if available. if writer is not None: if cfg.DETECTION.ENABLE: writer.add_scalars({"Val/mAP": val_meter.full_map}, global_step=cur_epoch) writer.plot_eval(preds=all_preds_cpu, labels=all_labels_cpu, global_step=cur_epoch) val_meter.reset() return mcc.item()