def export_pytorch_model(): weights = 'checkpoints/checkpoint_epoch_00028.pyth' args = parse_args() cfg = load_config(args) # os.environ['CUDA_VISIBLE_DEVICES'] = '0' # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") device = torch.device("cpu") model = build_model(cfg).to(device) # chkpt = torch.load(weights, map_location="cpu") chkpt = torch.load(weights, map_location=device) model.load_state_dict(chkpt['model_state']) # try: # model_dict = model.module.state_dict() # except AttributeError: # # 读取原始状态及参数, ## 多GPU训练,导致训练存储的模型时key会加上model # model_dict = model.state_dict() # # 将pretrained_dict里不属于model_dict的键剔除掉 # chkpt = {k: v for k, v in chkpt.items() if k in model_dict} # print("load pretrain model") # model_dict.update(chkpt) # # model.state_dict(model_dict) # model.load_state_dict(model_dict) # z转换为评估模型 model.eval() # e1 = torch.rand(1, 3, 8, 224, 224).cuda() # e2 = torch.rand(1, 3, 32, 224, 224).cuda() e1 = torch.rand(8, 3, 256, 455) # .fill_(0) e2 = torch.rand(32, 3, 256, 455) # .fill_(0) e3 = [e1, e2] # e4 = torch.rand(1, 5).cuda() # cuda() e4 = torch.rand(1, 1, 1, 5) # .fill_(0) rand(1, 1, 3, 5) import numpy import numpy as np numpy.save("input00.npy", e3[0].numpy()) numpy.save("input11.npy", e3[1].numpy()) numpy.save("input22.npy", e4.numpy()) input0 = torch.from_numpy(np.load("input0.npy")) input1 = torch.from_numpy(np.load("input1.npy")) input2 = torch.from_numpy(np.load("input2.npy")) pred = model(e3, e4) print(pred) # exit(0) input3 = [input0, input1] # traced_script_module = torch.jit.trace(model, (e3, e4)) traced_script_module = torch.jit.trace(model, (input3, input2)) # print(traced_script_module.graph) print(traced_script_module(input3, input2)) # .forward traced_script_module.save("weights/sf18_pytorch_cpu4503.pt") print("out put save")
def saveOnnxModel(self): model = build_model(self.cfg) optimizer = optim.construct_optimizer(model, self.cfg) start_epoch = cu.load_train_checkpoint(self.cfg, model, optimizer, self.logger) self.cfg.TRAIN['BATCH_SIZE'] = self.cfg.ONNX.BATCH_SIZE dl = loader.construct_loader(self.cfg, "train") inputs, labels, _, _ = next(iter(dl)) if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i].to(self.onnxDevice) model.to(torch.device(self.onnxDevice)) model.eval() onnxPath, _ = self.getOnnxModelPath() with torch.no_grad(): torch.onnx.export( model, inputs, onnxPath, opset_version=self.cfg.ONNX.OPSET_VER, verbose=True, input_names=self.cfg.ONNX.INPUT_NAMES, output_names=self.cfg.ONNX.OUTPUT_NAMES, ) self.logger.info("Exported {}".format(onnxPath))
def inference(cfg): # # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) cu.load_test_checkpoint(cfg, model) # Create video loaders. video_loader = loader.construct_loader(cfg, "test") # Create saver saver = Saver(cfg.DATA.PATH_TO_DATA_DIR, video_loader.dataset) model.eval() for i, (inputs, index) in tqdm(enumerate(video_loader), total=len(video_loader)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) index = index.cuda() feats = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: feats, index = du.all_gather([feats, index]) saver.save(feats, index) saver.merge()
def __init__(self, cfg, gpu_id=None): """ Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py gpu_id (Optional[int]): GPU id. """ if cfg.NUM_GPUS: self.gpu_id = ( torch.cuda.current_device() if gpu_id is None else gpu_id ) # Build the video model and print model statistics. # self.model = build_model(cfg, gpu_id=gpu_id) self.model = build_model(cfg, gpu_id=None) self.model.eval() self.cfg = cfg if cfg.DETECTION.ENABLE: # self.object_detector = Detectron2Predictor(cfg, gpu_id=self.gpu_id) self.object_detector = Detectron2Predictor(cfg, gpu_id= None) logger.info("Start loading model weights.") cu.load_test_checkpoint(cfg, self.model) logger.info("Finish loading model weights")
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0 test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( test_loader.dataset.num_videos % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0) # Create meters for multi-view testing. test_meter = TestMeter( test_loader.dataset.num_videos // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), cfg.DATA.MULTI_LABEL, cfg.DATA.ENSEMBLE_METHOD, ) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. test_meter = perform_test(test_loader, model, test_meter, cfg, writer) if writer is not None: writer.close()
def load_model(self): #logger.info("Model Config") #logger.info(self.cfg) self.model = build_model(self.cfg) self.model.eval() #if du.is_master_proc(): misc.log_model_info(self.model, self.cfg, is_train=False) model_path = cfg.TRAIN.CHECKPOINT_FILE_PATH assert os.path.exists( model_path), "%s. Model Path Not Found" % model_path cu.load_checkpoint(model_path, self.model, self.cfg.NUM_GPUS > 1) #
def test(cfg): # Build model model = build_model(cfg) optimizer = optim.construct_optimizer(model, cfg) # load checkpoint start_epoch = cu.load_test_checkpoint(cfg, model) print("Load model epoch", start_epoch) # Build data loader test_loader = dataloader.construct_loader(cfg, "test") # Perform test results = perform_test(test_loader, model, cfg)
def main(): """ Main function to spawn the train and test process. """ args = parse_args() cfg = load_config(args) #################################################################################################### # overrides = sys.argv[1:] # overrides_dict = {} # for i in range(len(overrides)//2): # overrides_dict[overrides[2*i]] = overrides[2*i+1] # overrides_dict['dir'] = cfg.OUTPUT_DIR # print(overrides_dict) #################################################################################################### import torch import time # train_loader = loader.construct_loader(cfg, "train") # val_loader = loader.construct_loader(cfg, "val") # start = time.perf_counter() # sample = next(iter(train_loader)) # print('data time' , time.perf_counter()-start) # print(sample[0].shape) # print(sample[-1]) cfg.NUM_GPUS = 1 # Build the video model and print model statistics. model = build_model(cfg) # misc.log_model_info(model, cfg, is_train=True) input_ = torch.rand([2, 1, 16, 96, 96]).cuda() #input_[input_<0.5] = 0 #input_[input_>=0.5] = 1 # input_ = sample[0].transpose(1,2).cuda() #input_ = sample[0][:10] start = time.perf_counter() output = model(input_) print('model time', time.perf_counter() - start) # print(output.keys()) for k, v in output.items(): print(k, v)
def __init__(self, cfg): """ Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Build the video model and print model statistics. self.model = build_model(cfg) self.model.eval() self.cfg = cfg logger.info("Start loading model info") misc.log_model_info(self.model, cfg, use_train_input=False) logger.info("Start loading model weights") cu.load_test_checkpoint(cfg, self.model) logger.info("Finish loading model weights")
def visualize(cfg): """ Perform layer weights and activations visualization on the model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ if cfg.TENSORBOARD.ENABLE and cfg.TENSORBOARD.MODEL_VIS.ENABLE: # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Model Visualization with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, is_train=False) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. vis_loader = loader.construct_loader(cfg, "test") logger.info( "Visualize model for {} data points".format(len(vis_loader)) ) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE # Set up writer for logging to Tensorboard format. if du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # Run visualization on the model run_visualization(vis_loader, model, cfg, writer) if writer is not None: writer.close()
def build_trainer(cfg): """ Build training model and its associated tools, including optimizer, dataloaders and meters. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py Returns: model (nn.Module): training model. optimizer (Optimizer): optimizer. train_loader (DataLoader): training data loader. val_loader (DataLoader): validatoin data loader. precise_bn_loader (DataLoader): training data loader for computing precise BN. train_meter (TrainMeter): tool for measuring training stats. val_meter (ValMeter): tool for measuring validation stats. """ # Build the video model and print model statistics. model = build_model(cfg) # if du.is_master_proc() and cfg.LOG_MODEL_INFO: # misc.log_model_info(model, cfg, use_train_input=True) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Create the video train and val loaders. # train_loader = loader.construct_loader(cfg, "train") # val_loader = loader.construct_loader(cfg, "val") precise_bn_loader = loader.construct_loader(cfg, "train", is_precise_bn=True) # Create meters. # train_meter = TrainMeter(1000, cfg) # val_meter = ValMeter(1000, cfg) train_meter = TrainMeter(1e6, cfg) val_meter = ValMeter(1e6, cfg) return ( model, optimizer, precise_bn_loader, train_meter, val_meter, )
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) # Create meters for loss tracking test_meter = TrainMeter(test_loader.dataset.num_videos, cfg) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS ): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. test_meter = perform_test(test_loader, model, test_meter, cfg, writer) if writer is not None: writer.close()
def train(cfg): # Build model model = build_model(cfg) optimizer = optim.construct_optimizer(model, cfg) # load checkpoint start_epoch = cu.load_train_checkpoint(cfg, model, optimizer) # Build data loader train_loader = dataloader.construct_loader(cfg, "train") val_loader = dataloader.construct_loader(cfg, "val") precise_bn_loader = dataloader.construct_loader(cfg, "train") best_accuracy = 0 for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Train for one epoch. train_epoch(train_loader, model, optimizer, cur_epoch, cfg) is_eval_epoch = cur_epoch > 0 # Compute precise BN stats. if (is_eval_epoch and cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0): calculate_and_update_precise_bn( precise_bn_loader, model, min(cfg.BN.NUM_BATCHES_PRECISE, len(precise_bn_loader)), cfg.NUM_GPUS > 0, ) _ = aggregate_sub_bn_stats( model) # for SubBatchNorm3d call before eval # Evaluate the model on validation set. if is_eval_epoch: results = eval_epoch(val_loader, model, cur_epoch, cfg) accuracy = results['top1'] if accuracy > best_accuracy: print("*** Saving best ****") best_accuracy = accuracy torch.save( { 'epoch': cur_epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict() }, os.path.join(cfg.OUTPUT_DIR, 'best_ckpt.pth'))
def main(): args = parser_args() print(args) cfg_file = args.cfg_file checkpoint_file = args.checkpoint save_checkpoint_file = args.save half_flag = args.half cfg = get_cfg() cfg.merge_from_file(cfg_file) cfg.TEST.CHECKPOINT_FILE_PATH = checkpoint_file print("simplifier model!\n") with torch.no_grad(): model = build_model(cfg) model.eval() cu.load_test_checkpoint(cfg, model) if half_flag: model.half() with open(save_checkpoint_file, 'wb') as file: torch.save({"model_state": model.state_dict()}, file)
def infer(cfg): # Setup logging format. logging.setup_logging() # Print config. logger.info("Infer with config:") logger.info(cfg) # Build the SlowFast model and print its statistics model = build_model(cfg) if du.is_master_proc(): misc.log_model_info(model, cfg, is_train=False) # load weights if cfg.INFERENCE.WEIGHTS_FILE_PATH != "": cu.load_checkpoint(cfg.INFERENCE.WEIGHTS_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.INFERENCE.WEIGHTS_TYPE == "caffe2") else: raise FileNotFoundError("Model weights file could not be found") inference_loader = loader.construct_loader(cfg, "inference") perform_inference(inference_loader, model, cfg)
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc(): misc.log_model_info(model, cfg, is_train=True) if cfg.BN.FREEZE: model.freeze_fn('bn_parameters') # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "" and not cfg.TRAIN.FINETUNE: logger.info("Load from given checkpoint file.") checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "" and cfg.TRAIN.FINETUNE: logger.info("Load from given checkpoint file. Finetuning.") _ = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) start_epoch = 0 else: start_epoch = 0 # Create the video train and val loaders. if cfg.TRAIN.DATASET != 'epickitchens' or not cfg.EPICKITCHENS.TRAIN_PLUS_VAL: train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") else: train_loader = loader.construct_loader(cfg, "train+val") val_loader = loader.construct_loader(cfg, "val") # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: if cfg.TRAIN.DATASET == 'epickitchens': train_meter = EPICTrainMeter(len(train_loader), cfg) val_meter = EPICValMeter(len(val_loader), cfg) else: train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(len(val_loader), cfg) # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg) # Compute precise BN stats. if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0: calculate_and_update_precise_bn(train_loader, model, cfg.BN.NUM_BATCHES_PRECISE) # Save a checkpoint. if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD): cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if misc.is_eval_epoch(cfg, cur_epoch): is_best_epoch = eval_epoch(val_loader, model, val_meter, cur_epoch, cfg) if is_best_epoch: cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg, is_best_epoch=is_best_epoch)
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg) # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS): writer = SummaryWriter(log_dir=cfg.OUTPUT_DIR) else: writer = None if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG: tags = [] if 'TAGS' in cfg and cfg.TAGS !=[]: tags=list(cfg.TAGS) neptune.set_project('Serre-Lab/motion') ###################### overrides = sys.argv[1:] overrides_dict = {} for i in range(len(overrides)//2): overrides_dict[overrides[2*i]] = overrides[2*i+1] overrides_dict['dir'] = cfg.OUTPUT_DIR ###################### if 'NEP_ID' in cfg and cfg.NEP_ID != "": session = Session() project = session.get_project(project_qualified_name='Serre-Lab/motion') nep_experiment = project.get_experiments(id=cfg.NEP_ID)[0] else: nep_experiment = neptune.create_experiment (name=cfg.NAME, params=overrides_dict, tags=tags) else: nep_experiment=None # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc(num_gpus=cfg.NUM_GPUS): misc.log_model_info(model, cfg, is_train=True) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) checkpoint_epoch = cu.load_checkpoint( last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer ) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": logger.info("Load from given checkpoint file.") checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) start_epoch = checkpoint_epoch + 1 else: start_epoch = 0 # Create the video train and val loaders. train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(len(val_loader), cfg) # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, writer, nep_experiment, cfg) # Compute precise BN stats. # if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0: # calculate_and_update_precise_bn( # train_loader, model, cfg.BN.NUM_BATCHES_PRECISE # ) # Save a checkpoint. if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD): cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if misc.is_eval_epoch(cfg, cur_epoch): eval_epoch(val_loader, model, val_meter, cur_epoch, nep_experiment, cfg) if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG: nep_experiment.log_metric('epoch', cur_epoch)
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. setup_environment() # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc(): misc.log_model_info(model, cfg, is_train=False) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0 ) # Create meters for multi-view testing. test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) # # Perform multi-view test on the entire dataset. perform_test(test_loader, model, test_meter, cfg)
def get_predictions(self): """ Predict and append prediction results to each box in each keyframe in `self.pred_boxes` dictionary. """ # Set random seed from configs. np.random.seed(self.cfg.RNG_SEED) torch.manual_seed(self.cfg.RNG_SEED) # Setup logging format. logging.setup_logging(self.cfg.OUTPUT_DIR) # Print config. logger.info("Run demo with config:") logger.info(self.cfg) assert (self.cfg.NUM_GPUS <= 1), "Cannot run demo visualization on multiple GPUs." # Build the video model and print model statistics. model = build_model(self.cfg) model.eval() logger.info("Start loading model info") misc.log_model_info(model, self.cfg, use_train_input=False) logger.info("Start loading model weights") cu.load_test_checkpoint(self.cfg, model) logger.info("Finish loading model weights") logger.info("Start making predictions for precomputed boxes.") for keyframe_idx, boxes_and_labels in tqdm.tqdm( self.pred_boxes.items()): inputs = self.get_input_clip(keyframe_idx) boxes = boxes_and_labels[0] boxes = torch.from_numpy(np.array(boxes)).float() box_transformed = scale_boxes( self.cfg.DATA.TEST_CROP_SIZE, boxes, self.display_height, self.display_width, ) # Pad frame index for each box. box_inputs = torch.cat( [ torch.full((box_transformed.shape[0], 1), float(0)), box_transformed, ], axis=1, ) if self.cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) box_inputs = box_inputs.cuda() preds = model(inputs, box_inputs) preds = preds.detach() if self.cfg.NUM_GPUS: preds = preds.cpu() boxes_and_labels[1] = preds
def train_epoch( train_loader, model, optimizer, scaler, train_meter, cur_epoch, cfg, writer=None, ): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ print(model) # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) if cfg.MIXUP.ENABLE: mixup_fn = MixUp( mixup_alpha=cfg.MIXUP.ALPHA, cutmix_alpha=cfg.MIXUP.CUTMIX_ALPHA, mix_prob=cfg.MIXUP.PROB, switch_prob=cfg.MIXUP.SWITCH_PROB, label_smoothing=cfg.MIXUP.LABEL_SMOOTH_VALUE, num_classes=cfg.MODEL.NUM_CLASSES, ) # print(model.patch_embed.proj.weight.device) # if cfg.NUM_GPUS >= 2 and not cfg.MODEL.DDP: # blk_size = int(16/cfg.NUM_GPUS) # start = blk_size # for g in range(cfg.NUM_GPUS-1): # dev = f"cuda:{g+1}" # for i in range(start, start + blk_size): # model.blocks[i] = model.blocks[i].to(dev) # start += blk_size # model.norm = model.norm.to(dev) # model.head = model.head.to(dev) profiler.log_tic("loop_time") # extra_model = Mlp(400, 1000000, 400) # print(extra_model) # extra_model = extra_model.to("cuda:4") if cfg.MODEL.MODEL_NAME == "MViTHybridP1": cfg.MODEL.MODEL_NAME = "MViTHybridP2" original_ddp = cfg.MODEL.DDP cfg.MODEL.DDP = False model_p2 = build_model(cfg) model_p2 = model_p2.to("cuda:2") # cuda() # because the rest of the logic is about the P1 model cfg.MODEL.MODEL_NAME = "MViTHybridP1" cfg.MODEL.DDP = original_ddp for cur_iter, (inputs, labels, index, time, meta) in enumerate(train_loader): print(f"Iteration: {cur_iter}, {inputs.shape}") # print(inputs.shape) # batchsize = 18 # inputs = [ # torch.rand((batchsize, 3, 16, 224, 224)), # ] # labels = torch.zeros(batchsize) # meta = # Transfer the data to the current GPU device. if cfg.MODEL.MODEL_NAME in ["MViT", "MViTHybridP1"] and cfg.NUM_GPUS: print("in MViT model if statement") # if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() # for key, val in meta.items(): # if isinstance(val, (list,)): # for i in range(len(val)): # val[i] = val[i].cuda(non_blocking=True) # else: # meta[key] = val.cuda(non_blocking=True) # else: # inputs[0] = inputs[0].to("cuda:0") # inputs = inputs.to("cuda:0") # labels = labels.to("cuda:0") # print(inputs.shape) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() if cfg.MIXUP.ENABLE: samples, labels = mixup_fn(inputs[0], labels) inputs[0] = samples with torch.cuda.amp.autocast(enabled=cfg.TRAIN.MIXED_PRECISION): # if cfg.DETECTION.ENABLE: # preds = model(inputs, meta["boxes"]) # else: profiler.log_tic("model_time") if cfg.MODEL.MODEL_NAME == "MViTHybridP1": preds, thw = model(inputs) preds = preds.to("cuda:2") # import ipdb; ipdb.set_trace() preds = model_p2(preds, thw) else: preds = model(inputs) # preds = preds.to("cuda:4") # pred = extra_model(preds) profiler.log_toc("model_time", shape=inputs.shape) # Explicitly declare reduction to mean. # loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. # loss = loss_fun(preds, labels) loss = preds.norm() # loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() profiler.log_tic("backward_time") scaler.scale(loss).backward() # Unscales the gradients of optimizer's assigned params in-place scaler.unscale_(optimizer) # Clip gradients if necessary if cfg.SOLVER.CLIP_GRAD_VAL: torch.nn.utils.clip_grad_value_(model.parameters(), cfg.SOLVER.CLIP_GRAD_VAL) elif cfg.SOLVER.CLIP_GRAD_L2NORM: torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.SOLVER.CLIP_GRAD_L2NORM) # Update the parameters. scaler.step(optimizer) scaler.update() profiler.log_toc("backward_time", shape=inputs.shape) if cfg.MIXUP.ENABLE: _top_max_k_vals, top_max_k_inds = torch.topk(labels, 2, dim=1, largest=True, sorted=True) idx_top1 = torch.arange(labels.shape[0]), top_max_k_inds[:, 0] idx_top2 = torch.arange(labels.shape[0]), top_max_k_inds[:, 1] preds = preds.detach() preds[idx_top1] += preds[idx_top2] preds[idx_top2] = 0.0 labels = top_max_k_inds[:, 0] if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) # else: # top1_err, top5_err = None, None # if cfg.DATA.MULTI_LABEL: # # Gather all the predictions across all the devices. # if cfg.NUM_GPUS > 1: # [loss] = du.all_reduce([loss]) # loss = loss.item() # else: # Compute the errors. # num_topks_correct = metrics.topks_correct(preds, labels, (1, 5)) # top1_err, top5_err = [ # (1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct # ] # Gather all the predictions across all the devices. # if cfg.NUM_GPUS > 1: # loss, top1_err, top5_err = du.all_reduce([loss, top1_err, top5_err]) # # Copy the stats from GPU to CPU (sync point). # loss, top1_err, top5_err = ( # loss.item(), # top1_err.item(), # top5_err.item(), # ) # # Update and log stats. # train_meter.update_stats( # top1_err, # top5_err, # loss, # lr, # inputs[0].size(0) # * max( # cfg.NUM_GPUS, 1 # ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. # ) # write to tensorboard format if available. # if writer is not None: # writer.add_scalars( # { # "Train/loss": loss, # "Train/lr": lr, # "Train/Top1_err": top1_err, # "Train/Top5_err": top5_err, # }, # global_step=data_size * cur_epoch + cur_iter, # ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() profiler.log_toc("loop_time", shape=inputs.shape) profiler.log_tic("loop_time") profiler.report(25) # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def test(cfg): """ Perform multi-view testing on the pretrained audio model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the audio model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg) cu.load_test_checkpoint(cfg, model) # Create audio testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) assert ( len(test_loader.dataset) % cfg.TEST.NUM_ENSEMBLE_VIEWS == 0 ) # Create meters for multi-view testing. if cfg.TEST.DATASET == 'epickitchens': test_meter = EPICTestMeter( len(test_loader.dataset) // cfg.TEST.NUM_ENSEMBLE_VIEWS, cfg.TEST.NUM_ENSEMBLE_VIEWS, cfg.MODEL.NUM_CLASSES, len(test_loader), cfg.DATA.ENSEMBLE_METHOD, ) else: test_meter = TestMeter( len(test_loader.dataset) // cfg.TEST.NUM_ENSEMBLE_VIEWS, cfg.TEST.NUM_ENSEMBLE_VIEWS, cfg.MODEL.NUM_CLASSES[0], len(test_loader), cfg.DATA.MULTI_LABEL, cfg.DATA.ENSEMBLE_METHOD, ) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS ): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. test_meter, preds, preds_clips, labels, metadata = perform_test(test_loader, model, test_meter, cfg, writer) if du.is_master_proc(): if cfg.TEST.DATASET == 'epickitchens': results = {'verb_output': preds[0], 'noun_output': preds[1], 'narration_id': metadata} scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores') if not os.path.exists(scores_path): os.makedirs(scores_path) file_path = os.path.join(scores_path, cfg.EPICKITCHENS.TEST_SPLIT+'.pkl') pickle.dump(results, open(file_path, 'wb')) else: if cfg.TEST.DATASET == 'vggsound': get_stats(preds, labels) results = {'scores': preds, 'labels': labels} scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores') if not os.path.exists(scores_path): os.makedirs(scores_path) file_path = os.path.join(scores_path, 'test.pkl') pickle.dump(results, open(file_path, 'wb')) if writer is not None: writer.close()
def visualize(cfg): """ Perform layer weights and activations visualization on the model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ if cfg.TENSORBOARD.ENABLE and (cfg.TENSORBOARD.MODEL_VIS.ENABLE or cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE): # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Model Visualization with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) model.eval() if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. vis_loader = loader.construct_loader(cfg, "test") if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0 # Set up writer for logging to Tensorboard format. if du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None if cfg.TENSORBOARD.PREDICTIONS_PATH != "": assert not cfg.DETECTION.ENABLE, "Detection is not supported." logger.info( "Visualizing class-level performance from saved results...") if writer is not None: with g_pathmgr.open(cfg.TENSORBOARD.PREDICTIONS_PATH, "rb") as f: preds, labels = pickle.load(f, encoding="latin1") writer.plot_eval(preds, labels) if cfg.TENSORBOARD.MODEL_VIS.ENABLE: if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE: assert ( not cfg.DETECTION.ENABLE ), "Detection task is currently not supported for Grad-CAM visualization." if cfg.MODEL.ARCH in cfg.MODEL.SINGLE_PATHWAY_ARCH: assert ( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 1 ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST)) elif cfg.MODEL.ARCH in cfg.MODEL.MULTI_PATHWAY_ARCH: assert ( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 2 ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format( len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST)) else: raise NotImplementedError( "Model arch {} is not in {}".format( cfg.MODEL.ARCH, cfg.MODEL.SINGLE_PATHWAY_ARCH + cfg.MODEL.MULTI_PATHWAY_ARCH, )) logger.info("Visualize model analysis for {} iterations".format( len(vis_loader))) # Run visualization on the model run_visualization(vis_loader, model, cfg, writer) if cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE: logger.info("Visualize Wrong Predictions for {} iterations".format( len(vis_loader))) perform_wrong_prediction_vis(vis_loader, model, cfg) if writer is not None: writer.close()
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc(): misc.log_model_info(model, cfg, is_train=False) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0) # Create meters for multi-view testing. if cfg.TEST.DATASET == 'epickitchens': test_meter = EPICTestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) else: test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) # # Perform multi-view test on the entire dataset. preds, labels, metadata = perform_test(test_loader, model, test_meter, cfg) if du.is_master_proc(): if cfg.TEST.DATASET == 'epickitchens': results = { 'scores': { 'verb': preds[0], 'noun': preds[1] }, 'labels': { 'verb': labels[0], 'noun': labels[1] }, 'narration_id': metadata } scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores') if not os.path.exists(scores_path): os.makedirs(scores_path) file_path = os.path.join(scores_path, cfg.EPICKITCHENS.TEST_SPLIT + '.pkl') pickle.dump(results, open(file_path, 'wb'))
def visualize_activations(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Setup logging format. logging.setup_logging(cfg) # Print config. logger.info("Vizualize activations") # logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = build_model(cfg) # Construct the optimizer. # optimizer = optim.construct_optimizer(model, cfg) logger.info("Load from given checkpoint file.") checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer=None, inflation=cfg.TRAIN.CHECKPOINT_INFLATE, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) # if du.is_master_proc(): # misc.log_model_info(model, cfg, is_train=True) # Create the video train and val loaders. # train_loader = loader.construct_loader(cfg, "train") # val_loader = loader.construct_loader(cfg, "val") train_set = build_dataset(cfg.TEST.DATASET, cfg, "train") for i in np.random.choice(len(train_set), 5): # frames, label, _, _ = train_set.get_augmented_examples(i) frames, label, _, _ = train_set[i] inputs = frames inputs[0] = inputs[0][None, :] logger.info(frames[0].shape) # frames = frames[0].permute(0,2,3,4,1) frames = frames[0].squeeze().transpose(0, 1) #.permute(1,2,3,0) logger.info(frames.shape) tv.utils.save_image(frames, os.path.join(cfg.OUTPUT_DIR, 'example_%d.jpg' % i), nrow=18, normalize=True) for j in range(len(inputs)): inputs[j] = inputs[j].cuda(non_blocking=True) with torch.no_grad(): # logger.info(inputs[i].shape) # sys.stdout.flush() inputs[0] = inputs[0][:min(3, len(inputs[0]))] output = model(inputs, extra=['frames']) # frames = frames[0].transpose(0,1)#.permute(1,2,3,0) # tv.utils.save_image(frames, os.path.join(cfg.OUTPUT_DIR, 'example_target_%d.jpg'%i), nrow=18, normalize=True) input_aug = output['input_aug'] logger.info(input_aug.shape) input_aug = input_aug[0].transpose(0, 1) tv.utils.save_image(input_aug, os.path.join(cfg.OUTPUT_DIR, 'example_input_%d.jpg' % i), nrow=18, normalize=True) # mix_layer [1, timesteps, layers, activations] mix_out = output['mix_layer'] #.cpu().data.numpy().squeeze() for layer in range(len(mix_out)): logger.info('mix layer %d' % layer) logger.info(mix_out[layer].view([18, -1]).mean(1)) images = mix_out[layer].transpose(1, 2).transpose(0, 1) logger.info(images.shape) images = images.reshape((-1, ) + images.shape[2:]) images = (images - images.min()) images = images / images.max() tv.utils.save_image( images, os.path.join(cfg.OUTPUT_DIR, 'example_%d_mix_layer_l%d.jpg' % (i, layer)), nrow=18, normalize=True) # BU errors per timestep per layer (choose a random activation or the mean) also write out the mean/norm # [1, timesteps, layers, channels, height, width] bu_errors = output['bu_errors'] #.cpu()#.data.numpy().squeeze() for layer in range(len(bu_errors)): images = bu_errors[layer].transpose(1, 2).transpose(0, 1) images = (images - images.min()) images = images / images.max() logger.info(images.shape) images = images.reshape((-1, ) + images.shape[2:]) tv.utils.save_image( images, os.path.join(cfg.OUTPUT_DIR, 'example_%d_bu_errors_l%d.jpg' % (i, layer)), nrow=18, normalize=True) # horiz inhibition per timestep per layer (choose a random activation or the mean) also write out the mean/norm # [1, timesteps, layers, channels, height, width] inhibition = output['H_inh'] #.cpu()#.data.numpy().squeeze() for layer in range(len(inhibition)): images = inhibition[layer].transpose(1, 2).transpose(0, 1) images = (images - images.min()) images = images / images.max() logger.info(images.shape) images = images.reshape((-1, ) + images.shape[2:]) tv.utils.save_image( images, os.path.join(cfg.OUTPUT_DIR, 'example_%d_H_inh_l%d.jpg' % (i, layer)), nrow=18, normalize=True) # persistent state in between timesteps # [1, timesteps, layers, channels, height, width] hidden = output['hidden'] #.cpu()#.data.numpy().squeeze() for layer in range(len(hidden)): images = hidden[layer].transpose(1, 2).transpose(0, 1) images = (images - images.min()) images = images / images.max() logger.info(images.shape) images = images.reshape((-1, ) + images.shape[2:]) tv.utils.save_image( images, os.path.join(cfg.OUTPUT_DIR, 'example_%d_hidden_l%d.jpg' % (i, layer)), nrow=18, normalize=True)
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, is_train=False) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpoint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) assert (len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0) # Create meters for multi-view testing. test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), cfg.DATA.MULTI_LABEL, cfg.DATA.ENSEMBLE_METHOD, ) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. perform_test(test_loader, model, test_meter, cfg, writer) if writer is not None: writer.close()
def test(cfg): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) out_str_prefix = "lin" if cfg.MODEL.DETACH_FINAL_FC else "" if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=False) if (cfg.TASK == "ssl" and cfg.MODEL.MODEL_NAME == "ContrastiveModel" and cfg.CONTRASTIVE.KNN_ON): train_loader = loader.construct_loader(cfg, "train") out_str_prefix = "knn" if hasattr(model, "module"): model.module.init_knn_labels(train_loader) else: model.init_knn_labels(train_loader) cu.load_test_checkpoint(cfg, model) # Create video testing loaders. test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0 test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( test_loader.dataset.num_videos % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0) # Create meters for multi-view testing. test_meter = TestMeter( test_loader.dataset.num_videos // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES if not cfg.TASK == "ssl" else cfg.CONTRASTIVE.NUM_CLASSES_DOWNSTREAM, len(test_loader), cfg.DATA.MULTI_LABEL, cfg.DATA.ENSEMBLE_METHOD, ) # Set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # # Perform multi-view test on the entire dataset. test_meter = perform_test(test_loader, model, test_meter, cfg, writer) if writer is not None: writer.close() result_string = ( "_a{}{}{} Top1 Acc: {} Top5 Acc: {} MEM: {:.2f} dataset: {}{}" "".format( out_str_prefix, cfg.TEST.DATASET[0], test_meter.stats["top1_acc"], test_meter.stats["top1_acc"], test_meter.stats["top5_acc"], misc.gpu_mem_usage(), cfg.TEST.DATASET[0], cfg.MODEL.NUM_CLASSES, )) logger.info("testing done: {}".format(result_string)) return result_string
# Save gif if (i+1) % 5 == 0: layer_path = self.dir_path + 'layer/' if not os.path.exists(layer_path): os.mkdir(layer_path) path = layer_path + str(self.model_name) + '_iter' + str(i) + '_path' + str(j) + '_' + \ str(self.layer) + '_f' + str(self.filter) + '_lr' + str(self.initial_learning_rate) + "_wd" \ + str(self.weight_decay) save_gif(created_video, path, stream_type="rgb") if __name__ == '__main__': args = parse_args() cfg = load_config(args) # Construct the model model = build_model(cfg) load_checkpoint(checkpoint_path, model, data_parallel=False, optimizer=None, inflation=False, convert_from_caffe2=True) cnn_layer = "s2.pathway0_res0.branch1" # "conv3d_0c_1x1.conv3d" filter_pos = 0 device = torch.device('cuda:0') model = model.to(device) layer_vis = CNNLayerVisualization(model, cnn_layer, filter_pos, device) # Layer visualization with pytorch hooks layer_vis.visualise_layer_with_hooks()
def test(cfg, cnt=-1): """ Perform multi-view testing on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # # Perform multi-view test on the entire dataset. scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores') if not os.path.exists(scores_path): os.makedirs(scores_path) filename_root = cfg.EPICKITCHENS.TEST_LIST.split('.')[0] if cnt >= 0: file_name = '{}_{}_{}.pkl'.format(filename_root, cnt, cfg.MODEL.MODEL_NAME) else: file_name = '{}_{}_{}.pkl'.format(filename_root, 'test_only', cfg.MODEL.MODEL_NAME) file_path = os.path.join(scores_path, file_name) logger.info(file_path) # Print config. # if cnt < 0: # logger.info("Test with config:") # logger.info(cfg) # Build the video model and print model statistics. model = build_model(cfg) if cfg.EPICKITCHENS.USE_BBOX: model.module.load_weight_slowfast() # if du.is_master_proc(): # misc.log_model_info(model, cfg, is_train=False) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": logger.info("Load from given checkpoint file.") cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: # raise NotImplementedError("Unknown way to load checkpoint.") logger.info("Testing with random initialization. Only for debugging.") # Create video testing loaders. if cfg.TEST.EXTRACT_FEATURES_MODE != "" and cfg.TEST.EXTRACT_FEATURES_MODE in ["test","train","val"]: test_loader = loader.construct_loader(cfg, cfg.TEST.EXTRACT_FEATURES_MODE) else: test_loader = loader.construct_loader(cfg, "test") logger.info("Testing model for {} iterations".format(len(test_loader))) if cfg.DETECTION.ENABLE: assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE test_meter = AVAMeter(len(test_loader), cfg, mode="test") else: assert ( len(test_loader.dataset) % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0 ) # Create meters for multi-view testing. if cfg.TEST.DATASET == 'epickitchens': test_meter = EPICTestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) else: test_meter = TestMeter( len(test_loader.dataset) // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS), cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS, cfg.MODEL.NUM_CLASSES, len(test_loader), ) pickle.dump([], open(file_path, 'wb+')) if cfg.TEST.EXTRACT_FEATURES: preds, labels, metadata, x_feat_list = perform_test(test_loader, model, test_meter, cfg) else: preds, labels, metadata = perform_test(test_loader, model, test_meter, cfg) if du.is_master_proc(): if cfg.TEST.DATASET == 'epickitchens': results = {'verb_output': preds[0], 'noun_output': preds[1], 'verb_gt': labels[0], 'noun_gt': labels[1], 'narration_id': metadata} scores_path = os.path.join(cfg.OUTPUT_DIR, 'scores') if not os.path.exists(scores_path): os.makedirs(scores_path) pickle.dump(results, open(file_path, 'wb')) if cfg.TEST.EXTRACT_FEATURES: pid = cfg.EPICKITCHENS.FEATURE_VID.split("_")[0] if not os.path.exists(os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid)): os.mkdir(os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid)) if not cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: arr_slow = torch.cat(x_feat_list[0], dim=0).numpy() arr_fast = torch.cat(x_feat_list[1], dim=0).numpy() print(arr_slow.shape, arr_fast.shape) fpath_feat = os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid, '{}.pkl'.format(cfg.EPICKITCHENS.FEATURE_VID)) with open(fpath_feat,'wb+') as f: pickle.dump([arr_slow, arr_fast], f) elif cfg.TEST.EXTRACT_MSTCN_FEATURES and cfg.TEST.EXTRACT_FEATURES: fpath_feat = os.path.join(cfg.TEST.EXTRACT_FEATURES_PATH, pid, '{}.npy'.format(cfg.EPICKITCHENS.FEATURE_VID)) with open(fpath_feat,'wb+') as f: arr = torch.cat(x_feat_list, dim=0).numpy() print(arr.shape) np.save(f, arr)
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set up environment. du.init_distributed_training(cfg) # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) # Init multigrid. multigrid = None if cfg.MULTIGRID.LONG_CYCLE or cfg.MULTIGRID.SHORT_CYCLE: multigrid = MultigridSchedule() cfg = multigrid.init_multigrid(cfg) if cfg.MULTIGRID.LONG_CYCLE: cfg, _ = multigrid.update_long_cycle(cfg, cur_epoch=0) # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = build_model(cfg) if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=True) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Load a checkpoint to resume training if applicable. start_epoch = cu.load_train_checkpoint(cfg, model, optimizer) # Create the video train and val loaders. train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") precise_bn_loader = (loader.construct_loader( cfg, "train", is_precise_bn=True) if cfg.BN.USE_PRECISE_STATS else None) # Create meters. if cfg.DETECTION.ENABLE: train_meter = AVAMeter(len(train_loader), cfg, mode="train") val_meter = AVAMeter(len(val_loader), cfg, mode="val") else: train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(len(val_loader), cfg) # set up writer for logging to Tensorboard format. if cfg.TENSORBOARD.ENABLE and du.is_master_proc( cfg.NUM_GPUS * cfg.NUM_SHARDS): writer = tb.TensorboardWriter(cfg) else: writer = None # Perform the training loop. logger.info("Start epoch: {}".format(start_epoch + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): if cfg.MULTIGRID.LONG_CYCLE: cfg, changed = multigrid.update_long_cycle(cfg, cur_epoch) if changed: ( model, optimizer, train_loader, val_loader, precise_bn_loader, train_meter, val_meter, ) = build_trainer(cfg) # Load checkpoint. if cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) assert "{:05d}.pyth".format(cur_epoch) in last_checkpoint else: last_checkpoint = cfg.TRAIN.CHECKPOINT_FILE_PATH logger.info("Load from {}".format(last_checkpoint)) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer) # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Train for one epoch. train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer) is_checkp_epoch = (cu.is_checkpoint_epoch( cfg, cur_epoch, None if multigrid is None else multigrid.schedule, )) is_eval_epoch = misc.is_eval_epoch( cfg, cur_epoch, None if multigrid is None else multigrid.schedule) # Compute precise BN stats. if ((is_checkp_epoch or is_eval_epoch) and cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0): calculate_and_update_precise_bn( precise_bn_loader, model, min(cfg.BN.NUM_BATCHES_PRECISE, len(precise_bn_loader)), cfg.NUM_GPUS > 0, ) _ = misc.aggregate_sub_bn_stats(model) # Save a checkpoint. if is_checkp_epoch: cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg) # Evaluate the model on validation set. if is_eval_epoch: eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer) if writer is not None: writer.close()
def test(cfg): # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) de.bridge.set_bridge('native') gpu_id = 1 # sample_rate = 1 # resize_h = 270 # resize_w = 360 # augment = ['FiveCrop', 'TenCrop', 'None'][1] sample_rate = 1 resize_h = 270 resize_w = 360 augment = ['FiveCrop', 'TenCrop', 'None'][0] crop_h = cfg.DATA.TEST_CROP_SIZE # 256 crop_w = cfg.DATA.TEST_CROP_SIZE # 256 if 'SLOWFAST' in cfg.TEST.CHECKPOINT_FILE_PATH and 'I3D' not in cfg.TEST.CHECKPOINT_FILE_PATH: model_type = 'slowfast' feature_dim = 2304 elif 'SLOWFAST' not in cfg.TEST.CHECKPOINT_FILE_PATH and 'I3D' in cfg.TEST.CHECKPOINT_FILE_PATH: model_type = 'i3d' feature_dim = 2048 else: raise Exception('Invalid Model.') video_dir = cfg.DATA.PATH_TO_DATA_DIR if augment == 'FiveCrop': feature_dir = os.path.join( cfg.OUTPUT_DIR, 'feature_{}_{}x{}_{}x{}_{}_5'.format(model_type, resize_h, resize_w, crop_h, crop_w, sample_rate)) elif augment == 'TenCrop': feature_dir = os.path.join( cfg.OUTPUT_DIR, 'feature_{}_{}x{}_{}x{}_{}_10'.format(model_type, resize_h, resize_w, crop_h, crop_w, sample_rate)) elif augment == 'None': feature_dir = os.path.join( cfg.OUTPUT_DIR, 'feature_{}_{}x{}_{}_1'.format(model_type, resize_h, resize_w, sample_rate)) else: raise Exception('Invalid Augment.') norm_transform = transforms.Normalize(mean=cfg.DATA.MEAN, std=cfg.DATA.STD) if augment == 'FiveCrop': frame_transform = transforms.Compose([ transforms.Resize(size=(resize_h, resize_w)), transforms.FiveCrop(size=(crop_h, crop_w)), transforms.Lambda( lambda crops: [transforms.ToTensor()(crop) for crop in crops]), transforms.Lambda( lambda crops: [norm_transform(crop) for crop in crops]), transforms.Lambda(lambda crops: torch.stack(crops)) ]) elif augment == 'TenCrop': frame_transform = transforms.Compose([ transforms.Resize(size=(resize_h, resize_w)), transforms.TenCrop(size=(crop_h, crop_w)), transforms.Lambda( lambda crops: [transforms.ToTensor()(crop) for crop in crops]), transforms.Lambda( lambda crops: [norm_transform(crop) for crop in crops]), transforms.Lambda(lambda crops: torch.stack(crops)) ]) elif augment == 'None': frame_transform = transforms.Compose([ transforms.Resize(size=(resize_h, resize_w)), transforms.ToTensor(), norm_transform, transforms.Lambda(lambda img: img.unsqueeze(0)) ]) else: raise Exception('Invalid Augment.') # Build the video model and print model statistics. model = build_model(cfg) print(model) cu.load_test_checkpoint(cfg, model) model.eval() model.to(torch.device('cuda:{}'.format(gpu_id))) if not os.path.exists(feature_dir): os.makedirs(feature_dir) video_files = os.listdir(video_dir) video_files.sort() for video_file in video_files: video_name = video_file[:-4] video_file = os.path.join(video_dir, video_file) feature_file = '{}.npy'.format(video_name) if feature_file in os.listdir(feature_dir): print('Skipped.') continue feature_file = os.path.join(feature_dir, feature_file) print(video_file) print(feature_file) video_feature = [] vr = de.VideoReader(video_file, ctx=de.cpu(0)) frame_num = len(vr) video_meta = skvideo.io.ffprobe(video_file) assert (frame_num == int(video_meta['video']['@nb_frames'])) sample_idxs = np.arange(0, frame_num, sample_rate) clip_size = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE # cfg.DATA.NUM_FRAMES # cfg.DATA.SAMPLING_RATE # cfg.SLOWFAST.ALPHA frame_buffer = {} buffer_size = 128 with torch.no_grad(): for _, sample_idx in enumerate(tqdm(sample_idxs)): fast_pathway_idxs = np.arange( sample_idx - clip_size // 2, sample_idx - clip_size // 2 + clip_size, cfg.DATA.SAMPLING_RATE) fast_pathway_idxs[fast_pathway_idxs < 0] = 0 fast_pathway_idxs[fast_pathway_idxs > frame_num - 1] = frame_num - 1 assert (fast_pathway_idxs.size == cfg.DATA.NUM_FRAMES) fast_pathway_frames = [] for idx in fast_pathway_idxs: if idx not in frame_buffer: frame = vr[idx].asnumpy() #(540, 960, 3) frame = Image.fromarray(frame) frame = frame_transform(frame) frame = frame.to(torch.device( 'cuda:{}'.format(gpu_id))) if augment == 'FiveCrop': assert (frame.shape[0] == 5) assert (frame.shape[1] == 3) assert (frame.shape[2] == crop_h) assert (frame.shape[3] == crop_w) elif augment == 'TenCrop': assert (frame.shape[0] == 10) assert (frame.shape[1] == 3) assert (frame.shape[2] == crop_h) assert (frame.shape[3] == crop_w) elif augment == 'None': assert (frame.shape[0] == 1) assert (frame.shape[1] == 3) assert (frame.shape[2] == resize_h) assert (frame.shape[3] == resize_w) else: raise Exception('Invalid Augment.') frame_buffer[idx] = frame if len(frame_buffer) > buffer_size: frame_buffer.pop(min(list(frame_buffer.keys()))) fast_pathway_frames.append(frame_buffer[idx].unsqueeze(2)) fast_pathway_frames = torch.cat(fast_pathway_frames, 2) if model_type == 'slowfast': slow_pathway_idxs = fast_pathway_idxs[::cfg.SLOWFAST.ALPHA] assert (slow_pathway_idxs.size == cfg.DATA.NUM_FRAMES / cfg.SLOWFAST.ALPHA) slow_pathway_frames = [] for idx in slow_pathway_idxs: if idx not in frame_buffer: frame = vr[idx].asnumpy() #(540, 960, 3) frame = Image.fromarray(frame) frame = frame_transform(frame) frame = frame.to( torch.device('cuda:{}'.format(gpu_id))) if augment == 'FiveCrop': assert (frame.shape[0] == 5) assert (frame.shape[1] == 3) assert (frame.shape[2] == crop_h) assert (frame.shape[3] == crop_w) elif augment == 'TenCrop': assert (frame.shape[0] == 10) assert (frame.shape[1] == 3) assert (frame.shape[2] == crop_h) assert (frame.shape[3] == crop_w) elif augment == 'None': assert (frame.shape[0] == 1) assert (frame.shape[1] == 3) assert (frame.shape[2] == resize_h) assert (frame.shape[3] == resize_w) else: raise Exception('Invalid Augment.') frame_buffer[idx] = frame if len(frame_buffer) > buffer_size: frame_buffer.pop(min(list( frame_buffer.keys()))) slow_pathway_frames.append( frame_buffer[idx].unsqueeze(2)) slow_pathway_frames = torch.cat(slow_pathway_frames, 2) if model_type == 'slowfast': frame_feature = model( [slow_pathway_frames, fast_pathway_frames], extract_feature=True) elif model_type == 'i3d': frame_feature = model([fast_pathway_frames], extract_feature=True) else: raise Exception('Invalid Model.') # (Pdb) fast_pathway_frames.shape # torch.Size([5, 3, 32, 256, 256]) # (Pdb) slow_pathway_frames.shape # torch.Size([5, 3, 8, 256, 256]) assert (frame_feature.shape[1] == feature_dim) if augment == 'FiveCrop': assert (frame_feature.shape[0] == 5) elif augment == 'TenCrop': assert (frame_feature.shape[0] == 10) elif augment == 'None': assert (frame_feature.shape[0] == 1) else: raise Exception('Invalid Augment.') # slowfast is for 30 fps! be careful! # re-extract all! frame_feature = torch.unsqueeze(frame_feature, dim=0) frame_feature = frame_feature.cpu().numpy() video_feature.append(frame_feature) video_feature = np.concatenate(video_feature, axis=0) print(video_feature.shape) np.save(feature_file, video_feature)