def construct_input(frame_list): transform = torchvision.transforms.Compose([ GroupScale(config.TRAIN.RESIZE_MIN), GroupCenterCrop(config.TRAIN.INPUT_SIZE), GroupNormalize(modality="RGB"), Stack(), ]) process_data = transform(frame_list) return process_data.unsqueeze(0)
def construct_input(frame_list): transform = torchvision.transforms.Compose([ GroupScale(256), GroupCenterCrop(224), Stack(), ToTorchFormatTensor(), GroupNormalize(), ]) process_data = transform(frame_list) return process_data.unsqueeze(0)
def run(*options, cfg=None): """Run training and validation of model Notes: Options can be passed in via the options argument and loaded from the cfg file Options loaded from default.py will be overridden by options loaded from cfg file Options passed in through options argument will override option loaded from cfg file Args: *options (str,int ,optional): Options used to overide what is loaded from the config. To see what options are available consult default.py cfg (str, optional): Location of config file to load. Defaults to None. """ update_config(config, options=options, config_file=cfg) print("Training ", config.TRAIN.MODALITY, " model.") print("Batch size:", config.TRAIN.BATCH_SIZE, " Gradient accumulation steps:", config.TRAIN.GRAD_ACCUM_STEPS) torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK torch.manual_seed(config.SEED) if torch.cuda.is_available(): torch.cuda.manual_seed_all(config.SEED) np.random.seed(seed=config.SEED) # Log to tensorboard writer = SummaryWriter(log_dir=config.LOG_DIR) # Setup dataloaders train_loader = torch.utils.data.DataLoader( I3DDataSet(data_root=config.DATASET.DIR, split=config.DATASET.SPLIT, sample_frames=config.TRAIN.SAMPLE_FRAMES, modality=config.TRAIN.MODALITY, transform=torchvision.transforms.Compose([ GroupScale(config.TRAIN.RESIZE_MIN), GroupRandomCrop(config.TRAIN.INPUT_SIZE), GroupRandomHorizontalFlip(), GroupNormalize(modality=config.TRAIN.MODALITY), Stack(), ])), batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) val_loader = torch.utils.data.DataLoader(I3DDataSet( data_root=config.DATASET.DIR, split=config.DATASET.SPLIT, modality=config.TRAIN.MODALITY, train_mode=False, transform=torchvision.transforms.Compose([ GroupScale(config.TRAIN.RESIZE_MIN), GroupCenterCrop(config.TRAIN.INPUT_SIZE), GroupNormalize(modality=config.TRAIN.MODALITY), Stack(), ]), ), batch_size=config.TEST.BATCH_SIZE, shuffle=False, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) # Setup model if config.TRAIN.MODALITY == "RGB": channels = 3 checkpoint = config.MODEL.PRETRAINED_RGB elif config.TRAIN.MODALITY == "flow": channels = 2 checkpoint = config.MODEL.PRETRAINED_FLOW else: raise ValueError("Modality must be RGB or flow") i3d_model = InceptionI3d(400, in_channels=channels) i3d_model.load_state_dict(torch.load(checkpoint)) # Replace final FC layer to match dataset i3d_model.replace_logits(config.DATASET.NUM_CLASSES) criterion = torch.nn.CrossEntropyLoss().cuda() optimizer = optim.SGD(i3d_model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0000001) i3d_model = i3d_model.cuda() scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=2, verbose=True, threshold=1e-4, min_lr=1e-4) # Data-parallel devices_lst = list(range(torch.cuda.device_count())) print("Devices {}".format(devices_lst)) if len(devices_lst) > 1: i3d_model = torch.nn.DataParallel(i3d_model) if not os.path.exists(config.MODEL.CHECKPOINT_DIR): os.makedirs(config.MODEL.CHECKPOINT_DIR) for epoch in range(config.TRAIN.MAX_EPOCHS): train(train_loader, i3d_model, criterion, optimizer, epoch, writer) if ( epoch + 1 ) % config.TEST.EVAL_FREQ == 0 or epoch == config.TRAIN.MAX_EPOCHS - 1: val_loss = validate(val_loader, i3d_model, criterion, epoch, writer) scheduler.step(val_loss) torch.save( i3d_model.module.state_dict(), config.MODEL.CHECKPOINT_DIR + '/' + config.MODEL.NAME + '_split' + str(config.DATASET.SPLIT) + '_epoch' + str(epoch).zfill(3) + '.pt') writer.close()
def run(*options, cfg=None): """Run training and validation of model Notes: Options can be passed in via the options argument and loaded from the cfg file Options loaded from default.py will be overridden by options loaded from cfg file Options passed in through options argument will override option loaded from cfg file Args: *options (str,int ,optional): Options used to overide what is loaded from the config. To see what options are available consult default.py cfg (str, optional): Location of config file to load. Defaults to None. """ update_config(config, options=options, config_file=cfg) print("Training ", config.TRAIN.MODALITY, " model.") torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK torch.manual_seed(config.SEED) if torch.cuda.is_available(): torch.cuda.manual_seed_all(config.SEED) np.random.seed(seed=config.SEED) # Log to tensorboard writer = SummaryWriter(log_dir=config.LOG_DIR) # Setup Augmentation/Transformation pipeline input_size = config.TRAIN.INPUT_SIZE resize_range_min = config.TRAIN.RESIZE_RANGE_MIN resize_range_max = config.TRAIN.RESIZE_RANGE_MAX #is_flow = True if config.TRAIN.MODALITY == "flow" else False is_flow = False train_augmentation = transforms.Compose([ GroupRandomResizeCrop([resize_range_min, resize_range_max], input_size), GroupRandomHorizontalFlip(is_flow=is_flow), #GroupColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05) ]) val_augmentation = transforms.Compose( [GroupScale(resize_range_min), GroupCenterCrop(input_size)]) # Setup DataLoaders train_loader = torch.utils.data.DataLoader( I3DDataSet(data_root=config.DATASET.DIR, split=config.DATASET.SPLIT, sample_frames=config.TRAIN.SAMPLE_FRAMES, modality=config.TRAIN.MODALITY, transform=torchvision.transforms.Compose([ train_augmentation, Stack(), ToTorchFormatTensor(), GroupNormalize(), ])), batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, num_workers=config.WORKERS, pin_memory=True) val_loader = torch.utils.data.DataLoader(I3DDataSet( data_root=config.DATASET.DIR, split=config.DATASET.SPLIT, sample_frames=config.TRAIN.SAMPLE_FRAMES, modality=config.TRAIN.MODALITY, transform=torchvision.transforms.Compose([ val_augmentation, Stack(), ToTorchFormatTensor(), GroupNormalize(), ]), train_mode=False, ), batch_size=config.TEST.BATCH_SIZE, shuffle=False, num_workers=config.WORKERS, pin_memory=True) # Setup Model if config.TRAIN.MODALITY == "RGB": channels = 3 checkpoint = config.MODEL.PRETRAINED_RGB elif config.TRAIN.MODALITY == "flow": channels = 2 checkpoint = config.MODEL.PRETRAINED_FLOW else: raise ValueError("Modality must be RGB or flow") i3d_model = InceptionI3d(400, in_channels=channels) i3d_model.load_state_dict(torch.load(checkpoint)) # Replace final FC layer to match dataset i3d_model.replace_logits(config.DATASET.NUM_CLASSES) criterion = torch.nn.CrossEntropyLoss().cuda() # Flow model: converges after 25 epochs using batch size 30 optimizer = optim.SGD(i3d_model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0000001) # optimizer = optim.Adam(i3d_model.parameters(), lr=0.0001) #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [20, 50], gamma=0.1) i3d_model = i3d_model.cuda() i3d_model, optimizer = amp.initialize(i3d_model, optimizer, opt_level="O1") scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=2, verbose=True, threshold=1e-4, min_lr=1e-4) # scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0001, max_lr=0.1, mode='triangular2', step_size_up=25) # Data-parallel devices_lst = list(range(torch.cuda.device_count())) print("Devices {}".format(devices_lst)) if len(devices_lst) > 1: i3d_model = torch.nn.DataParallel(i3d_model) else: raise Exception('Get more GPUs') if not os.path.exists(config.MODEL_DIR): os.makedirs(config.MODEL_DIR) for epoch in range(config.TRAIN.MAX_EPOCHS): # train for one epoch train(train_loader, i3d_model, criterion, optimizer, epoch, writer) # scheduler.step() # evaluate on validation set if (epoch + 1 ) % config.EVAL_FREQ == 0 or epoch == config.TRAIN.MAX_EPOCHS - 1: val_loss = validate(val_loader, i3d_model, criterion, epoch, writer) scheduler.step(val_loss) torch.save( i3d_model.module.state_dict(), config.MODEL_DIR + '/' + config.MODEL.NAME + '_split' + str(config.DATASET.SPLIT) + '_epoch' + str(epoch).zfill(3) + '.pt') writer.close()
def run(*options, cfg=None): update_config(config, options=options, config_file=cfg) torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK if torch.cuda.is_available(): torch.cuda.manual_seed_all(config.SEED) np.random.seed(seed=config.SEED) # Setup Augmentation/Transformation pipeline input_size = config.TRAIN.INPUT_SIZE resize_range_min = config.TRAIN.RESIZE_RANGE_MIN test_augmentation = transforms.Compose( [GroupScale(resize_range_min), GroupCenterCrop(input_size)]) # Data-parallel devices_lst = list(range(torch.cuda.device_count())) print("Devices {}".format(devices_lst)) if (config.TEST.MODALITY == "RGB") or (config.TEST.MODALITY == "both"): rgb_loader = torch.utils.data.DataLoader( I3DDataSet(data_root=config.DATASET.DIR, split=config.DATASET.SPLIT, sample_frames=config.TRAIN.SAMPLE_FRAMES, modality="RGB", train_mode=False, sample_frames_at_test=False, transform=torchvision.transforms.Compose([ test_augmentation, Stack(), ToTorchFormatTensor(), GroupNormalize(), ])), batch_size=config.TEST.BATCH_SIZE, shuffle=False, num_workers=config.WORKERS, pin_memory=True) rgb_model_file = config.TEST.MODEL_RGB if not os.path.exists(rgb_model_file): raise FileNotFoundError(rgb_model_file, " does not exist") print("scoring with rgb model") targets, rgb_predictions = test(rgb_loader, "RGB", rgb_model_file) targets = targets.cuda(non_blocking=True) rgb_top1_accuracy = accuracy(rgb_predictions, targets, topk=(1, )) print("rgb top1 accuracy: ", rgb_top1_accuracy[0].cpu().numpy().tolist()) if (config.TEST.MODALITY == "flow") or (config.TEST.MODALITY == "both"): flow_loader = torch.utils.data.DataLoader( I3DDataSet(data_root=config.DATASET.DIR, split=config.DATASET.SPLIT, sample_frames=config.TRAIN.SAMPLE_FRAMES, modality="flow", train_mode=False, sample_frames_at_test=False, transform=torchvision.transforms.Compose([ test_augmentation, Stack(), ToTorchFormatTensor(), GroupNormalize(), ])), batch_size=config.TEST.BATCH_SIZE, shuffle=False, num_workers=config.WORKERS, pin_memory=True) flow_model_file = config.TEST.MODEL_FLOW if not os.path.exists(flow_model_file): raise FileNotFoundError(flow_model_file, " does not exist") print("scoring with flow model") targets, flow_predictions = test(flow_loader, "flow", flow_model_file) targets = targets.cuda(non_blocking=True) flow_top1_accuracy = accuracy(flow_predictions, targets, topk=(1, )) print("flow top1 accuracy: ", flow_top1_accuracy[0].cpu().numpy().tolist()) if config.TEST.MODALITY == "both": predictions = torch.stack([rgb_predictions, flow_predictions]) predictions_mean = torch.mean(predictions, dim=0) top1accuracy = accuracy(predictions_mean, targets, topk=(1, )) print("combined top1 accuracy: ", top1accuracy[0].cpu().numpy().tolist())
# Data-parallel devices_lst = list(range(torch.cuda.device_count())) print("Devices {}".format(devices_lst)) if (config.TEST.MODALITY == "RGB") or (config.TEST.MODALITY == "combined"): rgb_loader = torch.utils.data.DataLoader( I3DDataSet( data_root=config.DATASET.DIR, split=config.DATASET.SPLIT, modality="RGB", train_mode=False, sample_frames_at_test=False, transform=torchvision.transforms.Compose([ GroupScale(config.TRAIN.RESIZE_MIN), GroupCenterCrop(config.TRAIN.INPUT_SIZE), GroupNormalize(modality="RGB"), Stack(), ]) ), batch_size=config.TEST.BATCH_SIZE, shuffle=False, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY ) rgb_model_file = config.TEST.MODEL_RGB if not os.path.exists(rgb_model_file): raise FileNotFoundError(rgb_model_file, " does not exist") rgb_model = load_model(modality="RGB", state_dict_file=rgb_model_file)
return len(self.video_list) if __name__ == '__main__': input_size = 224 resize_small_edge = 256 train_rgb = I3DDataSet(data_root='/datadir/rawframes/', split=1, sample_frames=64, modality='RGB', train_mode=True, sample_frames_at_test=False, transform=torchvision.transforms.Compose([ GroupScale(resize_small_edge), GroupRandomCrop(input_size), GroupRandomHorizontalFlip(), GroupNormalize(modality="RGB"), Stack(), ])) item = train_rgb.__getitem__(10) print("train_rgb:") print(item[0].size()) print("max=", item[0].max()) print("min=", item[0].min()) print("label=", item[1]) val_rgb = I3DDataSet(data_root='/datadir/rawframes/', split=1, sample_frames=64,