def construct_input(frame_list):

    transform = torchvision.transforms.Compose([
                    GroupScale(config.TRAIN.RESIZE_MIN),
                    GroupCenterCrop(config.TRAIN.INPUT_SIZE),
                    GroupNormalize(modality="RGB"),
                    Stack(),
                ])

    process_data = transform(frame_list)
    return process_data.unsqueeze(0)
def construct_input(frame_list):

    transform = torchvision.transforms.Compose([
        GroupScale(256),
        GroupCenterCrop(224),
        Stack(),
        ToTorchFormatTensor(),
        GroupNormalize(),
    ])

    process_data = transform(frame_list)
    return process_data.unsqueeze(0)
def run(*options, cfg=None):
    """Run training and validation of model

    Notes:
        Options can be passed in via the options argument and loaded from the cfg file
        Options loaded from default.py will be overridden by options loaded from cfg file
        Options passed in through options argument will override option loaded from cfg file
    
    Args:
        *options (str,int ,optional): Options used to overide what is loaded from the config. 
                                      To see what options are available consult default.py
        cfg (str, optional): Location of config file to load. Defaults to None.
    """
    update_config(config, options=options, config_file=cfg)

    print("Training ", config.TRAIN.MODALITY, " model.")
    print("Batch size:", config.TRAIN.BATCH_SIZE,
          " Gradient accumulation steps:", config.TRAIN.GRAD_ACCUM_STEPS)

    torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK

    torch.manual_seed(config.SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(config.SEED)
    np.random.seed(seed=config.SEED)

    # Log to tensorboard
    writer = SummaryWriter(log_dir=config.LOG_DIR)

    # Setup dataloaders
    train_loader = torch.utils.data.DataLoader(
        I3DDataSet(data_root=config.DATASET.DIR,
                   split=config.DATASET.SPLIT,
                   sample_frames=config.TRAIN.SAMPLE_FRAMES,
                   modality=config.TRAIN.MODALITY,
                   transform=torchvision.transforms.Compose([
                       GroupScale(config.TRAIN.RESIZE_MIN),
                       GroupRandomCrop(config.TRAIN.INPUT_SIZE),
                       GroupRandomHorizontalFlip(),
                       GroupNormalize(modality=config.TRAIN.MODALITY),
                       Stack(),
                   ])),
        batch_size=config.TRAIN.BATCH_SIZE,
        shuffle=True,
        num_workers=config.WORKERS,
        pin_memory=config.PIN_MEMORY)

    val_loader = torch.utils.data.DataLoader(I3DDataSet(
        data_root=config.DATASET.DIR,
        split=config.DATASET.SPLIT,
        modality=config.TRAIN.MODALITY,
        train_mode=False,
        transform=torchvision.transforms.Compose([
            GroupScale(config.TRAIN.RESIZE_MIN),
            GroupCenterCrop(config.TRAIN.INPUT_SIZE),
            GroupNormalize(modality=config.TRAIN.MODALITY),
            Stack(),
        ]),
    ),
                                             batch_size=config.TEST.BATCH_SIZE,
                                             shuffle=False,
                                             num_workers=config.WORKERS,
                                             pin_memory=config.PIN_MEMORY)

    # Setup model
    if config.TRAIN.MODALITY == "RGB":
        channels = 3
        checkpoint = config.MODEL.PRETRAINED_RGB
    elif config.TRAIN.MODALITY == "flow":
        channels = 2
        checkpoint = config.MODEL.PRETRAINED_FLOW
    else:
        raise ValueError("Modality must be RGB or flow")

    i3d_model = InceptionI3d(400, in_channels=channels)
    i3d_model.load_state_dict(torch.load(checkpoint))

    # Replace final FC layer to match dataset
    i3d_model.replace_logits(config.DATASET.NUM_CLASSES)

    criterion = torch.nn.CrossEntropyLoss().cuda()

    optimizer = optim.SGD(i3d_model.parameters(),
                          lr=0.1,
                          momentum=0.9,
                          weight_decay=0.0000001)

    i3d_model = i3d_model.cuda()

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     factor=0.1,
                                                     patience=2,
                                                     verbose=True,
                                                     threshold=1e-4,
                                                     min_lr=1e-4)

    # Data-parallel
    devices_lst = list(range(torch.cuda.device_count()))
    print("Devices {}".format(devices_lst))
    if len(devices_lst) > 1:
        i3d_model = torch.nn.DataParallel(i3d_model)

    if not os.path.exists(config.MODEL.CHECKPOINT_DIR):
        os.makedirs(config.MODEL.CHECKPOINT_DIR)

    for epoch in range(config.TRAIN.MAX_EPOCHS):

        train(train_loader, i3d_model, criterion, optimizer, epoch, writer)

        if (
                epoch + 1
        ) % config.TEST.EVAL_FREQ == 0 or epoch == config.TRAIN.MAX_EPOCHS - 1:
            val_loss = validate(val_loader, i3d_model, criterion, epoch,
                                writer)
            scheduler.step(val_loss)
            torch.save(
                i3d_model.module.state_dict(),
                config.MODEL.CHECKPOINT_DIR + '/' + config.MODEL.NAME +
                '_split' + str(config.DATASET.SPLIT) + '_epoch' +
                str(epoch).zfill(3) + '.pt')

    writer.close()
Example #4
0
def run(*options, cfg=None):
    """Run training and validation of model

    Notes:
        Options can be passed in via the options argument and loaded from the cfg file
        Options loaded from default.py will be overridden by options loaded from cfg file
        Options passed in through options argument will override option loaded from cfg file
    
    Args:
        *options (str,int ,optional): Options used to overide what is loaded from the config. 
                                      To see what options are available consult default.py
        cfg (str, optional): Location of config file to load. Defaults to None.
    """
    update_config(config, options=options, config_file=cfg)

    print("Training ", config.TRAIN.MODALITY, " model.")

    torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK

    torch.manual_seed(config.SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(config.SEED)
    np.random.seed(seed=config.SEED)

    # Log to tensorboard
    writer = SummaryWriter(log_dir=config.LOG_DIR)

    # Setup Augmentation/Transformation pipeline
    input_size = config.TRAIN.INPUT_SIZE
    resize_range_min = config.TRAIN.RESIZE_RANGE_MIN
    resize_range_max = config.TRAIN.RESIZE_RANGE_MAX

    #is_flow = True if config.TRAIN.MODALITY == "flow" else False
    is_flow = False

    train_augmentation = transforms.Compose([
        GroupRandomResizeCrop([resize_range_min, resize_range_max],
                              input_size),
        GroupRandomHorizontalFlip(is_flow=is_flow),
        #GroupColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05)
    ])

    val_augmentation = transforms.Compose(
        [GroupScale(resize_range_min),
         GroupCenterCrop(input_size)])

    # Setup DataLoaders
    train_loader = torch.utils.data.DataLoader(
        I3DDataSet(data_root=config.DATASET.DIR,
                   split=config.DATASET.SPLIT,
                   sample_frames=config.TRAIN.SAMPLE_FRAMES,
                   modality=config.TRAIN.MODALITY,
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(),
                       ToTorchFormatTensor(),
                       GroupNormalize(),
                   ])),
        batch_size=config.TRAIN.BATCH_SIZE,
        shuffle=True,
        num_workers=config.WORKERS,
        pin_memory=True)

    val_loader = torch.utils.data.DataLoader(I3DDataSet(
        data_root=config.DATASET.DIR,
        split=config.DATASET.SPLIT,
        sample_frames=config.TRAIN.SAMPLE_FRAMES,
        modality=config.TRAIN.MODALITY,
        transform=torchvision.transforms.Compose([
            val_augmentation,
            Stack(),
            ToTorchFormatTensor(),
            GroupNormalize(),
        ]),
        train_mode=False,
    ),
                                             batch_size=config.TEST.BATCH_SIZE,
                                             shuffle=False,
                                             num_workers=config.WORKERS,
                                             pin_memory=True)

    # Setup Model
    if config.TRAIN.MODALITY == "RGB":
        channels = 3
        checkpoint = config.MODEL.PRETRAINED_RGB
    elif config.TRAIN.MODALITY == "flow":
        channels = 2
        checkpoint = config.MODEL.PRETRAINED_FLOW
    else:
        raise ValueError("Modality must be RGB or flow")

    i3d_model = InceptionI3d(400, in_channels=channels)
    i3d_model.load_state_dict(torch.load(checkpoint))

    # Replace final FC layer to match dataset
    i3d_model.replace_logits(config.DATASET.NUM_CLASSES)

    criterion = torch.nn.CrossEntropyLoss().cuda()

    # Flow model: converges after 25 epochs using batch size 30
    optimizer = optim.SGD(i3d_model.parameters(),
                          lr=0.1,
                          momentum=0.9,
                          weight_decay=0.0000001)

    # optimizer = optim.Adam(i3d_model.parameters(), lr=0.0001)

    #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [20, 50], gamma=0.1)

    i3d_model = i3d_model.cuda()

    i3d_model, optimizer = amp.initialize(i3d_model, optimizer, opt_level="O1")

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     factor=0.1,
                                                     patience=2,
                                                     verbose=True,
                                                     threshold=1e-4,
                                                     min_lr=1e-4)

    # scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0001, max_lr=0.1, mode='triangular2', step_size_up=25)

    # Data-parallel
    devices_lst = list(range(torch.cuda.device_count()))
    print("Devices {}".format(devices_lst))
    if len(devices_lst) > 1:
        i3d_model = torch.nn.DataParallel(i3d_model)
    else:
        raise Exception('Get more GPUs')

    if not os.path.exists(config.MODEL_DIR):
        os.makedirs(config.MODEL_DIR)

    for epoch in range(config.TRAIN.MAX_EPOCHS):

        # train for one epoch
        train(train_loader, i3d_model, criterion, optimizer, epoch, writer)

        # scheduler.step()

        # evaluate on validation set
        if (epoch + 1
            ) % config.EVAL_FREQ == 0 or epoch == config.TRAIN.MAX_EPOCHS - 1:
            val_loss = validate(val_loader, i3d_model, criterion, epoch,
                                writer)
            scheduler.step(val_loss)
            torch.save(
                i3d_model.module.state_dict(), config.MODEL_DIR + '/' +
                config.MODEL.NAME + '_split' + str(config.DATASET.SPLIT) +
                '_epoch' + str(epoch).zfill(3) + '.pt')

    writer.close()
Example #5
0
def run(*options, cfg=None):

    update_config(config, options=options, config_file=cfg)

    torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK

    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(config.SEED)
    np.random.seed(seed=config.SEED)

    # Setup Augmentation/Transformation pipeline
    input_size = config.TRAIN.INPUT_SIZE
    resize_range_min = config.TRAIN.RESIZE_RANGE_MIN

    test_augmentation = transforms.Compose(
        [GroupScale(resize_range_min),
         GroupCenterCrop(input_size)])

    # Data-parallel
    devices_lst = list(range(torch.cuda.device_count()))
    print("Devices {}".format(devices_lst))

    if (config.TEST.MODALITY == "RGB") or (config.TEST.MODALITY == "both"):

        rgb_loader = torch.utils.data.DataLoader(
            I3DDataSet(data_root=config.DATASET.DIR,
                       split=config.DATASET.SPLIT,
                       sample_frames=config.TRAIN.SAMPLE_FRAMES,
                       modality="RGB",
                       train_mode=False,
                       sample_frames_at_test=False,
                       transform=torchvision.transforms.Compose([
                           test_augmentation,
                           Stack(),
                           ToTorchFormatTensor(),
                           GroupNormalize(),
                       ])),
            batch_size=config.TEST.BATCH_SIZE,
            shuffle=False,
            num_workers=config.WORKERS,
            pin_memory=True)

        rgb_model_file = config.TEST.MODEL_RGB
        if not os.path.exists(rgb_model_file):
            raise FileNotFoundError(rgb_model_file, " does not exist")

        print("scoring with rgb model")
        targets, rgb_predictions = test(rgb_loader, "RGB", rgb_model_file)

        targets = targets.cuda(non_blocking=True)
        rgb_top1_accuracy = accuracy(rgb_predictions, targets, topk=(1, ))
        print("rgb top1 accuracy: ",
              rgb_top1_accuracy[0].cpu().numpy().tolist())

    if (config.TEST.MODALITY == "flow") or (config.TEST.MODALITY == "both"):

        flow_loader = torch.utils.data.DataLoader(
            I3DDataSet(data_root=config.DATASET.DIR,
                       split=config.DATASET.SPLIT,
                       sample_frames=config.TRAIN.SAMPLE_FRAMES,
                       modality="flow",
                       train_mode=False,
                       sample_frames_at_test=False,
                       transform=torchvision.transforms.Compose([
                           test_augmentation,
                           Stack(),
                           ToTorchFormatTensor(),
                           GroupNormalize(),
                       ])),
            batch_size=config.TEST.BATCH_SIZE,
            shuffle=False,
            num_workers=config.WORKERS,
            pin_memory=True)

        flow_model_file = config.TEST.MODEL_FLOW
        if not os.path.exists(flow_model_file):
            raise FileNotFoundError(flow_model_file, " does not exist")

        print("scoring with flow model")
        targets, flow_predictions = test(flow_loader, "flow", flow_model_file)

        targets = targets.cuda(non_blocking=True)
        flow_top1_accuracy = accuracy(flow_predictions, targets, topk=(1, ))
        print("flow top1 accuracy: ",
              flow_top1_accuracy[0].cpu().numpy().tolist())

    if config.TEST.MODALITY == "both":
        predictions = torch.stack([rgb_predictions, flow_predictions])
        predictions_mean = torch.mean(predictions, dim=0)
        top1accuracy = accuracy(predictions_mean, targets, topk=(1, ))
        print("combined top1 accuracy: ",
              top1accuracy[0].cpu().numpy().tolist())
Example #6
0
    # Data-parallel
    devices_lst = list(range(torch.cuda.device_count()))
    print("Devices {}".format(devices_lst))

    if (config.TEST.MODALITY == "RGB") or (config.TEST.MODALITY == "combined"):

        rgb_loader = torch.utils.data.DataLoader(
            I3DDataSet(
                data_root=config.DATASET.DIR,
                split=config.DATASET.SPLIT,
                modality="RGB",
                train_mode=False,
                sample_frames_at_test=False,
                transform=torchvision.transforms.Compose([
                    GroupScale(config.TRAIN.RESIZE_MIN),
                    GroupCenterCrop(config.TRAIN.INPUT_SIZE),
                    GroupNormalize(modality="RGB"),
                    Stack(),
                ])
            ),
            batch_size=config.TEST.BATCH_SIZE,
            shuffle=False,
            num_workers=config.WORKERS,
            pin_memory=config.PIN_MEMORY
        )

        rgb_model_file = config.TEST.MODEL_RGB
        if not os.path.exists(rgb_model_file):
            raise FileNotFoundError(rgb_model_file, " does not exist")
        rgb_model = load_model(modality="RGB", state_dict_file=rgb_model_file)
        return len(self.video_list)


if __name__ == '__main__':

    input_size = 224
    resize_small_edge = 256

    train_rgb = I3DDataSet(data_root='/datadir/rawframes/',
                           split=1,
                           sample_frames=64,
                           modality='RGB',
                           train_mode=True,
                           sample_frames_at_test=False,
                           transform=torchvision.transforms.Compose([
                               GroupScale(resize_small_edge),
                               GroupRandomCrop(input_size),
                               GroupRandomHorizontalFlip(),
                               GroupNormalize(modality="RGB"),
                               Stack(),
                           ]))
    item = train_rgb.__getitem__(10)
    print("train_rgb:")
    print(item[0].size())
    print("max=", item[0].max())
    print("min=", item[0].min())
    print("label=", item[1])

    val_rgb = I3DDataSet(data_root='/datadir/rawframes/',
                         split=1,
                         sample_frames=64,