Esempio n. 1
0
transform_train = video.VideoGroupTrainTransform(size=(opt.input_size, opt.input_size), scale_ratios=[1.0, 0.875, 0.75, 0.66], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])


# Batch Size for Each GPU
per_device_batch_size = opt.per_device_batch_size
# Number of data loader workers
num_workers = opt.num_workers
# Calculate effective total batch size
batch_size = per_device_batch_size * num_gpus

# Set train=True for training data. Here we only use a subset of UCF101 for demonstration purpose.
# The subset has 101 training samples, one sample per class.

train_dataset = UCF101(setting=opt.train_setting, root=opt.train_dir, train=True,
                       new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length,new_step=opt.new_step,
                       target_width=opt.input_size, target_height=opt.input_size,
                       num_segments=opt.num_segments, transform=transform_train)
val_dataset = UCF101(setting=opt.val_setting, root=opt.train_dir, train=False,
                     new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length,new_step=opt.new_step,
                     target_width=opt.input_size, target_height=opt.input_size,
                     num_segments=opt.num_segments, transform=transform_test)


train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers,
                                   prefetch=int(opt.prefetch_ratio * num_workers), last_batch='rollover')
val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers,
                                 prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard')


train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size,
                                   shuffle=True, num_workers=num_workers)
Esempio n. 2
0
def get_data_loader(opt, batch_size, num_workers, logger, kvstore=None):
    data_dir = opt.data_dir
    val_data_dir = opt.val_data_dir
    scale_ratios = [float(i) for i in opt.scale_ratios.split(',')]
    input_size = opt.input_size

    def batch_fn(batch, ctx):
        data = split_and_load(batch[0],
                              ctx_list=ctx,
                              batch_axis=0,
                              even_split=False)
        label = split_and_load(batch[1],
                               ctx_list=ctx,
                               batch_axis=0,
                               even_split=False)
        return data, label

    transform_train = video.VideoGroupTrainTransform(
        size=(input_size, input_size),
        scale_ratios=scale_ratios,
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225])
    transform_test = video.VideoGroupValTransform(size=input_size,
                                                  mean=[0.485, 0.456, 0.406],
                                                  std=[0.229, 0.224, 0.225])

    if opt.dataset == 'kinetics400':
        train_dataset = Kinetics400(setting=opt.train_list,
                                    root=data_dir,
                                    train=True,
                                    new_width=opt.new_width,
                                    new_height=opt.new_height,
                                    new_length=opt.new_length,
                                    new_step=opt.new_step,
                                    target_width=input_size,
                                    target_height=input_size,
                                    video_loader=opt.video_loader,
                                    use_decord=opt.use_decord,
                                    num_segments=opt.num_segments,
                                    transform=transform_train)
        val_dataset = Kinetics400(setting=opt.val_list,
                                  root=val_data_dir,
                                  train=False,
                                  new_width=opt.new_width,
                                  new_height=opt.new_height,
                                  new_length=opt.new_length,
                                  new_step=opt.new_step,
                                  target_width=input_size,
                                  target_height=input_size,
                                  video_loader=opt.video_loader,
                                  use_decord=opt.use_decord,
                                  num_segments=opt.num_segments,
                                  transform=transform_test)
    elif opt.dataset == 'ucf101':
        train_dataset = UCF101(setting=opt.train_list,
                               root=data_dir,
                               train=True,
                               new_width=opt.new_width,
                               new_height=opt.new_height,
                               new_length=opt.new_length,
                               target_width=input_size,
                               target_height=input_size,
                               num_segments=opt.num_segments,
                               transform=transform_train)
        val_dataset = UCF101(setting=opt.val_list,
                             root=data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             target_width=input_size,
                             target_height=input_size,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'somethingsomethingv2':
        train_dataset = SomethingSomethingV2(setting=opt.train_list,
                                             root=data_dir,
                                             train=True,
                                             new_width=opt.new_width,
                                             new_height=opt.new_height,
                                             new_length=opt.new_length,
                                             new_step=opt.new_step,
                                             target_width=input_size,
                                             target_height=input_size,
                                             video_loader=opt.video_loader,
                                             use_decord=opt.use_decord,
                                             num_segments=opt.num_segments,
                                             transform=transform_train)
        val_dataset = SomethingSomethingV2(setting=opt.val_list,
                                           root=data_dir,
                                           train=False,
                                           new_width=opt.new_width,
                                           new_height=opt.new_height,
                                           new_length=opt.new_length,
                                           new_step=opt.new_step,
                                           target_width=input_size,
                                           target_height=input_size,
                                           video_loader=opt.video_loader,
                                           use_decord=opt.use_decord,
                                           num_segments=opt.num_segments,
                                           transform=transform_test)
    elif opt.dataset == 'hmdb51':
        train_dataset = HMDB51(setting=opt.train_list,
                               root=data_dir,
                               train=True,
                               new_width=opt.new_width,
                               new_height=opt.new_height,
                               new_length=opt.new_length,
                               new_step=opt.new_step,
                               target_width=input_size,
                               target_height=input_size,
                               video_loader=opt.video_loader,
                               use_decord=opt.use_decord,
                               num_segments=opt.num_segments,
                               transform=transform_train)
        val_dataset = HMDB51(setting=opt.val_list,
                             root=data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             new_step=opt.new_step,
                             target_width=input_size,
                             target_height=input_size,
                             video_loader=opt.video_loader,
                             use_decord=opt.use_decord,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    logger.info('Load %d training samples and %d validation samples.' %
                (len(train_dataset), len(val_dataset)))

    if kvstore is not None:
        train_data = gluon.data.DataLoader(
            train_dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            sampler=SplitSampler(len(train_dataset),
                                 num_parts=kvstore.num_workers,
                                 part_index=kvstore.rank),
            prefetch=int(opt.prefetch_ratio * num_workers),
            last_batch='rollover')
        val_data = gluon.data.DataLoader(
            val_dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            sampler=SplitSampler(len(val_dataset),
                                 num_parts=kvstore.num_workers,
                                 part_index=kvstore.rank),
            prefetch=int(opt.prefetch_ratio * num_workers),
            last_batch='discard')
    else:
        train_data = gluon.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=num_workers,
                                           prefetch=int(opt.prefetch_ratio *
                                                        num_workers),
                                           last_batch='rollover')
        val_data = gluon.data.DataLoader(val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers,
                                         prefetch=int(opt.prefetch_ratio *
                                                      num_workers),
                                         last_batch='discard')

    return train_data, val_data, batch_fn
Esempio n. 3
0
def get_data_loader(opt, batch_size, num_workers, logger, kvstore=None):
    data_dir = opt.data_dir
    val_data_dir = opt.val_data_dir
    scale_ratios = [float(i) for i in opt.scale_ratios.split(',')]
    input_size = opt.input_size
    default_mean = [0.485, 0.456, 0.406]
    default_std = [0.229, 0.224, 0.225]

    def batch_fn(batch, ctx):
        data = split_and_load(batch[0],
                              ctx_list=ctx,
                              batch_axis=0,
                              even_split=False)
        label = split_and_load(batch[1],
                               ctx_list=ctx,
                               batch_axis=0,
                               even_split=False)
        return data, label

    if opt.data_aug == 'v1':
        # GluonCV style, not keeping aspect ratio, multi-scale crop
        transform_train = video.VideoGroupTrainTransform(
            size=(input_size, input_size),
            scale_ratios=scale_ratios,
            mean=default_mean,
            std=default_std)
        transform_test = video.VideoGroupValTransform(size=input_size,
                                                      mean=default_mean,
                                                      std=default_std)
    elif opt.data_aug == 'v2':
        # GluonCV style, keeping aspect ratio, multi-scale crop, same as mmaction style
        transform_train = video.VideoGroupTrainTransformV2(
            size=(input_size, input_size),
            short_side=opt.new_height,
            scale_ratios=scale_ratios,
            mean=default_mean,
            std=default_std)
        transform_test = video.VideoGroupValTransformV2(
            crop_size=(input_size, input_size),
            short_side=opt.new_height,
            mean=default_mean,
            std=default_std)
    elif opt.data_aug == 'v3':
        # PySlowFast style, keeping aspect ratio, random short side scale jittering
        transform_train = video.VideoGroupTrainTransformV3(
            crop_size=(input_size, input_size),
            min_size=opt.new_height,
            max_size=opt.new_width,
            mean=default_mean,
            std=default_std)
        transform_test = video.VideoGroupValTransformV2(
            crop_size=(input_size, input_size),
            short_side=opt.new_height,
            mean=default_mean,
            std=default_std)
    elif opt.data_aug == 'v4':
        # mmaction style, keeping aspect ratio, random crop and resize, only for SlowFast family models, similar to 'v3'
        transform_train = video.VideoGroupTrainTransformV4(size=(input_size,
                                                                 input_size),
                                                           mean=default_mean,
                                                           std=default_std)
        transform_test = video.VideoGroupValTransformV2(
            crop_size=(input_size, input_size),
            short_side=opt.new_height,
            mean=default_mean,
            std=default_std)
    else:
        logger.info('Data augmentation %s is not supported yet.' %
                    (opt.data_aug))

    if opt.dataset == 'kinetics400':
        train_dataset = Kinetics400(
            setting=opt.train_list,
            root=data_dir,
            train=True,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=input_size,
            target_height=input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            data_aug=opt.data_aug,
            num_segments=opt.num_segments,
            transform=transform_train)
        val_dataset = Kinetics400(
            setting=opt.val_list,
            root=val_data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=input_size,
            target_height=input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            data_aug=opt.data_aug,
            num_segments=opt.num_segments,
            transform=transform_test)
    elif opt.dataset == 'ucf101':
        train_dataset = UCF101(setting=opt.train_list,
                               root=data_dir,
                               train=True,
                               new_width=opt.new_width,
                               new_height=opt.new_height,
                               new_length=opt.new_length,
                               target_width=input_size,
                               target_height=input_size,
                               data_aug=opt.data_aug,
                               num_segments=opt.num_segments,
                               transform=transform_train)
        val_dataset = UCF101(setting=opt.val_list,
                             root=data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             target_width=input_size,
                             target_height=input_size,
                             data_aug=opt.data_aug,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'somethingsomethingv2':
        train_dataset = SomethingSomethingV2(setting=opt.train_list,
                                             root=data_dir,
                                             train=True,
                                             new_width=opt.new_width,
                                             new_height=opt.new_height,
                                             new_length=opt.new_length,
                                             new_step=opt.new_step,
                                             target_width=input_size,
                                             target_height=input_size,
                                             video_loader=opt.video_loader,
                                             use_decord=opt.use_decord,
                                             data_aug=opt.data_aug,
                                             num_segments=opt.num_segments,
                                             transform=transform_train)
        val_dataset = SomethingSomethingV2(setting=opt.val_list,
                                           root=data_dir,
                                           train=False,
                                           new_width=opt.new_width,
                                           new_height=opt.new_height,
                                           new_length=opt.new_length,
                                           new_step=opt.new_step,
                                           target_width=input_size,
                                           target_height=input_size,
                                           video_loader=opt.video_loader,
                                           use_decord=opt.use_decord,
                                           data_aug=opt.data_aug,
                                           num_segments=opt.num_segments,
                                           transform=transform_test)
    elif opt.dataset == 'hmdb51':
        train_dataset = HMDB51(setting=opt.train_list,
                               root=data_dir,
                               train=True,
                               new_width=opt.new_width,
                               new_height=opt.new_height,
                               new_length=opt.new_length,
                               new_step=opt.new_step,
                               target_width=input_size,
                               target_height=input_size,
                               video_loader=opt.video_loader,
                               use_decord=opt.use_decord,
                               data_aug=opt.data_aug,
                               num_segments=opt.num_segments,
                               transform=transform_train)
        val_dataset = HMDB51(setting=opt.val_list,
                             root=data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             new_step=opt.new_step,
                             target_width=input_size,
                             target_height=input_size,
                             video_loader=opt.video_loader,
                             use_decord=opt.use_decord,
                             data_aug=opt.data_aug,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'custom':
        train_dataset = VideoClsCustom(
            setting=opt.train_list,
            root=data_dir,
            train=True,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=input_size,
            target_height=input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            data_aug=opt.data_aug,
            num_segments=opt.num_segments,
            transform=transform_train)
        val_dataset = VideoClsCustom(
            setting=opt.val_list,
            root=val_data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=input_size,
            target_height=input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            data_aug=opt.data_aug,
            num_segments=opt.num_segments,
            transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    logger.info('Load %d training samples and %d validation samples.' %
                (len(train_dataset), len(val_dataset)))

    if kvstore is not None:
        train_data = gluon.data.DataLoader(
            train_dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            sampler=ShuffleSplitSampler(len(train_dataset),
                                        num_parts=kvstore.num_workers,
                                        part_index=kvstore.rank),
            prefetch=int(opt.prefetch_ratio * num_workers),
            last_batch='rollover')
        val_data = gluon.data.DataLoader(
            val_dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            sampler=ShuffleSplitSampler(len(val_dataset),
                                        num_parts=kvstore.num_workers,
                                        part_index=kvstore.rank),
            prefetch=int(opt.prefetch_ratio * num_workers),
            last_batch='discard')
    else:
        train_data = gluon.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=num_workers,
                                           prefetch=int(opt.prefetch_ratio *
                                                        num_workers),
                                           last_batch='rollover')
        val_data = gluon.data.DataLoader(val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers,
                                         prefetch=int(opt.prefetch_ratio *
                                                      num_workers),
                                         last_batch='discard')

    return train_data, val_data, batch_fn
Esempio n. 4
0
def main():
    opt = parse_args()
    print(opt)

    # Garbage collection, default threshold is (700, 10, 10).
    # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading.
    gc.set_threshold(100, 5, 5)

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get data
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(
            size=opt.input_size,
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225])
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    num_segments=opt.num_segments,
                    num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        print('Pre-trained model %s is successfully loaded.' %
              (opt.resume_params))
    else:
        print('Pre-trained model is successfully loaded from the model zoo.')

    if opt.dataset == 'ucf101':
        val_dataset = UCF101(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             test_mode=True,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'kinetics400':
        val_dataset = Kinetics400(setting=opt.val_list,
                                  root=opt.data_dir,
                                  train=False,
                                  new_width=opt.new_width,
                                  new_height=opt.new_height,
                                  new_length=opt.new_length,
                                  new_step=opt.new_step,
                                  target_width=opt.input_size,
                                  target_height=opt.input_size,
                                  video_loader=opt.video_loader,
                                  use_decord=opt.use_decord,
                                  test_mode=True,
                                  num_segments=opt.num_segments,
                                  transform=transform_test)
    elif opt.dataset == 'somethingsomethingv2':
        val_dataset = SomethingSomethingV2(setting=opt.val_list,
                                           root=opt.data_dir,
                                           train=False,
                                           new_width=opt.new_width,
                                           new_height=opt.new_height,
                                           new_length=opt.new_length,
                                           new_step=opt.new_step,
                                           target_width=opt.input_size,
                                           target_height=opt.input_size,
                                           video_loader=opt.video_loader,
                                           use_decord=opt.use_decord,
                                           num_segments=opt.num_segments,
                                           transform=transform_test)
    elif opt.dataset == 'hmdb51':
        val_dataset = HMDB51(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             new_step=opt.new_step,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             video_loader=opt.video_loader,
                             use_decord=opt.use_decord,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers),
                                     last_batch='discard')
    print('Load %d test samples in %d iterations.' %
          (len(val_dataset), len(val_data)))

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data, opt, net)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))
Esempio n. 5
0
def main(logger):
    opt = parse_args()
    print(opt)

    # Garbage collection, default threshold is (700, 10, 10).
    # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading.
    gc.set_threshold(100, 5, 5)

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    context = [mx.cpu()]
    if num_gpus > 0:
        batch_size *= max(1, num_gpus)
        context = [mx.gpu(i) for i in range(num_gpus)]

    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get data
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size,
                                                      mean=image_norm_mean,
                                                      std=image_norm_std)
        opt.num_crop = 1

    if not opt.deploy:
        # get model
        if opt.use_pretrained and len(opt.hashtag) > 0:
            opt.use_pretrained = opt.hashtag
        classes = opt.num_classes
        model_name = opt.model
        # Currently, these is no hashtag for int8 models.
        if opt.quantized:
            model_name += '_int8'
            opt.use_pretrained = True

        net = get_model(name=model_name,
                        nclass=classes,
                        pretrained=opt.use_pretrained,
                        num_segments=opt.num_segments,
                        num_crop=opt.num_crop)
        net.cast(opt.dtype)
        net.collect_params().reset_ctx(context)
        if opt.mode == 'hybrid':
            net.hybridize(static_alloc=True, static_shape=True)
        if opt.resume_params is not '' and not opt.use_pretrained:
            net.load_parameters(opt.resume_params, ctx=context)
            print('Pre-trained model %s is successfully loaded.' %
                  (opt.resume_params))
        else:
            print(
                'Pre-trained model is successfully loaded from the model zoo.')
    else:
        model_name = 'deploy'
        net = mx.gluon.SymbolBlock.imports(
            '{}-symbol.json'.format(opt.model_prefix), ['data'],
            '{}-0000.params'.format(opt.model_prefix))
        net.hybridize(static_alloc=True, static_shape=True)

    print("Successfully loaded model {}".format(model_name))
    # dummy data for benchmarking performance
    if opt.benchmark:
        benchmarking(opt, net, context)
        sys.exit()

    if opt.dataset == 'ucf101':
        val_dataset = UCF101(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             test_mode=True,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'kinetics400':
        val_dataset = Kinetics400(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=opt.input_size,
            target_height=opt.input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            test_mode=True,
            num_segments=opt.num_segments,
            num_crop=opt.num_crop,
            transform=transform_test)
    elif opt.dataset == 'somethingsomethingv2':
        val_dataset = SomethingSomethingV2(setting=opt.val_list,
                                           root=opt.data_dir,
                                           train=False,
                                           new_width=opt.new_width,
                                           new_height=opt.new_height,
                                           new_length=opt.new_length,
                                           new_step=opt.new_step,
                                           target_width=opt.input_size,
                                           target_height=opt.input_size,
                                           video_loader=opt.video_loader,
                                           use_decord=opt.use_decord,
                                           num_segments=opt.num_segments,
                                           transform=transform_test)
    elif opt.dataset == 'hmdb51':
        val_dataset = HMDB51(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             new_step=opt.new_step,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             video_loader=opt.video_loader,
                             use_decord=opt.use_decord,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers),
                                     last_batch='discard')
    print('Load %d test samples in %d iterations.' %
          (len(val_dataset), len(val_data)))

    # calibrate FP32 model into INT8 model
    if opt.calibration:
        calibration(net, val_data, opt, context, logger)
        sys.exit()

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data, opt, net)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))
Esempio n. 6
0
    video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

##################################################################
# With the transform functions, we can define data loaders for our
# training datasets.

# Batch Size for Each GPU
per_device_batch_size = 5
# Number of data loader workers
num_workers = 8
# Calculate effective total batch size
batch_size = per_device_batch_size * num_gpus

# Set train=True for training the model. Here we set num_segments to 3 to enable TSN training.
train_dataset = UCF101(train=True, num_segments=3, transform=transform_train)
print('Load %d training samples.' % len(train_dataset))
train_data = gluon.data.DataLoader(train_dataset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   num_workers=num_workers)

################################################################
# Optimizer, Loss and Metric
# --------------------------

# Learning rate decay factor
lr_decay = 0.1
# Epochs where learning rate decays
lr_decay_epoch = [30, 60, np.inf]
def main(logger):
    opt = parse_args(parser)
    print(opt)

    assert not (os.path.isdir(opt.save_dir)), "already done this experiment..."
    Path(opt.save_dir).mkdir(parents=True)
    # Garbage collection, default threshold is (700, 10, 10).
    # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading.
    gc.set_threshold(100, 5, 5)

    num_gpus = 1
    context = [mx.gpu(i) for i in range(num_gpus)]
    per_device_batch_size = 5
    num_workers = 12
    batch_size = per_device_batch_size * num_gpus
    num_workers = opt.num_workers

    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get data
    default_mean = [0.485, 0.456, 0.406]
    default_std = [0.229, 0.224, 0.225]
    # if opt.ten_crop:
    #     if opt.data_aug == 'v1':
    #         transform_test = transforms.Compose([
    #             video.VideoTenCrop(opt.input_size),
    #             video.VideoToTensor(),
    #             video.VideoNormalize(default_mean, default_std)
    #         ])
    #     else:
    #         transform_test = transforms.Compose([
    #             video.ShortSideRescale(opt.input_size),
    #             video.VideoTenCrop(opt.input_size),
    #             video.VideoToTensor(),
    #             video.VideoNormalize(default_mean, default_std)
    #         ])
    #     opt.num_crop = 10
    # elif opt.three_crop:
    #     if opt.data_aug == 'v1':
    #         transform_test = transforms.Compose([
    #             video.VideoThreeCrop(opt.input_size),
    #             video.VideoToTensor(),
    #             video.VideoNormalize(default_mean, default_std)
    #         ])
    #     else:
    #         transform_test = transforms.Compose([
    #             video.ShortSideRescale(opt.input_size),
    #             video.VideoThreeCrop(opt.input_size),
    #             video.VideoToTensor(),
    #             video.VideoNormalize(default_mean, default_std)
    #         ])
    #     opt.num_crop = 3
    # else:
    #     if opt.data_aug == 'v1':
    #         transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=default_mean, std=default_std)
    #     else:
    #         transform_test = video.VideoGroupValTransformV2(crop_size=(opt.input_size, opt.input_size), short_side=opt.input_size,
    #                                                         mean=default_mean, std=default_std)
    #     opt.num_crop = 1

    if not opt.deploy:
        # get model
        if opt.use_pretrained and len(opt.hashtag) > 0:
            opt.use_pretrained = opt.hashtag
        classes = opt.num_classes
        model_name = opt.model
        # Currently, these is no hashtag for int8 models.
        if opt.quantized:
            model_name += '_int8'
            opt.use_pretrained = True

        net = get_model(name=model_name,
                        nclass=classes,
                        pretrained=opt.use_pretrained,
                        num_segments=opt.num_segments,
                        num_crop=opt.num_crop)
        net.cast(opt.dtype)
        net.collect_params().reset_ctx(context)
        resume_params = find_model_params(opt)

        if opt.mode == 'hybrid':
            net.hybridize(static_alloc=True, static_shape=True)
        if resume_params is not '' and not opt.use_pretrained:
            net.load_parameters(resume_params, ctx=context)
            print('Pre-trained model %s is successfully loaded.' %
                  (resume_params))
        else:
            print(
                'Pre-trained model is successfully loaded from the model zoo.')
    else:
        model_name = 'deploy'
        net = mx.gluon.SymbolBlock.imports(
            '{}-symbol.json'.format(opt.model_prefix), ['data'],
            '{}-0000.params'.format(opt.model_prefix))
        net.hybridize(static_alloc=True, static_shape=True)

    print("Successfully loaded model {}".format(model_name))
    # dummy data for benchmarking performance
    if opt.benchmark:
        benchmarking(opt, net, context)
        sys.exit()

    if opt.dataset == 'ucf101':
        val_dataset = UCF101(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             test_mode=True,
                             data_aug=opt.data_aug,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'kinetics400':
        val_dataset = Kinetics400(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=opt.input_size,
            target_height=opt.input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            test_mode=True,
            data_aug=opt.data_aug,
            num_segments=opt.num_segments,
            num_crop=opt.num_crop,
            transform=transform_test)
    elif opt.dataset == 'somethingsomethingv2':
        val_dataset = SomethingSomethingV2(setting=opt.val_list,
                                           root=opt.data_dir,
                                           train=False,
                                           new_width=opt.new_width,
                                           new_height=opt.new_height,
                                           new_length=opt.new_length,
                                           new_step=opt.new_step,
                                           target_width=opt.input_size,
                                           target_height=opt.input_size,
                                           video_loader=opt.video_loader,
                                           use_decord=opt.use_decord,
                                           data_aug=opt.data_aug,
                                           num_segments=opt.num_segments,
                                           transform=transform_test)
    elif opt.dataset == 'hmdb51':
        val_dataset = HMDB51(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             new_step=opt.new_step,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             video_loader=opt.video_loader,
                             use_decord=opt.use_decord,
                             data_aug=opt.data_aug,
                             num_segments=opt.num_segments,
                             transform=transform_test)

    elif opt.dataset == 'custom':
        transform_test = video.VideoGroupTrainTransform(
            size=(224, 224),
            scale_ratios=[1.0, 0.8],
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225])

        val_dataset = VideoClsCustom(
            root=opt.val_data_dir,
            setting=opt.val_list,
            train=False,
            new_length=32,
            name_pattern='frame_%d.jpg',
            transform=transform_test,
            video_loader=False,
            slowfast=True,
            use_decord=True,
        )

    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    # val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers,
    #                                  prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard')
    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers)

    print('Load %d test samples in %d iterations.' %
          (len(val_dataset), len(val_data)))

    # calibrate FP32 model into INT8 model
    if opt.calibration:
        calibration(net, val_data, opt, context, logger)
        sys.exit()

    start_time = time.time()
    acc_top1_val, acc_top5_val, true_labels, predicted_probabilities = test(
        context, val_data, opt, net)
    split_filename = os.path.split(opt.val_list)[1]
    split = os.path.splitext(split_filename)[0]
    #load encoder
    encoder = joblib.load(opt.encoder_path)
    #set-up metrics
    classes = np.arange(len(encoder.classes_))
    metrics_dict = {
        "Accuracy": balanced_accuracy_score,
        "Mcc": matthews_corrcoef,
        "Precision_Avg": [precision_score, {
            "average": "micro"
        }],
        "Recall_Avg": [recall_score, {
            "average": "micro"
        }],
        "Precision_Class":
        [precision_score, {
            "labels": classes,
            "average": None
        }],
        "Recall_Class": [recall_score, {
            "labels": classes,
            "average": None
        }],
    }
    split_folder = os.path.join(opt.save_dir, split)
    #set-up evaluator
    evaluator = Evaluator_video(split_folder, encoder, true_labels,
                                predicted_probabilities, metrics_dict)
    #compute report
    report = get_split_report(evaluator)
    #save report
    save_results(report, split_folder)
    print(f"Correctly process split {split}")

    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))
Esempio n. 8
0
#########################################################################
# We first show an example that randomly reads 25 videos each time, randomly selects one frame per video and
# performs center cropping.

from gluoncv.data import UCF101
from mxnet.gluon.data import DataLoader
from mxnet.gluon.data.vision import transforms
from gluoncv.data.transforms import video

transform_train = transforms.Compose(
    [video.VideoCenterCrop(size=224),
     video.VideoToTensor()])

# Default location of the data is stored on ~/.mxnet/datasets/ucf101.
# You need to specify ``setting`` and ``root`` for UCF101 if you decoded the video frames into a different folder.
train_dataset = UCF101(train=True, transform=transform_train)
train_data = DataLoader(train_dataset, batch_size=25, shuffle=True)

#########################################################################
# We can see the shape of our loaded data as below. ``extra`` indicates if we select multiple crops or multiple segments
# from a video. Here, we only pick one frame per video, so the ``extra`` dimension is 1.
for x, y in train_data:
    print('Video frame size (batch, extra, channel, height, width):', x.shape)
    print('Video label:', y.shape)
    break

#########################################################################
# Let's plot several training samples. index 0 is image, 1 is label
from gluoncv.utils import viz
viz.plot_image(train_dataset[7][0].squeeze().transpose(
    (1, 2, 0)) * 255.0)  # Basketball
Esempio n. 9
0
def get_data_loader(opt, batch_size, num_workers, logger):
    data_dir = opt.train_dir
    scale_ratios = [1.0, 0.875, 0.75, 0.66]
    input_size = opt.input_size

    def batch_fn(batch, ctx):
        data = split_and_load(batch[0],
                              ctx_list=ctx,
                              batch_axis=0,
                              even_split=False)
        label = split_and_load(batch[1],
                               ctx_list=ctx,
                               batch_axis=0,
                               even_split=False)
        return data, label

    transform_train = video.VideoGroupTrainTransform(
        size=(input_size, input_size),
        scale_ratios=scale_ratios,
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225])
    transform_test = video.VideoGroupValTransform(size=input_size,
                                                  mean=[0.485, 0.456, 0.406],
                                                  std=[0.229, 0.224, 0.225])

    if opt.dataset == 'ucf101':
        train_dataset = UCF101(setting=opt.train_setting,
                               root=data_dir,
                               train=True,
                               new_width=opt.new_width,
                               new_height=opt.new_height,
                               new_length=opt.new_length_diff,
                               target_width=input_size,
                               target_height=input_size,
                               num_segments=opt.num_segments,
                               transform=transform_train)
        val_dataset = UCF101(setting=opt.val_setting,
                             root=data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length_diff,
                             target_width=input_size,
                             target_height=input_size,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    else:
        #        logger.info('Dataset %s is not supported yet.' % (opt.dataset))
        print('Dataset %s is not supported yet.' % (opt.dataset))

    print('Load %d training samples and %d validation samples.' %
          (len(train_dataset), len(val_dataset)))
    #    logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset)))

    train_data = gluon.data.DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=num_workers,
                                       prefetch=int(opt.prefetch_ratio *
                                                    num_workers),
                                       last_batch='rollover')
    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers),
                                     last_batch='discard')

    return train_data, val_data, batch_fn