Ejemplo n.º 1
0
    def _preprocess(self, frames: np.ndarray, clip_len: int) -> np.ndarray:
        """
        This method performs all preprocess operations and transformation that
        frames must pass through

        Parameters
        ----------
        frames: np.ndarray
            Numpy array of shape (N, L, H, W, C), where N is the number of clips,
            L is the number of frames each clip has, H and W are the height and
            width of each frame and C are image channels

        clip_len: int
            Number of frames a model input must have

        Returns
        -------
        clips_input: np.ndarray
            Preprocessed numpy array with all input frames
        """
        transform_fn = video_tranforms.VideoGroupValTransform(
            size=self.FRAME_SIDE_SIZE,
            mean=self.IMAGENET_MEAN,
            std=self.IMAGENET_SD)
        clips_input = transform_fn(frames)
        clips_input = np.stack(clips_input, axis=0)
        clips_input = clips_input.reshape((-1, ) + (clip_len, 3, 224, 224))
        clips_input = np.transpose(clips_input, (0, 2, 1, 3, 4))
        return clips_input
Ejemplo n.º 2
0
def getModelForApp():

    print(opt)
    gc.set_threshold(100, 5, 5)

    if not os.path.exists(opt['save_dir']):
        os.makedirs(opt['save_dir'])

    # set env
    gpu_id = opt['gpu_id']
    context = mx.gpu(gpu_id)

    # get data preprocess
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]

    transform_test = video.VideoGroupValTransform(size=opt['input_size'], mean=image_norm_mean, std=image_norm_std)
    opt['num_crop'] = 1

    classes = opt['num_classes']
    model_name = opt['model']
    net = get_model(name=model_name, nclass=classes, pretrained=opt['use_pretrained'],
                    feat_ext=True, num_segments=opt['num_segments'], num_crop=opt['num_crop'])
    net.cast(opt['dtype'])
    net.collect_params().reset_ctx(context)

    print('Pre-trained model is successfully loaded from the model zoo.')
    print("Successfully built model {}".format(model_name))

    return net , transform_test , context , model_name
Ejemplo n.º 3
0
def get_data_loader(opt, batch_size, num_workers, logger, kvstore=None):
    data_dir = opt.data_dir
    val_data_dir = opt.val_data_dir
    scale_ratios = [float(i) for i in opt.scale_ratios.split(',')]
    input_size = opt.input_size

    def batch_fn(batch, ctx):
        if opt.num_segments > 1:
            data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False, multiplier=opt.num_segments)
        else:
            data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        return data, label

    transform_train = video.VideoGroupTrainTransform(size=(input_size, input_size), scale_ratios=scale_ratios, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    transform_test = video.VideoGroupValTransform(size=input_size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    if opt.dataset == 'kinetics400':
        train_dataset = kinetics400.classification.Kinetics400(setting=opt.train_list, root=data_dir, train=True,
                                                     new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step,
                                                     target_width=input_size, target_height=input_size,
                                                     num_segments=opt.num_segments, transform=transform_train)
        val_dataset = kinetics400.classification.Kinetics400(setting=opt.val_list, root=val_data_dir, train=False,
                                                   new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length, new_step=opt.new_step,
                                                   target_width=input_size, target_height=input_size,
                                                   num_segments=opt.num_segments, transform=transform_test)
    elif opt.dataset == 'ucf101':
        train_dataset = ucf101.classification.UCF101(setting=opt.train_list, root=data_dir, train=True,
                                                     new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length,
                                                     target_width=input_size, target_height=input_size,
                                                     num_segments=opt.num_segments, transform=transform_train)
        val_dataset = ucf101.classification.UCF101(setting=opt.val_list, root=data_dir, train=False,
                                                   new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length,
                                                   target_width=input_size, target_height=input_size,
                                                   num_segments=opt.num_segments, transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset)))

    if kvstore is not None:
        train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers,
                                           sampler=SplitSampler(len(train_dataset), num_parts=kvstore.num_workers, part_index=kvstore.rank),
                                           batchify_fn=tsn_mp_batchify_fn, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='rollover')
        val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers,
                                         sampler=SplitSampler(len(val_dataset), num_parts=kvstore.num_workers, part_index=kvstore.rank),
                                         batchify_fn=tsn_mp_batchify_fn, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard')
    else:
        train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers,
                                           batchify_fn=tsn_mp_batchify_fn, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='rollover')
        val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers,
                                           batchify_fn=tsn_mp_batchify_fn, prefetch=int(opt.prefetch_ratio * num_workers), last_batch='discard')

    return train_data, val_data, batch_fn
Ejemplo n.º 4
0
 def get_action(self, net):
     if len(self.frames) < self.SAMPLE_DURATION:
         return None
     clip_input = self.frames
     transform_fn = video.VideoGroupValTransform(size=224,
                                                 mean=[0.485, 0.456, 0.406],
                                                 std=[0.229, 0.224, 0.225])
     clip_input = transform_fn(clip_input)
     print(f"INFO: action input shape:")
     print([clip.shape for clip in clip_input])
     clip_input = np.stack(clip_input, axis=0)
     clip_input = clip_input.reshape((-1, ) + (32, 3, 224, 224))
     clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))
     pred = net(nd.array(clip_input))
     classes = net.classes
     topK = 1
     ind = nd.topk(pred, k=topK)[0].astype('int')
     return classes[ind[0].asscalar()]
Ejemplo n.º 5
0
if opt.resume_params is not '':
    net.load_parameters(opt.resume_params, ctx=ctx)

if opt.use_pretrained:
    net.features_3d.load_parameters(opt.pretrained_ECOfeature3d,ctx=ctx,allow_missing=True)
    net.output.load_parameters(opt.pretrained_ECOoutput,ctx=ctx,allow_missing=True)
    logger.info('use pretrained model : %s , %s',opt.pretrained_ECOfeature3d,opt.pretrained_ECOoutput)
    
if opt.use_mult:
    net.collect_params(opt.freeze_patterns).setattr('lr_mult',opt.freeze_lr_mult)

logger.info(net)
net.collect_params().reset_ctx(ctx)
    
transform_train = video.VideoGroupTrainTransform(size=(opt.input_size, opt.input_size), scale_ratios=[1.0, 0.875, 0.75, 0.66], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])


# Batch Size for Each GPU
per_device_batch_size = opt.per_device_batch_size
# Number of data loader workers
num_workers = opt.num_workers
# Calculate effective total batch size
batch_size = per_device_batch_size * num_gpus

# Set train=True for training data. Here we only use a subset of UCF101 for demonstration purpose.
# The subset has 101 training samples, one sample per class.

train_dataset = UCF101(setting=opt.train_setting, root=opt.train_dir, train=True,
                       new_width=opt.new_width, new_height=opt.new_height, new_length=opt.new_length,new_step=opt.new_step,
                       target_width=opt.input_size, target_height=opt.input_size,
Ejemplo n.º 6
0
def main():
    opt = parse_args()
    print(opt)

    # Garbage collection, default threshold is (700, 10, 10).
    # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading.
    gc.set_threshold(100, 5, 5)

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get data
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(
            size=opt.input_size,
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225])
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    num_segments=opt.num_segments,
                    num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        print('Pre-trained model %s is successfully loaded.' %
              (opt.resume_params))
    else:
        print('Pre-trained model is successfully loaded from the model zoo.')

    if opt.dataset == 'ucf101':
        val_dataset = UCF101(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             test_mode=True,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'kinetics400':
        val_dataset = Kinetics400(setting=opt.val_list,
                                  root=opt.data_dir,
                                  train=False,
                                  new_width=opt.new_width,
                                  new_height=opt.new_height,
                                  new_length=opt.new_length,
                                  new_step=opt.new_step,
                                  target_width=opt.input_size,
                                  target_height=opt.input_size,
                                  video_loader=opt.video_loader,
                                  use_decord=opt.use_decord,
                                  test_mode=True,
                                  num_segments=opt.num_segments,
                                  transform=transform_test)
    elif opt.dataset == 'somethingsomethingv2':
        val_dataset = SomethingSomethingV2(setting=opt.val_list,
                                           root=opt.data_dir,
                                           train=False,
                                           new_width=opt.new_width,
                                           new_height=opt.new_height,
                                           new_length=opt.new_length,
                                           new_step=opt.new_step,
                                           target_width=opt.input_size,
                                           target_height=opt.input_size,
                                           video_loader=opt.video_loader,
                                           use_decord=opt.use_decord,
                                           num_segments=opt.num_segments,
                                           transform=transform_test)
    elif opt.dataset == 'hmdb51':
        val_dataset = HMDB51(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             new_step=opt.new_step,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             video_loader=opt.video_loader,
                             use_decord=opt.use_decord,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers),
                                     last_batch='discard')
    print('Load %d test samples in %d iterations.' %
          (len(val_dataset), len(val_data)))

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data, opt, net)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))
Ejemplo n.º 7
0
def get_data_loader(opt, batch_size, num_workers, logger, kvstore=None):
    data_dir = opt.data_dir
    val_data_dir = opt.val_data_dir
    scale_ratios = [float(i) for i in opt.scale_ratios.split(',')]
    input_size = opt.input_size
    default_mean = [0.485, 0.456, 0.406]
    default_std = [0.229, 0.224, 0.225]

    def batch_fn(batch, ctx):
        data = split_and_load(batch[0],
                              ctx_list=ctx,
                              batch_axis=0,
                              even_split=False)
        label = split_and_load(batch[1],
                               ctx_list=ctx,
                               batch_axis=0,
                               even_split=False)
        return data, label

    if opt.data_aug == 'v1':
        # GluonCV style, not keeping aspect ratio, multi-scale crop
        transform_train = video.VideoGroupTrainTransform(
            size=(input_size, input_size),
            scale_ratios=scale_ratios,
            mean=default_mean,
            std=default_std)
        transform_test = video.VideoGroupValTransform(size=input_size,
                                                      mean=default_mean,
                                                      std=default_std)
    elif opt.data_aug == 'v2':
        # GluonCV style, keeping aspect ratio, multi-scale crop, same as mmaction style
        transform_train = video.VideoGroupTrainTransformV2(
            size=(input_size, input_size),
            short_side=opt.new_height,
            scale_ratios=scale_ratios,
            mean=default_mean,
            std=default_std)
        transform_test = video.VideoGroupValTransformV2(
            crop_size=(input_size, input_size),
            short_side=opt.new_height,
            mean=default_mean,
            std=default_std)
    elif opt.data_aug == 'v3':
        # PySlowFast style, keeping aspect ratio, random short side scale jittering
        transform_train = video.VideoGroupTrainTransformV3(
            crop_size=(input_size, input_size),
            min_size=opt.new_height,
            max_size=opt.new_width,
            mean=default_mean,
            std=default_std)
        transform_test = video.VideoGroupValTransformV2(
            crop_size=(input_size, input_size),
            short_side=opt.new_height,
            mean=default_mean,
            std=default_std)
    elif opt.data_aug == 'v4':
        # mmaction style, keeping aspect ratio, random crop and resize, only for SlowFast family models, similar to 'v3'
        transform_train = video.VideoGroupTrainTransformV4(size=(input_size,
                                                                 input_size),
                                                           mean=default_mean,
                                                           std=default_std)
        transform_test = video.VideoGroupValTransformV2(
            crop_size=(input_size, input_size),
            short_side=opt.new_height,
            mean=default_mean,
            std=default_std)
    else:
        logger.info('Data augmentation %s is not supported yet.' %
                    (opt.data_aug))

    if opt.dataset == 'kinetics400':
        train_dataset = Kinetics400(
            setting=opt.train_list,
            root=data_dir,
            train=True,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=input_size,
            target_height=input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            data_aug=opt.data_aug,
            num_segments=opt.num_segments,
            transform=transform_train)
        val_dataset = Kinetics400(
            setting=opt.val_list,
            root=val_data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=input_size,
            target_height=input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            data_aug=opt.data_aug,
            num_segments=opt.num_segments,
            transform=transform_test)
    elif opt.dataset == 'ucf101':
        train_dataset = UCF101(setting=opt.train_list,
                               root=data_dir,
                               train=True,
                               new_width=opt.new_width,
                               new_height=opt.new_height,
                               new_length=opt.new_length,
                               target_width=input_size,
                               target_height=input_size,
                               data_aug=opt.data_aug,
                               num_segments=opt.num_segments,
                               transform=transform_train)
        val_dataset = UCF101(setting=opt.val_list,
                             root=data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             target_width=input_size,
                             target_height=input_size,
                             data_aug=opt.data_aug,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'somethingsomethingv2':
        train_dataset = SomethingSomethingV2(setting=opt.train_list,
                                             root=data_dir,
                                             train=True,
                                             new_width=opt.new_width,
                                             new_height=opt.new_height,
                                             new_length=opt.new_length,
                                             new_step=opt.new_step,
                                             target_width=input_size,
                                             target_height=input_size,
                                             video_loader=opt.video_loader,
                                             use_decord=opt.use_decord,
                                             data_aug=opt.data_aug,
                                             num_segments=opt.num_segments,
                                             transform=transform_train)
        val_dataset = SomethingSomethingV2(setting=opt.val_list,
                                           root=data_dir,
                                           train=False,
                                           new_width=opt.new_width,
                                           new_height=opt.new_height,
                                           new_length=opt.new_length,
                                           new_step=opt.new_step,
                                           target_width=input_size,
                                           target_height=input_size,
                                           video_loader=opt.video_loader,
                                           use_decord=opt.use_decord,
                                           data_aug=opt.data_aug,
                                           num_segments=opt.num_segments,
                                           transform=transform_test)
    elif opt.dataset == 'hmdb51':
        train_dataset = HMDB51(setting=opt.train_list,
                               root=data_dir,
                               train=True,
                               new_width=opt.new_width,
                               new_height=opt.new_height,
                               new_length=opt.new_length,
                               new_step=opt.new_step,
                               target_width=input_size,
                               target_height=input_size,
                               video_loader=opt.video_loader,
                               use_decord=opt.use_decord,
                               data_aug=opt.data_aug,
                               num_segments=opt.num_segments,
                               transform=transform_train)
        val_dataset = HMDB51(setting=opt.val_list,
                             root=data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             new_step=opt.new_step,
                             target_width=input_size,
                             target_height=input_size,
                             video_loader=opt.video_loader,
                             use_decord=opt.use_decord,
                             data_aug=opt.data_aug,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'custom':
        train_dataset = VideoClsCustom(
            setting=opt.train_list,
            root=data_dir,
            train=True,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=input_size,
            target_height=input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            data_aug=opt.data_aug,
            num_segments=opt.num_segments,
            transform=transform_train)
        val_dataset = VideoClsCustom(
            setting=opt.val_list,
            root=val_data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=input_size,
            target_height=input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            data_aug=opt.data_aug,
            num_segments=opt.num_segments,
            transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    logger.info('Load %d training samples and %d validation samples.' %
                (len(train_dataset), len(val_dataset)))

    if kvstore is not None:
        train_data = gluon.data.DataLoader(
            train_dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            sampler=ShuffleSplitSampler(len(train_dataset),
                                        num_parts=kvstore.num_workers,
                                        part_index=kvstore.rank),
            prefetch=int(opt.prefetch_ratio * num_workers),
            last_batch='rollover')
        val_data = gluon.data.DataLoader(
            val_dataset,
            batch_size=batch_size,
            num_workers=num_workers,
            sampler=ShuffleSplitSampler(len(val_dataset),
                                        num_parts=kvstore.num_workers,
                                        part_index=kvstore.rank),
            prefetch=int(opt.prefetch_ratio * num_workers),
            last_batch='discard')
    else:
        train_data = gluon.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=num_workers,
                                           prefetch=int(opt.prefetch_ratio *
                                                        num_workers),
                                           last_batch='rollover')
        val_data = gluon.data.DataLoader(val_dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers,
                                         prefetch=int(opt.prefetch_ratio *
                                                      num_workers),
                                         last_batch='discard')

    return train_data, val_data, batch_fn
def main(logger):
    opt = parse_args()
    logger.info(opt)
    gc.set_threshold(100, 5, 5)

    if not os.path.exists(opt.save_dir):
        os.makedirs(opt.save_dir)

    # set env
    if opt.gpu_id == -1:
        context = mx.cpu()
    else:
        gpu_id = opt.gpu_id
        context = mx.gpu(gpu_id)

    # get data preprocess
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size,
                                                      mean=image_norm_mean,
                                                      std=image_norm_std)
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    feat_ext=True,
                    num_segments=opt.num_segments,
                    num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params != '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        logger.info('Pre-trained model %s is successfully loaded.' %
                    (opt.resume_params))
    else:
        logger.info(
            'Pre-trained model is successfully loaded from the model zoo.')
    logger.info("Successfully built model {}".format(model_name))

    # get data
    anno_file = opt.data_list
    f = open(anno_file, 'r')
    data_list = f.readlines()
    logger.info('Load %d video samples.' % len(data_list))

    # build a pseudo dataset instance to use its children class methods
    video_utils = VideoClsCustom(root=opt.data_dir,
                                 setting=opt.data_list,
                                 num_segments=opt.num_segments,
                                 num_crop=opt.num_crop,
                                 new_length=opt.new_length,
                                 new_step=opt.new_step,
                                 new_width=opt.new_width,
                                 new_height=opt.new_height,
                                 video_loader=opt.video_loader,
                                 use_decord=opt.use_decord,
                                 slowfast=opt.slowfast,
                                 slow_temporal_stride=opt.slow_temporal_stride,
                                 fast_temporal_stride=opt.fast_temporal_stride,
                                 data_aug=opt.data_aug,
                                 lazy_init=True)

    start_time = time.time()
    for vid, vline in enumerate(data_list):
        video_path = vline.split()[0]
        video_name = video_path.split('/')[-1]
        if opt.need_root:
            video_path = os.path.join(opt.data_dir, video_path)
        video_data = read_data(opt, video_path, transform_test, video_utils)
        video_input = video_data.as_in_context(context)
        video_feat = net(video_input.astype(opt.dtype, copy=False))

        feat_file = '%s_%s_feat.npy' % (model_name, video_name)
        np.save(os.path.join(opt.save_dir, feat_file), video_feat.asnumpy())

        if vid > 0 and vid % opt.log_interval == 0:
            logger.info('%04d/%04d is done' % (vid, len(data_list)))

    end_time = time.time()
    logger.info('Total feature extraction time is %4.2f minutes' %
                ((end_time - start_time) / 60))
video_fname = utils.download(url)
vr = decord.VideoReader(video_fname)
fast_frame_id_list = range(0, 64, 2)
slow_frame_id_list = range(0, 64, 16)
frame_id_list = list(fast_frame_id_list) + list(slow_frame_id_list)
video_data = vr.get_batch(frame_id_list).asnumpy()
clip_input = [video_data[vid, :, :, :] for vid, _ in enumerate(frame_id_list)]

################################################################
# Now we define transformations for the video clip.
# This transformation function does three things:
# center crop each image to 224x224 in size,
# transpose it to ``num_channels*num_frames*height*width``,
# and normalize with mean and standard deviation calculated across all ImageNet images.

transform_fn = video.VideoGroupValTransform(size=224, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
clip_input = transform_fn(clip_input)
clip_input = np.stack(clip_input, axis=0)
clip_input = clip_input.reshape((-1,) + (36, 3, 224, 224))
clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))
print('Video data is downloaded and preprocessed.')

################################################################
# Next, we load a pre-trained SlowFast model with ResNet50 as backbone.

model_name = 'slowfast_4x16_resnet50_kinetics400'
net = get_model(model_name, nclass=400, pretrained=True)
print('%s model is successfully loaded.' % model_name)

################################################################
# Finally, we prepare the video clip and feed it to the model.
Ejemplo n.º 10
0
def main(logger):
    opt = parse_args()

    makedirs(opt.save_dir)

    filehandler = logging.FileHandler(
        os.path.join(opt.save_dir, opt.logging_file))
    streamhandler = logging.StreamHandler()
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)
    logger.info(opt)

    gc.set_threshold(100, 5, 5)

    # set env
    gpu_id = opt.gpu_id
    context = mx.gpu(gpu_id)

    # get data preprocess
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size,
                                                      mean=image_norm_mean,
                                                      std=image_norm_std)
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    num_segments=opt.num_segments,
                    num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params is not '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        logger.info('Pre-trained model %s is successfully loaded.' %
                    (opt.resume_params))
    else:
        logger.info(
            'Pre-trained model is successfully loaded from the model zoo.')
    logger.info("Successfully built model {}".format(model_name))

    # get data
    anno_file = opt.data_list
    f = open(anno_file, 'r')
    data_list = f.readlines()
    logger.info('Load %d video samples.' % len(data_list))

    start_time = time.time()
    for vid, vline in enumerate(data_list):
        video_path = vline.split()[0]
        video_name = video_path.split('/')[-1]
        if opt.need_root:
            video_path = os.path.join(opt.data_dir, video_path)
        video_data = read_data(opt, video_path, transform_test)
        video_input = video_data.as_in_context(context)
        pred = net(video_input.astype(opt.dtype, copy=False))
        if opt.save_logits:
            logits_file = '%s_%s_logits.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, logits_file), pred.asnumpy())
        pred_label = np.argmax(pred.asnumpy())
        if opt.save_preds:
            preds_file = '%s_%s_preds.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, preds_file), pred_label)

        logger.info('%04d/%04d: %s is predicted to class %d' %
                    (vid, len(data_list), video_name, pred_label))

    end_time = time.time()
    logger.info('Total inference time is %4.2f minutes' %
                ((end_time - start_time) / 60))
Ejemplo n.º 11
0
def main(logger):
    opt = parse_args()
    print(opt)

    # Garbage collection, default threshold is (700, 10, 10).
    # Set threshold lower to collect garbage more frequently and release more CPU memory for heavy data loading.
    gc.set_threshold(100, 5, 5)

    # set env
    num_gpus = opt.num_gpus
    batch_size = opt.batch_size
    context = [mx.cpu()]
    if num_gpus > 0:
        batch_size *= max(1, num_gpus)
        context = [mx.gpu(i) for i in range(num_gpus)]

    num_workers = opt.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # get data
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size,
                                                      mean=image_norm_mean,
                                                      std=image_norm_std)
        opt.num_crop = 1

    if not opt.deploy:
        # get model
        if opt.use_pretrained and len(opt.hashtag) > 0:
            opt.use_pretrained = opt.hashtag
        classes = opt.num_classes
        model_name = opt.model
        # Currently, these is no hashtag for int8 models.
        if opt.quantized:
            model_name += '_int8'
            opt.use_pretrained = True

        net = get_model(name=model_name,
                        nclass=classes,
                        pretrained=opt.use_pretrained,
                        num_segments=opt.num_segments,
                        num_crop=opt.num_crop)
        net.cast(opt.dtype)
        net.collect_params().reset_ctx(context)
        if opt.mode == 'hybrid':
            net.hybridize(static_alloc=True, static_shape=True)
        if opt.resume_params is not '' and not opt.use_pretrained:
            net.load_parameters(opt.resume_params, ctx=context)
            print('Pre-trained model %s is successfully loaded.' %
                  (opt.resume_params))
        else:
            print(
                'Pre-trained model is successfully loaded from the model zoo.')
    else:
        model_name = 'deploy'
        net = mx.gluon.SymbolBlock.imports(
            '{}-symbol.json'.format(opt.model_prefix), ['data'],
            '{}-0000.params'.format(opt.model_prefix))
        net.hybridize(static_alloc=True, static_shape=True)

    print("Successfully loaded model {}".format(model_name))
    # dummy data for benchmarking performance
    if opt.benchmark:
        benchmarking(opt, net, context)
        sys.exit()

    if opt.dataset == 'ucf101':
        val_dataset = UCF101(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             test_mode=True,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    elif opt.dataset == 'kinetics400':
        val_dataset = Kinetics400(
            setting=opt.val_list,
            root=opt.data_dir,
            train=False,
            new_width=opt.new_width,
            new_height=opt.new_height,
            new_length=opt.new_length,
            new_step=opt.new_step,
            target_width=opt.input_size,
            target_height=opt.input_size,
            video_loader=opt.video_loader,
            use_decord=opt.use_decord,
            slowfast=opt.slowfast,
            slow_temporal_stride=opt.slow_temporal_stride,
            fast_temporal_stride=opt.fast_temporal_stride,
            test_mode=True,
            num_segments=opt.num_segments,
            num_crop=opt.num_crop,
            transform=transform_test)
    elif opt.dataset == 'somethingsomethingv2':
        val_dataset = SomethingSomethingV2(setting=opt.val_list,
                                           root=opt.data_dir,
                                           train=False,
                                           new_width=opt.new_width,
                                           new_height=opt.new_height,
                                           new_length=opt.new_length,
                                           new_step=opt.new_step,
                                           target_width=opt.input_size,
                                           target_height=opt.input_size,
                                           video_loader=opt.video_loader,
                                           use_decord=opt.use_decord,
                                           num_segments=opt.num_segments,
                                           transform=transform_test)
    elif opt.dataset == 'hmdb51':
        val_dataset = HMDB51(setting=opt.val_list,
                             root=opt.data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length,
                             new_step=opt.new_step,
                             target_width=opt.input_size,
                             target_height=opt.input_size,
                             video_loader=opt.video_loader,
                             use_decord=opt.use_decord,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    else:
        logger.info('Dataset %s is not supported yet.' % (opt.dataset))

    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers),
                                     last_batch='discard')
    print('Load %d test samples in %d iterations.' %
          (len(val_dataset), len(val_data)))

    # calibrate FP32 model into INT8 model
    if opt.calibration:
        calibration(net, val_data, opt, context, logger)
        sys.exit()

    start_time = time.time()
    acc_top1_val, acc_top5_val = test(context, val_data, opt, net)
    end_time = time.time()

    print('Test accuracy: acc-top1=%f acc-top5=%f' %
          (acc_top1_val * 100, acc_top5_val * 100))
    print('Total evaluation time is %4.2f minutes' %
          ((end_time - start_time) / 60))
Ejemplo n.º 12
0
def main():
    opt = parse_args()

    makedirs(opt.save_dir)

    filehandler = logging.FileHandler(
        os.path.join(opt.save_dir, opt.logging_file))
    streamhandler = logging.StreamHandler()
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    logger.addHandler(filehandler)
    logger.addHandler(streamhandler)
    logger.info(opt)

    gc.set_threshold(100, 5, 5)

    # set env
    if opt.gpu_id == -1:
        context = mx.cpu()
    else:
        gpu_id = opt.gpu_id
        context = mx.gpu(gpu_id)

    # get data preprocess
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size,
                                                      mean=image_norm_mean,
                                                      std=image_norm_std)
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name,
                    nclass=classes,
                    pretrained=opt.use_pretrained,
                    num_segments=opt.num_segments,
                    num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params != '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        logger.info('Pre-trained model %s is successfully loaded.' %
                    (opt.resume_params))
    else:
        logger.info(
            'Pre-trained model is successfully loaded from the model zoo.')
    logger.info("Successfully built model {}".format(model_name))

    # get classes list, if we are using a pretrained network from the model_zoo
    classes = None
    if opt.use_pretrained:
        if "kinetics400" in model_name:
            classes = Kinetics400Attr().classes
        elif "ucf101" in model_name:
            classes = UCF101Attr().classes
        elif "hmdb51" in model_name:
            classes = HMDB51Attr().classes
        elif "sthsth" in model_name:
            classes = SomethingSomethingV2Attr().classes

    # get data
    anno_file = opt.data_list
    f = open(anno_file, 'r')
    data_list = f.readlines()
    logger.info('Load %d video samples.' % len(data_list))

    # build a pseudo dataset instance to use its children class methods
    video_utils = VideoClsCustom(root=opt.data_dir,
                                 setting=opt.data_list,
                                 num_segments=opt.num_segments,
                                 num_crop=opt.num_crop,
                                 new_length=opt.new_length,
                                 new_step=opt.new_step,
                                 new_width=opt.new_width,
                                 new_height=opt.new_height,
                                 video_loader=opt.video_loader,
                                 use_decord=opt.use_decord,
                                 slowfast=opt.slowfast,
                                 slow_temporal_stride=opt.slow_temporal_stride,
                                 fast_temporal_stride=opt.fast_temporal_stride,
                                 data_aug=opt.data_aug,
                                 lazy_init=True)

    start_time = time.time()
    for vid, vline in enumerate(data_list):
        video_path = vline.split()[0]
        video_name = video_path.split('/')[-1]
        if opt.need_root:
            video_path = os.path.join(opt.data_dir, video_path)
        video_data = read_data(opt, video_path, transform_test, video_utils)
        video_input = video_data.as_in_context(context)
        pred = net(video_input.astype(opt.dtype, copy=False))
        if opt.save_logits:
            logits_file = '%s_%s_logits.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, logits_file), pred.asnumpy())
        pred_label = np.argmax(pred.asnumpy())
        if opt.save_preds:
            preds_file = '%s_%s_preds.npy' % (model_name, video_name)
            np.save(os.path.join(opt.save_dir, preds_file), pred_label)

        # Try to report a text label instead of the number.
        if classes:
            pred_label = classes[pred_label]

        logger.info('%04d/%04d: %s is predicted to class %s' %
                    (vid, len(data_list), video_name, pred_label))

    end_time = time.time()
    logger.info('Total inference time is %4.2f minutes' %
                ((end_time - start_time) / 60))
def read_video_data(s3_video_path, num_frames=32):
    """Read and preprocess video data from the S3 bucket."""
    print('read and preprocess video data here ')
    s3_client = boto3.client('s3')
    #print(uuid.uuid4())
    fname = s3_video_path.replace('s3://', '')
    fname = fname.replace('S3://', '')
    fname = fname.replace('/', '')
    #download_path = '/tmp/{}-{}'.format(uuid.uuid4(), fname)
    #video_list_path = '/tmp/{}-{}'.format(uuid.uuid4(), 'video_list.txt')
    download_path = '/tmp/' + fname
    video_list_path = '/tmp/video_list' + str(uuid.uuid4()) + '.txt'
    bucket, key = get_bucket_and_key(s3_video_path)
    s3_client.download_file(bucket, key, download_path)

    #update download_path filename to be unique
    filename, ext = os.path.splitext(download_path)  # save the file extension
    filename = filename + str(uuid.uuid4())
    os.rename(download_path, filename + ext)
    download_path = filename + ext

    #Dummy duration and label with each video path
    video_list = '{} {} {}'.format(download_path, 10, 1)
    with open(video_list_path, 'w') as fopen:
        fopen.write(video_list)

    #Constants
    data_dir = '/tmp/'
    num_segments = 1
    new_length = num_frames
    new_step = 1
    use_decord = True
    video_loader = True
    slowfast = False
    #Preprocessing params

    #The transformation function does three things: center crop the image to 224x224 in size, transpose it to num_channels,num_frames,height*width, and normalize with mean and standard deviation calculated across all ImageNet images.

    #Use the general gluoncv dataloader VideoClsCustom to load the data with num_frames = 32 as the length.
    input_size = 224
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    transform = video.VideoGroupValTransform(size=input_size,
                                             mean=mean,
                                             std=std)
    video_utils = VideoClsCustom(root=data_dir,
                                 setting=video_list_path,
                                 num_segments=num_segments,
                                 new_length=new_length,
                                 new_step=new_step,
                                 video_loader=video_loader,
                                 use_decord=use_decord,
                                 slowfast=slowfast)

    #Read for the video list
    video_name = video_list.split()[0]

    decord = try_import_decord()
    decord_vr = decord.VideoReader(video_name)
    duration = len(decord_vr)

    skip_length = new_length * new_step
    segment_indices, skip_offsets = video_utils._sample_test_indices(duration)

    if video_loader:
        if slowfast:
            clip_input = video_utils._video_TSN_decord_slowfast_loader(
                video_name, decord_vr, duration, segment_indices, skip_offsets)
        else:
            clip_input = video_utils._video_TSN_decord_batch_loader(
                video_name, decord_vr, duration, segment_indices, skip_offsets)
    else:
        raise RuntimeError('We only support video-based inference.')

    clip_input = transform(clip_input)

    if slowfast:
        sparse_sampels = len(clip_input) // (num_segments * num_crop)
        clip_input = np.stack(clip_input, axis=0)
        clip_input = clip_input.reshape((-1, ) + (sparse_sampels, 3,
                                                  input_size, input_size))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))
    else:
        clip_input = np.stack(clip_input, axis=0)
        clip_input = clip_input.reshape((-1, ) + (new_length, 3, input_size,
                                                  input_size))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))

    if new_length == 1:
        clip_input = np.squeeze(clip_input,
                                axis=2)  # this is for 2D input case

    clip_input = nd.array(clip_input)

    #Cleanup temp files
    os.remove(download_path)
    os.remove(video_list_path)
    #os.system('rm {}'.format(download_path))
    #os.system('rm {}'.format(video_list_path))

    return clip_input
Ejemplo n.º 14
0
def get_data_loader(opt, batch_size, num_workers, logger):
    data_dir = opt.train_dir
    scale_ratios = [1.0, 0.875, 0.75, 0.66]
    input_size = opt.input_size

    def batch_fn(batch, ctx):
        data = split_and_load(batch[0],
                              ctx_list=ctx,
                              batch_axis=0,
                              even_split=False)
        label = split_and_load(batch[1],
                               ctx_list=ctx,
                               batch_axis=0,
                               even_split=False)
        return data, label

    transform_train = video.VideoGroupTrainTransform(
        size=(input_size, input_size),
        scale_ratios=scale_ratios,
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225])
    transform_test = video.VideoGroupValTransform(size=input_size,
                                                  mean=[0.485, 0.456, 0.406],
                                                  std=[0.229, 0.224, 0.225])

    if opt.dataset == 'ucf101':
        train_dataset = UCF101(setting=opt.train_setting,
                               root=data_dir,
                               train=True,
                               new_width=opt.new_width,
                               new_height=opt.new_height,
                               new_length=opt.new_length_diff,
                               target_width=input_size,
                               target_height=input_size,
                               num_segments=opt.num_segments,
                               transform=transform_train)
        val_dataset = UCF101(setting=opt.val_setting,
                             root=data_dir,
                             train=False,
                             new_width=opt.new_width,
                             new_height=opt.new_height,
                             new_length=opt.new_length_diff,
                             target_width=input_size,
                             target_height=input_size,
                             num_segments=opt.num_segments,
                             transform=transform_test)
    else:
        #        logger.info('Dataset %s is not supported yet.' % (opt.dataset))
        print('Dataset %s is not supported yet.' % (opt.dataset))

    print('Load %d training samples and %d validation samples.' %
          (len(train_dataset), len(val_dataset)))
    #    logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset)))

    train_data = gluon.data.DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=num_workers,
                                       prefetch=int(opt.prefetch_ratio *
                                                    num_workers),
                                       last_batch='rollover')
    val_data = gluon.data.DataLoader(val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     prefetch=int(opt.prefetch_ratio *
                                                  num_workers),
                                     last_batch='discard')

    return train_data, val_data, batch_fn
def get_action_recognition(video_obj,
                           model_arch="slowfast_4x16_resnet50_kinetics400"):
    '''
	//TODO
	'''
    # starting decord
    decord = try_import_decord()

    net = get_model(model_arch, pretrained=True)

    try:
        video_obj = utils.download(video_obj)
    except ValueError:
        pass

    vr = decord.VideoReader(video_obj)

    if "slowfast" in model_arch:
        fast_frame_id_list = range(0, 64, 2)
        slow_frame_id_list = range(0, 64, 16)
        frame_id_list = list(fast_frame_id_list) + list(slow_frame_id_list)
    else:
        frame_id_list = range(0, 64, 2)

    print("=========Reached here============")

    video_data = vr.get_batch(frame_id_list).asnumpy()
    clip_input = [
        video_data[vid, :, :, :] for vid, _ in enumerate(frame_id_list)
    ]

    if "inceptionv3" in model_arch:
        transform_fn = video.VideoGroupValTransform(size=299,
                                                    mean=[0.485, 0.456, 0.406],
                                                    std=[0.229, 0.224, 0.225])
        clip_input = transform_fn(clip_input)
        clip_input = np.stack(clip_input, axis=0)
        if "slowfast" in model_arch:
            clip_input = clip_input.reshape((-1, ) + (36, 3, 340, 450))
        else:
            clip_input = clip_input.reshape((-1, ) + (32, 3, 340, 450))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))
    else:
        transform_fn = video.VideoGroupValTransform(size=224,
                                                    mean=[0.485, 0.456, 0.406],
                                                    std=[0.229, 0.224, 0.225])
        clip_input = transform_fn(clip_input)
        clip_input = np.stack(clip_input, axis=0)
        if "slowfast" in model_arch:
            clip_input = clip_input.reshape((-1, ) + (36, 3, 224, 224))
        else:
            clip_input = clip_input.reshape((-1, ) + (32, 3, 224, 224))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))

    pred = net(nd.array(clip_input))

    classes = net.classes
    topK = 5
    ind = nd.topk(pred, k=topK)[0].astype('int')
    resList = []

    for i in range(topK):
        resList.append([
            classes[ind[i].asscalar()],
            nd.softmax(pred)[0][ind[i]].asscalar()
        ])

    resDF = pd.DataFrame(resList, columns=["class", "prob"])
    return resDF
Ejemplo n.º 16
0
def main():
    global args, best_loss
    # create model, load existing models from gluoncv
    print(" > Creating model ... !")
    num_gpus = args.num_gpus
    batch_size = args.batch_size
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = args.num_workers
    print('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus))

    # =================== load model and parameters =======================
    classes = args.num_classes
    model_name = args.model
    model = get_model(name=model_name,
                      nclass=classes,
                      pretrained=args.use_pretrained,
                      num_segments=args.num_segments)
    model.cast(args.dtype)
    model.collect_params().reset_ctx(context)

    if args.mode == 'hybrid':
        model.hybridize(static_alloc=True, static_shape=True)
    if args.resume_params is not '' and not args.use_pretrained:
        model.load_parameters(args.resume_params, ctx=context)
        print('Pre-trained model %s is successfully loaded.' %
              (args.resume_params))
    else:
        print('Pre-trained model is successfully loaded from the model zoo.')

    # ===================== load dataset =====================
    global transform_post

    transform_post = video.VideoGroupValTransform(size=args.input_size,
                                                  mean=[0, 0, 0],
                                                  std=[1, 1, 1])
    val_dataset = SomethingSomethingV2_revise(setting=args.val_list,
                                              root=args.data_dir,
                                              train=False,
                                              new_width=args.new_width,
                                              new_height=args.new_height,
                                              new_length=args.new_length,
                                              new_step=args.new_step,
                                              target_width=args.input_size,
                                              target_height=args.input_size,
                                              video_loader=args.video_loader,
                                              use_decord=args.use_decord,
                                              num_segments=args.num_segments,
                                              transform=transform_post)

    val_loader = gluon.data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        #num_workers=num_workers,
        prefetch=int(args.prefetch_ratio * num_workers),
        batchify_fn=tsn_mp_batchify_fn,
        last_batch='discard')

    print('Load %d test samples in %d iterations.' %
          (len(val_dataset), len(val_loader)))

    # ========================== attack ===========================
    if args.eval_only:
        validate(val_loader, model, context)
        print(" > Evaluation DONE !")
        return