Exemple #1
0
                       cropping,
                       Stack(roll=(args.arch in ['BNInception','InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])),
                       GroupNormalize(net.input_mean, net.input_std),
                   ])),
        batch_size=1, shuffle=False,
        num_workers=args.workers * 2, pin_memory=True)

if args.gpus is not None:
    devices = [args.gpus[i] for i in range(args.workers)]
else:
    devices = list(range(args.workers))


#net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
net = torch.nn.DataParallel(net.cuda())
net.eval()

data_gen = enumerate(data_loader)

total_num = len(data_loader.dataset)
output = []


def eval_video(video_data):
    i, data, label = video_data
    num_crop = args.test_crops

    if args.modality == 'RGB':
        length = 3
    elif args.modality == 'Flow':
Exemple #2
0
        Stack(roll=args.arch == 'BNInception'),
        ToTorchFormatTensor(div=args.arch != 'BNInception'),
        GroupNormalize(net.input_mean, net.input_std)
    ])),
                                          batch_size=1,
                                          shuffle=False,
                                          num_workers=args.workers * 2,
                                          pin_memory=True)

if args.gpus is not None:
    devices = [args.gpus[i] for i in range(args.workers)]
else:
    devices = list(range(args.workers))
print(devices)

net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)

if args.resume:
    if os.path.isfile(args.resume):
        print(("=> loading checkpoint '{}'".format(args.resume)))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        net.load_state_dict(checkpoint['state_dict'])
        print(("=> loaded checkpoint '{}' (epoch {})".format(
            args.resume, checkpoint['epoch'])))
    else:
        print(("=> no checkpoint found at '{}'".format(args.resume)))

# ToDo: why
# if len(devices) > 1:  # cause bug
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            GroupNormalize(originalNet.input_mean, originalNet.input_std),
        ])),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=args.workers * 2,
                                              pin_memory=False)

    if args.gpus is not None:
        devices = [args.gpus[i] for i in range(args.workers)]
    else:
        devices = list(range(args.workers))

    #net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
    originalNet = torch.nn.DataParallel(originalNet.cuda())
    originalNet.eval()

    exit()
    data_gen = enumerate(data_loader)

    total_num = len(data_loader.dataset)
    output = []

    def eval_video(video_data):
        i, data, label = video_data
        num_crop = args.test_crops

        if "RGBFlow" == 'RGB':
            length = 3
        elif "RGBFlow" == 'Flow':
Exemple #4
0
          args.modality,
          base_model=args.arch,
          consensus_type=args.consensus_type,
          img_feature_dim=args.img_feature_dim,
          print_spec=False)

weights = args.weight
checkpoint = torch.load(weights)
#print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))

base_dict = {
    '.'.join(k.split('.')[1:]): v
    for k, v in list(checkpoint['state_dict'].items())
}
net.load_state_dict(base_dict)
net.cuda().eval()

# Initialize frame transforms.

transform = torchvision.transforms.Compose([
    GroupOverSample(net.input_size, net.scale_size),
    Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
    ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])),
    GroupNormalize(net.input_mean, net.input_std),
])

# Obtain video frames
if args.frame_folder is not None:
    print('Loading frames in %s' % args.frame_folder)
    import glob
    # here make sure after sorting the frame paths have the correct temporal order
                   new_length=1 if args.modality == "RGB" else 5,
                   modality=args.modality,
                   image_tmpl="im{}.jpg",
                   test_mode=True,
                   save_scores=True,
                   transform=torchvision.transforms.Compose([
                       cropping,
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])),
        batch_size=1, shuffle=False,
        num_workers=args.workers, pin_memory=True)


net = net.cuda()
net.eval()

data_gen = enumerate(data_loader)
total_num = len(data_loader.dataset)
output = []


def eval_video(video_data):
    i, data, label = video_data
    num_crop = args.test_crops

    if args.modality == 'RGB':
        length = 3
    elif args.modality == 'Flow':
        length = 10
ei = 0
while(os.path.exists(logdir + '/%d/' % ei)):
    ei = ei + 1

#################################
# main loop
#################################

for di in range(0, args.num_experiments):
    p['logdir'] = './%s/%s/%d/%d/' % (args.logdir, expdir, ei, di)
    if(not os.path.exists(p['logdir'])):
        os.makedirs(p['logdir'])

    model = []
    model = TSN(p, dataset_train)
    model = model.cuda(device)

    optim = get_optimizer(args, model)

    max_perf_val = 0.0    
    max_perf_aux = 0.0
    for epoch in range(0, args.num_epochs):
        stats_train = process_epoch('train', epoch, p, dataloader_train, model, optim)
        stats_val = process_epoch('val', epoch, p, dataloader_val, model)
        
        perf_val = stats_val['top1.cause'] + stats_val['top1.effect']
        perf_val_aux = stats_val['top2.cause'] + stats_val['top2.effect']
        if(perf_val >= max_perf_val):
            if(perf_val_aux >= max_perf_aux):
                max_perf_val = perf_val
                max_perf_aux = perf_val_aux
Exemple #7
0
def get_pred(video_path, caption_path, opt):
    # options
    parser = argparse.ArgumentParser(
        description="TRN testing on the full validation set")
    # parser.add_argument('dataset', type=str, choices=['something','jester','moments','charades'])
    # parser.add_argument('modality', type=str, choices=['RGB', 'Flow', 'RGBDiff'])

    parser.add_argument('--dataset', type=str, default='somethingv2')
    parser.add_argument('--modality', type=str, default='RGB')

    parser.add_argument(
        '--weights',
        type=str,
        default=
        'model/TRN_somethingv2_RGB_BNInception_TRNmultiscale_segment8_best.pth.tar'
    )
    parser.add_argument('--arch', type=str, default="BNInception")
    parser.add_argument('--save_scores', type=str, default=None)
    parser.add_argument('--test_segments', type=int, default=8)
    parser.add_argument('--max_num', type=int, default=-1)
    parser.add_argument('--test_crops', type=int, default=10)
    parser.add_argument('--input_size', type=int, default=224)
    parser.add_argument('--crop_fusion_type',
                        type=str,
                        default='TRNmultiscale',
                        choices=['avg', 'TRN', 'TRNmultiscale'])
    parser.add_argument('-j',
                        '--workers',
                        default=4,
                        type=int,
                        metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--gpus', nargs='+', type=int, default=None)
    parser.add_argument('--img_feature_dim', type=int, default=256)
    parser.add_argument(
        '--num_set_segments',
        type=int,
        default=1,
        help='TODO: select multiply set of n-frames from a video')
    parser.add_argument('--softmax', type=int, default=0)

    args = parser.parse_args()

    def accuracy(output, target, topk=(1, )):
        """Computes the precision@k for the specified values of k"""
        maxk = max(topk)
        batch_size = target.size(0)
        prob, pred = output.topk(maxk, 1, True, True)
        prob = prob.t().data.numpy().squeeze()
        pred = pred.t().data.numpy().squeeze()
        return prob, pred

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality, opt)
    num_class = len(categories)

    net = TSN(num_class,
              args.test_segments
              if args.crop_fusion_type in ['TRN', 'TRNmultiscale'] else 1,
              args.modality,
              base_model=args.arch,
              consensus_type=args.crop_fusion_type,
              img_feature_dim=args.img_feature_dim,
              opt=opt)

    try:
        checkpoint = torch.load(args.weights)
    except:
        args.weights = os.path.join(opt.project_root, 'scripts/Eval/',
                                    args.weights)
        checkpoint = torch.load(args.weights)

    print("model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                                  checkpoint['best_prec1']))

    base_dict = {
        '.'.join(k.split('.')[1:]): v
        for k, v in list(checkpoint['state_dict'].items())
    }
    net.load_state_dict(base_dict)

    if args.test_crops == 1:
        cropping = torchvision.transforms.Compose([
            GroupScale(net.scale_size),
            GroupCenterCrop(net.input_size),
        ])
    elif args.test_crops == 10:
        cropping = torchvision.transforms.Compose(
            [GroupOverSample(net.input_size, net.scale_size)])
    else:
        raise ValueError(
            "Only 1 and 10 crops are supported while we got {}".format(
                args.test_crops))

    data_loader = torch.utils.data.DataLoader(TSNDataSet(
        video_path,
        caption_path,
        num_segments=args.test_segments,
        new_length=1 if args.modality == "RGB" else 5,
        modality=args.modality,
        image_tmpl=prefix,
        test_mode=True,
        transform=torchvision.transforms.Compose([
            cropping,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            GroupNormalize(net.input_mean, net.input_std),
        ])),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=args.workers * 2,
                                              pin_memory=True)

    if args.gpus is not None:
        devices = [args.gpus[i] for i in range(args.workers)]
    else:
        devices = list(range(args.workers))

    #net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
    net = torch.nn.DataParallel(net.cuda())
    net.eval()

    data_gen = enumerate(data_loader)

    output = []

    def eval_video(video_data):
        i, data, label = video_data
        num_crop = args.test_crops

        if args.modality == 'RGB':
            length = 3
        elif args.modality == 'Flow':
            length = 10
        elif args.modality == 'RGBDiff':
            length = 18
        else:
            raise ValueError("Unknown modality " + args.modality)

        input_var = torch.autograd.Variable(data.view(-1, length, data.size(2),
                                                      data.size(3)),
                                            volatile=True)
        rst = net(input_var)
        if args.softmax == 1:
            # take the softmax to normalize the output to probability
            rst = F.softmax(rst)

        rst = rst.data.cpu().numpy().copy()

        if args.crop_fusion_type in ['TRN', 'TRNmultiscale']:
            rst = rst.reshape(-1, 1, num_class)
        else:
            rst = rst.reshape((num_crop, args.test_segments,
                               num_class)).mean(axis=0).reshape(
                                   (args.test_segments, 1, num_class))

        return i, rst, label[0]

    max_num = args.max_num if args.max_num > 0 else len(data_loader.dataset)

    prob_all, pred_all = [], []
    for i, (data, label) in data_gen:
        if i >= max_num:
            break
        rst = eval_video((i, data, label))
        output.append(rst[1:])
        prob, pred = accuracy(torch.from_numpy(np.mean(rst[1], axis=0)),
                              label,
                              topk=(1, 174))
        prob_all.append(prob)
        pred_all.append(pred)
    return prob_all, pred_all
    """
    base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())}
    for key in ['consensus.fc_fusion_scales.6.3.bias', 'consensus.fc_fusion_scales.5.3.bias', 'consensus.fc_fusion_scales.4.3.bias',
    'consensus.fc_fusion_scales.3.3.bias', 'consensus.fc_fusion_scales.2.3.bias', 'consensus.fc_fusion_scales.1.3.bias',
    'consensus.fc_fusion_scales.0.3.bias', 'consensus.fc_fusion_scales.6.3.weight', 'consensus.fc_fusion_scales.5.3.weight',
    'consensus.fc_fusion_scales.4.3.weight', 'consensus.fc_fusion_scales.3.3.weight', 'consensus.fc_fusion_scales.2.3.weight',
    'consensus.fc_fusion_scales.1.3.weight', 'consensus.fc_fusion_scales.0.3.weight']:
    del base_dict[key]
    #print(base_dict)
    """
    #net.load_state_dict(base_dict, strict=False)
    net.load_state_dict(checkpoint, strict=True)
    #print(net)
    #exit(0)
    net.eval()
    net.cuda()

    # Initialize frame transforms.
    transform = torchvision.transforms.Compose([
        transforms.GroupOverSample(net.module.input_size, net.module.scale_size),
        transforms.Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
        transforms.ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])),
        transforms.GroupNormalize(net.module.input_mean, net.module.input_std),
    ])

    segments_gt = [0, 0, 1, 1, 0, 0, 0,
                   0, 0, 1, 1, 1, 1, 0,
                   1, 0, 0, 0, 0, 0, 0,
                   1, 1, 1, 0, 0, 0, 0,
                   1, 1, 1, 0, 0, 1, 1,
                   2, 2, 0, 0, 1, 1, 1,
Exemple #9
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)
    '''
    consensue_type = avg
    base_model = resnet_101
    dropout : 0.5
    
    '''
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    #224
    crop_size = model.crop_size
    #256/224
    scale_size = model.scale_size
    # for each modiltiy is different
    input_mean = model.input_mean
    input_std = model.input_std

    policies = model.get_optim_policies()
    #这里拥有三个augmentation
    #GroupMultiScaleCrop,GroupRandomHorizontalFlip
    #here GropMultiScaleCrop ,is a easily method for 裁剪边用固定位置的crop并最终resize 到 224 ,采用的插值方式,为双线性插值
    #GroupRandomHorizontalFlip
    train_augmentation = model.get_augmentation()
    print(args.gpus)
    model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    #解释说这里为什么要有roll,主要还是考虑到我们所训练的是对于BGR 还是RGB
    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="im{}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="im{}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")
    #see the optim policy
    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))
    # general the lr here is 1e-3
    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    #如果说这里是验证过程,如果说不是验证过程
    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return
    viz = vis.Visualizer()
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, viz)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, epoch, viz=viz)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'test_crops': model.state_dict(),
                    'best_prec1': prec1,
                }, is_best)
Exemple #10
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality)
    num_class = len(categories)

    args.store_name = '_'.join([
        'TRN', args.dataset, args.modality, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = TSN(339,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn)
    _, cnn = list(model.named_children())[0]
    for p in cnn.parameters():
        p.requires_grad = False

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    # remove if not transfer learning
    checkpoint = torch.load('/home/ec2-user/mit_weights.pth.tar')
    model.load_state_dict(checkpoint['state_dict'])

    for module in list(
            list(model._modules['module'].children())
        [-1].children())[-1].children():
        module[-1] = nn.Linear(256, num_class)

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True
    model.cuda()

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.Adam(model.parameters(), args.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                8,
                                                gamma=0.1,
                                                last_epoch=-1)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step()
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader), log_training,
                             epoch)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
    summary_writer.close()
Exemple #11
0
def eval_one_model(num_class, modality, weights, devices, args):

    # init model
    net = TSN(num_class,
              1,
              modality,
              base_model=args.arch,
              consensus_type=args.crop_fusion_type,
              dropout=args.dropout,
              mdl=args.mdl,
              pretrained=False)

    # load checkpoint
    checkpoint = torch.load(weights)
    print("model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                                  checkpoint['best_prec1']))

    base_dict = checkpoint['state_dict']
    # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    net.load_state_dict(base_dict)

    # transformer
    if args.test_crops == 1:
        cropping = torchvision.transforms.Compose([
            GroupScale(net.scale_size),
            GroupCenterCrop(net.input_size),
        ])
    elif args.test_crops == 10:
        cropping = torchvision.transforms.Compose(
            [GroupOverSample(net.input_size, net.scale_size)])
    else:
        raise ValueError(
            "Only 1 and 10 crops are supported while we got {}".format(
                args.test_crops))

    # prepare dataset
    if args.dataset == 'ucf101':
        naming_pattern = "frame{:06d}.jpg" if modality in [
            "RGB", "RGBDiff", 'tvl1'
        ] else args.flow_prefix + "{}_{:06d}.jpg"
    else:
        naming_pattern = "image_{:05d}.jpg" if modality in [
            "RGB", "RGBDiff"
        ] else args.flow_prefix + "{}_{:05d}.jpg"

    data_loader = torch.utils.data.DataLoader(TSNDataSet(
        os.path.join(args.data_root_path,
                     ('jpegs_256' if modality == 'RGB' else 'tvl1_flow')),
        args.test_list,
        num_segments=args.test_segments,
        new_length=4 if modality == "RGB" else 6,
        modality=modality,
        image_tmpl=naming_pattern,
        test_mode=True,
        dataset=args.dataset,
        transform=torchvision.transforms.Compose([
            cropping,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            GroupNormalize(net.input_mean, net.input_std),
        ])),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=args.workers * 2,
                                              pin_memory=True)

    data_gen = iter(data_loader)

    total_num = len(data_loader.dataset)
    output = []  # [class probability, label code]

    # Inferencing

    net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
    net.eval()

    max_num = len(data_loader.dataset)

    for i in tqdm(range(max_num)):
        data, label = next(data_gen)
        if i >= max_num:
            break
        output.append(
            eval_video(net, (i, data, label), num_class, modality, args))

    video_pred = [np.argmax(np.mean(x[1], axis=0)) for x in output]
    video_labels = [x[2] for x in output]

    # summarize results
    cf = confusion_matrix(video_labels, video_pred).astype(float)

    cls_cnt = cf.sum(axis=1)
    cls_hit = np.diag(cf)

    cls_acc = cls_hit / cls_cnt
    print('Accuracy of {}, {:.02f}%'.format(modality, np.mean(cls_acc) * 100))

    del net
    del data_loader

    class_acc_map = class_acc_mapping(cls_acc, args.dataset)

    return output, video_labels, class_acc_map
Exemple #12
0
def main():
    check_rootfolders()
    global best_prec1
    if args.run_for == 'train':
        categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
            args.dataset, args.modality)
    elif args.run_for == 'test':
        categories, args.test_list, args.root_path, prefix = datasets_video.return_data(
            args.dataset, args.modality)

    num_class = len(categories)

    args.store_name = '_'.join([
        'STModeling', args.dataset, args.modality, args.arch,
        args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = TSN(num_class, args)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    train_augmentation = model.get_augmentation()

    policies = model.get_optim_policies()
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            # best_prec1 = 0
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    #print(model)
    cudnn.benchmark = True

    # Data loading code
    if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')):
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    elif args.modality == 'RGBFlow':
        data_length = args.num_motion

    if args.run_for == 'train':
        train_loader = torch.utils.data.DataLoader(TSNDataSet(
            "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data",
            args.train_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            dataset=args.dataset,
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                      isRGBFlow=(args.modality == 'RGBFlow')),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ])),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=False)

        val_loader = torch.utils.data.DataLoader(TSNDataSet(
            "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data",
            args.val_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            dataset=args.dataset,
            random_shift=False,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                      isRGBFlow=(args.modality == 'RGBFlow')),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ])),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=False)

        # define loss function (criterion) and optimizer
        if args.loss_type == 'nll':
            criterion = torch.nn.CrossEntropyLoss().cuda()
        else:
            raise ValueError("Unknown loss type")

        for group in policies:
            print(
                ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
                    group['name'], len(group['params']), group['lr_mult'],
                    group['decay_mult'])))

        optimizer = torch.optim.SGD(policies,
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        if args.consensus_type == 'DNDF':
            params = [p for p in model.parameters() if p.requires_grad]
            optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=1e-5)

        if args.evaluate:
            validate(val_loader, model, criterion, 0)
            return

        log_training = open(
            os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
        for epoch in range(args.start_epoch, args.epochs):
            if not args.consensus_type == 'DNDF':
                adjust_learning_rate(optimizer, epoch, args.lr_steps)

            # train for one epoch
            train(train_loader, model, criterion, optimizer, epoch,
                  log_training)

            # evaluate on validation set
            if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
                prec1 = validate(val_loader, model, criterion,
                                 (epoch + 1) * len(train_loader), log_training)

                # remember best prec@1 and save checkpoint
                is_best = prec1 > best_prec1
                best_prec1 = max(prec1, best_prec1)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                    }, is_best)

    elif args.run_for == 'test':
        print("=> loading checkpoint '{}'".format(args.root_weights))
        checkpoint = torch.load(args.root_weights)
        args.start_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
        model.cuda().eval()
        print("=> loaded checkpoint ")

        test_loader = torch.utils.data.DataLoader(TSNDataSet(
            "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data",
            args.test_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            dataset=args.dataset,
            random_shift=False,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                      isRGBFlow=(args.modality == 'RGBFlow')),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ])),
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  num_workers=args.workers,
                                                  pin_memory=False)

        # cam = cv2.VideoCapture(0)
        # cam.set(cv2.CAP_PROP_FPS, 48)

        # for i, (input, _) in enumerate(test_loader):
        #     with torch.no_grad():
        #         input_var = torch.autograd.Variable(input)
        #
        # ret, frame = cam.read()
        # frame_map = np.full((280, 640, 3), 0, np.uint8)
        # frame_map = frame
        # print(frame_map)
        # while (True):
        #     bg = np.full((480, 1200, 3), 15, np.uint8)
        #     bg[:480, :640] = frame
        #
        #     font = cv2.FONT_HERSHEY_SIMPLEX
        #     # cv2.rectangle(bg, (128, 48), (640 - 128, 480 - 48), (0, 255, 0), 3)
        #
        #     cv2.imshow('preview', bg)
        #
        #     if cv2.waitKey(1) & 0xFF == ord('q'):
        #         break

        test(test_loader, model, categories)
Exemple #13
0
                   transform=torchvision.transforms.Compose([
                       cropping,
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       GroupNormalize(net.input_mean, net.input_std),
                   ])),
        batch_size=1, shuffle=False,
        num_workers=args.workers * 2, pin_memory=True)

if args.gpus is not None:
    devices = [args.gpus[i] for i in range(args.workers)]
else:
    devices = list(range(args.workers))


net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
net.eval()

data_gen = enumerate(data_loader)

total_num = len(data_loader.dataset)
output = []


def eval_video(video_data):
    i, data, label = video_data
    num_crop = args.test_crops

    if args.modality == 'RGB':
        length = 3
    elif args.modality == 'Flow':
Exemple #14
0
def main():
    logger.auto_set_dir()

    global args, best_prec1

    import argparse
    parser = argparse.ArgumentParser(description="PyTorch implementation of Temporal Segment Networks")
    parser.add_argument('--dataset', type=str,default="something", choices=['something', 'jester', 'moments'])
    parser.add_argument('--modality', type=str, default="RGB", choices=['RGB', 'Flow'])
    parser.add_argument('--train_list', type=str, default="")
    parser.add_argument('--val_list', type=str, default="")
    parser.add_argument('--root_path', type=str, default="")
    parser.add_argument('--store_name', type=str, default="")
    # ========================= Model Configs ==========================
    parser.add_argument('--arch', type=str, default="BNInception")
    parser.add_argument('--num_segments', type=int, default=3)
    parser.add_argument('--consensus_type', type=str, default='avg')
    parser.add_argument('--k', type=int, default=3)

    parser.add_argument('--dropout', '--do', default=0.8, type=float,
                        metavar='DO', help='dropout ratio (default: 0.5)')
    parser.add_argument('--loss_type', type=str, default="nll",
                        choices=['nll'])
    parser.add_argument('--img_feature_dim', default=256, type=int, help="the feature dimension for each frame")

    # ========================= Learning Configs ==========================
    parser.add_argument('--epochs', default=120, type=int, metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('-b', '--batch_size', default=128, type=int,
                        metavar='N', help='mini-batch size (default: 256)')
    parser.add_argument('--lr', '--learning-rate', default=0.001, type=float,
                        metavar='LR', help='initial learning rate')
    parser.add_argument('--lr_steps', default=[50, 100], type=float, nargs="+",
                        metavar='LRSteps', help='epochs to decay learning rate by 10')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float,
                        metavar='W', help='weight decay (default: 5e-4)')
    parser.add_argument('--clip-gradient', '--gd', default=20, type=float,
                        metavar='W', help='gradient norm clipping (default: disabled)')
    parser.add_argument('--no_partialbn', '--npb', default=False, action="store_true")

    # ========================= Monitor Configs ==========================
    parser.add_argument('--print-freq', '-p', default=20, type=int,
                        metavar='N', help='print frequency (default: 10)')
    parser.add_argument('--eval-freq', '-ef', default=5, type=int,
                        metavar='N', help='evaluation frequency (default: 5)')

    # ========================= Runtime Configs ==========================
    parser.add_argument('-j', '--workers', default=30, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--resume', default='', type=str, metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                        help='evaluate model on validation set')
    parser.add_argument('--snapshot_pref', type=str, default="")
    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                        help='manual epoch number (useful on restarts)')
    parser.add_argument('--gpu', type=str, default='4')
    parser.add_argument('--flow_prefix', default="", type=str)
    parser.add_argument('--root_log', type=str, default='log')
    parser.add_argument('--root_model', type=str, default='model')
    parser.add_argument('--root_output', type=str, default='output')

    args = parser.parse_args()

    args.consensus_type = "TRN"
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    device_ids = [int(id) for id in args.gpu.split(',')]
    assert len(device_ids) >1, "TRN must run with GPU_num > 1"

    args.root_log = logger.get_logger_dir()
    args.root_model = logger.get_logger_dir()
    args.root_output = logger.get_logger_dir()

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality)
    num_class = len(categories)


    args.store_name = '_'.join(['TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d'% args.num_segments])
    print('storing name: ' + args.store_name)

    model = TSN(num_class, args.num_segments, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)#TODO, , device_ids=[int(id) for id in args.gpu.split(',')]

    if torch.cuda.is_available():
       model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl=prefix,
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=(args.arch in ['BNInception','InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl=prefix,
                   random_shift=False,
                   transform=torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=(args.arch in ['BNInception','InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        logger.info('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)
Exemple #15
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if not os.path.exists(args.record_path + args.modality.lower()):
        os.mkdir(args.record_path + args.modality.lower())

    num_class = 2

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    # model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_set = TSNDataSet(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="frame_{:06d}.jpg"
        if args.modality in ["RGB", "RGBDiff"] else "{:06d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ]))
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_set = TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="frame_{:06d}.jpg"
        if args.modality in ["RGB", "RGBDiff"] else "{:06d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ]))
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer

    criterion = torch.nn.CrossEntropyLoss().cuda()

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1, pred_dict = validate(val_loader, model, criterion, epoch)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        if is_best:
            with open(
                    args.record_path + args.modality.lower() + '/' +
                    args.snapshot_pref + args.modality.lower() +
                    '_video_preds.pickle', 'wb') as f:
                pickle.dump(pred_dict, f)
                f.close()

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)