Example #1
0
def get_set_loader():

    pure = True if _MODALITY == 'rgb' or _MODALITY == 'flow' else False
    modlist = ["rgb", "rgbdsc", "flyflow", "edr1"]

    train_transform = torchvision.transforms.Compose([
        GroupScale(size=256),
        GroupRandomCrop(size=_IMAGE_SIZE),
        Stack(modality=_MODALITY),
        ToTorchFormatTensor(pure=pure)
    ])

    test_transform = torchvision.transforms.Compose([
        GroupScale(size=256),
        GroupCenterCrop(size=_IMAGE_SIZE),
        Stack(modality=_MODALITY),
        ToTorchFormatTensor(pure=pure)
    ])

    train_dataset = TSNDataSet(
        "",
        _TRAIN_LIST,
        num_segments=1,
        new_length=64,
        modality=_MODALITY,
        image_tmpl="img_{:05d}.jpg" if _MODALITY in modlist else "flow_" +
        "{}_{:05d}.jpg",
        transform=train_transform)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=_BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=_NUM_W,
                                               collate_fn=my_collate)

    test_dataset = TSNDataSet(
        "",
        _TEST_LIST,
        num_segments=1,
        new_length=64,
        modality=_MODALITY,
        image_tmpl="img_{:05d}.jpg" if _MODALITY in modlist else "flow_" +
        "{}_{:05d}.jpg",
        transform=test_transform)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=_TEST_BATCH_SIZE,
                                              shuffle=True,
                                              num_workers=_NUM_W,
                                              collate_fn=my_collate)
    return train_loader, test_loader
Example #2
0
    def run(self):
        if args.test_crops == 1:
            cropping = torchvision.transforms.Compose([
                GroupScale(self.net.scale_size),
                GroupCenterCrop(self.net.input_size)
            ])
        elif args.test_crops == 10:
            cropping = torchvision.transforms.Compose(
                [GroupOverSample(self.net.input_size, self.net.scale_size)])
        else:
            raise ValueError(
                "Only 1 and 10 crops are supported while we got {}".format(
                    args.test_crops))

        if self.modality == 'RGB':
            root_path = 'record/test/temp_chunk/'
            test_list = 'rgb_video_test_list.txt'
        else:
            root_path = 'record/test/temp_opf/'
            test_list = 'opf_video_test_list.txt'
        data_set = TSNDataSet(root_path,
                              test_list,
                              num_segments=args.test_segments,
                              new_length=1 if self.modality == "RGB" else 5,
                              modality=self.modality,
                              image_tmpl="frame_{:06d}.jpg" if self.modality
                              in ["RGB", "RGBDiff"] else "{:06d}.jpg",
                              test_mode=True,
                              transform=torchvision.transforms.Compose([
                                  cropping,
                                  Stack(roll=False),
                                  ToTorchFormatTensor(div=True),
                                  GroupNormalize(self.net.input_mean,
                                                 self.net.input_std),
                              ]))
        data_loader = torch.utils.data.DataLoader(data_set,
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  num_workers=args.workers,
                                                  pin_memory=True)
        self.net.cuda()
        self.net.eval()
        data_gen = enumerate(data_loader)
        output = []
        for i, (keys, data, label) in data_gen:
            a = data.chunk(args.test_segments, 1)
            res = []
            for j in a:
                rst = self.eval_video((i, j, label))
                res.append(rst)
            output.append((res, label[0]))
        if self.modality == 'RGB':
            rgb_whole_pred[str(StartFrame)] = np.mean(output[0][0], axis=0)
        else:
            opf_whole_pred[str(StartFrame)] = np.mean(output[0][0], axis=0)
        return
Example #3
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    categories, args.train_list, args.val_list, args.test_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality)
    num_class = len(categories)

    args.store_name = '_'.join([
        'TRN', args.dataset, args.modality, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    # Four types of input modalities for two-stream ConvNets (one stream spatial and the other temporal): a single RGB image, stacked RGB difference,
    # stacked optical flow field, and stacked warped optical flow field;  the spatial stream ConvNet operates on a single RGB images,
    # and the temporal stream ConvNet takes a stack of consecutive optical flow fields as input.
    # A single RGB image usually encodes static appearance at a specific time point and lacks the contextual information about previous and next frames.
    # RGB difference between two consecutive frames describe the appearance change, which may correspond to the motion salient region.
    # Optical flow fields may not concentrate on the human action; the warped optical flow suppresses the background motion and makes motion concentrate
    # on the actor.

    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

# Division between train and val set

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            args.root_path,
            args.train_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(
                    roll=(args.arch in ['BNInception', 'InceptionV3'])
                ),  # Batch-Normalization-Inception, InceptionV3: evolution of InceptionV2 of GoogleNet
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Example #4
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "UCF-Frames",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "UCF-Frames",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Example #5
0
        GroupCenterCrop(net.input_size),
    ])
elif args.test_crops == 10:
    cropping = torchvision.transforms.Compose([
        GroupOverSample(net.input_size, net.scale_size)
    ])
else:
    raise ValueError("Only 1 and 10 crops are supported while we got {}".format(args.test_crops))

data_loader = torch.utils.data.DataLoader(
        TSNDataSet(args.root_path, args.val_list, num_segments=args.test_segments,
                   new_length=1 if args.modality == "RGB" else 5,
                   modality=args.modality,
                   image_tmpl=prefix,
                   test_mode=True,
                   transform=torchvision.transforms.Compose([
                       cropping,
                       Stack(roll=(args.arch in ['BNInception','InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])),
                       GroupNormalize(net.input_mean, net.input_std),
                   ])),
        batch_size=1, shuffle=False,
        num_workers=args.workers * 2, pin_memory=True)

if args.gpus is not None:
    devices = [args.gpus[i] for i in range(args.workers)]
else:
    devices = list(range(args.workers))


#net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
    if "RGBFlow" == 'RGB':
        data_length = 1
    elif "RGBFlow" in ['Flow', 'RGBDiff']:
        data_length = 5
    elif "RGBFlow" == 'RGBFlow':
        data_length = args.num_motion

    data_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.test_segments,
        new_length=data_length,
        modality="RGBFlow",
        image_tmpl=prefix,
        dataset="emmanuelle",
        test_mode=True,
        transform=torchvision.transforms.Compose([
            cropping,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                  isRGBFlow=("RGBFlow" == 'RGBFlow')),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            GroupNormalize(originalNet.input_mean, originalNet.input_std),
        ])),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=args.workers * 2,
                                              pin_memory=False)

    if args.gpus is not None:
        devices = [args.gpus[i] for i in range(args.workers)]
Example #7
0
def main():
    global args, best_prec1, class_to_name
    parser.add_argument('--class_index', type=str, help='class index file')
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'something':
        num_class = 174
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    if args.dataset == 'something':
        img_prefix = ''
        with open(args.class_index, 'r') as f:
            content = f.readlines()
        class_to_name = {
            idx: line.strip().replace(' ', '-')
            for idx, line in enumerate(content)
        }
    else:
        img_prefix = 'image_'
        with open(args.class_index, 'r') as f:
            content = f.readlines()
        class_to_name = {int(line.strip().split(' ')[0])-1:line.strip().split(' ')[1] \
                for line in content}

    with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(args), opt_file)
    if not (args.consensus_type == 'lstm'
            or args.consensus_type == 'conv_lstm'):
        args.lstm_out_type = None
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn,
                lstm_out_type=args.lstm_out_type,
                lstm_layers=args.lstm_layers,
                lstm_hidden_dims=args.lstm_hidden_dims,
                conv_lstm_kernel=args.conv_lstm_kernel,
                bi_add_clf=args.bi_add_clf,
                bi_out_dims=args.bi_out_dims,
                bi_rank=args.bi_rank,
                bi_att_softmax=args.bi_att_softmax,
                bi_filter_size=args.bi_filter_size,
                bi_dropout=args.bi_dropout,
                bi_conv_dropout=args.bi_conv_dropout,
                get_att_maps=True,
                dataset=args.dataset)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()
    # print(model)
    # input('...')

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            # print(model)
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
            # input('...')
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
        rev_normalize = ReverseGroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 10
        # data_length = 5

    if args.val_reverse:
        val_temp_transform = ReverseFrames(size=data_length *
                                           args.num_segments)
        print('using reverse val')
    elif args.val_shuffle:
        val_temp_transform = ShuffleFrames(size=data_length *
                                           args.num_segments)
        print('using shuffle val')
    else:
        val_temp_transform = IdentityTransform()
        print('using normal val')
    val_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            "",
            args.val_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=img_prefix + "{:05d}.jpg" if args.modality
            in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg",
            # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
            random_shift=False,
            temp_transform=val_temp_transform,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=args.arch != 'BNInception'),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    # val_logger = open(os.path.join(args.result_path, 'test.log'), 'w')
    print('visualizing...')
    val_logger = os.path.join(args.result_path, 'visualize.log')
    validate(val_loader,
             model,
             0,
             val_logger=val_logger,
             rev_normalize=rev_normalize)
    return
Example #8
0
        GroupScale(net.scale_size),
        GroupCenterCrop(net.input_size),
    ])
elif args.test_crops == 10:
    cropping = torchvision.transforms.Compose([
        GroupOverSample(net.input_size, net.scale_size)
    ])
else:
    raise ValueError("Only 1 and 10 crops are supported while we got {}".format(args.test_crops))
data_loader = torch.utils.data.DataLoader(
        TSNDataSet("", args.test_list, num_segments=args.test_segments,
                   new_length=1 if args.modality == "RGB" else 5,
                   modality=args.modality,
                   image_tmpl="im{}.jpg",
                   test_mode=True,
                   save_scores=True,
                   transform=torchvision.transforms.Compose([
                       cropping,
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])),
        batch_size=1, shuffle=False,
        num_workers=args.workers, pin_memory=True)


net = net.cuda()
net.eval()

data_gen = enumerate(data_loader)
total_num = len(data_loader.dataset)
output = []
Example #9
0
def main():

    global args, best_prec1
    num_class = 4
    rgb_read_format = "{:d}.jpg"

    model = TSN(num_class,
                args.num_segments,
                args.pretrained_parts,
                'RGB',
                base_model='ECO',
                consensus_type='identity',
                dropout=0.3,
                partial_bn=True)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std

    # Optimizer s also support specifying per-parameter options.
    # To do this, pass in an iterable of dict s.
    # Each of them will define a separate parameter group,
    # and should contain a params key, containing a list of parameters belonging to it.
    # Other keys should match the keyword arguments accepted by the optimizers,
    # and will be used as optimization options for this group.
    policies = model.get_optim_policies()

    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda()

    model_dict = model.state_dict()

    print("pretrained_parts: ", args.pretrained_parts)

    model_dir = args.model_path
    new_state_dict = torch.load(model_dir)['state_dict']

    un_init_dict_keys = [
        k for k in model_dict.keys() if k not in new_state_dict
    ]
    print("un_init_dict_keys: ", un_init_dict_keys)
    print("\n------------------------------------")

    for k in un_init_dict_keys:
        new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_()
        if 'weight' in k:
            if 'bn' in k:
                print("{} init as: 1".format(k))
                constant_(new_state_dict[k], 1)
            else:
                print("{} init as: xavier".format(k))
                xavier_uniform_(new_state_dict[k])
        elif 'bias' in k:
            print("{} init as: 0".format(k))
            constant_(new_state_dict[k], 0)

    print("------------------------------------")

    model.load_state_dict(new_state_dict)

    cudnn.benchmark = True

    # Data loading code
    normalize = GroupNormalize(input_mean, input_std)
    data_length = 1

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality='RGB',
        image_tmpl=rgb_read_format,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=True),
            ToTorchFormatTensor(div=False),
            normalize,
        ])),
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             pin_memory=True)

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    model.eval()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda()
        input_var = input
        target_var = target
        output = model(input_var)
        _, pred = output.data.topk(1, 1, True, True)
        print(pred, target)
    print('done')
Example #10
0
def get_pred(video_path, caption_path, opt):
    # options
    parser = argparse.ArgumentParser(
        description="TRN testing on the full validation set")
    # parser.add_argument('dataset', type=str, choices=['something','jester','moments','charades'])
    # parser.add_argument('modality', type=str, choices=['RGB', 'Flow', 'RGBDiff'])

    parser.add_argument('--dataset', type=str, default='somethingv2')
    parser.add_argument('--modality', type=str, default='RGB')

    parser.add_argument(
        '--weights',
        type=str,
        default=
        'model/TRN_somethingv2_RGB_BNInception_TRNmultiscale_segment8_best.pth.tar'
    )
    parser.add_argument('--arch', type=str, default="BNInception")
    parser.add_argument('--save_scores', type=str, default=None)
    parser.add_argument('--test_segments', type=int, default=8)
    parser.add_argument('--max_num', type=int, default=-1)
    parser.add_argument('--test_crops', type=int, default=10)
    parser.add_argument('--input_size', type=int, default=224)
    parser.add_argument('--crop_fusion_type',
                        type=str,
                        default='TRNmultiscale',
                        choices=['avg', 'TRN', 'TRNmultiscale'])
    parser.add_argument('-j',
                        '--workers',
                        default=4,
                        type=int,
                        metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--gpus', nargs='+', type=int, default=None)
    parser.add_argument('--img_feature_dim', type=int, default=256)
    parser.add_argument(
        '--num_set_segments',
        type=int,
        default=1,
        help='TODO: select multiply set of n-frames from a video')
    parser.add_argument('--softmax', type=int, default=0)

    args = parser.parse_args()

    def accuracy(output, target, topk=(1, )):
        """Computes the precision@k for the specified values of k"""
        maxk = max(topk)
        batch_size = target.size(0)
        prob, pred = output.topk(maxk, 1, True, True)
        prob = prob.t().data.numpy().squeeze()
        pred = pred.t().data.numpy().squeeze()
        return prob, pred

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality, opt)
    num_class = len(categories)

    net = TSN(num_class,
              args.test_segments
              if args.crop_fusion_type in ['TRN', 'TRNmultiscale'] else 1,
              args.modality,
              base_model=args.arch,
              consensus_type=args.crop_fusion_type,
              img_feature_dim=args.img_feature_dim,
              opt=opt)

    try:
        checkpoint = torch.load(args.weights)
    except:
        args.weights = os.path.join(opt.project_root, 'scripts/Eval/',
                                    args.weights)
        checkpoint = torch.load(args.weights)

    print("model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                                  checkpoint['best_prec1']))

    base_dict = {
        '.'.join(k.split('.')[1:]): v
        for k, v in list(checkpoint['state_dict'].items())
    }
    net.load_state_dict(base_dict)

    if args.test_crops == 1:
        cropping = torchvision.transforms.Compose([
            GroupScale(net.scale_size),
            GroupCenterCrop(net.input_size),
        ])
    elif args.test_crops == 10:
        cropping = torchvision.transforms.Compose(
            [GroupOverSample(net.input_size, net.scale_size)])
    else:
        raise ValueError(
            "Only 1 and 10 crops are supported while we got {}".format(
                args.test_crops))

    data_loader = torch.utils.data.DataLoader(TSNDataSet(
        video_path,
        caption_path,
        num_segments=args.test_segments,
        new_length=1 if args.modality == "RGB" else 5,
        modality=args.modality,
        image_tmpl=prefix,
        test_mode=True,
        transform=torchvision.transforms.Compose([
            cropping,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            GroupNormalize(net.input_mean, net.input_std),
        ])),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=args.workers * 2,
                                              pin_memory=True)

    if args.gpus is not None:
        devices = [args.gpus[i] for i in range(args.workers)]
    else:
        devices = list(range(args.workers))

    #net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
    net = torch.nn.DataParallel(net.cuda())
    net.eval()

    data_gen = enumerate(data_loader)

    output = []

    def eval_video(video_data):
        i, data, label = video_data
        num_crop = args.test_crops

        if args.modality == 'RGB':
            length = 3
        elif args.modality == 'Flow':
            length = 10
        elif args.modality == 'RGBDiff':
            length = 18
        else:
            raise ValueError("Unknown modality " + args.modality)

        input_var = torch.autograd.Variable(data.view(-1, length, data.size(2),
                                                      data.size(3)),
                                            volatile=True)
        rst = net(input_var)
        if args.softmax == 1:
            # take the softmax to normalize the output to probability
            rst = F.softmax(rst)

        rst = rst.data.cpu().numpy().copy()

        if args.crop_fusion_type in ['TRN', 'TRNmultiscale']:
            rst = rst.reshape(-1, 1, num_class)
        else:
            rst = rst.reshape((num_crop, args.test_segments,
                               num_class)).mean(axis=0).reshape(
                                   (args.test_segments, 1, num_class))

        return i, rst, label[0]

    max_num = args.max_num if args.max_num > 0 else len(data_loader.dataset)

    prob_all, pred_all = [], []
    for i, (data, label) in data_gen:
        if i >= max_num:
            break
        rst = eval_video((i, data, label))
        output.append(rst[1:])
        prob, pred = accuracy(torch.from_numpy(np.mean(rst[1], axis=0)),
                              label,
                              topk=(1, 174))
        prob_all.append(prob)
        pred_all.append(pred)
    return prob_all, pred_all
Example #11
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality)
    num_class = len(categories)

    args.store_name = '_'.join([
        'TRN', args.dataset, args.modality, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = TSN(339,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn)
    _, cnn = list(model.named_children())[0]
    for p in cnn.parameters():
        p.requires_grad = False

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    # remove if not transfer learning
    checkpoint = torch.load('/home/ec2-user/mit_weights.pth.tar')
    model.load_state_dict(checkpoint['state_dict'])

    for module in list(
            list(model._modules['module'].children())
        [-1].children())[-1].children():
        module[-1] = nn.Linear(256, num_class)

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True
    model.cuda()

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.Adam(model.parameters(), args.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                8,
                                                gamma=0.1,
                                                last_epoch=-1)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step()
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader), log_training,
                             epoch)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
    summary_writer.close()
Example #12
0
def main():
    global args, best_loss
    args = parser.parse_args()
    check_rootfolders()

    root_data, train_dict, val_dict = datasets_video.return_dataset(
        args.dataset, args.modality, args.view)
    num_class = 1

    args.store_name = '_'.join([
        'TRN', args.label_name, args.dataset, args.modality, args.view,
        args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)
    img_tmpl = '{:06d}.jpg' if args.view == 'body' else 'frame_det_00_{:06d}.bmp'
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn,
                fix_all_weights=args.fix_all_weights)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()  # augmentation increase

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        root_data,
        args.train_dict,
        args.label_name,
        num_segments=args.num_segments,
        phase='Train',
        new_length=data_length,
        modality=args.modality,
        image_tmpl=img_tmpl,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        root_data,
        args.val_dict,
        args.label_name,
        num_segments=args.num_segments,
        phase='Validation',
        new_length=data_length,
        modality=args.modality,
        image_tmpl=img_tmpl,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    elif args.loss_type == 'mse':
        criterion = torch.nn.MSELoss().cuda()
    elif args.loss_type == 'mae':
        criterion = torch.nn.SmoothL1Loss().cuda()
    else:  # another loss is mse or mae
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        log_val = open(
            os.path.join(args.root_log, '%s_val.csv' % args.store_name), 'w')
        validate(val_loader, model, criterion, 0, log_val)
        return

    log_training = open(
        os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            loss = validate(val_loader, model, criterion,
                            (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = loss < best_loss
            best_loss = min(loss, best_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                }, is_best)
Example #13
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    print("------------------------------------")
    print("Environment Versions:")
    print("- Python: {}".format(sys.version))
    print("- PyTorch: {}".format(torch.__version__))
    print("- TorchVison: {}".format(torchvision.__version__))

    args_dict = args.__dict__
    print("------------------------------------")
    print(args.arch + " Configurations:")
    for key in args_dict.keys():
        print("- {}: {}".format(key, args_dict[key]))
    print("------------------------------------")
    print(args.mode)
    if args.dataset == 'ucf101':
        num_class = 101
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'hmdb51':
        num_class = 51
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'kinetics':
        num_class = 400
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'something':
        num_class = 174
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'tinykinetics':
        num_class = 150
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'minikinetics':
        num_class = 150
        rgb_read_format = "{:05d}.jpg"
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.pretrained_parts,
                args.modality,
                base_model=args.arch,
                dataset=args.dataset,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)  #, rep_flow = args.rep_flow)
    #     print (model)
    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    # Optimizer s also support specifying per-parameter options.
    # To do this, pass in an iterable of dict s.
    # Each of them will define a separate parameter group,
    # and should contain a params key, containing a list of parameters belonging to it.
    # Other keys should match the keyword arguments accepted by the optimizers,
    # and will be used as optimization options for this group.
    policies = model.get_optim_policies(args.dataset)

    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    model_dict = model.state_dict()

    print("pretrained_parts: ", args.pretrained_parts)

    if args.arch == "resnet50":
        new_state_dict = {}  #model_dict
        div = False
        roll = True
    elif args.arch == "resnet34":
        pretrained_dict = {}
        new_state_dict = {}  #model_dict
        for k, v in model_dict.items():
            if ('fc' not in k):
                new_state_dict.update({k: v})
        div = False
        roll = True
    elif args.arch == "Res3D18":
        pretrained_dict = {}
        new_state_dict = {}  #model_dict
        for k, v in model_dict.items():
            if ('fc' not in k):
                new_state_dict.update({k: v})
        div = False
        roll = True
    elif args.arch == "TSM":
        pretrained_dict = {}
        new_state_dict = {}  #model_dict
        for k, v in model_dict.items():
            if ('fc' not in k):
                new_state_dict.update({k: v})


#         div = False
#         roll = True
        div = True
        roll = False
    elif (args.arch == "MS"):
        pretrained_dict = {}
        new_state_dict = {}  #model_dict
        for k, v in model_dict.items():
            if ('fc' not in k):
                new_state_dict.update({k: v})
        div = True
        roll = False

    un_init_dict_keys = [
        k for k in model_dict.keys() if k not in new_state_dict
    ]
    print("un_init_dict_keys: ", un_init_dict_keys)
    print("\n------------------------------------")

    for k in un_init_dict_keys:
        new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_()
        if 'weight' in k:
            if 'bn' in k:
                print("{} init as: 1".format(k))
                constant_(new_state_dict[k], 1)
            else:
                print("{} init as: xavier".format(k))
                xavier_uniform_(new_state_dict[k])
        elif 'bias' in k:
            print("{} init as: 0".format(k))
            constant_(new_state_dict[k], 0)

    print("------------------------------------")

    model.load_state_dict(new_state_dict)

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 1

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            "",
            args.train_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            mode=args.mode,
            image_tmpl=args.rgb_prefix + rgb_read_format if args.modality
            in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format,
            transform=torchvision.transforms.Compose([
                #                        GroupScale((240,320)),
                GroupScale(int(scale_size)),
                #                        GroupScale((256)),
                train_augmentation,
                Stack(roll=roll),
                ToTorchFormatTensor(div=div),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            "",
            args.val_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            mode=args.mode,
            image_tmpl=args.rgb_prefix + rgb_read_format if args.modality
            in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format,
            random_shift=False,
            transform=torchvision.transforms.Compose([
                #                        GroupScale((240,320)),
                GroupScale(int(scale_size)),
                #                        GroupScale((256)),
                GroupCenterCrop(crop_size),
                Stack(roll=roll),
                ToTorchFormatTensor(div=div),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=args.nesterov)

    output_list = []
    if args.evaluate:
        input_size = scale_size
        test_loader = torch.utils.data.DataLoader(
            TSNDataSet(
                "",
                args.val_list,
                num_segments=args.num_segments,
                new_length=data_length,
                modality=args.modality,
                mode=args.mode,
                image_tmpl=args.rgb_prefix + rgb_read_format if args.modality
                in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format,
                random_shift=False,
                test_mode=True,
                transform=torchvision.transforms.Compose([
                    #                            GroupScale((240,320)),
                    #                            GroupScale(int(scale_size)),
                    #                            GroupCenterCrop(crop_size),
                    GroupFullResSample(scale_size, input_size, flip=False),
                    Stack(roll=roll),
                    ToTorchFormatTensor(div=div),
                    normalize,
                ])),
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=args.workers,
            pin_memory=True)
        prec1, score_tensor = test(test_loader,
                                   model,
                                   criterion,
                                   0,
                                   temperature=100,
                                   num_class=num_class)
        #         prec1, score_tensor = validate(val_loader,model,criterion,0, temperature=100)
        output_list.append(score_tensor)
        fn = 'score_kinetics.pt'
        save_validation_score(output_list, filename=fn)
        #         prec1, score_tensor = validate2(val_loader2,model,criterion,0, temperature=100)
        print("test score saved in {}".format('/'.join(
            (args.val_output_folder, fn))))
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)
        # train for one epoch
        temperature = train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1, score_tensor = validate(val_loader,
                                           model,
                                           criterion,
                                           (epoch + 1) * len(train_loader),
                                           temperature=temperature)

            output_list.append(score_tensor)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)

    # save validation score
    save_validation_score(output_list)
    print("validation score saved in {}".format('/'.join(
        (args.val_output_folder, 'score.pt'))))
Example #14
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality)
    num_class = len(categories)


    args.store_name = '_'.join(['TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d'% args.num_segments])
    print('storing name: ' + args.store_name)

    model = TSN(2, args.num_segments, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn)

    checkpoint = torch.load('pretrain/TRN_somethingv2_RGB_BNInception_TRNmultiscale_segment8_best.pth.tar', map_location='cpu')
    base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())}
    for key in ['consensus.fc_fusion_scales.6.3.bias', 'consensus.fc_fusion_scales.5.3.bias',
                'consensus.fc_fusion_scales.4.3.bias',
                'consensus.fc_fusion_scales.3.3.bias', 'consensus.fc_fusion_scales.2.3.bias',
                'consensus.fc_fusion_scales.1.3.bias',
                'consensus.fc_fusion_scales.0.3.bias', 'consensus.fc_fusion_scales.6.3.weight',
                'consensus.fc_fusion_scales.5.3.weight',
                'consensus.fc_fusion_scales.4.3.weight', 'consensus.fc_fusion_scales.3.3.weight',
                'consensus.fc_fusion_scales.2.3.weight',
                'consensus.fc_fusion_scales.1.3.weight', 'consensus.fc_fusion_scales.0.3.weight']:
        del base_dict[key]
    # print(base_dict)
    model.load_state_dict(base_dict, strict=False)
    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl=prefix,
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=(args.arch in ['BNInception','InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    # val_loader = torch.utils.data.DataLoader(
    #     TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments,
    #                new_length=data_length,
    #                modality=args.modality,
    #                image_tmpl=prefix,
    #                random_shift=False,
    #                transform=torchvision.transforms.Compose([
    #                    GroupScale(int(scale_size)),
    #                    GroupCenterCrop(crop_size),
    #                    Stack(roll=(args.arch in ['BNInception','InceptionV3'])),
    #                    ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])),
    #                    normalize,
    #                ])),
    #     batch_size=args.batch_size, shuffle=False,
    #     num_workers=args.workers, pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        weight = torch.ones([2]).cuda()
        weight[0] = 1.2
        pos_weight = torch.ones([2]).cuda()
        #pos_weight[0] = 2
        criterion = torch.nn.BCEWithLogitsLoss(weight = weight, pos_weight=pos_weight).cuda() 
        #criterion = torch.nn.CrossEntropyLoss().cuda()
        
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))
        
    optimizer = torch.optim.SGD(policies,
                                0.0001,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # if args.evaluate:
    #     validate(val_loader, model, criterion, 0)
    #     return

    log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)
        torch.save(model.state_dict(), 'checkpoint_bce_20_w12_{}.pth.tar'.format(epoch))
        torch.save(model.state_dict(), 'checkpoint_bce_20_w12_{}.pth'.format(epoch))
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)
Example #15
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)
    '''
    consensue_type = avg
    base_model = resnet_101
    dropout : 0.5
    
    '''
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    #224
    crop_size = model.crop_size
    #256/224
    scale_size = model.scale_size
    # for each modiltiy is different
    input_mean = model.input_mean
    input_std = model.input_std

    policies = model.get_optim_policies()
    #这里拥有三个augmentation
    #GroupMultiScaleCrop,GroupRandomHorizontalFlip
    #here GropMultiScaleCrop ,is a easily method for 裁剪边用固定位置的crop并最终resize 到 224 ,采用的插值方式,为双线性插值
    #GroupRandomHorizontalFlip
    train_augmentation = model.get_augmentation()
    print(args.gpus)
    model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    #解释说这里为什么要有roll,主要还是考虑到我们所训练的是对于BGR 还是RGB
    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="im{}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="im{}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")
    #see the optim policy
    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))
    # general the lr here is 1e-3
    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    #如果说这里是验证过程,如果说不是验证过程
    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return
    viz = vis.Visualizer()
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, viz)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, epoch, viz=viz)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'test_crops': model.state_dict(),
                    'best_prec1': prec1,
                }, is_best)
Example #16
0
def main():
    # do some pre_process, such as ignore warning
    pre_process()

    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'streetdance245':
        num_class = 245
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)
    # print('Do not parallel: ', model)
    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    # calculate flops
    from ptflops import get_model_complexity_info
    macs, params = get_model_complexity_info(model, (1, 9, 224, 224),
                                             as_strings=True,
                                             print_per_layer_stat=True,
                                             verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))
    set_break()
    # # input of model is (batch_size, n_seg*c, h, w)
    # from torchsummary import summary
    # summary(model, input_size=(1, 9, 224, 224))
    # set_break()
    # print('Parallel module features: ', model.module._modules.keys())
    # print('Parallel layer4 :2: ', model.module.base_model.layer4[:2])
    # print('Parallel layer4 2 conv1: ', model.module.base_model.layer4[2].conv1)
    # print('lists: ', len(list(model.module.base_model.children())))
    # Input size here is (batch_size, n_seg*c, h, w)

    # with open('new_model.txt', 'w') as f:
    #     f.write(str(model))
    #     f.write(str(model.module.base_model._modules.keys()))
    # for m in model.module.modules():
    #     if isinstance(m, torch.nn.Conv3d):
    #         print(m)
    # set_break()
    # Additional long-range Module
    # input size here is (batch_size, N_seg, fc_input, 7, 7), fc_input here is 2048
    # from torchsummary import summary
    # summary(model3d, input_size=(4, 3, 2048, 7, 7))
    # set_break()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)
        # set_break()
        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)

    print('best_prec1:', best_prec1)
Example #17
0
def main():
    global args
    args = parser.parse_args()

    print("------------------------------------")
    print("Environment Versions:")
    print("- Python: {}".format(sys.version))
    print("- PyTorch: {}".format(torch.__version__))
    print("- TorchVison: {}".format(torchvision.__version__))

    args_dict = args.__dict__
    print("------------------------------------")
    print(args.arch+" Configurations:")
    for key in args_dict.keys():
        print("- {}: {}".format(key, args_dict[key]))
    print("------------------------------------")

    if args.dataset == 'ucf101':
        num_class = 101
        rgb_read_format = "{:06d}.jpg" # Format for THUMOS14 videos
        # rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'hmdb51':
        num_class = 51
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'kinetics':
        num_class = 400
        rgb_read_format = "{:04d}.jpg"
    elif args.dataset == 'something':
        num_class = 174
        rgb_read_format = "{:04d}.jpg"
    else:
        raise ValueError('Unknown dataset '+args.dataset)

    model = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std

    if _CUDA:
        model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # CUDA
    print_model(model)
    if not _CUDA:
        model = torch.nn.DataParallel(model) # CPU

    print("pretrained_parts: ", args.pretrained_parts)

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            if _CUDA:
                checkpoint = torch.load(args.resume) # CUDA
            else:
                checkpoint = torch.load(args.resume, map_location='cpu') # CPU
            # if not checkpoint['lr']:
            if "lr" not in checkpoint.keys():
                args.lr = input("No 'lr' attribute found in resume model, please input the 'lr' manually: ")
                args.lr = float(args.lr)
            else:
                args.lr = checkpoint['lr']
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch: {}, lr: {})"
                  .format(args.resume, checkpoint['epoch'], args.lr)))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))
    else:
        print("Please specify the checkpoint to pretrained model")
        return

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        #input_mean = [0,0,0] #for debugging
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    end = time.time()
    # data_loader = torch.utils.data.DataLoader(
    dataset = TSNDataSet("", args.val_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl=args.rgb_prefix+rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+rgb_read_format,
                   random_shift=False,
                   transform=torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=True),
                       ToTorchFormatTensor(div=False),
                       #Stack(roll=(args.arch == 'C3DRes18') or (args.arch == 'ECO') or (args.arch == 'ECOfull') or (args.arch == 'ECO_2FC')),
                       #ToTorchFormatTensor(div=(args.arch != 'C3DRes18') and (args.arch != 'ECO') and (args.arch != 'ECOfull') and (args.arch != 'ECO_2FC')),
                       normalize,
                   ]),
                   test_mode=True,
                   window_size=_WINDOW_SIZE, window_stride=_WINDOW_STRIDE);
    data_loader = torch.utils.data.DataLoader(dataset,
                      batch_size=args.batch_size, shuffle=False,
                      num_workers=args.workers, pin_memory=True,
                      collate_fn=collate_fn)

    # criterion = torch.nn.CrossEntropyLoss().cuda()
    # predict(data_loader, model, criterion, 0)
    predict(dataset, model, criterion=None, iter=0)
    # profile_model(model)
    elapsed_time = time.time() - end    
    print("STATS_TOT_WINDOWS={0}, Total prediction time={1}".format(STATS_TOT_WINDOWS, elapsed_time))
    return
Example #18
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    else:
        data_length = 5  # generate 5 displacement map, using 6 RGB images

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                new_length=data_length)
    model = model.to(device)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    train_augmentation = model.get_augmentation()
    if device.type == 'cuda':
        model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality
        in ["RGB", "RGBDiff", "CV"] else args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality
        in ["RGB", "RGBDiff", "CV"] else args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss().to(device)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     args.lr_steps,
                                                     gamma=0.1)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(0, args.epochs):
        scheduler.step()
        if epoch < args.start_epoch:
            continue

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, epoch)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
    writer.close()
Example #19
0
        [GroupOverSample(net.input_size, net.scale_size)])
else:
    raise ValueError(
        "Only 1 and 10 crops are supported while we got {}".format(
            args.test_crops))

data_loader = torch.utils.data.DataLoader(
    TSNDataSet(
        args.sources,
        args.test_list,
        timesteps=args.timesteps,
        #test_segments=args.test_segments,
        #sampling_method=args.sampling_method,
        new_length=1 if args.modality == "RGB" else 5,
        modality=args.modality,
        image_tmpl="image_{:05d}.jpg" if args.modality in ['RGB', 'RGBDiff']
        else args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(256)),
            GroupCenterCrop(224),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            GroupNormalize(net.input_mean, net.input_std),
        ])),
    batch_size=1,
    shuffle=False,
    num_workers=args.workers,
    pin_memory=True)

if args.gpus is not None:
    devices = [args.gpus[i] for i in range(args.workers)]
Example #20
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'myDataset':
        num_class = 12
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    f, axs = plt.subplots(4, 1, figsize=(10, 5))
    if args.start_epoch == 0:
        train_acc = []
        train_loss = []
        val_acc = []
        val_loss = []
        epochs = []
        val_epochs = []
    else:
        train_acc = np.load("./%s/train_acc.npy" % args.snapshot_pref).tolist()
        train_loss = np.load("./%s/train_loss.npy" %
                             args.snapshot_pref).tolist()
        val_acc = np.load("./%s/val_acc.npy" % args.snapshot_pref).tolist()
        val_loss = np.load("./%s/val_loss.npy" % args.snapshot_pref).tolist()
        epochs = np.load("./%s/epochs.npy" % args.snapshot_pref).tolist()
        val_epochs = np.load("./%s/val_epochs.npy" %
                             args.snapshot_pref).tolist()
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        acc, loss = train(train_loader, model, criterion, optimizer, epoch)
        train_acc.append(acc)
        train_loss.append(loss)
        epochs.append(epoch)
        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1, v_loss = validate(val_loader, model, criterion,
                                     (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
            val_acc.append(prec1)
            val_loss.append(v_loss)
            val_epochs.append(epoch)
        axs[0].plot(val_epochs, val_loss, c='b', marker='.', label='val_loss')
        axs[1].plot(val_epochs, val_acc, c='r', marker='.', label='val_acc')
        axs[2].plot(epochs, train_loss, c='b', marker='.', label='train_loss')
        axs[3].plot(epochs, train_acc, c='r', marker='.', label='train_acc')
        plt.title('TSN_' + args.snapshot_pref)
        if epoch == 0:
            for i in range(4):
                axs[i].legend(loc='best')
        plt.pause(0.000001)
        if not os.path.exists(args.snapshot_pref):
            os.makedirs(args.snapshot_pref)
        plt.savefig('./%s/%s.jpg' % (args.snapshot_pref, str(epoch).zfill(5)))
        np.save("./%s/train_acc.npy" % args.snapshot_pref, train_acc)
        np.save("./%s/train_loss.npy" % args.snapshot_pref, train_loss)
        np.save("./%s/val_acc.npy" % args.snapshot_pref, val_acc)
        np.save("./%s/val_loss.npy" % args.snapshot_pref, val_loss)
        np.save("./%s/val_epochs.npy" % args.snapshot_pref, val_epochs)
        np.save("./%s/epochs.npy" % args.snapshot_pref, epochs)
Example #21
0
def main():
    parser = options()
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'saag01':
        num_class = 2
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=0.5,
                partial_bn=False)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_size = model.input_size
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    cropping = torchvision.transforms.Compose([
        GroupScale(scale_size),
        GroupCenterCrop(input_size),
    ])

    checkpoint = torch.load(args.checkpoint)
    start_epoch = checkpoint['epoch']
    best_prec1 = checkpoint['best_prec1']

    state_dict = checkpoint['state_dict']

    # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    model.load_state_dict(state_dict)

    test_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.test_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.img_prefix + "_{:05d}" +
        args.ext if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix +
        "_{}_{:05d}" + args.ext,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            GroupNormalize(input_mean, input_std),
        ]),
        custom_prefix=args.custom_prefix),
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True,
                                              drop_last=True)

    ### Test ###
    test(model, test_loader, args)
Example #22
0
def main(args, config):
	if args.modality == 'RGB':
		data_length = 1
	elif args.modality == 'Flow':
		data_length = 5


	model = TSN(26, args.num_segments, args.modality,
				base_model=args.arch, new_length=data_length, embed=args.embed,
				consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn, context=args.context)

	input_mean = model.input_mean
	input_std = model.input_std
	policies = model.get_optim_policies()
 
	normalize = GroupNormalize(input_mean, input_std)

	dataset = TSNDataSet("train", num_segments=args.num_segments,
				   context=args.context,
				   new_length=data_length,
				   modality=args.modality,
				   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB"] else args.flow_prefix+"{}_{:05d}.jpg",
				   transform=torchvision.transforms.Compose([
					   GroupScale((224,224)),
					   Stack(roll=args.arch == 'BNInception'),
					   ToTorchFormatTensor(div=args.arch != 'BNInception'),
					   normalize,
				   ]))

   

	train_loader = torch.utils.data.DataLoader(
		dataset,
		batch_size=args.batch_size, shuffle=True,
		num_workers=args.workers, pin_memory=True, drop_last=False)

	val_loader = torch.utils.data.DataLoader(
		TSNDataSet("val", num_segments=args.num_segments,
				   context=args.context,
				   new_length=data_length,
				   modality=args.modality,
					image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB"] else args.flow_prefix+"{}_{:05d}.jpg",
				   random_shift=False,
				   transform=torchvision.transforms.Compose([
					   GroupScale((int(224),int(224))),
					   Stack(roll=args.arch == 'BNInception'),
					   ToTorchFormatTensor(div=args.arch != 'BNInception'),
					   normalize,
				   ])),
		batch_size=args.batch_size, shuffle=False,
		num_workers=args.workers, pin_memory=True)


	logger = config.get_logger('train')
	logger.info(model)


	# get function handles of loss and metrics
	criterion_categorical = getattr(module_loss, config['loss'])
	criterion_continuous = getattr(module_loss, config['loss_continuous'])

	metrics = [getattr(module_metric, met) for met in config['metrics']]
	metrics_continuous = [getattr(module_metric, met) for met in config['metrics_continuous']]

	optimizer = torch.optim.SGD(policies,
								args.lr,
								momentum=args.momentum,
								weight_decay=args.weight_decay)

	lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer)

	for param_group in optimizer.param_groups:
		print(param_group['lr'])

	trainer = Trainer(model, criterion_categorical, criterion_continuous, metrics, metrics_continuous, optimizer,
					  config=config,
					  data_loader=train_loader,
					  valid_data_loader=val_loader,
					  lr_scheduler=lr_scheduler, embed=args.embed)

	trainer.train()
Example #23
0
def main():

    torch.set_printoptions(precision=6)

    global args, best_prec1
    args = parser.parse_args()
    #导入参数设置数据集类数量
    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'cad':
        num_class = 8
    else:
        raise ValueError('Unknown dataset ' + args.dataset)
    """
    #导入模型,输入包含分类的类别数:
    # num_class;args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3;
    # 采用哪种输入:args.modality,比如RGB表示常规图像,Flow表示optical flow等;
    # 采用哪种模型:args.arch,比如resnet101,BNInception等;
    # 不同输入snippet的融合方式:args.consensus_type,比如avg等;
    # dropout参数:args.dropout。
    """
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    """
    接着main函数的思路,前面这几行都是在TSN类中定义的变量或者方法,model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()是设置多GPU训练模型。
    args.resume这个参数主要是用来设置是否从断点处继续训练,比如原来训练模型训到一半停止了,希望继续从保存的最新epoch开始训练,
    因此args.resume要么是默认的None,要么就是你保存的模型文件(.pth)的路径。
    其中checkpoint = torch.load(args.resume)是用来导入已训练好的模型。
    model.load_state_dict(checkpoint[‘state_dict’])是完成导入模型的参数初始化model这个网络的过程,load_state_dict是torch.nn.Module类中重要的方法之一。

    """
    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    """
    接下来是main函数中的第二部分:数据导入。首先是自定义的TSNDataSet类用来处理最原始的数据,返回的是torch.utils.data.Dataset类型,
    一般而言在PyTorch中自定义的数据读取类都要继承torch.utils.data.Dataset这个基类,比如此处的TSNDataSet类,然后通过重写初始化函数__init__和__getitem__方法来读取数据。
    torch.utils.data.Dataset类型的数据并不能作为模型的输入,还要通过torch.utils.data.DataLoader类进一步封装,
    这是因为数据读取类TSNDataSet返回两个值,第一个值是Tensor类型的数据,第二个值是int型的标签,
    而torch.utils.data.DataLoader类是将batch size个数据和标签分别封装成一个Tensor,从而组成一个长度为2的list。
    对于torch.utils.data.DataLoader类而言,最重要的输入就是TSNDataSet类的初始化结果,其他如batch size和shuffle参数是常用的。通过这两个类读取和封装数据,后续再转为Variable就能作为模型的输入了。

    """

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=3,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=3,
                                             pin_memory=True)
    """
    接下来就是main函数的第三部分:训练模型。这里包括定义损失函数、优化函数、一些超参数设置等,然后训练模型并在指定epoch验证和保存模型。
    adjust_learning_rate(optimizer, epoch, args.lr_steps)是设置学习率变化策略,args.lr_steps是一个列表,里面的值表示到达多少个epoch的时候要改变学习率,
    在adjust_learning_rate函数中,默认是修改学习率的时候修改成当前的0.1倍。
    train(train_loader, model, criterion, optimizer, epoch)就是训练模型,输入包含训练数据、模型、损失函数、优化函数和要训练多少个epoch。
    最后的if语句是当训练epoch到达指定值的时候就进行一次模型验证和模型保存,args.eval_freq这个参数就是用来控制保存的epoch值。
    prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))就是用训练好的模型验证测试数据集。
    最后的save_checkpoint函数就是保存模型参数(model)和其他一些信息,这里我对源代码做了修改,希望有助于理解,该函数中主要就是调用torch.save(mode, save_path)来保存模型。
    模型训练函数train和模型验证函数validate函数是重点,后面详细介绍。

    """
    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))
    '''
    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    '''
    # try Adam instead.
    optimizer = torch.optim.Adam(policies, args.lr)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Example #24
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = SeqVLAD(num_class,
                    args.num_centers,
                    args.modality,
                    args.timesteps,
                    args.redu_dim,
                    with_relu=args.with_relu,
                    base_model=args.arch,
                    activation=args.activation,
                    seqvlad_type=args.seqvlad_type,
                    init_method=args.init_method,
                    consensus_type=args.consensus_type,
                    dropout=args.dropout,
                    partial_bn=not args.no_partialbn)
    #print(model)
    '''
    model = SeqVLAD_with_centerloss(num_class, args.num_centers,
                args.modality, args.timesteps, args.redu_dim,
                with_relu = args.with_relu, 
                base_model = args.arch, 
                activation= args.activation,
                seqvlad_type = args.seqvlad_type,
                dropout= args.dropout, 
                partial_bn= not args.no_partialbn)
     
    centers = model.centers
    '''
    #sys.exit()
    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    if args.two_steps is not None:
        print('two step training ')
        sub_policies = model.get_sub_optim_policies()

    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    #model.centers = centers

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']

            model_dict = model.state_dict()

            if args.resume_type == 'tsn':
                args.start_epoch = 0

                ## exclude certain module
                pretrained_dict = checkpoint['state_dict']

                excluded_modules = [
                    'module.new_fc', 'module.base_model.global_pool'
                ]
                res_state_dict = filter_excluded_module(
                    pretrained_dict, excluded_modules)
                model_dict.update(res_state_dict)

            elif args.resume_type == 'same':
                if args.two_steps == 0:
                    args.start_epoch = 0

                pretrained_dict = checkpoint['state_dict']
                res_state_dict = init_from_tsn_model(model_dict,
                                                     pretrained_dict)
                model_dict.update(res_state_dict)
            else:
                print('==> resume_type must be one of same/tsn')
                exit()

            model.load_state_dict(model_dict)

            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.sources,
        args.train_list,
        timesteps=args.timesteps,
        new_length=data_length,
        modality=args.modality,
        sampling_method=args.sampling_method,
        reverse=args.reverse,
        image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"]
        else args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.sources,
        args.val_list,
        timesteps=args.timesteps,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"]
        else args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
        #criterion_cent = CenterLoss(num_classes=num_class, feat_dim = 32768).cuda()

    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    if args.two_steps is not None:
        for group in sub_policies:
            print(
                ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
                    group['name'], len(group['params']), group['lr_mult'],
                    group['decay_mult'])))

    if args.optim == 'SGD':
        optimizer = torch.optim.SGD(policies,
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        optimizer_cent = torch.optim.SGD(policies,
                                         args.lr,
                                         momentum=args.momentum,
                                         weight_decay=args.weight_decay)
        if args.two_steps is not None:
            sub_optimizer = torch.optim.SGD(sub_policies,
                                            args.lr,
                                            momentum=args.momentum,
                                            weight_decay=args.weight_decay)
            sub_optimizer_cent = torch.optim.SGD(
                sub_policies,
                args.lr,
                momentum=args.momentum,
                weight_decay=args.weight_decay)

    elif args.optim == 'Adam':
        print('use Adam optimizer ... ...')
        optimizer = torch.optim.Adam(policies,
                                     args.lr,
                                     weight_decay=args.weight_decay)
        if args.two_steps is not None:
            sub_optimizer = torch.optim.Adam(sub_policies,
                                             args.lr,
                                             weight_decay=args.weight_decay)

    else:
        print('optimzer: {} is not implimented, please use SGD or Adam'.format(
            args.optim))
        exit()

    if args.evaluate:
        #print("only")
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        #print("hahahaha")
        if args.two_steps is not None and epoch < args.two_steps:
            #   validate(val_loader, model, criterion,criterion_cent, 0)
            #print("local optimizer!!")
            adjust_learning_rate(sub_optimizer, epoch, args.lr_steps)
            #adjust_learning_rate(sub_optimizer_cent, epoch, args.lr_steps)
            train(train_loader, model, criterion, sub_optimizer,
                  optimizer_cent, epoch)
        else:
            #print("global optimiazer!!!!!")
            adjust_learning_rate(optimizer, epoch, args.lr_steps)
            #adjust_learning_rate(optimizer_cent, epoch, args.lr_steps)
            train(train_loader, model, criterion, optimizer, optimizer_cent,
                  epoch)

        # print(dir(model))
        # print(type(model.parameters.global_pool))
        # print(model['global_pool'])
        # train for one epoch

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Example #25
0
elif args.test_crops == 10:
    cropping = torchvision.transforms.Compose(
        [GroupOverSample(net.input_size, net.scale_size)])
else:
    raise ValueError(
        "Only 1 and 10 crops are supported while we got {}".format(
            args.test_crops))

data_loader = torch.utils.data.DataLoader(
    TSNDataSet("",
               args.test_list,
               num_segments=args.test_segments,
               new_length=1 if args.modality == "RGB" else 5,
               modality=args.modality,
               image_tmpl="img_{:05d}.jpg" if args.modality
               in ['RGB', 'RGBDiff'] else args.flow_prefix + "{}_{:05d}.jpg",
               test_mode=True,
               transform=torchvision.transforms.Compose([
                   cropping,
                   Stack(roll=args.arch == 'BNInception'),
                   ToTorchFormatTensor(div=args.arch != 'BNInception'),
                   GroupNormalize(net.input_mean, net.input_std),
               ])),
    batch_size=1,
    shuffle=False,
    num_workers=args.workers * 2,
    pin_memory=True)

if args.gpus is not None:
    devices = [args.gpus[i] for i in range(args.workers)]
else:
    devices = list(range(args.workers))
Example #26
0
def main_charades():
    global args, best_prec1, use_gpu

    use_gpu = torch.cuda.is_available()

    categories, args.train_list, args.val_list, args.train_num_list, args.val_num_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality, args.root_path)
    num_class = len(categories)
    crop_size = args.crop_size
    scale_size = args.scale_size
    input_mean = [0.485, 0.456, 0.406]
    input_std = [0.229, 0.224, 0.225]

    train_dataset = TSNDataSet(
        args.root_path,
        args.train_list,
        args.train_num_list,
        num_class=num_class,
        num_segments=args.num_segments,
        new_length=args.data_length,
        modality=args.modality,
        image_tmpl=prefix,
        transform=torchvision.transforms.Compose([
            GroupMultiScaleCrop(crop_size, [1.0, 0.875, 0.75, 0.66, 0.5],
                                max_distort=2),
            GroupRandomHorizontalFlip(is_flow=False),
            Stack(roll=False),
            ToTorchFormatTensor(div=True),
            GroupNormalize(input_mean, input_std),
            ChangeToCTHW(modality=args.modality)
        ]))

    val_dataset = TSNDataSet(args.root_path,
                             args.val_list,
                             args.val_num_list,
                             num_class=num_class,
                             num_segments=args.num_segments,
                             new_length=args.data_length,
                             modality=args.modality,
                             image_tmpl=prefix,
                             random_shift=False,
                             transform=torchvision.transforms.Compose([
                                 GroupScale(int(scale_size)),
                                 GroupCenterCrop(crop_size),
                                 Stack(roll=False),
                                 ToTorchFormatTensor(div=True),
                                 GroupNormalize(input_mean, input_std),
                                 ChangeToCTHW(modality=args.modality)
                             ]))

    # model = modelfile.InceptionI3d(num_class, in_channels=3)
    model = modelfile.gcn_i3d(
        num_class=num_class,
        t=0.4,
        adj_file=
        './data/Charades_v1/gcn_info/class_graph_conceptnet_context_0.8.pkl',
        word_file='./data/Charades_v1/gcn_info/class_word.pkl')

    # define loss function (criterion)
    criterion = nn.MultiLabelSoftMarginLoss()

    # define optimizer
    params = get_config_optim(model,
                              lr=args.lr,
                              weight_decay=args.weight_decay)
    # params = get_optim_fix_conv(model, lr=args.lr, weight_decay=args.weight_decay)

    # optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True)
    optimizer = torch.optim.Adam(params, eps=1e-8)

    state = {
        'batch_size': args.batch_size,
        'val_batch_size': args.val_batch_size,
        'image_size': args.image_size,
        'max_epochs': args.epochs,
        'evaluate': args.evaluate,
        'resume': args.resume,
        'num_classes': num_class
    }
    state['difficult_examples'] = False
    state['print_freq'] = args.print_freq
    state['save_model_path'] = args.save_model_path
    state['log_path'] = args.log_path
    state['logname'] = args.logname
    state['workers'] = args.workers
    state['epoch_step'] = args.epoch_step
    state['lr'] = args.lr
    state['device_ids'] = list(range(torch.cuda.device_count()))
    if args.evaluate:
        state['evaluate'] = True
    mapengine = engine.GCNMultiLabelMAPEngine(
        state, inp_file='./data/Charades_v1/gcn_info/class_word.pkl')
    mapengine.learning(model, criterion, train_dataset, val_dataset, optimizer)
Example #27
0
    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    elif args.modality == 'RGBFlow':
        data_length = args.num_motion

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        dataset=args.dataset,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                  isRGBFlow=(args.modality == 'RGBFlow')),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=False)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
Example #28
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality)
    num_class = len(categories)

    args.store_name = '_'.join([
        'STSNN', args.dataset, args.modality, args.arch,
        'group%d' % args.num_segments,
        '%df1c' % args.num_motion
    ])
    print('storing name: ' + args.store_name)

    model = STSNN(num_class,
                  args.num_segments,
                  args.modality,
                  base_model=args.arch,
                  consensus_type=args.consensus_type,
                  dropout=args.dropout,
                  num_motion=args.num_motion,
                  img_feature_dim=args.img_feature_dim,
                  partial_bn=not args.no_partialbn,
                  dataset=args.dataset)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    train_augmentation = model.get_augmentation()

    policies = model.get_optim_policies()
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    print(model)
    cudnn.benchmark = True

    # Data loading code
    if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')):
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    elif args.modality == 'RGBFlow':
        data_length = args.num_motion

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        dataset=args.dataset,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                  isRGBFlow=(args.modality == 'RGBFlow')),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=False)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        dataset=args.dataset,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                  isRGBFlow=(args.modality == 'RGBFlow')),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=False)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Example #29
0
        [GroupScale(net.scale_size),
         GroupCenterCrop(net.input_size)])
elif args.test_crops == 10:
    cropping = transforms.Compose(
        [GroupOverSample(net.input_size, net.scale_size)])
else:
    raise ValueError(
        "Only 1 and 10 crops are supported while we got {}".format(
            args.test_crops))

data_loader = torch.utils.data.DataLoader(TSNDataSet(
    args.data_path,
    args.mode,
    num_segments=args.test_segments,
    new_length=1 if args.modality == "RGB" else 5,
    modality=args.modality,
    transform=transforms.Compose([
        cropping,
        Stack(roll=args.arch == 'BNInception'),
        ToTorchFormatTensor(div=args.arch != 'BNInception'),
        GroupNormalize(net.input_mean, net.input_std)
    ])),
                                          batch_size=1,
                                          shuffle=False,
                                          num_workers=args.workers * 2,
                                          pin_memory=True)

if args.gpus is not None:
    devices = [args.gpus[i] for i in range(args.workers)]
else:
    devices = list(range(args.workers))
print(devices)
Example #30
0
def eval_one_model(num_class, modality, weights, devices, args):

    # init model
    net = TSN(num_class,
              1,
              modality,
              base_model=args.arch,
              consensus_type=args.crop_fusion_type,
              dropout=args.dropout,
              mdl=args.mdl,
              pretrained=False)

    # load checkpoint
    checkpoint = torch.load(weights)
    print("model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                                  checkpoint['best_prec1']))

    base_dict = checkpoint['state_dict']
    # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    net.load_state_dict(base_dict)

    # transformer
    if args.test_crops == 1:
        cropping = torchvision.transforms.Compose([
            GroupScale(net.scale_size),
            GroupCenterCrop(net.input_size),
        ])
    elif args.test_crops == 10:
        cropping = torchvision.transforms.Compose(
            [GroupOverSample(net.input_size, net.scale_size)])
    else:
        raise ValueError(
            "Only 1 and 10 crops are supported while we got {}".format(
                args.test_crops))

    # prepare dataset
    if args.dataset == 'ucf101':
        naming_pattern = "frame{:06d}.jpg" if modality in [
            "RGB", "RGBDiff", 'tvl1'
        ] else args.flow_prefix + "{}_{:06d}.jpg"
    else:
        naming_pattern = "image_{:05d}.jpg" if modality in [
            "RGB", "RGBDiff"
        ] else args.flow_prefix + "{}_{:05d}.jpg"

    data_loader = torch.utils.data.DataLoader(TSNDataSet(
        os.path.join(args.data_root_path,
                     ('jpegs_256' if modality == 'RGB' else 'tvl1_flow')),
        args.test_list,
        num_segments=args.test_segments,
        new_length=4 if modality == "RGB" else 6,
        modality=modality,
        image_tmpl=naming_pattern,
        test_mode=True,
        dataset=args.dataset,
        transform=torchvision.transforms.Compose([
            cropping,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            GroupNormalize(net.input_mean, net.input_std),
        ])),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=args.workers * 2,
                                              pin_memory=True)

    data_gen = iter(data_loader)

    total_num = len(data_loader.dataset)
    output = []  # [class probability, label code]

    # Inferencing

    net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
    net.eval()

    max_num = len(data_loader.dataset)

    for i in tqdm(range(max_num)):
        data, label = next(data_gen)
        if i >= max_num:
            break
        output.append(
            eval_video(net, (i, data, label), num_class, modality, args))

    video_pred = [np.argmax(np.mean(x[1], axis=0)) for x in output]
    video_labels = [x[2] for x in output]

    # summarize results
    cf = confusion_matrix(video_labels, video_pred).astype(float)

    cls_cnt = cf.sum(axis=1)
    cls_hit = np.diag(cf)

    cls_acc = cls_hit / cls_cnt
    print('Accuracy of {}, {:.02f}%'.format(modality, np.mean(cls_acc) * 100))

    del net
    del data_loader

    class_acc_map = class_acc_mapping(cls_acc, args.dataset)

    return output, video_labels, class_acc_map