Beispiel #1
0
def main():

    global args, best_prec1
    args = Parse_args()

    log.l.info('Input command:\n ===========> python ' + ' '.join(sys.argv) +
               '  ===========>')

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'mm':
        num_class = 500
    elif args.dataset == 'thumos14':
        num_class = 21
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    log.l.info(
        '============= prepare the model and model\'s parameters ============='
    )

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        log.l.info(
            '============== train from checkpoint (finetune mode) ================='
        )
        if os.path.isfile(args.resume):
            log.l.info(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            log.l.info(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            log.l.info(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    log.l.info('============== Now, loading data ... ==============\n')
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(PerFrameData(
        args.frames_root,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        data_gap=args.data_gap,
        test_mode=False,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.data_workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(PerFrameData(
        args.frames_root,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        data_gap=args.data_gap,
        test_mode=True,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.data_workers,
                                             pin_memory=True)

    log.l.info(
        '================= Now, define loss function and optimizer =============='
    )
    weight = torch.from_numpy(np.array([1] + [3] * (num_class - 1)))
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        log.l.info(
            ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
                group['name'], len(group['params']), group['lr_mult'],
                group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        log.l.info('Need val the data first...')
        validate(val_loader, model, criterion, 0)

    log.l.info(
        '\n\n===================> TRAIN and VAL begins <===================\n')

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Beispiel #2
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'VideoNet':
        num_class = 353
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    #model = transfer_model(model,num_classes=num_class)
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Beispiel #3
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    print("------------------------------------")
    print("Environment Versions:")
    print("- Python: {}".format(sys.version))
    print("- PyTorch: {}".format(torch.__version__))
    print("- TorchVison: {}".format(torchvision.__version__))

    args_dict = args.__dict__
    print("------------------------------------")
    print(args.arch + " Configurations:")
    for key in args_dict.keys():
        print("- {}: {}".format(key, args_dict[key]))
    print("------------------------------------")
    print(args.mode)
    if args.dataset == 'ucf101':
        num_class = 101
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'hmdb51':
        num_class = 51
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'kinetics':
        num_class = 400
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'something':
        num_class = 174
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'somethingv2':
        num_class = 174
        rgb_read_format = "img_{:05d}.jpg"
    elif args.dataset == 'NTU_RGBD':
        num_class = 120
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'tinykinetics':
        num_class = 150
        rgb_read_format = "{:05d}.jpg"
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn,
                non_local=args.non_local)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    # Optimizer s also support specifying per-parameter options.
    # To do this, pass in an iterable of dict s.
    # Each of them will define a separate parameter group,
    # and should contain a params key, containing a list of parameters belonging to it.
    # Other keys should match the keyword arguments accepted by the optimizers,
    # and will be used as optimization options for this group.
    policies = model.get_optim_policies(args.dataset)

    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    model_dict = model.state_dict()

    if args.arch == "resnet50":
        new_state_dict = {}  #model_dict
        div = False
        roll = True
    elif args.arch == "resnet34":
        pretrained_dict = {}
        new_state_dict = {}  #model_dict
        for k, v in model_dict.items():
            if ('fc' not in k):
                new_state_dict.update({k: v})
        div = False
        roll = True
    elif (args.arch[:3] == "TCM"):
        pretrained_dict = {}
        new_state_dict = {}  #model_dict
        for k, v in model_dict.items():
            if ('fc' not in k):
                new_state_dict.update({k: v})
        div = True
        roll = False

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 1

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            "",
            args.train_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            mode=args.mode,
            image_tmpl=args.rgb_prefix + rgb_read_format if args.modality
            in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format,
            img_start_idx=args.img_start_idx,
            transform=torchvision.transforms.Compose([
                GroupScale((240, 320)),
                #                        GroupScale(int(scale_size)),
                train_augmentation,
                Stack(roll=roll),
                ToTorchFormatTensor(div=div),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            "",
            args.val_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            mode=args.mode,
            image_tmpl=args.rgb_prefix + rgb_read_format if args.modality
            in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format,
            img_start_idx=args.img_start_idx,
            random_shift=False,
            transform=torchvision.transforms.Compose([
                GroupScale((240, 320)),
                #                        GroupScale((224)),
                #                        GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=roll),
                ToTorchFormatTensor(div=div),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()

    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=args.nesterov)

    output_list = []
    if args.evaluate:
        prec1, score_tensor = validate(val_loader,
                                       model,
                                       criterion,
                                       temperature=100)
        output_list.append(score_tensor)
        save_validation_score(output_list, filename='score.pt')
        print("validation score saved in {}".format('/'.join(
            (args.val_output_folder, 'score_inf5.pt'))))
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)
        # train for one epoch
        temperature = train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1, score_tensor = validate(val_loader,
                                           model,
                                           criterion,
                                           temperature=temperature)

            output_list.append(score_tensor)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)

            output_best = 'Best Prec@1: %.3f\n' % (best_prec1)
            print(output_best)

            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)

    # save validation score
    save_validation_score(output_list)
    print("validation score saved in {}".format('/'.join(
        (args.val_output_folder, 'score.pt'))))
Beispiel #4
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'myDataset':
        num_class = 12
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    f, axs = plt.subplots(4, 1, figsize=(10, 5))
    if args.start_epoch == 0:
        train_acc = []
        train_loss = []
        val_acc = []
        val_loss = []
        epochs = []
        val_epochs = []
    else:
        train_acc = np.load("./%s/train_acc.npy" % args.snapshot_pref).tolist()
        train_loss = np.load("./%s/train_loss.npy" %
                             args.snapshot_pref).tolist()
        val_acc = np.load("./%s/val_acc.npy" % args.snapshot_pref).tolist()
        val_loss = np.load("./%s/val_loss.npy" % args.snapshot_pref).tolist()
        epochs = np.load("./%s/epochs.npy" % args.snapshot_pref).tolist()
        val_epochs = np.load("./%s/val_epochs.npy" %
                             args.snapshot_pref).tolist()
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        acc, loss = train(train_loader, model, criterion, optimizer, epoch)
        train_acc.append(acc)
        train_loss.append(loss)
        epochs.append(epoch)
        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1, v_loss = validate(val_loader, model, criterion,
                                     (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
            val_acc.append(prec1)
            val_loss.append(v_loss)
            val_epochs.append(epoch)
        axs[0].plot(val_epochs, val_loss, c='b', marker='.', label='val_loss')
        axs[1].plot(val_epochs, val_acc, c='r', marker='.', label='val_acc')
        axs[2].plot(epochs, train_loss, c='b', marker='.', label='train_loss')
        axs[3].plot(epochs, train_acc, c='r', marker='.', label='train_acc')
        plt.title('TSN_' + args.snapshot_pref)
        if epoch == 0:
            for i in range(4):
                axs[i].legend(loc='best')
        plt.pause(0.000001)
        if not os.path.exists(args.snapshot_pref):
            os.makedirs(args.snapshot_pref)
        plt.savefig('./%s/%s.jpg' % (args.snapshot_pref, str(epoch).zfill(5)))
        np.save("./%s/train_acc.npy" % args.snapshot_pref, train_acc)
        np.save("./%s/train_loss.npy" % args.snapshot_pref, train_loss)
        np.save("./%s/val_acc.npy" % args.snapshot_pref, val_acc)
        np.save("./%s/val_loss.npy" % args.snapshot_pref, val_loss)
        np.save("./%s/val_epochs.npy" % args.snapshot_pref, val_epochs)
        np.save("./%s/epochs.npy" % args.snapshot_pref, epochs)
Beispiel #5
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)
    '''
    consensue_type = avg
    base_model = resnet_101
    dropout : 0.5
    
    '''
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    #224
    crop_size = model.crop_size
    #256/224
    scale_size = model.scale_size
    # for each modiltiy is different
    input_mean = model.input_mean
    input_std = model.input_std

    policies = model.get_optim_policies()
    #这里拥有三个augmentation
    #GroupMultiScaleCrop,GroupRandomHorizontalFlip
    #here GropMultiScaleCrop ,is a easily method for 裁剪边用固定位置的crop并最终resize 到 224 ,采用的插值方式,为双线性插值
    #GroupRandomHorizontalFlip
    train_augmentation = model.get_augmentation()
    print(args.gpus)
    model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    #解释说这里为什么要有roll,主要还是考虑到我们所训练的是对于BGR 还是RGB
    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="im{}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="im{}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")
    #see the optim policy
    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))
    # general the lr here is 1e-3
    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    #如果说这里是验证过程,如果说不是验证过程
    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return
    viz = vis.Visualizer()
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, viz)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, epoch, viz=viz)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'test_crops': model.state_dict(),
                    'best_prec1': prec1,
                }, is_best)
Beispiel #6
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'movie':
        num_class = 21
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            #best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSetMovie(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="frame_{:04d}.jpg" if args.modality in ["RGB", "RGBDiff"]
        else args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSetMovie(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="frame_{:04d}.jpg" if args.modality in ["RGB", "RGBDiff"]
        else args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=int(args.batch_size /
                                                            2),
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    #if args.loss_type == 'nll':
    #criterion = torch.nn.CrossEntropyLoss().cuda()
    #else:
    #raise ValueError("Unknown loss type")
    #class_weight = torch.tensor([1] * 21).cuda().float()
    #pos_weight = torch.tensor([1] * 21).cuda().float()
    criterion = torch.nn.BCEWithLogitsLoss().cuda()
    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    zero_time = time.time()
    best_map = 0
    print('Start training...')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        start_time = time.time()
        trainloss = train(train_loader, model, criterion, optimizer, epoch)
        print('Traing loss %4f Epoch %d' % (trainloss, epoch))
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            valloss, mAP, wAP, output_mtx = validate(val_loader, model,
                                                     criterion)
            end_time = time.time()
            epoch_time = end_time - start_time
            total_time = end_time - zero_time
            print('Total time used: %s Epoch %d time uesd: %s' %
                  (str(datetime.timedelta(seconds=int(total_time))), epoch,
                   str(datetime.timedelta(seconds=int(epoch_time)))))
            print(
                'Train loss: {0:.4f} val loss: {1:.4f} mAP: {2:.4f} wAP: {3:.4f}'
                .format(trainloss, valloss, mAP, wAP))
            # evaluate on validation set
            is_best = mAP > best_map
            if mAP > best_map:
                best_map = mAP
                # checkpoint_name = "%04d_%s" % (epoch+1, "checkpoint.pth.tar")
                checkpoint_name = "best_checkpoint.pth.tar"
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                    }, is_best, epoch)
                npy_name = str(epoch) + args.result_path
                np.save(npy_name, output_mtx)
            with open(args.record_path, 'a') as file:
                file.write(
                    'Epoch:[{0}]'
                    'Train loss: {1:.4f} val loss: {2:.4f} map: {3:.4f}\n'.
                    format(epoch + 1, trainloss, valloss, mAP))

    print('************ Done!... ************')
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    args.dataset = "thumos"
    args.modality = "RGB"

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        'thumos', args.modality)
    num_class = len(categories)

    args.store_name = '_'.join([
        'TRN', args.dataset, args.modality, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)

    ###############################################################################
    # ALL LINES AFTER THIS REPRESENT NEW CODE WRITTEN TO TRAIN THE FEW-SHOT MODEL #
    ###############################################################################

    for i in range(10):
        print("TRAINING FEW-SHOT MODEL")

    num_fs_class = 14  # number of few shot classes
    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        'thumos-fs', args.modality)  # load few-shot dataset

    # modify the fully connected layers to fit our new task with 14 classes
    fs_model = model
    fs_model.module.consensus.classifier = nn.Sequential(
        nn.ReLU(), nn.Linear(in_features=768, out_features=512, bias=True),
        nn.ReLU(),
        nn.Linear(in_features=512, out_features=num_fs_class,
                  bias=True)).cuda()

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, fs_model, criterion, 0)
        return

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    args.store_name = '_'.join([
        'fs_TRN', args.dataset, args.modality, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    best_prec1 = 0
    log_fs_training = open(
        os.path.join(args.root_log, '%s.csv' % "fs-logging"), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        torch.cuda.empty_cache()

        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, fs_model, criterion, optimizer, epoch,
              log_fs_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, fs_model, criterion,
                             (epoch + 1) * len(train_loader), log_fs_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': fs_model.state_dict(),
                    'best_prec1': best_prec1,
                },
                is_best,
                filename='fs_checkpoint.pth.tar')
Beispiel #8
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset,
                                                                                                       args.modality)
    num_class = len(categories)

    args.store_name = '_'.join(['STModeling', args.dataset, args.modality, args.arch, args.consensus_type,
                                'segment%d' % args.num_segments])
    print('storing name: ' + args.store_name)

    model = TSN(num_class, args)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    train_augmentation = model.get_augmentation()

    policies = model.get_optim_policies()
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            # best_prec1 = 0
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                   .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    print(model)
    cudnn.benchmark = True

    # Data loading code
    if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')):
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    elif args.modality == 'RGBFlow':
        data_length = args.num_motion

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl=prefix,
                   dataset=args.dataset,
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                             isRGBFlow=(args.modality == 'RGBFlow')),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=False)

    val_loader = torch.utils.data.DataLoader(
        TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl=prefix,
                   dataset=args.dataset,
                   random_shift=False,
                   transform=torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                             isRGBFlow=(args.modality == 'RGBFlow')),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=False)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.consensus_type == 'DNDF':
        params = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=1e-5)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    history = {
        'accuracy': [],
        'val_accuracy': [],
        'loss': [],
        'val_loss': []
    }
    model_details = {
        'backbone': args.arch,
        'transformer_arch': args.consensus_type,
        'lr': args.lr,
        'batch_size': args.batch_size
    }
    for epoch in range(args.start_epoch, args.epochs):
        if not args.consensus_type == 'DNDF':
            adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        acc, loss = train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1, val_loss = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)
        
        history['accuracy'].append(acc)
        history['loss'].append(loss)
        history['val_accuracy'].append(prec1)
        history['val_loss'].append(val_loss)
    plot_utils.plot_statistics(history,model_details)
Beispiel #9
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality)
    num_class = len(categories)
    print("num_class: " + str(num_class))

    args.store_name = '_'.join([
        'TRN', args.dataset, args.modality, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'Flow' or args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    print("Creating val_loader:")
    print("args.root_path: " + str(args.root_path))
    print("args.val_list: " + str(args.val_list))
    print("args.num_segments: " + str(args.num_segments))
    print("data_length: " + str(data_length))
    print("modality: " + str(args.modality))
    print("prefix: " + str(prefix))
    print("scale_size: " + str(int(scale_size)))
    print("crop_size: " + str(crop_size))
    print("args.arch: " + str(args.arch))
    print("args.batch_size: " + str(args.batch_size))
    print("args.workers: " + str(args.workers))
    print("")

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    git_log_output = subprocess.run(
        [
            'git', 'log', '-n1',
            '--pretty=format:commit: %h%nauthor: %an%n%s%n%b'
        ],
        stdout=subprocess.PIPE).stdout.decode('utf-8').split('\n')
    git_diff_output = subprocess.run(
        ['git', 'diff'], stdout=subprocess.PIPE).stdout.decode('utf-8')

    if args.exp_name == '':
        exp_name_match = re.match(r'experiment: *(.+)', git_log_output[2])
        if exp_name_match is None:
            print(
                'Experiment name required:\n'
                '  current commit subject does not specify an experiment, and\n'
                '  --experiment_name was not specified')
            sys.exit(0)
        args.exp_name = exp_name_match.group(1)
    print(f'experiment name: {args.exp_name}')

    time = str(datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))
    exp_dir_path = os.path.join(args.root_log, args.exp_name, time)
    log_file_path = os.path.join(exp_dir_path, f'{args.store_name}.csv')
    print("log_file_path:")
    print(log_file_path)
    os.makedirs(exp_dir_path)
    log_training = open(log_file_path, 'w')
    # store information about git status
    git_info_path = os.path.join(exp_dir_path, 'experiment_info.txt')
    with open(git_info_path, 'w') as f:
        f.write('\n'.join(git_log_output))
        f.write('\n\n' + ('=' * 80) + '\n')
        f.write(git_diff_output)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best, time)
Beispiel #10
0
def main():
    global args, best_prec1, tb_writer, log_dir
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'ucf11':
        num_class = 11
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    log_dir = 'logs/{}'.format(args.save_path)
    tb_writer = SummaryWriter(log_dir)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                new_length=args.new_length,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn,
                pretrained=not args.no_pretrained)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    device_ids = [int(id) for id in args.gpus.split(',')]
    model = torch.nn.DataParallel(model, device_ids=device_ids).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            # args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            base_dict = {
                "module." + k: v
                for k, v in list(checkpoint['state_dict'].items())
            }
            model.load_state_dict(base_dict)
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.dataset == 'ucf101' or args.dataset == 'ucf11':
        naming_pattern = "frame{:06d}.jpg" if args.modality in [
            "RGB", "RGBDiff", 'tvl1'
        ] else args.flow_prefix + "{}_{:06d}.jpg"
    else:
        naming_pattern = "image_{:05d}.jpg" if args.modality in [
            "RGB", "RGBDiff"
        ] else args.flow_prefix + "{}_{:05d}.jpg"

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.data_root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=args.new_length,
        modality=args.modality,
        image_tmpl=naming_pattern,
        dataset=args.dataset,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.data_root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=args.new_length,
        modality=args.modality,
        image_tmpl=naming_pattern,
        random_shift=False,
        dataset=args.dataset,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        avg_loss, avg_top1, avg_top5 = train(train_loader, model, criterion,
                                             optimizer, epoch)
        tb_writer.add_scalar('Monitor/Training Loss', avg_loss, epoch)
        tb_writer.add_scalar('Monitor/Training Top1', avg_top1, epoch)
        tb_writer.add_scalar('Monitor/Training Top5', avg_top5, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            avg_loss, avg_top1, avg_top5 = validate(
                val_loader, model, criterion, (epoch + 1) * len(train_loader))

            tb_writer.add_scalar('Monitor/Validation Loss', avg_loss, epoch)
            tb_writer.add_scalar('Monitor/Validation Top1', avg_top1, epoch)
            tb_writer.add_scalar('Monitor/Validation Top5', avg_top5, epoch)

            # remember best prec@1 and save checkpoint
            is_best = avg_top1 > best_prec1
            best_prec1 = max(avg_top1, best_prec1)
            save_checkpoint(
                log_dir, {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.module.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Beispiel #11
0
def main():
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                "RGB",
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)
    model2 = TSN(num_class,
                 args.num_segments,
                 "Flow",
                 base_model=args.arch,
                 consensus_type=args.consensus_type,
                 dropout=args.dropout,
                 partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    model2 = torch.nn.DataParallel(model2, device_ids=args.gpus).cuda()

    if args.resume_rgb:
        if os.path.isfile(args.resume_rgb):
            print(("=> loading checkpoint '{}'".format(args.resume_rgb)))
            checkpoint = torch.load(args.resume_rgb)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}'".format(args.resume_rgb)))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume_rgb)))

    if args.resume_flow:
        if os.path.isfile(args.resume_flow):
            print(("=> loading checkpoint '{}'".format(args.resume_flow)))
            checkpoint = torch.load(args.resume_flow)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model2.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}'".format(args.resume_flow)))

        else:
            print(("=> no checkpoint found at '{}'".format(args.resume_flow)))

    cudnn.benchmark = True

    # Data loading code
    normalize = GroupNormalize(input_mean, input_std)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=1,
        modality="RGB",
        image_tmpl="frame{:06d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    val_loader2 = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list2,
        num_segments=args.num_segments,
        new_length=5,
        modality="Flow",
        image_tmpl="frame{:06d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    validate(val_loader, val_loader2, model, model2, criterion)
Beispiel #12
0
def main():
    global args, best_prec1, num_train_dataset, num_val_dataset, writer
    args = parser.parse_args()
    # if args.no_cudnn:
    #     torch.backends.cudnn.benchmark = False
    # print (torch.backends.cudnn.benchmark)
    # asdf
    _fill_in_None_args()
    _join_result_path()
    check_rootfolders()
    with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(args), opt_file)

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality, args.root_path, args.file_type)
    # print(categories, args.train_list, args.val_list, args.root_path, prefix)
    num_class = len(categories)


    args.store_name = '_'.join([args.consensus_type, args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d'% args.num_segments, \
        'key%d'%args.key_dim, 'value%d'%args.value_dim, 'query%d'%args.query_dim, 'queryUpdateby%s'%args.query_update_method,\
        'NoSoftmax%s'%args.no_softmax_on_p, 'hopMethod%s'%args.hop_method])
    print('storing name: ' + args.store_name)

    model = TSN(
        num_class,
        args.num_segments,
        args.modality,
        base_model=args.arch,
        consensus_type=args.consensus_type,
        dropout=args.dropout,
        key_dim=args.key_dim,
        value_dim=args.value_dim,
        query_dim=args.query_dim,
        query_update_method=args.query_update_method,
        partial_bn=not args.no_partialbn,
        freezeBN_Eval=args.freezeBN_Eval,
        freezeBN_Require_Grad_True=args.freezeBN_Require_Grad_True,
        num_hop=args.hop,
        hop_method=args.hop_method,
        num_CNNs=args.num_CNNs,
        no_softmax_on_p=args.no_softmax_on_p,
        freezeBackbone=args.freezeBackbone,
        CustomPolicy=args.CustomPolicy,
        sorting=args.sorting,
        MultiStageLoss=args.MultiStageLoss,
        MultiStageLoss_MLP=args.MultiStageLoss_MLP,
        how_to_get_query=args.how_to_get_query,
        only_query=args.only_query,
        CC=args.CC,
        channel=args.channel,
        memory_dim=args.memory_dim,
        image_resolution=args.image_resolution,
        how_many_objects=args.how_many_objects,
        Each_Embedding=args.Each_Embedding,
        Curriculum=args.Curriculum,
        Curriculum_dim=args.Curriculum_dim,
        lr_steps=args.lr_steps,
    )

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))
    # asdf
    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_data = TSNDataSet(
        args.root_path,
        args.train_list,
        args.file_type,
        num_segments=args.num_segments,
        MoreAug_Rotation=args.MoreAug_Rotation,
        MoreAug_ColorJitter=args.MoreAug_ColorJitter,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        phase='train',
        transform1=torchvision.transforms.Compose([
            train_augmentation,  # GroupMultiScaleCrop[1, .875, .75, .66] AND GroupRandomHorizontalFlip
        ]),
        transform2=torchvision.transforms.Compose([
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,  # GroupNormalize
        ]),
        image_resolution=args.image_resolution)
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=False,
                                               drop_last=True)

    val_data = TSNDataSet(
        args.root_path,
        args.val_list,
        args.file_type,
        num_segments=args.num_segments,
        MoreAug_Rotation=args.MoreAug_Rotation,
        MoreAug_ColorJitter=args.MoreAug_ColorJitter,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        random_shift=False,
        phase='test',
        transform1=torchvision.transforms.Compose(
            [GroupScale(int(scale_size)),
             GroupCenterCrop(crop_size)]),
        transform2=torchvision.transforms.Compose([
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ]),
        image_resolution=args.image_resolution)
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=False,
                                             drop_last=True)
    num_train_dataset = len(train_data)
    num_val_dataset = len(val_data)

    # print (num_train_dataset, num_val_dataset)
    # print (len(train_loader), len(val_loader))
    # asdf

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss(reduce=False).cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(policies,
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(policies,
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
        # optimizer = torch.optim.SGD(policies,
        #                             args.lr,
        #                             momentum=args.momentum,
        #                             weight_decay=args.weight_decay)

    if args.evaluate:
        json_file_path = os.path.join(
            args.result_path, 'results_epoch%d.json' % args.evaluation_epoch)
        validate(val_loader,
                 model,
                 criterion,
                 0,
                 json_file=json_file_path,
                 idx2class=categories,
                 epoch=args.evaluation_epoch)
        return

    writer = SummaryWriter(args.result_path)
    log_training = open(
        os.path.join(args.root_log, '%s.csv' % args.store_name), 'a')
    # print (count_parameters(model))
    # asdf

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            json_file_path = os.path.join(args.result_path,
                                          'results_epoch%d.json' % (epoch + 1))
            # prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log=log_training, json_file=json_file_path, idx2class=categories)
            prec1 = validate(val_loader,
                             model,
                             criterion, (epoch + 1) * num_train_dataset,
                             log=log_training,
                             json_file=json_file_path,
                             idx2class=categories,
                             epoch=epoch)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
    log_training.close()
    writer.close()
Beispiel #13
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    #导入参数设置数据集类数量
    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset '+args.dataset)
    """
    #导入模型,输入包含分类的类别数:
    # num_class;args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3;
    # 采用哪种输入:args.modality,比如RGB表示常规图像,Flow表示optical flow等;
    # 采用哪种模型:args.arch,比如resnet101,BNInception等;
    # 不同输入snippet的融合方式:args.consensus_type,比如avg等;
    # dropout参数:args.dropout。
    """
    model = TSN(num_class, args.num_segments, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    """
    接着main函数的思路,前面这几行都是在TSN类中定义的变量或者方法,model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()是设置多GPU训练模型。
    args.resume这个参数主要是用来设置是否从断点处继续训练,比如原来训练模型训到一半停止了,希望继续从保存的最新epoch开始训练,
    因此args.resume要么是默认的None,要么就是你保存的模型文件(.pth)的路径。
    其中checkpoint = torch.load(args.resume)是用来导入已训练好的模型。
    model.load_state_dict(checkpoint[‘state_dict’])是完成导入模型的参数初始化model这个网络的过程,load_state_dict是torch.nn.Module类中重要的方法之一。

    """
    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    """
    接下来是main函数中的第二部分:数据导入。首先是自定义的TSNDataSet类用来处理最原始的数据,返回的是torch.utils.data.Dataset类型,
    一般而言在PyTorch中自定义的数据读取类都要继承torch.utils.data.Dataset这个基类,比如此处的TSNDataSet类,然后通过重写初始化函数__init__和__getitem__方法来读取数据。
    torch.utils.data.Dataset类型的数据并不能作为模型的输入,还要通过torch.utils.data.DataLoader类进一步封装,
    这是因为数据读取类TSNDataSet返回两个值,第一个值是Tensor类型的数据,第二个值是int型的标签,
    而torch.utils.data.DataLoader类是将batch size个数据和标签分别封装成一个Tensor,从而组成一个长度为2的list。
    对于torch.utils.data.DataLoader类而言,最重要的输入就是TSNDataSet类的初始化结果,其他如batch size和shuffle参数是常用的。通过这两个类读取和封装数据,后续再转为Variable就能作为模型的输入了。

    """

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet("",  args.train_list,num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)



    val_loader = torch.utils.data.DataLoader(
        TSNDataSet("", args.val_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   random_shift=False,
                   transform=torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)
    """
    接下来就是main函数的第三部分:训练模型。这里包括定义损失函数、优化函数、一些超参数设置等,然后训练模型并在指定epoch验证和保存模型。
    adjust_learning_rate(optimizer, epoch, args.lr_steps)是设置学习率变化策略,args.lr_steps是一个列表,里面的值表示到达多少个epoch的时候要改变学习率,
    在adjust_learning_rate函数中,默认是修改学习率的时候修改成当前的0.1倍。
    train(train_loader, model, criterion, optimizer, epoch)就是训练模型,输入包含训练数据、模型、损失函数、优化函数和要训练多少个epoch。
    最后的if语句是当训练epoch到达指定值的时候就进行一次模型验证和模型保存,args.eval_freq这个参数就是用来控制保存的epoch值。
    prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))就是用训练好的模型验证测试数据集。
    最后的save_checkpoint函数就是保存模型参数(model)和其他一些信息,这里我对源代码做了修改,希望有助于理解,该函数中主要就是调用torch.save(mode, save_path)来保存模型。
    模型训练函数train和模型验证函数validate函数是重点,后面详细介绍。

    """
    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)
Beispiel #14
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if not os.path.exists(args.record_path + args.modality.lower()):
        os.mkdir(args.record_path + args.modality.lower())

    num_class = 2

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    # model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_set = TSNDataSet(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="frame_{:06d}.jpg"
        if args.modality in ["RGB", "RGBDiff"] else "{:06d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ]))
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_set = TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="frame_{:06d}.jpg"
        if args.modality in ["RGB", "RGBDiff"] else "{:06d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ]))
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer

    criterion = torch.nn.CrossEntropyLoss().cuda()

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1, pred_dict = validate(val_loader, model, criterion, epoch)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        if is_best:
            with open(
                    args.record_path + args.modality.lower() + '/' +
                    args.snapshot_pref + args.modality.lower() +
                    '_video_preds.pickle', 'wb') as f:
                pickle.dump(pred_dict, f)
                f.close()

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)
Beispiel #15
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'something':
        num_class = 174
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    if args.dataset == 'something':
        img_prefix = ''
    else:
        img_prefix = 'image_'
    with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(args), opt_file)
    if not (args.consensus_type == 'lstm'
            or args.consensus_type == 'conv_lstm'):
        args.lstm_out_type = None
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn,
                lstm_out_type=args.lstm_out_type,
                lstm_layers=args.lstm_layers,
                lstm_hidden_dims=args.lstm_hidden_dims,
                conv_lstm_kernel=args.conv_lstm_kernel,
                bi_add_clf=args.bi_add_clf,
                bi_out_dims=args.bi_out_dims,
                bi_rank=args.bi_rank,
                bi_att_softmax=args.bi_att_softmax,
                bi_filter_size=args.bi_filter_size,
                bi_dropout=args.bi_dropout,
                bi_conv_dropout=args.bi_conv_dropout,
                dataset=args.dataset)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()
    # print(model)
    # input('...')

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            # print(model)
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
            # input('...')
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 10
        # data_length = 5

    if args.train_reverse:
        train_temp_transform = ReverseFrames(size=data_length *
                                             args.num_segments)
    elif args.train_shuffle:
        train_temp_transform = ShuffleFrames(size=data_length *
                                             args.num_segments)
    else:
        train_temp_transform = IdentityTransform()
    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            "",
            args.train_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=img_prefix + "{:05d}.jpg" if args.modality
            in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg",
            # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
            temp_transform=train_temp_transform,
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=args.arch != 'BNInception'),
                normalize,
            ]),
            contrastive_mode=args.contrastive_mode),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    if args.val_reverse:
        val_temp_transform = ReverseFrames(size=data_length *
                                           args.num_segments)
        print('using reverse val')
    elif args.val_shuffle:
        val_temp_transform = ShuffleFrames(size=data_length *
                                           args.num_segments)
        print('using shuffle val')
    else:
        val_temp_transform = IdentityTransform()
        print('using normal val')
    val_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            "",
            args.val_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=img_prefix + "{:05d}.jpg" if args.modality
            in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg",
            # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
            random_shift=False,
            temp_transform=val_temp_transform,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=args.arch != 'BNInception'),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        if args.contrastive_mode:
            criterion = ContrastiveLoss(m1=args.contras_m1,
                                        m2=args.contras_m2).cuda()
            val_criterion = torch.nn.CrossEntropyLoss().cuda()
        else:
            criterion = torch.nn.CrossEntropyLoss().cuda()
            val_criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    # optimizer = torch.optim.Adagrad(policies,
    # args.lr,
    # weight_decay=args.weight_decay)

    if args.evaluate:
        # val_logger = open(os.path.join(args.result_path, 'test.log'), 'w')
        print('evaluating')
        val_logger = os.path.join(args.result_path, 'test.log')
        validate(val_loader, model, val_criterion, 0, val_logger=val_logger)
        # val_logger.close()
        return

    # train_logger = open(os.path.join(args.result_path, 'train.log'), 'w')
    # val_logger = open(os.path.join(args.result_path, 'val.log'), 'w')
    train_logger = os.path.join(args.result_path, 'train.log')
    val_logger = os.path.join(args.result_path, 'val.log')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        if args.contrastive_mode:
            train_contrastive(train_loader,
                              model,
                              criterion,
                              optimizer,
                              epoch,
                              train_logger=train_logger,
                              args=args)
        else:
            # train for one epoch
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  train_logger=train_logger)
        # train_logger.write('\n')
        with open(train_logger, 'a') as f:
            f.write('\n')

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader,
                             model,
                             val_criterion, (epoch + 1) * len(train_loader),
                             val_logger=val_logger)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Beispiel #16
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    print("------------------------------------")
    print("Environment Versions:")
    print("- Python: {}".format(sys.version))
    print("- PyTorch: {}".format(torch.__version__))
    print("- TorchVison: {}".format(torchvision.__version__))

    args_dict = args.__dict__
    print("------------------------------------")
    print(args.arch + " Configurations:")
    for key in args_dict.keys():
        print("- {}: {}".format(key, args_dict[key]))
    print("------------------------------------")

    if args.dataset == 'ucf101':
        num_class = 101
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'something':
        num_class = 174
        rgb_read_format = "{:04d}.jpg"
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.pretrained_parts,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std

    # Optimizer s also support specifying per-parameter options.
    # To do this, pass in an iterable of dict s.
    # Each of them will define a separate parameter group,
    # and should contain a params key, containing a list of parameters belonging to it.
    # Other keys should match the keyword arguments accepted by the optimizers,
    # and will be used as optimization options for this group.
    policies = model.get_optim_policies()

    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    model_dict = model.state_dict()

    print("pretrained_parts: ", args.pretrained_parts)

    if args.arch == "ECO":
        new_state_dict = init_ECO(model_dict)
    if args.arch == "ECOfull":
        new_state_dict = init_ECOfull(model_dict)
    elif args.arch == "C3DRes18":
        new_state_dict = init_C3DRes18(model_dict)

    un_init_dict_keys = [
        k for k in model_dict.keys() if k not in new_state_dict
    ]
    print("un_init_dict_keys: ", un_init_dict_keys)
    print("\n------------------------------------")

    for k in un_init_dict_keys:
        new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_()
        if 'weight' in k:
            if 'bn' in k:
                print("{} init as: 1".format(k))
                constant_(new_state_dict[k], 1)
            else:
                print("{} init as: xavier".format(k))
                xavier_uniform_(new_state_dict[k])
        elif 'bias' in k:
            print("{} init as: 0".format(k))
            constant_(new_state_dict[k], 0)

    print("------------------------------------")

    model.load_state_dict(new_state_dict)

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.rgb_prefix + rgb_read_format if args.modality
        in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=True),
            ToTorchFormatTensor(div=False),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.rgb_prefix + rgb_read_format if args.modality
        in ["RGB", "RGBDiff"] else args.flow_prefix + rgb_read_format,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=True),
            ToTorchFormatTensor(div=False),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=args.nesterov)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Beispiel #17
0
def main():
    logger.auto_set_dir()

    global args, best_prec1

    import argparse
    parser = argparse.ArgumentParser(description="PyTorch implementation of Temporal Segment Networks")
    parser.add_argument('--dataset', type=str,default="something", choices=['something', 'jester', 'moments'])
    parser.add_argument('--modality', type=str, default="RGB", choices=['RGB', 'Flow'])
    parser.add_argument('--train_list', type=str, default="")
    parser.add_argument('--val_list', type=str, default="")
    parser.add_argument('--root_path', type=str, default="")
    parser.add_argument('--store_name', type=str, default="")
    # ========================= Model Configs ==========================
    parser.add_argument('--arch', type=str, default="BNInception")
    parser.add_argument('--num_segments', type=int, default=3)
    parser.add_argument('--consensus_type', type=str, default='avg')
    parser.add_argument('--k', type=int, default=3)

    parser.add_argument('--dropout', '--do', default=0.8, type=float,
                        metavar='DO', help='dropout ratio (default: 0.5)')
    parser.add_argument('--loss_type', type=str, default="nll",
                        choices=['nll'])
    parser.add_argument('--img_feature_dim', default=256, type=int, help="the feature dimension for each frame")

    # ========================= Learning Configs ==========================
    parser.add_argument('--epochs', default=120, type=int, metavar='N',
                        help='number of total epochs to run')
    parser.add_argument('-b', '--batch_size', default=128, type=int,
                        metavar='N', help='mini-batch size (default: 256)')
    parser.add_argument('--lr', '--learning-rate', default=0.001, type=float,
                        metavar='LR', help='initial learning rate')
    parser.add_argument('--lr_steps', default=[50, 100], type=float, nargs="+",
                        metavar='LRSteps', help='epochs to decay learning rate by 10')
    parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                        help='momentum')
    parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float,
                        metavar='W', help='weight decay (default: 5e-4)')
    parser.add_argument('--clip-gradient', '--gd', default=20, type=float,
                        metavar='W', help='gradient norm clipping (default: disabled)')
    parser.add_argument('--no_partialbn', '--npb', default=False, action="store_true")

    # ========================= Monitor Configs ==========================
    parser.add_argument('--print-freq', '-p', default=20, type=int,
                        metavar='N', help='print frequency (default: 10)')
    parser.add_argument('--eval-freq', '-ef', default=5, type=int,
                        metavar='N', help='evaluation frequency (default: 5)')

    # ========================= Runtime Configs ==========================
    parser.add_argument('-j', '--workers', default=30, type=int, metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('--resume', default='', type=str, metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                        help='evaluate model on validation set')
    parser.add_argument('--snapshot_pref', type=str, default="")
    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                        help='manual epoch number (useful on restarts)')
    parser.add_argument('--gpu', type=str, default='4')
    parser.add_argument('--flow_prefix', default="", type=str)
    parser.add_argument('--root_log', type=str, default='log')
    parser.add_argument('--root_model', type=str, default='model')
    parser.add_argument('--root_output', type=str, default='output')

    args = parser.parse_args()

    args.consensus_type = "TRN"
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    device_ids = [int(id) for id in args.gpu.split(',')]
    assert len(device_ids) >1, "TRN must run with GPU_num > 1"

    args.root_log = logger.get_logger_dir()
    args.root_model = logger.get_logger_dir()
    args.root_output = logger.get_logger_dir()

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality)
    num_class = len(categories)


    args.store_name = '_'.join(['TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d'% args.num_segments])
    print('storing name: ' + args.store_name)

    model = TSN(num_class, args.num_segments, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)#TODO, , device_ids=[int(id) for id in args.gpu.split(',')]

    if torch.cuda.is_available():
       model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl=prefix,
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=(args.arch in ['BNInception','InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl=prefix,
                   random_shift=False,
                   transform=torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=(args.arch in ['BNInception','InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        logger.info('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)
Beispiel #18
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    categories, args.train_list, args.val_list, args.test_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality)
    num_class = len(categories)

    args.store_name = '_'.join([
        'TRN', args.dataset, args.modality, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    # Four types of input modalities for two-stream ConvNets (one stream spatial and the other temporal): a single RGB image, stacked RGB difference,
    # stacked optical flow field, and stacked warped optical flow field;  the spatial stream ConvNet operates on a single RGB images,
    # and the temporal stream ConvNet takes a stack of consecutive optical flow fields as input.
    # A single RGB image usually encodes static appearance at a specific time point and lacks the contextual information about previous and next frames.
    # RGB difference between two consecutive frames describe the appearance change, which may correspond to the motion salient region.
    # Optical flow fields may not concentrate on the human action; the warped optical flow suppresses the background motion and makes motion concentrate
    # on the actor.

    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

# Division between train and val set

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            args.root_path,
            args.train_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(
                    roll=(args.arch in ['BNInception', 'InceptionV3'])
                ),  # Batch-Normalization-Inception, InceptionV3: evolution of InceptionV2 of GoogleNet
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Beispiel #19
0
          modality,
          base_model=arch,
          consensus_type=consensus_type,
          dropout=0.8,
          extra_feature=True)
checkpoint = torch.load(pretrained_model)
base_dict = {
    '.'.join(k.split('.')[1:]): v
    for k, v in list(checkpoint['state_dict'].items())
}
net.load_state_dict(base_dict)
print(net)
print(net.input_size, net.scale_size, net.input_mean, net.input_std)
print('model load done..%s' % data_split_type)

train_augmentation = net.get_augmentation()
loader = []
loader1 = torch.utils.data.DataLoader(TSNDataSet(
    "",
    train_list,
    num_segments=num_segments,
    new_length=1 if modality == "RGB" else 5,
    modality=modality,
    image_tmpl="img_{:05d}.jpg"
    if modality in ["RGB", "RGBDiff"] else flow_prefix + "{}_{:05d}.jpg",
    transform=torchvision.transforms.Compose([
        train_augmentation,
        Stack(roll=arch == 'BNInception'),
        ToTorchFormatTensor(div=arch != 'BNInception'),
        GroupNormalize(net.input_mean, net.input_std),
    ])),
Beispiel #20
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(args.dataset, args.modality)
    num_class = len(categories)


    args.store_name = '_'.join(['TRN', args.dataset, args.modality, args.arch, args.consensus_type, 'segment%d'% args.num_segments])
    print('storing name: ' + args.store_name)

    model = TSN(2, args.num_segments, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn)

    checkpoint = torch.load('pretrain/TRN_somethingv2_RGB_BNInception_TRNmultiscale_segment8_best.pth.tar', map_location='cpu')
    base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())}
    for key in ['consensus.fc_fusion_scales.6.3.bias', 'consensus.fc_fusion_scales.5.3.bias',
                'consensus.fc_fusion_scales.4.3.bias',
                'consensus.fc_fusion_scales.3.3.bias', 'consensus.fc_fusion_scales.2.3.bias',
                'consensus.fc_fusion_scales.1.3.bias',
                'consensus.fc_fusion_scales.0.3.bias', 'consensus.fc_fusion_scales.6.3.weight',
                'consensus.fc_fusion_scales.5.3.weight',
                'consensus.fc_fusion_scales.4.3.weight', 'consensus.fc_fusion_scales.3.3.weight',
                'consensus.fc_fusion_scales.2.3.weight',
                'consensus.fc_fusion_scales.1.3.weight', 'consensus.fc_fusion_scales.0.3.weight']:
        del base_dict[key]
    # print(base_dict)
    model.load_state_dict(base_dict, strict=False)
    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl=prefix,
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=(args.arch in ['BNInception','InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    # val_loader = torch.utils.data.DataLoader(
    #     TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments,
    #                new_length=data_length,
    #                modality=args.modality,
    #                image_tmpl=prefix,
    #                random_shift=False,
    #                transform=torchvision.transforms.Compose([
    #                    GroupScale(int(scale_size)),
    #                    GroupCenterCrop(crop_size),
    #                    Stack(roll=(args.arch in ['BNInception','InceptionV3'])),
    #                    ToTorchFormatTensor(div=(args.arch not in ['BNInception','InceptionV3'])),
    #                    normalize,
    #                ])),
    #     batch_size=args.batch_size, shuffle=False,
    #     num_workers=args.workers, pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        weight = torch.ones([2]).cuda()
        weight[0] = 1.2
        pos_weight = torch.ones([2]).cuda()
        #pos_weight[0] = 2
        criterion = torch.nn.BCEWithLogitsLoss(weight = weight, pos_weight=pos_weight).cuda() 
        #criterion = torch.nn.CrossEntropyLoss().cuda()
        
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))
        
    optimizer = torch.optim.SGD(policies,
                                0.0001,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # if args.evaluate:
    #     validate(val_loader, model, criterion, 0)
    #     return

    log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)
        torch.save(model.state_dict(), 'checkpoint_bce_20_w12_{}.pth.tar'.format(epoch))
        torch.save(model.state_dict(), 'checkpoint_bce_20_w12_{}.pth'.format(epoch))
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)
Beispiel #21
0
def main():
    check_rootfolders()
    global best_prec1
    if args.run_for == 'train':
        categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
            args.dataset, args.modality)
    elif args.run_for == 'test':
        categories, args.test_list, args.root_path, prefix = datasets_video.return_data(
            args.dataset, args.modality)

    num_class = len(categories)

    args.store_name = '_'.join([
        'STModeling', args.dataset, args.modality, args.arch,
        args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = TSN(num_class, args)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    train_augmentation = model.get_augmentation()

    policies = model.get_optim_policies()
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            # best_prec1 = 0
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    #print(model)
    cudnn.benchmark = True

    # Data loading code
    if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')):
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    elif args.modality == 'RGBFlow':
        data_length = args.num_motion

    if args.run_for == 'train':
        train_loader = torch.utils.data.DataLoader(TSNDataSet(
            "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data",
            args.train_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            dataset=args.dataset,
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                      isRGBFlow=(args.modality == 'RGBFlow')),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ])),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=False)

        val_loader = torch.utils.data.DataLoader(TSNDataSet(
            "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data",
            args.val_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            dataset=args.dataset,
            random_shift=False,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                      isRGBFlow=(args.modality == 'RGBFlow')),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ])),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=False)

        # define loss function (criterion) and optimizer
        if args.loss_type == 'nll':
            criterion = torch.nn.CrossEntropyLoss().cuda()
        else:
            raise ValueError("Unknown loss type")

        for group in policies:
            print(
                ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
                    group['name'], len(group['params']), group['lr_mult'],
                    group['decay_mult'])))

        optimizer = torch.optim.SGD(policies,
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        if args.consensus_type == 'DNDF':
            params = [p for p in model.parameters() if p.requires_grad]
            optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=1e-5)

        if args.evaluate:
            validate(val_loader, model, criterion, 0)
            return

        log_training = open(
            os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
        for epoch in range(args.start_epoch, args.epochs):
            if not args.consensus_type == 'DNDF':
                adjust_learning_rate(optimizer, epoch, args.lr_steps)

            # train for one epoch
            train(train_loader, model, criterion, optimizer, epoch,
                  log_training)

            # evaluate on validation set
            if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
                prec1 = validate(val_loader, model, criterion,
                                 (epoch + 1) * len(train_loader), log_training)

                # remember best prec@1 and save checkpoint
                is_best = prec1 > best_prec1
                best_prec1 = max(prec1, best_prec1)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                    }, is_best)

    elif args.run_for == 'test':
        print("=> loading checkpoint '{}'".format(args.root_weights))
        checkpoint = torch.load(args.root_weights)
        args.start_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
        model.cuda().eval()
        print("=> loaded checkpoint ")

        test_loader = torch.utils.data.DataLoader(TSNDataSet(
            "/home/machine/PROJECTS/OTHER/DATASETS/kussaster/data",
            args.test_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            dataset=args.dataset,
            random_shift=False,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                      isRGBFlow=(args.modality == 'RGBFlow')),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ])),
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  num_workers=args.workers,
                                                  pin_memory=False)

        # cam = cv2.VideoCapture(0)
        # cam.set(cv2.CAP_PROP_FPS, 48)

        # for i, (input, _) in enumerate(test_loader):
        #     with torch.no_grad():
        #         input_var = torch.autograd.Variable(input)
        #
        # ret, frame = cam.read()
        # frame_map = np.full((280, 640, 3), 0, np.uint8)
        # frame_map = frame
        # print(frame_map)
        # while (True):
        #     bg = np.full((480, 1200, 3), 15, np.uint8)
        #     bg[:480, :640] = frame
        #
        #     font = cv2.FONT_HERSHEY_SIMPLEX
        #     # cv2.rectangle(bg, (128, 48), (640 - 128, 480 - 48), (0, 255, 0), 3)
        #
        #     cv2.imshow('preview', bg)
        #
        #     if cv2.waitKey(1) & 0xFF == ord('q'):
        #         break

        test(test_loader, model, categories)
Beispiel #22
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    print("args args args")
    print(args)
    check_rootfolders()

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality)
    num_class = len(categories)

    args.store_name = '_'.join([
        'TRN', args.dataset, args.modality, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)
    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    # optimizer = torch.optim.SGD(policies,
    #                             args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)

    optimizer = torch.optim.Adam(policies,
                                 lr=args.lr,
                                 betas=(0.9, 0.999),
                                 eps=1e-08,
                                 weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(args.root_log, '%s_adam.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Beispiel #23
0
def main():
    global args, best_prec1, class_to_name
    parser.add_argument('--class_index', type=str, help='class index file')
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'something':
        num_class = 174
    else:
        raise ValueError('Unknown dataset '+args.dataset)

    if args.dataset == 'something':
        img_prefix = ''
        with open(args.class_index, 'r') as f:
            content = f.readlines()
        class_to_name = {idx:line.strip().replace(' ', '-') for idx, line in enumerate(content)}
    else:
        img_prefix = 'image_'
        with open(args.class_index, 'r') as f:
            content = f.readlines()
        class_to_name = {int(line.strip().split(' ')[0])-1:line.strip().split(' ')[1] \
                for line in content}

    with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(args), opt_file)
    if not (args.consensus_type == 'lstm' or args.consensus_type == 'conv_lstm'):
        args.lstm_out_type = None
    model = TSN(num_class, args.num_segments, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type, 
                dropout=args.dropout, 
                partial_bn=not args.no_partialbn, 
                lstm_out_type=args.lstm_out_type, 
                lstm_layers=args.lstm_layers, 
                lstm_hidden_dims=args.lstm_hidden_dims, 
                conv_lstm_kernel=args.conv_lstm_kernel, 
                bi_add_clf=args.bi_add_clf, 
                bi_out_dims=args.bi_out_dims, 
                bi_rank=args.bi_rank, 
                bi_att_softmax=args.bi_att_softmax, 
                bi_filter_size=args.bi_filter_size, 
                bi_dropout=args.bi_dropout, 
                bi_conv_dropout=args.bi_conv_dropout, 
                get_att_maps=True, 
                dataset=args.dataset)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()
    # print(model)
    # input('...')

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            # print(model)
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
            # input('...')
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
        rev_normalize = ReverseGroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 10
        # data_length = 5

    print('training inputs...')
    val_logger = os.path.join(args.result_path, 'vis_feature.log')
    train_inputs(model, 0, val_logger=val_logger, rev_normalize=rev_normalize)
    return
Beispiel #24
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    else:
        data_length = 5  # generate 5 displacement map, using 6 RGB images

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                new_length=data_length)
    model = model.to(device)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    train_augmentation = model.get_augmentation()
    if device.type == 'cuda':
        model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality
        in ["RGB", "RGBDiff", "CV"] else args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality
        in ["RGB", "RGBDiff", "CV"] else args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss().to(device)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     args.lr_steps,
                                                     gamma=0.1)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(0, args.epochs):
        scheduler.step()
        if epoch < args.start_epoch:
            continue

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, epoch)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
    writer.close()
Beispiel #25
0
class TRN():
    def __init__(self, 
                num_segments,
                modality,

                lr =0.001, 
                loss_type = 'nll', # cross entropy
                weight_decay=5e-4, #weight_decay: L2 penalty #default
                lr_steps =[30, 60], # epochs to decay learning rate by 10
                momentum= 0.9, 
                gpus= None, 
                clip_gradient =20, 

                new_length=None,
                base_model="resnet50",
                dropout=0.7,
                img_feature_dim=256, #The dimensionality of the features used for relational reasoning. 
                partial_bn=True,
                consensus_type= 'TRN', # MTRN
                
                dataset = 'epic',
                batch_size= 1,
                workers= 2,
                
                resume = None,  #  pretained model (path)
                epochs= None,
                start_epoch = None, #
                
                
                ifprintmodel= 0, # print the model structure
                print_freq =1,
                eval_freq =1,
                ):


        self.num_segments= num_segments
        self.modality= modality
        self.base_model= base_model
        self.new_length= new_length
        self.img_feature_dim= img_feature_dim
        self.consensus_type= consensus_type
        self.dataset= dataset

        self.resume = resume 
        self.epochs = epochs
        self.start_epoch= start_epoch

        self.lr= lr  
        self.loss_type= loss_type
        self.weight_decay = weight_decay
        self.lr_steps= lr_steps
        self.momentum= momentum
        self.partial_bn= partial_bn
        self.dropout= dropout

        self.batch_size = batch_size
        self.workers= workers
        self.gpus=  gpus
        self.eval_freq= eval_freq
        self.print_freq= print_freq

        
        self.num_class, self.train_list, self.val_list, self.root_path, self.prefix = datasets_video.return_dataset(self.dataset, self.modality)
        self.store_name = '_'.join(['TRN', self.dataset, self.modality, self.base_model, self.consensus_type, 'segment%d'% self.num_segments, 'K%d'% self.new_length])      
        self.best_prec1= 0 
        self.clip_gradient= clip_gradient
        
        
        
        self.model = TSN(self.num_class, self.num_segments, self.modality,
                new_length= self.new_length,
                base_model= self.base_model,
                consensus_type= self.consensus_type,
                dropout=self.dropout,
                img_feature_dim= self.img_feature_dim,
                partial_bn= self.partial_bn)

        self.crop_size =  self.model.crop_size 
        self.scale_size = self.model.scale_size
        self.input_mean = self.model.input_mean
        self.input_std = self.model.input_std
        self.model_policies = self.model.get_optim_policies()
        self.augmentation= self.model.get_augmentation()
        
        print('we have {} GPUs found'.format(torch.cuda.device_count()))
        self.model = torch.nn.DataParallel(self.model #, device_ids=self.gpus
                                           ).cuda()

        print(f'''  
+-------------------------------------------------------+
               num_class : {self.num_class}
                modality : {self.modality}
              base_model : {self.base_model}
              new_length : {self.new_length}
          consensus_type : {self.consensus_type}
         img_feature_dim : {self.img_feature_dim}

                  resume : {self.resume}
                  epochs : {self.epochs }
             start_epoch : {self.start_epoch }
                      lr : {self.lr }
               loss_type : {self.loss_type }
            weight_decay : {self.weight_decay }
                lr_steps : {self.lr_steps }
                momentum : {self.momentum }
              partial_bn : {self.partial_bn}
           clip_gradient : {self.clip_gradient }
                 dropout : {self.dropout}

              batch_size : {self.batch_size}
                 workers : {self.workers}
                    gpus : {self.gpus } ( no use now)
               eval_freq : {self.eval_freq }
              print_freq : {self.print_freq }
              
               crop_size : {self.crop_size}
              scale_size : {self.scale_size}
+-------------------------------------------------------+
construct a network named : {self.store_name}''')
        
        #---- checkpoint------load model ---- 
        if self.resume:
            if os.path.isfile(self.resume):
                print(("=> loading checkpoint '{}'".format(self.resume)))
                checkpoint = torch.load(self.resume)
                self.start_epoch = checkpoint['epoch']
                self.best_prec1 = checkpoint['best_prec1']
                self.model.load_state_dict(checkpoint['state_dict'])
                print(("=> loaded checkpoint '{}' (epoch {}) (epochs={})"
                      .format(self.resume, checkpoint['epoch'], self.epochs)))
            else:
                print(("=> no checkpoint found at '{}'".format(self.resume)))

        cudnn.benchmark = True

        # Data loading code
        if self.modality != 'RGBDiff':
            self.normalize = GroupNormalize(self.input_mean, self.input_std)
        else:
            self.normalize = IdentityTransform()
        

        #------- define loss function (criterion) and optimizer-------
        if self.loss_type == 'nll':
            self.criterion = torch.nn.CrossEntropyLoss().cuda()
        else:
            raise ValueError("Unknown loss type")

        
        #---------=========describe parameters:========= ----------------------
        
        print('*'*20,'TSN parameters:')
        Tools.parameter_desc(self.model, ifprint= ifprintmodel)
        #------parameter  way2-----
        print('-'*30)
        for group in self.model_policies:
            print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
                group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))
        
        toal_params=0 
        for p in self.model_policies:
    #        print('-'*10,'{} ( num: {})'.format(p['name'],len(p['params'])))
            for i, param in enumerate(p['params']):
                toal_params+= param.size().numel()
    #            if i< 5 :
    #                print(param.size(), param.size().numel())
    #            elif i==5 : 
    #                print('...')
        print('*'*20, 'count from policies, total parameters: {:,}'.format(toal_params))
        print('TRN initialised \n')

        

    def __call__(self, input_pickle):
        '''
        input_list: pickle file ,like ' epic_kitchens/val_02.pkl'
        '''
        end= time.time()
        DS= EpicDataSet( pickle_file= input_pickle,
                 num_segments= self.num_segments, 
                 new_length= self.new_length, 
                 modality=self.modality,
                 transform=torchvision.transforms.Compose([
                           GroupScale(int(self.scale_size)), #hw 
                           GroupCenterCrop(self.crop_size), #hw
                           Stack(roll=(self.base_model in ['BNInception','InceptionV3'])),
                           ToTorchFormatTensor(div=(self.base_model not in ['BNInception','InceptionV3'])),
                           self.normalize,
                       ]),
                 random_shift=False, test_mode= True
                 )
        
        test_loader = torch.utils.data.DataLoader(
            DS,
            batch_size= self.batch_size, 
            shuffle=False,
            num_workers=self.workers, 
            pin_memory=True)
        time1= time.time()- end
        print('testmode: load {} actions ({} batchs) '.format(len(DS) ,len(test_loader) ))
        logits= self.validate(self.model, test_loader, self.criterion)[0]
        print('cost time: {:.1f} mins, {:.1f} mins'.format(time1,(time.time()- end)/60))
        return logits


#--------- training: ------------------------------------------------------
    
    def do_training(self, ifprint=1):
        
        
        # initial logging file 
        model= self.model
        self.log_training = os.path.join('log/{}.txt'.format(self.store_name))
        with open(self.log_training, 'w') as log:
            log.write('initial logfile. {} \n'.format(time.strftime(" %d.%B %H:%M:%S")))
        self.logging("Start Training {}epochs (start from epoch {}) at: {}".format(self.epochs, self.start_epoch  ,time.strftime(" %d.%B %H:%M:%S")))
        
        
        
        #---------load data----------------------------------------------------
        print('======batch_size=', self.batch_size )
        transform_train= torchvision.transforms.Compose([
                           self.augmentation, # GroupMultiScaleCrop+ GroupRandomHorizontalFlip
                           Stack(roll=(self.base_model in ['BNInception','InceptionV3'])),
                           ToTorchFormatTensor(div=(self.base_model not in ['BNInception','InceptionV3'])),
                           self.normalize,
                       ])
        train_loader = torch.utils.data.DataLoader(
#            TSNDataSet(self.root_path, self.train_list, num_segments=self.num_segments,
#                       new_length=self.new_length,modality=self.modality,image_tmpl=self.prefix,
#                       transform=transform_train,
#                       fromEpic= 1
#                        ),
            EpicDataSet( pickle_file= self.train_list,
                 num_segments= self.num_segments, 
                 new_length= self.new_length, 
                 modality=self.modality,
                 transform= transform_train,
                 random_shift=True, test_mode= False
                 ),
            
            batch_size=self.batch_size, shuffle=True,
            num_workers=self.workers, pin_memory=True)

        transform_val= torchvision.transforms.Compose([
                           GroupScale(int(self.scale_size)), #hw 
                           GroupCenterCrop(self.crop_size), #hw
                           Stack(roll=(self.base_model in ['BNInception','InceptionV3'])),
                           ToTorchFormatTensor(div=(self.base_model not in ['BNInception','InceptionV3'])),
                           self.normalize,
                       ])
        val_loader = torch.utils.data.DataLoader(
#            TSNDataSet(self.root_path, self.val_list, num_segments=self.num_segments,
#                       new_length= self.new_length,modality=self.modality,image_tmpl=self.prefix,
#                       random_shift=False,
#                       transform= transform_val,
#                       fromEpic= 1
#                        ),
            EpicDataSet( pickle_file= self.val_list,
                 num_segments= self.num_segments, 
                 new_length= self.new_length, 
                 modality=self.modality,
                 transform= transform_val,
                 random_shift=False, test_mode= True
                 ),
        
            batch_size=self.batch_size, shuffle=False,
            num_workers=self.workers, pin_memory=True)
        
        optimizer = torch.optim.SGD(self.model_policies, #---params
                                    self.lr,
                                    momentum= self.momentum,
                                    weight_decay=self.weight_decay) 
        

        val_monitor=[]
        
        end= time.time()
        for epoch in range(self.start_epoch, self.epochs):
            print('\n','-'*10,'epoch:{}, lr {}'.format(epoch, self.lr),'-'*10, time.strftime(" %d.%B %H:%M:%S"))
            self.adjust_learning_rate(optimizer, epoch, self.lr_steps)

            # train for one epoch
            tr_loss= self.train(train_loader,  model, self.criterion, optimizer, epoch, self.log_training, ifprint)

            #----------------validation ----------------------------
            # evaluate on validation set        
            val= ((epoch- self.start_epoch +1) % self.eval_freq == 0 or epoch== self.epochs- 1 )
            if (val):
                print('-'*5,'begin evaluating.. ')
                val_prec1, val_prec5, val_loss = self.validate( model, val_loader, self.criterion, epoch, self.log_training)
                val_monitor.append((epoch, (time.time()-end)/60, tr_loss.item(), val_loss.item(), val_prec1.item(), val_prec5.item()))# get single item from tensor
                
                is_best = val_prec1 > self.best_prec1
                self.best_prec1 = max(val_prec1, self.best_prec1)
                #---- checkpoint------save--------
                print('-'*5,'save checkpoint.. ')
                self.save_checkpoint({
                    'epoch': epoch + 1,
                    'arch': self.base_model,
                    'state_dict': model.state_dict(),
                    'best_prec1': self.best_prec1,
                }, is_best)
                print('-'*30, '\n')
            #--------------log loss--------------------------------
        
        print('*'*10, 'training completed!')
        print('Best Prec@1: %.3f'%(self.best_prec1))
        
        df= pd.DataFrame(val_monitor, columns=['epoch','epochtime(min)','train_loss','val_loss','val_prec1','val_prec5'])
        print(df)
    
# ----------------------------------------------------------------------------------------------------------------        





    def train(self, train_loader, model, criterion, optimizer, epoch, outputfile, ifprint):
        
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        if not self.partial_bn:
            model.module.partialBN(False)
        else:
            model.module.partialBN(True)
        # switch to train mode
        model.train()

        #  num_segments*new_length* channel (rgb 3, flow 2)* crop_size
        if ifprint: print('begin train(), train_loader  len:{}'.format( len(train_loader)))
        tr_end=time.time()
        for i, (input, target) in enumerate(train_loader):

            if i==0 and ifprint:
                print('input size: {} (batchsize ,num_segments*new_length* channel (rgb 3, flow 2), [crop_size])'.format(input.size()))  
            
            # measure data loading time
            data_time.update(time.time() - tr_end)

    ##        target = target.cuda(async=True) #async is now a reserved word in Python >= 3.7 so use non_blocking instead.
            target = target.cuda(non_blocking=True)
            input_var = torch.autograd.Variable(input)
            target_var = torch.autograd.Variable(target)

            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)

            # measure accuracy and record loss
            prec1, prec5 = self.accuracy(output.data, target, topk=(1,5))
            losses.update(loss.data, input.size(0))
            top1.update(prec1, input.size(0))
            top5.update(prec5, input.size(0))

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()

            if self.clip_gradient is not None:
                total_norm = clip_grad_norm(model.parameters(), self.clip_gradient) ##torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
                if total_norm > self.clip_gradient:
                    print("clipping gradient: {} with coef {}".format(total_norm, self.clip_gradient / total_norm))

            optimizer.step()

            # measure elapsed time
            batcht0= time.time() - tr_end
            batch_time.update(batcht0)
            
            idx= epoch * len(train_loader) + i+1
            
            op= 'epoch:{:^5d}, batch: {:2d} /{}, lr: {:.5f}, time(min): {:.1f}, loss: {:.4f}, prec1: {:.2f}, prec5: {:.2f}'.format(
                          epoch, i, len(train_loader), optimizer.param_groups[-1]['lr'],
                          batcht0/60, loss.data , prec1, prec5
                           )
            if outputfile: self.logging(op)
            if (ifprint and idx % self.print_freq == 0) or (i== len(train_loader)-1) : 
                print(op)
                
        return losses.avg 


    def validate(self, model, val_loader,  criterion, iter= None, outputfile=None):
        
    #    batch_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        # switch to evaluate mode: same as model.train(mode=False)
        model.eval() 
        val_end = time.time()
        logits=[]
        for i, (input, target) in enumerate(val_loader):
            
            if i==0 :
                print('input size: {} (batchsize ,num_segments*new_length* channel (rgb 3, flow 2), [crop_size])'.format(input.size()))  
                  
            #print('validating enumerate batch=', i )
            
            target = target.cuda(non_blocking=True)
            input_var = torch.autograd.Variable(input, volatile=True)
            target_var = torch.autograd.Variable(target, volatile=True)

            # compute output
            output = model(input_var)
            #print('###{}: input batchs*size: {}*{} output size:{}'.format(i, len(val_loader), input.size(),output.size()))
            loss = criterion(output, target_var)

            # measure accuracy and record loss
            prec1, prec5 = self.accuracy(output.data, target, topk=(1,5))

            losses.update(loss.data, input.size(0))
            top1.update(prec1, input.size(0)) #input.size(0)= batch_size
            top5.update(prec5, input.size(0))
            if not outputfile:# testing
                logits.extend(output.tolist())
            
        
        val_time= time.time() - val_end
        op = ( (('-'*60 +'(epoch{}), validating'.format(iter)) if outputfile else 'testing') +
              'Results: time(min): {:.1f},  Loss {loss.avg:.5f}, Prec1 {top1.avg:.3f}( best:{best_p:.3f} ), Prec5 {top5.avg:.3f}'
              .format( val_time/60, loss=losses, top1=top1, best_p= self.best_prec1, top5=top5))
        print(op)
        
        if outputfile: # validating
            self.logging(op)
            return top1.avg, top5.avg, losses.avg
        else:  # testing
            return logits, top1.avg, top5.avg, losses.avg


    def save_checkpoint(self, state, is_best, filename='checkpoint.pth.tar'):
        torch.save(state, '%s/%s_checkpoint.pth.tar' % ('model', self.store_name))
        if is_best:
            shutil.copyfile('%s/%s_checkpoint.pth.tar' % ('model', self.store_name),'%s/%s_best.pth.tar' % ('model', self.store_name))


    def adjust_learning_rate(self, optimizer, epoch, lr_steps):
        """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
        decay = 0.1 ** (sum(epoch >= np.array(lr_steps)))
        lr = self.lr * decay
        if epoch in np.array(lr_steps).astype(int):
            text= 'learning_rate decayed({}*{}) to {}'.format(self.lr, decay, lr)
            print(text)
            self.logging(text)


        decay = self.weight_decay
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr * param_group['lr_mult']
            param_group['weight_decay'] = decay * param_group['decay_mult']


    def accuracy(self, output, target, topk=(1,)):
        """Computes the precision@k for the specified values of k"""
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res
    
    def logging(self, text):
        with open(self.log_training, 'a') as log:
            log.write(text +  '\n')
            log.flush()
Beispiel #26
0

args = parser.parse_args()
print(args)

num_class = 400
model = TSN(num_class, args.num_segments, 'RGB',
            base_model=args.arch,
            consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn)

crop_size = model.crop_size
scale_size = model.scale_size
input_mean = model.input_mean
input_std = model.input_std
policies = model.get_optim_policies()
train_augmentation = model.get_augmentation()

#model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

if args.resume:
    if os.path.isfile(args.resume):
        print(("=> loading checkpoint '{}'".format(args.resume)))
        sd = torch.load(args.resume, map_location=lambda storage, loc: storage, pickle_module=pickle)
        base = model._modules['base_model'].state_dict()
        count = 0
        if True:
            for k,v in sd.items():
                if k not in base.keys():
                    continue;
                v = v.data
                if v.shape == base[k].shape:
Beispiel #27
0
def main():
    # main函数主要包含导入模型、数据准备、训练三个部分
    global args, best_prec1
    args = parser.parse_args()

    # 导入数据集
    # UCF101数据集包含13,320个视频剪辑,其中共101类动作。HMDB51数据集是来自各种来源的大量现实视频的集合,比如:电影和网络视频,数据集包含来自51个动作分类的6,766个视频剪辑。
    if args.dataset == 'ucf101':
        num_clsass = 101
    elif args.dataset == 'hmdb51':
        num_clsass = 51
    elif args.dataset == 'kinetics':
        num_clsass == 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    # part1:模型导入

    # 输入包含分
    # 类的类别数:num_class;
    # args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3;
    # 采用哪种输入:args.modality,比如RGB表示常规图像,Flow表示optical flow等;
    # 采用哪种模型:args.arch,比如resnet101,BNInception等;
    # 不同输入snippet的融合方式:args.consensus_type,比如avg等;
    # dropout参数:args.dropout。
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)
    # 交叉模式预训练技术:利用RGB模型初始化时间网络。
    # 根据不同输入类型改变网络第一层的结构
    # setattr(),getattr()

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    # 修改RGB模型第一个卷积层的权重来处理光流场的输入
    policies = model.get_optim_policies()
    # 获得数据转换函数
    train_augmentation = model.get_augmentation()

    # 设置多GPU训练模型
    model = torch.nn.DataParaller(model, device_ids=args.gpus).cuda()

    # 用来设置是否从断点处继续训练,比如原来训练模型训到一半停止了,希望继续从保存的最新epoch开始训练,因此args.resume要么是默认的None,要么就是你保存的模型文件(.pth)的路径
    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            # 导入已训练好的模型
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            # 完成导入模型的参数初始化model这个网络的过程
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # part2:数据导入
    if args.modality != 'RGBDiff':
        # 数据预处理
        # 如果是rgb or flow做数据归一化
        normalize = GroupNormalize(input_mean, input_std)
    else:
        # 如果是RGBDiff数据不做处理
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    # TSNDataSet类用来处理最原始的数据,返回的是torch.utils.data.Dataset类型
    # PyTorch中自定义的数据读取类都要继承torch.utils.data.Dataset这个基类
    # (self, root_path, list_file,
    # num_segments=3, new_length=1, modality='RGB',
    # image_tmpl='img_{:05d}.jpg', transform=None,
    # force_grayscale=False, random_shift=True, test_mode=False)
    # torch.utils.data.DataLoader类是将batch size个数据和标签分别封装成一个Tensor,从而组成一个长度为2的list。对于torch.utils.data.DataLoader类而言,最重要的输入就是TSNDataSet类的初始化结果,其他如batch size和shuffle参数是常用的。通过这两个类读取和封装数据,后续再转为Variable就能作为模型的输入了
    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    # 导入测试数据
    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # part3:训练模型
    # 包括定义损失函数、优化函数、一些超参数设置等

    # 定义损失函数
    if args.loss_type == 'nll':
        # pytorch中CrossEntropyLoss是通过两个步骤计算出来的,第一步是计算log softmax,第二步是计算cross entropy(或者说是negative log likehood)
        # CrossEntropyLoss不需要在网络的最后一层添加softmax和log层,直接输出全连接层即可。而NLLLoss则需要在定义网络的时候在最后一层添加softmax和log层
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    # policies是网络第一层的信息
    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))
    # 定义优化函数
    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        # 调整学习率
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)
Beispiel #28
0
def main():

    global args, best_prec1
    num_class = 4
    rgb_read_format = "{:d}.jpg"

    model = TSN(num_class,
                args.num_segments,
                args.pretrained_parts,
                'RGB',
                base_model='ECO',
                consensus_type='identity',
                dropout=0.3,
                partial_bn=True)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std

    # Optimizer s also support specifying per-parameter options.
    # To do this, pass in an iterable of dict s.
    # Each of them will define a separate parameter group,
    # and should contain a params key, containing a list of parameters belonging to it.
    # Other keys should match the keyword arguments accepted by the optimizers,
    # and will be used as optimization options for this group.
    policies = model.get_optim_policies()

    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda()

    model_dict = model.state_dict()

    print("pretrained_parts: ", args.pretrained_parts)

    model_dir = args.model_path
    new_state_dict = torch.load(model_dir)['state_dict']

    un_init_dict_keys = [
        k for k in model_dict.keys() if k not in new_state_dict
    ]
    print("un_init_dict_keys: ", un_init_dict_keys)
    print("\n------------------------------------")

    for k in un_init_dict_keys:
        new_state_dict[k] = torch.DoubleTensor(model_dict[k].size()).zero_()
        if 'weight' in k:
            if 'bn' in k:
                print("{} init as: 1".format(k))
                constant_(new_state_dict[k], 1)
            else:
                print("{} init as: xavier".format(k))
                xavier_uniform_(new_state_dict[k])
        elif 'bias' in k:
            print("{} init as: 0".format(k))
            constant_(new_state_dict[k], 0)

    print("------------------------------------")

    model.load_state_dict(new_state_dict)

    cudnn.benchmark = True

    # Data loading code
    normalize = GroupNormalize(input_mean, input_std)
    data_length = 1

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality='RGB',
        image_tmpl=rgb_read_format,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=True),
            ToTorchFormatTensor(div=False),
            normalize,
        ])),
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1,
                                             pin_memory=True)

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    model.eval()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda()
        input_var = input
        target_var = target
        output = model(input_var)
        _, pred = output.data.topk(1, 1, True, True)
        print(pred, target)
    print('done')
Beispiel #29
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    #    if args.dataset == 'ucf101':
    #        num_class = 101
    #    elif args.dataset == 'hmdb51':
    #        num_class = 51
    #    elif args.dataset == 'kinetics':
    #        num_class = 400
    #    elif args.dataset == 'kaist':
    #        num_class = 4
    #    elif args.dataset == 'ma':
    #        num_class = 5
    #    else:
    #        raise ValueError('Unknown dataset '+args.dataset)
    num_class = args.num_class

    model = TSN(num_class,
                args.num_segments * args.num_spacial_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint[
                'epoch']  ##lz: comment these lines when fine-tune (not resume)
            best_prec1 = checkpoint[
                'best_prec1']  ##lz: comment these lines when fine-tune (not resume)
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            args.root_path,
            args.train_list,
            train_val_test='train',
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl="-{:04d}.jpg" if args.modality in ["RGB", "RGBDiff"]
            else args.flow_prefix + "_{}-{:04d}.jpg",
            transform=torchvision.transforms.Compose([
                #                       GroupCrop(300,(200,50)),# tree
                #                       GroupCrop(224,(250,50)),# tree
                #                       GroupCrop(224,(1000,0)),# sky
                #                       GroupRandomCrop(256),
                #                       ImgRandomCrop(256),
                #                       GroupNRandomCrop(224, 4), # r4TSSN
                #                       GroupMbyNCrop(2, 2, 720, 1280), # 2x2TSSN
                #                       GroupMbyNCrop(4, 4, 720, 1280), # 4x4TSSN
                GroupMbyNRandomCrop(1, 2, 2, 720, 1280),  # one out of 2x2TSSN
                #                       GroupColorJitter(brightness=0.4, contrast=0, saturation=0, hue=0),
                ImgColorJitter(brightness=0.4, contrast=0, saturation=0,
                               hue=0),
                train_augmentation,
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=args.arch != 'BNInception'),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            args.root_path,
            args.train_list,
            train_val_test='val',
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl="-{:04d}.jpg" if args.modality in ["RGB", "RGBDiff"]
            else args.flow_prefix + "_{}-{:04d}.jpg",
            random_shift=False,
            transform=torchvision.transforms.Compose([
                #                       GroupCrop(300,(200,50)),# tree
                #                       GroupCrop(224,(250,50)),# tree
                #                       GroupCrop(224,(1000,0)),# sky
                #                       GroupRandomCrop(256),
                #                       ImgRandomCrop(256),
                #                       GroupNRandomCrop(224, 4), # r4TSSN
                GroupMbyNCrop(2, 2, 720, 1280),  # 2x2TSSN
                #                       GroupMbyNCrop(4, 4, 720, 1280), # 4x4TSSN
                #                       GroupMbyNRandomCrop(1, 2, 2, 720, 1280), # one out of 2x2TSSN
                #                       GroupColorJitter(brightness=0.4, contrast=0, saturation=0, hue=0),
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=args.arch != 'BNInception'),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda(
        )  # This criterion expects a class index (0 to C-1)
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 >= best_prec1  # need = to avoid lucky 100%
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Beispiel #30
0
def main():
    parser = options()
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'saag01':
        num_class = 2
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=0.5,
                partial_bn=False)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_size = model.input_size
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    cropping = torchvision.transforms.Compose([
        GroupScale(scale_size),
        GroupCenterCrop(input_size),
    ])

    checkpoint = torch.load(args.checkpoint)
    start_epoch = checkpoint['epoch']
    best_prec1 = checkpoint['best_prec1']

    state_dict = checkpoint['state_dict']

    # base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    model.load_state_dict(state_dict)

    test_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.test_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.img_prefix + "_{:05d}" +
        args.ext if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix +
        "_{}_{:05d}" + args.ext,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            GroupNormalize(input_mean, input_std),
        ]),
        custom_prefix=args.custom_prefix),
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True,
                                              drop_last=True)

    ### Test ###
    test(model, test_loader, args)
Beispiel #31
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset '+args.dataset)

    model = TSN(num_class, args.num_segments, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet("", args.train_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        TSNDataSet("", args.val_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   random_shift=False,
                   transform=torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)