Esempio n. 1
0
def main():
    args = parser.parse_args()
    splitanno = sio.loadmat('./dataanno/anno.mat')
#     labelmap = sio.loadmat('./dataanno/setid.mat')
    trainid = splitanno['trnid'][0].tolist()
    valid = splitanno['valid'][0].tolist()
    testid = splitanno['tstid'][0].tolist()
    labellist = splitanno['labels'][0].tolist()
    
    
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_dataset = FlowerDataSet(indexlist = trainid, labellist = labellist,
                        transform=torchvision.transforms.Compose([
                       torchvision.transforms.Compose([GroupMultiScaleCrop(224, [1, .875, .75, .66]),
                                                   GroupRandomHorizontalFlip(is_flow=False)]),
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ]))
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    for i, (input, target) in enumerate(train_loader):
        print input.size()
Esempio n. 2
0
    def forward(self, input):
        global args
        args = parser.parse_args()
        b = args.batch_size
        sample_len = (3 if self.modality == "RGB" else 1) * self.new_length

        if self.modality == 'RGBDiff':
            sample_len = 3 * self.new_length
            input = self._get_diff(input)

        base_out = self.base_model(input.view((-1, sample_len) + input.size()[-2:]))
        '''
        y = base_out.view((-1, 25, base_out.size()[-1]))

        pool = nn.MaxPool2d(kernel_size=(25, 1), stride=(25, 1))
        y = pool(y)

        '''
        y = torch.max(base_out[:25], 0).values.unsqueeze(0)

        for i in range(25, len(base_out), 25):
            y = torch.cat([y, torch.max(base_out[i: min(i+25, len(base_out))], 0).values.unsqueeze(0)], 0)

        base_out = y
        if self.dropout > 0:
            base_out = self.new_fc(base_out)

        base_out = self.softmax(base_out)


        return base_out.squeeze(1)
def main():

    global arg
    arg = parser.parse_args()
    print(arg)

    categories, train_list, val_list, root_path, prefix = return_moments()
    num_class = len(categories)
    assert (num_class == arg.num_classes)

    dataloader = spatial_dataloader(
        BATCH_SIZE=arg.batch_size,
        num_workers=8,
        path=
        '/media/lili/fce9875a-a5c8-4c35-8f60-db60be29ea5d/Moments_in_Time_Raw/',
        train_list='./img_list/new_moments_train_list.txt',
        test_list='./img_list/new_moments_validation_list.txt')

    train_loader, val_loader, test_video = dataloader.run()

    model = Spatial_CNN(nb_epochs=arg.epochs,
                        lr=arg.lr,
                        batch_size=arg.batch_size,
                        resume=arg.resume,
                        start_epoch=arg.start_epoch,
                        evaluate=arg.evaluate,
                        train_loader=train_loader,
                        test_loader=val_loader,
                        test_video=test_video)

    #Training
    model.run()
Esempio n. 4
0
def main():
    global arg
    arg = parser.parse_args()
    print(arg)

    # Prepare DataLoader
    data_loader = spatial_dataloader.spatial_dataloader(
        BATCH_SIZE=arg.batch_size,  # 批次
        num_workers=8,  # 定义8个子进程加载数据
        path=opt.spatial_train_data_root,
        ucf_list=opt.ucf_list,
        ucf_split=opt.ucf_split,
    )

    train_loader, test_loader, test_video = data_loader.run()
    # test_loader: 71877, 数据加载时的测试集合, 类型: {DataLoader} batch_size:25,
    # train_loader: 9537 类型: {DataLoader}
    # test_video: 类型 {dict} 长度3783, 如: {'Unxxxx_g04_c02' : 96}
    # 得到训练集合等
    # Model
    model = Spatial_CNN(
        nb_epochs=arg.epochs,
        lr=arg.lr,
        batch_size=arg.batch_size,
        resume=arg.resume,
        start_epoch=arg.start_epoch,
        evaluate=arg.evaluate,
        train_loader=train_loader,
        test_loader=test_loader,
        test_video=test_video
    )
    # Training
    model.run()
Esempio n. 5
0
def main():
    global args
    args = parser.parse_args()
    check_rootfolders() 

    if not args.new_length:# none
        data_length= 1 if args.modality== 'RGB' else 5
    else :
        data_length= args.new_length

    trnmodel= TRN(        dataset = args.dataset,
                num_segments = args.num_segments,
                modality = args.modality,
                new_length=data_length, #

                lr = args.lr, 
                loss_type = args.loss_type, # default="nll",
                weight_decay = args.weight_decay, # default=5e-4,
                lr_steps = args.lr_steps, # default=[50, 100],
                momentum= args.momentum, # default=0.9,
                gpus = args.gpus, 
                clip_gradient = args.clip_gradient,
                base_model= args.arch, #"resnet50",
                dropout= args.dropout, #0.7,
                img_feature_dim=args.img_feature_dim ,#default 256,
                partial_bn= not args.no_partialbn ,# default=False 
                consensus_type= args.consensus_type, #'TRN', # MTRN

                batch_size= args.batch_size,# default=64
                workers= args.workers, #default=2
                
                resume = args.resume ,  #  pretained model 
                epochs= args.epochs,
                start_epoch = args.start_epoch, #
                

                ifprintmodel= args.print_model in [1, 'True'], # default =1
                print_freq =1,
                eval_freq =1

        )
    #---------if evaluation: ----------------------------------------------------
    print('evalutate=',args.evaluate)
    if str(args.evaluate).lower()=='true' or args.evaluate=='1':
        logits= trnmodel(args.test_pickle)
        logits= np.array(logits)
        print('output size: ',logits.shape)
        with np.printoptions(threshold=np.inf):
            print(logits)
    else:
        trnmodel.do_training( ifprint= args.print_training_in_terminal)
Esempio n. 6
0
def main():
    global args, web_path, best_prec1
    best_prec1 = 0
    args = parser.parse_args()
    network_config = Foo(
        encoder=args.encoder,
        decoder=args.decoder,
        fc_dim=args.fc_dim,
        num_views=args.n_views,
        num_class=94,
        transform_type=args.transform_type,
        output_size=args.label_resolution,
    )

    val_dataset = Seq_OVMDataset(args.test_dir, pix_file=args.pix_file,
                        transform=torchvision.transforms.Compose([
                            Stack(roll=True),
                            ToTorchFormatTensor(div=True),
                            GroupNormalize(mean_rgb, std_rgb)
                            ]),
                        n_views=network_config.num_views, resolution=args.input_resolution,
                        label_res=args.label_resolution, use_mask=args.use_mask, is_train=False)

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=1,
        shuffle=False, pin_memory=True
    )


    mapper = VPNModel(network_config)
    mapper = nn.DataParallel(mapper.cuda())

    if args.weights:
        if os.path.isfile(args.weights):
            print(("=> loading checkpoint '{}'".format(args.weights)))
            checkpoint = torch.load(args.weights)
            args.start_epoch = checkpoint['epoch']
            mapper.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.weights)))

    web_path = os.path.join(args.visualize, args.store_name)
    criterion = nn.NLLLoss(weight=None, size_average=True)
    eval(val_loader, mapper, criterion, web_path)

    web_path = os.path.join(args.visualize, args.store_name)
Esempio n. 7
0
def main():
    args = parser.parse_args()

    args = prepare(args)
    if args.wandb:
        wandb.init(project=args.project_name)
        wandb.run.name = args.subproject_name
        wandb.config.update(args)

    train_loader, index_loader, val_loader, test_loader = dataset_manager(args)
    model, optimizer, scheduler, criterion, hash_center = model_manager(args)
    if args.wandb:
        wandb.watch(model)
    if args.train:
        train(args, train_loader, index_loader, val_loader, model, optimizer,
              scheduler, criterion, hash_center)
    if args.test:
        mAP = evaluation(args, index_loader, test_loader, model, T=0)
        print("mAP : {:.3f}".format(mAP))
def main():
    global args
    args = parser.parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.available_gpus
    args.consensus_type = 'avg'
    args.pretrain = 'imagenet'
    args.tune_from = None
    args.img_feature_dim = 256
    args.loss_type ='nll'
    args.evaluate = False

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    if args.dist_url == "env://" and args.world_size == -1:
        args.world_size = int(os.environ["WORLD_SIZE"])

    args.distributed = args.world_size > 1 or args.multiprocessing_distributed

    ngpus_per_node = torch.cuda.device_count()
    if args.multiprocessing_distributed:
        # Since we have ngpus_per_node processes per node, the total world_size
        # needs to be adjusted accordingly
        args.world_size = ngpus_per_node * args.world_size
        # Use torch.multiprocessing.spawn to launch distributed processes: the
        # main_worker process function
        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
    else:
        # Simply call main_worker function
        main_worker(args.gpu, ngpus_per_node, args)
Esempio n. 9
0
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.workers)

    model = ActionModifiers(test_set, args)
    if args.gpu:
        model = model.cuda()  #TODO implement gpu option properly everywhere

    evaluator = Evaluator(test_set, model)

    checkpoint = torch.load(args.load)
    model.load_state_dict(checkpoint['net'])
    print('loaded model from', os.path.basename(args.load))
    v2a_ant, v2a_all, a2v_ant, a2v_all, v2action = test(
        model, test_loader, test_set, evaluator)
    print('Video-to-Adverb Antonym: %.3f' % v2a_ant)
    print('Video-to-Adverb All: %.3f' % v2a_all)
    print('Adverb-to-Video Antonym: %.3f' % a2v_ant)
    print('Adverb-to-Video All: %.3f' % a2v_all)
    print('Video-to-Action: %.3f' % v2action)


if __name__ == '__main__':
    args = parser.parse_args()
    args.batch_size = 1
    if args.modality == 'both':
        args.modality = ['rgb', 'flow']
    else:
        args.modality = [args.modality]
    main(args)
Esempio n. 10
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    if not args.test:
        categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
            args.dataset, args.modality)
    else:
        categories, args.test_list, args.root_path, prefix = datasets_video.return_dataset(
            'SHGDTuples', args.modality)
    num_class = len(categories)

    args.store_name = '_'.join([
        args.dataset, args.modality, args.arch,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = MFF(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn,
                dataset=args.dataset)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    train_augmentation = model.get_augmentation()

    policies = model.get_optim_policies()
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.test, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    if args.pretrained:
        if args.arch == 'squeezenet1_1':
            name = 'module.base_model.0.weight'
        else:
            name = 'module.base_model.features.0.0.weight'

        if os.path.isfile(args.pretrained):
            pretrained_dict = torch.load(args.pretrained)
            pretrained_state_dict = pretrained_dict['state_dict']
            #for name, param in pretrained_state_dict.items():
            #if 'base_model' in name:
            #print(name)
            pretrained_state_dict = {
                k: v
                for k, v in pretrained_state_dict.items()
                if 'module.consensus.classifier.3.' not in k
            }
            model_dict = model.state_dict()
            weight_conv_t = pretrained_state_dict[name]

        else:
            print(("=> no pretrained model checkpoint found at '{}'".format(
                args.pretrained)))

        if args.modality == 'IRD':
            # make the first conv from 3 chann to 2 chann (average the sum of 3 chann)
            weight_conv_t = weight_conv_t.sum(1)
            weight_conv_t = weight_conv_t.unsqueeze(1)
            weight_conv_t = weight_conv_t.mean(1)
            weight_conv_t = torch.stack((weight_conv_t, weight_conv_t), 1)
            pretrained_state_dict[name] = weight_conv_t
            model_dict.update(pretrained_state_dict)
            print("Converted the first conv layer to 2 channels.")

        if args.modality == 'IR' or args.modality == 'D':
            # make the first conv from 3 chann to 1 chann (average the sum of 3 chann)
            weight_conv_t = weight_conv_t.sum(1)
            weight_conv_t = weight_conv_t.unsqueeze(1)
            weight_conv_t = weight_conv_t.mean(1)
            weight_conv_t = weight_conv_t.unsqueeze(1)
            pretrained_state_dict[name] = weight_conv_t
            model_dict.update(pretrained_state_dict)
            print("Converted the first conv layer to 1 channel.")

        model.load_state_dict(model_dict)
        print("=> loaded pretrained model checkpoint '{}'".format(
            args.pretrained))
    print(model)

    ## to print the number of trainable paramters in the network
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params_num = sum([np.prod(p.size()) for p in model_parameters])
    print('Total number of parameters:' + str(params_num))
    cudnn.benchmark = True

    # Data loading code
    normalize = GroupNormalize(input_mean, input_std)
    if args.dataset == 'SHGD':
        from SHGD import DataSet
    if args.dataset == 'jester':
        from Jester import DataSet
    if not args.test:
        train_loader = torch.utils.data.DataLoader(DataSet(
            args.root_path,
            args.train_list,
            num_segments=args.num_segments,
            modality=args.modality,
            image_tmpl=prefix,
            dataset=args.dataset,
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(),
                ToTorchFormatTensor(),
                normalize,
            ])),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=False)

        val_loader = torch.utils.data.DataLoader(DataSet(
            args.root_path,
            args.val_list,
            num_segments=args.num_segments,
            modality=args.modality,
            image_tmpl=prefix,
            dataset=args.dataset,
            random_shift=False,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(),
                ToTorchFormatTensor(),
                normalize,
            ])),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=False)

    else:
        test_loader = torch.utils.data.DataLoader(DataSet(
            args.root_path,
            args.test_list,
            num_segments=args.num_segments,
            modality=args.modality,
            image_tmpl=prefix,
            dataset=args.dataset,
            random_shift=False,
            test_mode=True,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(),
                ToTorchFormatTensor(),
                normalize,
            ])),
                                                  batch_size=1,
                                                  shuffle=False,
                                                  num_workers=args.workers,
                                                  pin_memory=False)

    if args.test:
        if not args.resume:
            print('Please give a path to a trained model for testing.')
            sys.exit()
        else:
            test(args.start_epoch, test_loader, model, args)
            return

    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    elif args.loss_type == 'nll' and num_class == 13:
        # give the "No gesture/Hand up/Hand down less weight than the other classes. No:4420 Hand up:2280 Hand Down:2190 Others:228
        weights = [1, 1, 1, 1 / 10, 1 / 10, 1 / 20, 1, 1, 1, 1, 1, 1, 1]
        class_weights = torch.Tensor(weights).cuda()
        criterion = torch.nn.CrossEntropyLoss(weight=class_weights)
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    log_training = open(
        os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader), log_training,
                             num_class)

            #remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Esempio n. 11
0
def main():
    global args, best_acc
    args = parser.parse_args()

    writer = LogWriter(args.log)
    # writer = None

    cfg = parse_config('config.txt')
    print_configs(cfg, 'TRAIN')

    main_program = fluid.default_main_program()
    start_program = fluid.default_startup_program()

    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()

    with fluid.program_guard(main_program, start_program):

        # data placeholder
        input = fluid.data(name='data',
                           shape=[-1, 3, 224, 224],
                           dtype='float32')
        label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
        print(f'label shape:{label.shape}')

        model = ECOfull(input, num_segments=args.num_segments)
        net_out = model()

        cost = fluid.layers.softmax_with_cross_entropy(net_out, label)
        avg_cost = fluid.layers.mean(cost)

        acc = fluid.layers.accuracy(net_out, label)

        # test program
        eval_program = main_program.clone(for_test=True)

        # optimizer
        fluid.optimizer.SGD(args.lr).minimize(avg_cost)

    #print(main_program.all_parameters())
    reader = KineticsReader('eco', 'train', cfg).create_reader()
    feeder = fluid.DataFeeder([input, label], place)

    # 验证集
    val_reader = KineticsReader('eco', 'valid', cfg).create_reader()

    # 初始化参数
    exe = fluid.Executor(place=place)
    exe.run(start_program)

    train_exe = fluid.Executor(place=place)

    if 0:
        # fluid.io.load(train_exe, 'models/', filename='eco_full.pdparams')
        fluid.io.load(main_program, 'models/eco_full_best', train_exe)
    # # pre-trained
    else:
        f = open('program_state_dict.pkl', 'rb')
        state_dict = pickle.load(f)
        f.close()
        fluid.io.set_program_state(main_program, state_dict)

    step = 0
    best_acc = read_best_acc()
    for i in range(args.epochs):
        for index, data in enumerate(reader()):
            avg_cost_, acc_ = train_exe.run(
                main_program,
                feed=feeder.feed(data),
                fetch_list=[avg_cost.name, acc.name])

            if (index + 1) % args.print_freq == 0:
                if not writer is None:
                    writer.add_scalar(tag='train/loss',
                                      step=step,
                                      value=avg_cost_[0])
                    writer.add_scalar(tag='train/acc',
                                      step=step,
                                      value=acc_[0])
                print(
                    f'epoch:{i+1} step:{index + 1} avg loss:{avg_cost_[0]} acc:{acc_[0]}'
                )
            step += 1

        if (i + 1) % args.eval_freq == 0:
            fetch_list = [avg_cost.name, acc.name]
            validate(val_reader,
                     feeder,
                     place,
                     eval_program,
                     fetch_list,
                     epoch=i,
                     writer=writer)
Esempio n. 12
0
def main():
    finetuning = False

    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    if args.dataset == 'something-v1':
        num_class = 174
        args.rgb_prefix = ''
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'diving48':
        num_class = 48
        args.rgb_prefix = 'frames'
        rgb_read_format = "{:05d}.jpg"
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model_dir = os.path.join('experiments', args.dataset, args.arch,
                             args.consensus_type + '-' + args.modality,
                             str(args.run_iter))
    if not args.resume:
        if os.path.exists(model_dir):
            print('Dir {} exists!!!'.format(model_dir))
            sys.exit()
        else:
            os.makedirs(model_dir)
            os.makedirs(os.path.join(model_dir, args.root_log))

    writer = SummaryWriter(model_dir)

    args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset)

    if 'something' in args.dataset:
        # label transformation for left/right categories
        target_transforms = {
            86: 87,
            87: 86,
            93: 94,
            94: 93,
            166: 167,
            167: 166
        }
        print('Target transformation is enabled....')
    else:
        target_transforms = None

    args.store_name = '_'.join([
        args.dataset, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    model = VideoModel(num_class=num_class,
                       modality=args.modality,
                       num_segments=args.num_segments,
                       base_model=args.arch,
                       consensus_type=args.consensus_type,
                       dropout=args.dropout,
                       partial_bn=not args.no_partialbn,
                       gsm=args.gsm,
                       target_transform=target_transforms)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(VideoDataset(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.rgb_prefix + rgb_read_format,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(VideoDataset(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.rgb_prefix + rgb_read_format,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    lr_scheduler_clr = CosineAnnealingLR.WarmupCosineLR(
        optimizer=optimizer,
        milestones=[args.warmup, args.epochs],
        warmup_iters=args.warmup,
        min_ratio=1e-7)
    if args.resume:
        for epoch in range(0, args.start_epoch):
            lr_scheduler_clr.step()

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(model_dir, args.root_log, '%s.csv' % args.store_name),
        'a')
    for epoch in range(args.start_epoch, args.epochs):

        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1)

        train_prec1 = train(train_loader,
                            model,
                            criterion,
                            optimizer,
                            epoch,
                            log_training,
                            writer=writer)

        lr_scheduler_clr.step()

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader,
                             model,
                             criterion, (epoch + 1) * len(train_loader),
                             log_training,
                             writer=writer,
                             epoch=epoch)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                    'current_prec1': prec1,
                    'lr': optimizer.param_groups[-1]['lr'],
                }, is_best, model_dir)
        else:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                    'current_prec1': train_prec1,
                    'lr': optimizer.param_groups[-1]['lr'],
                }, False, model_dir)
def main():
    global args, web_path, best_prec1
    parser.add_argument('--test-views', type=int, default=94)
    parser.add_argument('--view-bias', type=int, default=8)

    best_prec1 = 0
    args = parser.parse_args()
    network_config = Foo(
        encoder=args.encoder,
        decoder=args.decoder,
        fc_dim=args.fc_dim,
        num_views=args.n_views,
        num_class=args.num_class,
        transform_type=args.transform_type,
        output_size=args.label_resolution,
    )

    val_dataset = OVMDataset(args.data_root, args.eval_list,
                         transform=torchvision.transforms.Compose([
                             Stack(roll=True),
                             ToTorchFormatTensor(div=True),
                             GroupNormalize(mean_rgb, std_rgb)
                         ]),
                         num_views=network_config.num_views, input_size=args.input_resolution,
                         label_size=args.segSize, use_mask=args.use_mask, use_depth=args.use_depth, is_train=False)

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=args.batch_size,
        num_workers=args.num_workers, shuffle=False,
        pin_memory=True
    )


    mapper = VPNModel(network_config)
    mapper = nn.DataParallel(mapper.cuda())

    if args.weights:
        if os.path.isfile(args.weights):
            print(("=> loading checkpoint '{}'".format(args.weights)))
            checkpoint = torch.load(args.weights)
            args.start_epoch = checkpoint['epoch']
            mapper.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.weights)))


    criterion = nn.NLLLoss(weight=None, size_average=True)
    eval(val_loader, mapper, criterion)

    web_path = os.path.join(args.visualize, args.store_name)
    if os.path.isdir(web_path):
        pass
    else:
        os.makedirs(web_path)

    with dominate.document(title=web_path) as web:
        for step in range(len(val_loader)):
            if step % args.print_freq == 0:
                h2('Step {}'.format(step*args.batch_size))
                with table(border = 1, style = 'table-layout: fixed;'):
                    with tr():
                        for i in range(args.test_views):
                            path = 'Step-{}-{}.png'.format(step * args.batch_size, i)
                            with td(style='word-wrap: break-word;', halign='center', valign='top'):
                                img(style='width:128px', src=path)
                        path = 'Step-{}-pred.png'.format(step * args.batch_size)
                        with td(style='word-wrap: break-word;', halign='center', valign='top'):
                            img(style='width:128px', src=path)
                        path = 'Step-{}-gt.png'.format(step * args.batch_size)
                        with td(style='word-wrap: break-word;', halign='center', valign='top'):
                            img(style='width:128px', src=path)

    with open(os.path.join(web_path, 'index.html'), 'w') as fp:
        fp.write(web.render())
Esempio n. 14
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    assert len(args.train_id) > 0

    check_rootfolders(args.train_id)
    summary_w = tf and tf.summary.FileWriter(
        os.path.join('results', args.train_id, args.root_log))  #tensorboard

    categories, args.train_list, args.val_list, args.root_path, prefix = return_something_path(
        args.modality)
    num_class = len(categories)

    args.store_name = '_'.join([args.model, args.modality, args.arch])
    print('storing name: ' + args.store_name)

    policies = -1
    if args.model == 'TwoStream':
        model = TwoStream(num_class,
                          args.modality,
                          base_model=args.arch,
                          dropout=args.dropout,
                          crop_num=1,
                          partial_bn=not args.no_partialbn)
        policies = model.get_optim_policies()

    elif args.model == 'TSN':
        model = TSN(num_class,
                    args.num_segments,
                    args.modality,
                    base_model=args.arch,
                    dropout=args.dropout,
                    crop_num=1,
                    partial_bn=not args.no_partialbn)
        policies = model.get_optim_policies()

    elif args.model == 'C3D':
        model = C3D()
        model_dict = model.state_dict()

        pretrained_dict = torch.load('./model_zoo/c3d.pickle')

        # 1. filter out unnecessary keys
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in model_dict
        }
        # 2. overwrite entries in the existing state dict
        model_dict.update(pretrained_dict)
        # 3. load the new state dict
        model.load_state_dict(model_dict)

        print('c3d pretrained model loaded~')
    else:
        print('error!')
        exit()

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    if args.modality == 'RGB' and args.model == 'C3D':
        data_length = 16  # clip

    if args.model == 'TwoStream':
        datasettrain = TwoStreamDataSet(
            args.root_path,
            args.train_list,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ]))

        datasetval = TwoStreamDataSet(
            args.root_path,
            args.val_list,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            random_shift=False,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ]))
    elif args.model == 'TSN':
        datasettrain = TSNDataSet(
            args.root_path,
            args.train_list,
            args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ]))

        datasetval = TSNDataSet(
            args.root_path,
            args.val_list,
            args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            random_shift=False,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ]))
    elif args.model == 'C3D':
        datasettrain = C3DDataSet(
            args.root_path,
            args.train_list,
            1,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                ToTorchFormatTensor(div=(
                    args.arch not in ['BNInception', 'InceptionV3', 'C3D'])),
                normalize,
            ]))

        datasetval = C3DDataSet(
            args.root_path,
            args.val_list,
            1,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            random_shift=False,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                ToTorchFormatTensor(div=(
                    args.arch not in ['BNInception', 'InceptionV3', 'C3D'])),
                normalize,
            ]))

    trainvidnum = len(datasettrain)
    valvidnum = len(datasetval)

    train_loader = torch.utils.data.DataLoader(datasettrain,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(datasetval,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss().cuda()

    if policies != -1:
        for group in policies:
            print(
                ('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
                    group['name'], len(group['params']), group['lr_mult'],
                    group['decay_mult'])))

        optimizer = torch.optim.SGD(policies,
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    # log_training = open(os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps, args.factor,
                             policies != -1)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, trainvidnum,
              summary_w)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * trainvidnum, summary_w)
            # prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), summary_w)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Esempio n. 15
0
def main():

    torch.set_printoptions(precision=6)

    global args, best_prec1
    args = parser.parse_args()
    #导入参数设置数据集类数量
    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'cad':
        num_class = 8
    else:
        raise ValueError('Unknown dataset ' + args.dataset)
    """
    #导入模型,输入包含分类的类别数:
    # num_class;args.num_segments表示把一个video分成多少份,对应论文中的K,默认K=3;
    # 采用哪种输入:args.modality,比如RGB表示常规图像,Flow表示optical flow等;
    # 采用哪种模型:args.arch,比如resnet101,BNInception等;
    # 不同输入snippet的融合方式:args.consensus_type,比如avg等;
    # dropout参数:args.dropout。
    """
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    """
    接着main函数的思路,前面这几行都是在TSN类中定义的变量或者方法,model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()是设置多GPU训练模型。
    args.resume这个参数主要是用来设置是否从断点处继续训练,比如原来训练模型训到一半停止了,希望继续从保存的最新epoch开始训练,
    因此args.resume要么是默认的None,要么就是你保存的模型文件(.pth)的路径。
    其中checkpoint = torch.load(args.resume)是用来导入已训练好的模型。
    model.load_state_dict(checkpoint[‘state_dict’])是完成导入模型的参数初始化model这个网络的过程,load_state_dict是torch.nn.Module类中重要的方法之一。

    """
    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    """
    接下来是main函数中的第二部分:数据导入。首先是自定义的TSNDataSet类用来处理最原始的数据,返回的是torch.utils.data.Dataset类型,
    一般而言在PyTorch中自定义的数据读取类都要继承torch.utils.data.Dataset这个基类,比如此处的TSNDataSet类,然后通过重写初始化函数__init__和__getitem__方法来读取数据。
    torch.utils.data.Dataset类型的数据并不能作为模型的输入,还要通过torch.utils.data.DataLoader类进一步封装,
    这是因为数据读取类TSNDataSet返回两个值,第一个值是Tensor类型的数据,第二个值是int型的标签,
    而torch.utils.data.DataLoader类是将batch size个数据和标签分别封装成一个Tensor,从而组成一个长度为2的list。
    对于torch.utils.data.DataLoader类而言,最重要的输入就是TSNDataSet类的初始化结果,其他如batch size和shuffle参数是常用的。通过这两个类读取和封装数据,后续再转为Variable就能作为模型的输入了。

    """

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=3,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=3,
                                             pin_memory=True)
    """
    接下来就是main函数的第三部分:训练模型。这里包括定义损失函数、优化函数、一些超参数设置等,然后训练模型并在指定epoch验证和保存模型。
    adjust_learning_rate(optimizer, epoch, args.lr_steps)是设置学习率变化策略,args.lr_steps是一个列表,里面的值表示到达多少个epoch的时候要改变学习率,
    在adjust_learning_rate函数中,默认是修改学习率的时候修改成当前的0.1倍。
    train(train_loader, model, criterion, optimizer, epoch)就是训练模型,输入包含训练数据、模型、损失函数、优化函数和要训练多少个epoch。
    最后的if语句是当训练epoch到达指定值的时候就进行一次模型验证和模型保存,args.eval_freq这个参数就是用来控制保存的epoch值。
    prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))就是用训练好的模型验证测试数据集。
    最后的save_checkpoint函数就是保存模型参数(model)和其他一些信息,这里我对源代码做了修改,希望有助于理解,该函数中主要就是调用torch.save(mode, save_path)来保存模型。
    模型训练函数train和模型验证函数validate函数是重点,后面详细介绍。

    """
    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))
    '''
    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    '''
    # try Adam instead.
    optimizer = torch.optim.Adam(policies, args.lr)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Esempio n. 16
0
def main():
    global args, best_prec1, class_to_name
    parser.add_argument('--class_index', type=str, help='class index file')
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'something':
        num_class = 174
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    if args.dataset == 'something':
        img_prefix = ''
        with open(args.class_index, 'r') as f:
            content = f.readlines()
        class_to_name = {
            idx: line.strip().replace(' ', '-')
            for idx, line in enumerate(content)
        }
    else:
        img_prefix = 'image_'
        with open(args.class_index, 'r') as f:
            content = f.readlines()
        class_to_name = {int(line.strip().split(' ')[0])-1:line.strip().split(' ')[1] \
                for line in content}

    with open(os.path.join(args.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(args), opt_file)
    if not (args.consensus_type == 'lstm'
            or args.consensus_type == 'conv_lstm'):
        args.lstm_out_type = None
    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn,
                lstm_out_type=args.lstm_out_type,
                lstm_layers=args.lstm_layers,
                lstm_hidden_dims=args.lstm_hidden_dims,
                conv_lstm_kernel=args.conv_lstm_kernel,
                bi_add_clf=args.bi_add_clf,
                bi_out_dims=args.bi_out_dims,
                bi_rank=args.bi_rank,
                bi_att_softmax=args.bi_att_softmax,
                bi_filter_size=args.bi_filter_size,
                bi_dropout=args.bi_dropout,
                bi_conv_dropout=args.bi_conv_dropout,
                get_att_maps=True,
                dataset=args.dataset)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()
    # print(model)
    # input('...')

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            # print(model)
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
            # input('...')
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
        rev_normalize = ReverseGroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 10
        # data_length = 5

    if args.val_reverse:
        val_temp_transform = ReverseFrames(size=data_length *
                                           args.num_segments)
        print('using reverse val')
    elif args.val_shuffle:
        val_temp_transform = ShuffleFrames(size=data_length *
                                           args.num_segments)
        print('using shuffle val')
    else:
        val_temp_transform = IdentityTransform()
        print('using normal val')
    val_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            "",
            args.val_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=img_prefix + "{:05d}.jpg" if args.modality
            in ["RGB", "RGBDiff"] else args.flow_prefix + "{}_{:05d}.jpg",
            # image_tmpl="image_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
            random_shift=False,
            temp_transform=val_temp_transform,
            transform=torchvision.transforms.Compose([
                GroupScale(int(scale_size)),
                GroupCenterCrop(crop_size),
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=args.arch != 'BNInception'),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    # val_logger = open(os.path.join(args.result_path, 'test.log'), 'w')
    print('visualizing...')
    val_logger = os.path.join(args.result_path, 'visualize.log')
    validate(val_loader,
             model,
             0,
             val_logger=val_logger,
             rev_normalize=rev_normalize)
    return
Esempio n. 17
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'something-v1':
        num_class = 174
    elif args.dataset == 'diving48':
        num_class = 48
    elif args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'skating2':
        num_class = 63
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model_dir = os.path.join('experiments', args.dataset, args.arch,
                             args.consensus_type + '-' + args.modality,
                             str(args.run_iter))
    args.train_list, args.val_list, args.root_path, args.rgb_prefix = datasets_video.return_dataset(
        args.dataset)
    if 'something' in args.dataset:
        # label transformation for left/right categories
        target_transforms = {
            86: 87,
            87: 86,
            93: 94,
            94: 93,
            166: 167,
            167: 166
        }
        print('Target transformation is enabled....')
    else:
        target_transforms = None

    if not args.resume_rgb:
        if os.path.exists(model_dir):
            print('Dir {} exists!!!  it will be removed'.format(model_dir))
            shutil.rmtree(model_dir)
        os.makedirs(model_dir)
        os.makedirs(os.path.join(model_dir, args.root_log))

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['flow', 'RGBDiff']:
        data_length = 5
        # data_length = 1

    if args.resume_rgb:
        if args.modality == 'RGB':
            if 'gst' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='GST',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'stm' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='STM',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'tmp' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='TMP',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'tsm' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='TSM',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'ori' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='ORI',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'I3D' in args.arch:
                print("!!!!!!!!!!!!!!!!!!!!!!!\n\n")
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='I3D',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)

            else:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='ORI',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            if os.path.isfile(args.resume_rgb):
                print(("=> loading checkpoint '{}'".format(args.resume_rgb)))
                checkpoint = torch.load(args.resume_rgb)
                args.start_epoch = checkpoint['epoch']
                best_prec1 = checkpoint['best_prec1']
                original_checkpoint = checkpoint['state_dict']
                print(("(epoch {} ) best_prec1 : {} ".format(
                    checkpoint['epoch'], best_prec1)))
                original_checkpoint = {
                    k[7:]: v
                    for k, v in original_checkpoint.items()
                }
                #model_dict =  i3d_model.state_dict()
                #model_dict.update(pretrained_dict)
                model.load_state_dict(original_checkpoint)
                print(
                    ("=> loaded checkpoint '{}' (epoch {} ) best_prec1 : {} ".
                     format(args.resume_rgb, checkpoint['epoch'], best_prec1)))
            else:
                raise ValueError("=> no checkpoint found at '{}'".format(
                    args.resume_rgb))
    else:
        if args.modality == 'flow':
            if 'I3D' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='I3D',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi,
                                      modality='flow',
                                      new_length=data_length)
        elif args.modality == 'RGB':
            if 'gst' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='GST',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'stm' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='STM',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'tmp' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='TMP',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'tsm' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='TSM',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'ori' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='ORI',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            elif 'I3D' in args.arch:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='I3D',
                                      backbone=args.arch,
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)
            else:
                model = TemporalModel(num_class,
                                      args.num_segments,
                                      model='ORI',
                                      backbone=args.arch + '_ori',
                                      alpha=args.alpha,
                                      beta=args.beta,
                                      dropout=args.dropout,
                                      target_transforms=target_transforms,
                                      resi=args.resi)

    cudnn.benchmark = True
    writer = SummaryWriter(model_dir)
    # Data loading code
    args.store_name = '_'.join([
        args.dataset, args.arch, args.consensus_type,
        'segment%d' % args.num_segments
    ])
    print('storing name: ' + args.store_name)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = get_optim_policies(model)
    train_augmentation = get_augmentation(mode='train')
    val_trans = get_augmentation(mode='val')
    normalize = GroupNormalize(input_mean, input_std)

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    if args.dataset == 'diving48':
        args.root_path = args.root_path + '/train'

    train_loader = torch.utils.data.DataLoader(VideoDataset(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.rgb_prefix,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ]),
        dataset=args.dataset),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    print("trainloader.type = {}".format(type(train_loader)))
    if args.dataset == 'diving48':
        args.root_path = args.root_path[:-6] + '/test'
    val_loader = torch.utils.data.DataLoader(VideoDataset(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=args.rgb_prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ]),
        dataset=args.dataset),
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        log_test = open('test_not.csv', 'w')
        validate(val_loader, model, criterion, log_test)
        os.remove(log_test)
        return

    if args.lr_scheduler == 'cos_warmup':
        lr_scheduler_clr = CosineAnnealingLR.WarmupCosineLR(
            optimizer=optimizer,
            milestones=[args.warmup, args.epochs],
            warmup_iters=args.warmup,
            min_ratio=1e-7)
    elif args.lr_scheduler == 'lr_step_warmup':
        lr_scheduler_clr = CosineAnnealingLR.WarmupStepLR(
            optimizer=optimizer,
            milestones=[args.warmup] +
            [args.epochs - 30, args.epochs - 10, args.epochs],
            warmup_iters=args.warmup)
    elif args.lr_scheduler == 'lr_step':
        lr_scheduler_clr = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, args.lr_steps, 0.1)
    if args.resume_rgb:
        for epoch in range(0, args.start_epoch):
            optimizer.step()
            lr_scheduler_clr.step()

    log_training = open(
        os.path.join(model_dir, args.root_log, '%s.csv' % args.store_name),
        'a')
    for epoch in range(args.start_epoch, args.epochs):
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1)
        train(train_loader,
              model,
              criterion,
              optimizer,
              epoch,
              log_training,
              writer=writer)
        lr_scheduler_clr.step()
        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader,
                             model,
                             criterion,
                             log_training,
                             writer=writer,
                             epoch=epoch)
            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                    'lr': optimizer.param_groups[-1]['lr'],
                }, is_best, model_dir)
            print('best_prec1: {}'.format(best_prec1))
        else:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                    'lr': optimizer.param_groups[-1]['lr'],
                }, False, model_dir)
def main():
    global args, best_prec1
    best_prec1 = 0
    args = parser.parse_args()
    network_config = Foo(
        encoder=args.encoder,
        decoder=args.decoder,
        fc_dim=args.fc_dim,
        output_size=args.label_resolution,
        num_views=args.n_views,
        num_class=args.num_class,
        transform_type=args.transform_type,
    )
    train_dataset = OVMDataset(args.data_root,
                               args.train_list,
                               transform=torchvision.transforms.Compose([
                                   Stack(roll=True),
                                   ToTorchFormatTensor(div=True),
                                   GroupNormalize(mean_rgb, std_rgb)
                               ]),
                               num_views=network_config.num_views,
                               input_size=args.input_resolution,
                               label_size=args.label_resolution,
                               use_mask=args.use_mask,
                               use_depth=args.use_depth)
    val_dataset = OVMDataset(args.data_root,
                             args.eval_list,
                             transform=torchvision.transforms.Compose([
                                 Stack(roll=True),
                                 ToTorchFormatTensor(div=True),
                                 GroupNormalize(mean_rgb, std_rgb)
                             ]),
                             num_views=network_config.num_views,
                             input_size=args.input_resolution,
                             label_size=args.label_resolution,
                             use_mask=args.use_mask,
                             use_depth=args.use_depth)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               num_workers=args.num_workers,
                                               shuffle=True,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             num_workers=args.num_workers,
                                             shuffle=False,
                                             pin_memory=True)

    mapper = VPNModel(network_config)
    mapper = nn.DataParallel(mapper.cuda())

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            mapper.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    criterion = nn.NLLLoss(weight=None, size_average=True)
    optimizer = optim.Adam(mapper.parameters(),
                           lr=args.start_lr,
                           betas=(0.95, 0.999))

    if not os.path.isdir(args.log_root):
        os.mkdir(args.log_root)
    log_train = open(os.path.join(args.log_root, '%s.csv' % args.store_name),
                     'w')

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)
        train(train_loader, mapper, criterion, optimizer, epoch, log_train)

        if (epoch + 1) % args.ckpt_freq == 0 or epoch == args.epochs - 1:
            prec1 = eval(val_loader, mapper, criterion, log_train, epoch)
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': network_config.encoder,
                    'state_dict': mapper.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Esempio n. 19
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    check_rootfolders()

    categories, args.train_list, args.val_list, args.root_path, prefix = datasets_video.return_dataset(
        args.dataset, args.modality)
    num_class = len(categories)

    args.store_name = '_'.join([
        'STSNN', args.dataset, args.modality, args.arch,
        'group%d' % args.num_segments,
        '%df1c' % args.num_motion
    ])
    print('storing name: ' + args.store_name)

    model = STSNN(num_class,
                  args.num_segments,
                  args.modality,
                  base_model=args.arch,
                  consensus_type=args.consensus_type,
                  dropout=args.dropout,
                  num_motion=args.num_motion,
                  img_feature_dim=args.img_feature_dim,
                  partial_bn=not args.no_partialbn,
                  dataset=args.dataset)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    train_augmentation = model.get_augmentation()

    policies = model.get_optim_policies()
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    print(model)
    cudnn.benchmark = True

    # Data loading code
    if ((args.modality != 'RGBDiff') | (args.modality != 'RGBFlow')):
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    elif args.modality == 'RGBFlow':
        data_length = args.num_motion

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        dataset=args.dataset,
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                  isRGBFlow=(args.modality == 'RGBFlow')),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=False)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        dataset=args.dataset,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3']),
                  isRGBFlow=(args.modality == 'RGBFlow')),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=False)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(args.root_log, '%s.csv' % args.store_name), 'w')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader), log_training)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
def main():
    global args, best_prec1, train_list, experiment_dir, best_loss
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'epic':
        num_class = (125, 352)
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = TBN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                midfusion=args.midfusion)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    data_length = model.new_length
    # policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    # Resume training from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            state_dict_new = OrderedDict()
            for k, v in checkpoint['state_dict'].items():
                state_dict_new[k.split('.', 1)[1]] = v
            model.load_state_dict(state_dict_new)
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    # Load pretrained weights for each stream
    if args.pretrained_flow_weights:
        print('Initialize Flow stream from Kinetics')
        pretrained = os.path.join('pretrained/kinetics_tsn_flow.pth.tar')
        state_dict = torch.load(pretrained)
        for k, v in state_dict.items():
            state_dict[k] = torch.squeeze(v, dim=0)
        base_model = getattr(model, 'flow')
        base_model.load_state_dict(state_dict, strict=False)

    # Freeze stream weights (leaves only fusion and classification trainable)
    if args.freeze:
        model.freeze_fn('modalities')

    # Freeze batch normalisation layers except the first
    if args.partialbn:
        model.freeze_fn('partialbn_parameters')

    model = torch.nn.DataParallel(model, device_ids=args.gpus).to(device)

    cudnn.benchmark = True

    # Data loading code
    normalize = {}
    for m in args.modality:
        if (m != 'Spec'):
            if (m != 'RGBDiff'):
                normalize[m] = GroupNormalize(input_mean[m], input_std[m])
            else:
                normalize[m] = IdentityTransform()

    image_tmpl = {}
    train_transform = {}
    val_transform = {}
    for m in args.modality:
        if (m != 'Spec'):
            # Prepare dictionaries containing image name templates for each modality
            if m in ['RGB', 'RGBDiff']:
                image_tmpl[m] = "img_{:010d}.jpg"
            elif m == 'Flow':
                image_tmpl[m] = args.flow_prefix + "{}_{:010d}.jpg"
            # Prepare train/val dictionaries containing the transformations
            # (augmentation+normalization)
            # for each modality
            train_transform[m] = torchvision.transforms.Compose([
                train_augmentation[m],
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=args.arch != 'BNInception'),
                normalize[m],
            ])

            val_transform[m] = torchvision.transforms.Compose([
                GroupScale(int(scale_size[m])),
                GroupCenterCrop(crop_size[m]),
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=args.arch != 'BNInception'),
                normalize[m],
            ])
        else:
            # Prepare train/val dictionaries containing the transformations
            # (augmentation+normalization)
            # for each modality
            train_transform[m] = torchvision.transforms.Compose([
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=False),
            ])

            val_transform[m] = torchvision.transforms.Compose([
                Stack(roll=args.arch == 'BNInception'),
                ToTorchFormatTensor(div=False),
            ])

    if args.train_list is None:
        # If train_list is not provided, we train on the default
        # dataset which is all the training set
        train_loader = torch.utils.data.DataLoader(TBNDataSet(
            args.dataset,
            training_labels(),
            data_length,
            args.modality,
            image_tmpl,
            visual_path=args.visual_path,
            audio_path=args.audio_path,
            num_segments=args.num_segments,
            transform=train_transform,
            fps=args.fps,
            resampling_rate=args.resampling_rate),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True)
    else:
        train_loader = torch.utils.data.DataLoader(TBNDataSet(
            args.dataset,
            args.train_list,
            data_length,
            args.modality,
            image_tmpl,
            visual_path=args.visual_path,
            audio_path=args.audio_path,
            num_segments=args.num_segments,
            transform=train_transform,
            fps=args.fps,
            resampling_rate=args.resampling_rate),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True)
    if args.train_list is not None:
        # we cannot validate on part of the training set
        # if we use all the training set for training
        val_loader = torch.utils.data.DataLoader(TBNDataSet(
            args.dataset,
            args.val_list,
            data_length,
            args.modality,
            image_tmpl,
            visual_path=args.visual_path,
            audio_path=args.audio_path,
            num_segments=args.num_segments,
            mode='val',
            transform=val_transform,
            fps=args.fps,
            resampling_rate=args.resampling_rate),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()

    if len(args.modality) > 1:
        param_groups = [
            {
                'params':
                filter(lambda p: p.requires_grad,
                       model.module.rgb.parameters())
            },
            {
                'params':
                filter(lambda p: p.requires_grad,
                       model.module.flow.parameters()),
                'lr':
                0.001
            },
            {
                'params':
                filter(lambda p: p.requires_grad,
                       model.module.spec.parameters())
            },
            {
                'params':
                filter(lambda p: p.requires_grad,
                       model.module.fusion_classification_net.parameters())
            },
        ]
    else:
        param_groups = filter(lambda p: p.requires_grad, model.parameters())

    optimizer = torch.optim.SGD(param_groups,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scheduler = MultiStepLR(optimizer, args.lr_steps, gamma=0.1)
    if args.evaluate:
        validate(val_loader, model, criterion, device)
        return
    if args.save_stats:
        if args.dataset != 'epic':
            stats_dict = {
                'train_loss': np.zeros((args.epochs, )),
                'val_loss': np.zeros((args.epochs, )),
                'train_acc': np.zeros((args.epochs, )),
                'val_acc': np.zeros((args.epochs, ))
            }
        elif args.dataset == 'epic':
            if args.train_list is not None:
                stats_dict = {
                    'train_loss': np.zeros((args.epochs, )),
                    'train_verb_loss': np.zeros((args.epochs, )),
                    'train_noun_loss': np.zeros((args.epochs, )),
                    'train_acc': np.zeros((args.epochs, )),
                    'train_verb_acc': np.zeros((args.epochs, )),
                    'train_noun_acc': np.zeros((args.epochs, )),
                    'val_loss': np.zeros((args.epochs, )),
                    'val_verb_loss': np.zeros((args.epochs, )),
                    'val_noun_loss': np.zeros((args.epochs, )),
                    'val_acc': np.zeros((args.epochs, )),
                    'val_verb_acc': np.zeros((args.epochs, )),
                    'val_noun_acc': np.zeros((args.epochs, ))
                }
            else:
                stats_dict = {
                    'train_loss': np.zeros((args.epochs, )),
                    'train_verb_loss': np.zeros((args.epochs, )),
                    'train_noun_loss': np.zeros((args.epochs, )),
                    'train_acc': np.zeros((args.epochs, )),
                    'train_verb_acc': np.zeros((args.epochs, )),
                    'train_noun_acc': np.zeros((args.epochs, ))
                }

    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step()
        # train for one epoch
        training_metrics = train(train_loader, model, criterion, optimizer,
                                 epoch, device)
        if args.save_stats:
            for k, v in training_metrics.items():
                stats_dict[k][epoch] = v
        # evaluate on validation set
        if args.train_list is not None:
            if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
                test_metrics = validate(val_loader, model, criterion, device)
                if args.save_stats:
                    for k, v in test_metrics.items():
                        stats_dict[k][epoch] = v
                prec1 = test_metrics['val_acc']
                # remember best prec@1 and save checkpoint
                is_best = prec1 > best_prec1
                best_prec1 = max(prec1, best_prec1)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                    }, is_best)
        else:  #  No validation set
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': training_metrics['train_acc'],
                }, False)

    summaryWriter.close()

    if args.save_stats:
        save_stats_dir = os.path.join('stats', experiment_dir)
        if not os.path.exists(save_stats_dir):
            os.makedirs(save_stats_dir)
        with open(os.path.join(save_stats_dir, 'training_stats.npz'),
                  'wb') as f:
            np.savez(f, **stats_dict)
Esempio n. 21
0
def main():
    # do some pre_process, such as ignore warning
    pre_process()

    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    elif args.dataset == 'streetdance245':
        num_class = 245
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)
    # print('Do not parallel: ', model)
    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    # calculate flops
    from ptflops import get_model_complexity_info
    macs, params = get_model_complexity_info(model, (1, 9, 224, 224),
                                             as_strings=True,
                                             print_per_layer_stat=True,
                                             verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))
    set_break()
    # # input of model is (batch_size, n_seg*c, h, w)
    # from torchsummary import summary
    # summary(model, input_size=(1, 9, 224, 224))
    # set_break()
    # print('Parallel module features: ', model.module._modules.keys())
    # print('Parallel layer4 :2: ', model.module.base_model.layer4[:2])
    # print('Parallel layer4 2 conv1: ', model.module.base_model.layer4[2].conv1)
    # print('lists: ', len(list(model.module.base_model.children())))
    # Input size here is (batch_size, n_seg*c, h, w)

    # with open('new_model.txt', 'w') as f:
    #     f.write(str(model))
    #     f.write(str(model.module.base_model._modules.keys()))
    # for m in model.module.modules():
    #     if isinstance(m, torch.nn.Conv3d):
    #         print(m)
    # set_break()
    # Additional long-range Module
    # input size here is (batch_size, N_seg, fc_input, 7, 7), fc_input here is 2048
    # from torchsummary import summary
    # summary(model3d, input_size=(4, 3, 2048, 7, 7))
    # set_break()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)
        # set_break()
        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)

    print('best_prec1:', best_prec1)
Esempio n. 22
0
def main():
    global args, best_prec1, best_prec5
    args = parser.parse_args()
    args.store_name = '_'.join(['part_iSLR',args.train_mode,\
                                'class'+str(args.num_class)])
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus

    create_path(args.root_model)
    # get model 
    model = islr_model(args.num_class,train_mode=args.train_mode)

    model = torch.nn.DataParallel(model).cuda()

    # restore model
    if args.val_resume:
        if osp.isfile(args.val_resume):
            checkpoint = torch.load(args.val_resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            best_prec5 = checkpoint['best_prec5']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})\n \
                    best_prec1: {:.3f}\n \
                    best_prec5: {:.3f}"
                  .format(args.evaluate, checkpoint['epoch'],\
                      best_prec1,best_prec5)))

        else:
            print(("=> no checkpoint found at '{}'".format(args.val_resume)))
    
    cudnn.benchmark = True

    # Data loading code
    scale_size = 256
    crop_size = 224
    input_mean = [0.485, 0.456, 0.406]
    input_std = [0.229, 0.224, 0.225]          

    normalize = GroupNormalize(input_mean,input_std)

    val_loader = torch.utils.data.DataLoader(
        iSLR_Dataset(args.val_file,
            args=args,
            transform=torchvision.transforms.Compose([
                GroupScale((crop_size,crop_size)),
                # GroupScale(int(scale_size)),
                # GroupCenterCrop(crop_size),
                Stack(roll=False),
                ToTorchFormatTensor(div=True),
                normalize,
            ])
        ),
        batch_size=args.batch_size,shuffle=False,
        num_workers=args.workers,pin_memory=True,
        # collate_fn=collate
    )

    # define loss function (criterion)
    criterion = torch.nn.CrossEntropyLoss().cuda()

    prec1,prec5 = validate(val_loader, model, criterion, 0 // args.eval_freq)
Esempio n. 23
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_classes = 101
    elif args.dataset == 'hmdb51':
        num_classes = 51
    elif args.dataset == 'kinetics':
        num_classes = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = getattr(i3d, args.arch)(modality=args.modality,
                                    num_classes=num_classes,
                                    dropout_ratio=args.dropout)

    crop_size = args.input_size
    scale_size = args.input_size * 256 // 224
    input_mean = [0.485, 0.456, 0.406]
    input_std = [0.229, 0.224, 0.225]
    if args.modality == 'Flow':
        input_mean = [0.5]
        input_std = [np.mean(input_std)]

    train_augmentation = get_augmentation(args.modality, args.input_size)

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    train_loader = torch.utils.data.DataLoader(I3DDataSet(
        args.root_path,
        args.train_list,
        clip_length=args.clip_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg"
        if args.modality == "RGB" else args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            ToNumpyNDArray(),
            ToTorchFormatTensor(),
            GroupNormalize(input_mean, input_std),
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(I3DDataSet(
        args.root_path,
        args.val_list,
        clip_length=args.clip_length,
        modality=args.modality,
        image_tmpl="img_{:05d}.jpg"
        if args.modality == "RGB" else args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            ToNumpyNDArray(),
            ToTorchFormatTensor(),
            GroupNormalize(input_mean, input_std),
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    optimizer = torch.optim.SGD(params=model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Esempio n. 24
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(TSNDataSet(
        "UCF-Frames",
        args.train_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        transform=torchvision.transforms.Compose([
            train_augmentation,
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        "UCF-Frames",
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl="{:06d}.jpg" if args.modality in ["RGB", "RGBDiff"] else
        args.flow_prefix + "{}_{:05d}.jpg",
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=args.arch == 'BNInception'),
            ToTorchFormatTensor(div=args.arch != 'BNInception'),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion,
                             (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Esempio n. 25
0
def main():
    global args
    args = parser.parse_args()

    print("------------------------------------")
    print("Environment Versions:")
    print("- Python: {}".format(sys.version))
    print("- PyTorch: {}".format(torch.__version__))
    print("- TorchVison: {}".format(torchvision.__version__))

    args_dict = args.__dict__
    print("------------------------------------")
    print(args.arch+" Configurations:")
    for key in args_dict.keys():
        print("- {}: {}".format(key, args_dict[key]))
    print("------------------------------------")

    if args.dataset == 'ucf101':
        num_class = 101
        rgb_read_format = "{:06d}.jpg" # Format for THUMOS14 videos
        # rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'hmdb51':
        num_class = 51
        rgb_read_format = "{:05d}.jpg"
    elif args.dataset == 'kinetics':
        num_class = 400
        rgb_read_format = "{:04d}.jpg"
    elif args.dataset == 'something':
        num_class = 174
        rgb_read_format = "{:04d}.jpg"
    else:
        raise ValueError('Unknown dataset '+args.dataset)

    model = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std

    if _CUDA:
        model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # CUDA
    print_model(model)
    if not _CUDA:
        model = torch.nn.DataParallel(model) # CPU

    print("pretrained_parts: ", args.pretrained_parts)

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            if _CUDA:
                checkpoint = torch.load(args.resume) # CUDA
            else:
                checkpoint = torch.load(args.resume, map_location='cpu') # CPU
            # if not checkpoint['lr']:
            if "lr" not in checkpoint.keys():
                args.lr = input("No 'lr' attribute found in resume model, please input the 'lr' manually: ")
                args.lr = float(args.lr)
            else:
                args.lr = checkpoint['lr']
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch: {}, lr: {})"
                  .format(args.resume, checkpoint['epoch'], args.lr)))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))
    else:
        print("Please specify the checkpoint to pretrained model")
        return

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        #input_mean = [0,0,0] #for debugging
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    end = time.time()
    # data_loader = torch.utils.data.DataLoader(
    dataset = TSNDataSet("", args.val_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl=args.rgb_prefix+rgb_read_format if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+rgb_read_format,
                   random_shift=False,
                   transform=torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=True),
                       ToTorchFormatTensor(div=False),
                       #Stack(roll=(args.arch == 'C3DRes18') or (args.arch == 'ECO') or (args.arch == 'ECOfull') or (args.arch == 'ECO_2FC')),
                       #ToTorchFormatTensor(div=(args.arch != 'C3DRes18') and (args.arch != 'ECO') and (args.arch != 'ECOfull') and (args.arch != 'ECO_2FC')),
                       normalize,
                   ]),
                   test_mode=True,
                   window_size=_WINDOW_SIZE, window_stride=_WINDOW_STRIDE);
    data_loader = torch.utils.data.DataLoader(dataset,
                      batch_size=args.batch_size, shuffle=False,
                      num_workers=args.workers, pin_memory=True,
                      collate_fn=collate_fn)

    # criterion = torch.nn.CrossEntropyLoss().cuda()
    # predict(data_loader, model, criterion, 0)
    predict(dataset, model, criterion=None, iter=0)
    # profile_model(model)
    elapsed_time = time.time() - end    
    print("STATS_TOT_WINDOWS={0}, Total prediction time={1}".format(STATS_TOT_WINDOWS, elapsed_time))
    return
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import torch.nn as nn
import torch
import logging
import numpy as np
from contrib import adf
from opts import parser

FLAGS = parser.parse_args()


def keep_variance(x, min_variance):
    return x + min_variance


def finitialize_msra(modules, small=False):
    logging.info("Initializing MSRA")
    for layer in modules:
        if isinstance(layer, adf.Conv2d) or isinstance(layer, adf.Linear):  # convolution: bias=0, weight=msra
            nn.init.kaiming_normal_(layer.weight)
            if small:
                layer.weight.data.mul_(0.001)
            if layer.bias is not None:
                nn.init.constant_(layer.bias, 0)


def finitialize_xavier(modules, small=False):
    logging.info("Initializing Xavier")
Esempio n. 27
0
def main():
    global args, logger, writer, dataset_configs
    global best_top1_epoch, best_top5_epoch, best_top1, best_top5, best_top1_top5, best_top5_top1
    dataset_configs = get_and_save_args(parser)
    parser.set_defaults(**dataset_configs)
    args = parser.parse_args()

    # ================== GPU setting ===============
    os.environ['CUDA_DEVICE_ORDER'] = "PCI_BUS_ID"
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    """copy codes and creat dir for saving models and logs"""
    if not os.path.isdir(args.snapshot_pref):
        os.makedirs(args.snapshot_pref)

    logger = Prepare_logger(args)
    logger.info('\ncreating folder: ' + args.snapshot_pref)

    if not args.evaluate:
        writer = SummaryWriter(args.snapshot_pref)
        recorder = Recorder(args.snapshot_pref)
        recorder.writeopt(args)

    logger.info('\nruntime args\n\n{}\n'.format(json.dumps(vars(args), indent=4)))

    """prepare dataset and model"""
    # word2idx = json.load(open('./data/dataset/TACoS/TACoS_word2id_glove_lower.json', 'r'))
    # train_dataset = TACoS(args, split='train')
    # test_dataset = TACoS(args, split='test')
    word2idx = json.load(open('./data/dataset/Charades/Charades_word2id.json', 'r'))
    train_dataset = CharadesSTA(args, split='train')
    test_dataset = CharadesSTA(args, split='test')
    train_dataloader = DataLoader(
        train_dataset, batch_size=args.batch_size,
        shuffle=True, collate_fn=collate_data, num_workers=8, pin_memory=True
    )
    test_dataloader = DataLoader(
        test_dataset, batch_size=args.test_batch_size,
        shuffle=False, collate_fn=collate_data, num_workers=8, pin_memory=True
    )
    vocab_size = len(word2idx)

    lr = args.lr
    n_epoch = args.n_epoch

    main_model = mainModel(vocab_size, args, hidden_dim=512, embed_dim=300,
                           bidirection=True, graph_node_features=1024)

    if os.path.exists(args.glove_weights):
        logger.info("Loading glove weights")
        main_model.query_encoder.embedding.weight.data.copy_(torch.load(args.glove_weights))
    else:
        logger.info("Generating glove weights")
        main_model.query_encoder.embedding.weight.data.copy_(glove_init(word2idx))

    main_model = nn.DataParallel(main_model).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            logger.info(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            pretrained_dict = checkpoint['state_dict']
            # only resume part of model paramete
            model_dict = main_model.state_dict()
            pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
            model_dict.update(pretrained_dict)
            main_model.load_state_dict(model_dict)
            # main_model.load_state_dict(checkpoint['state_dict'])
            logger.info(("=> loaded checkpoint '{}' (epoch {})"
                      .format(args.evaluate, checkpoint['epoch'])))
        else:
            logger.info(("=> no checkpoint found at '{}'".format(args.resume)))

    if args.evaluate:
        topks, accuracy_topks = evaluate(main_model, test_dataloader, word2idx, False)
        for ind, topk in enumerate(topks):
            print("R@{}: {:.1f}\n".format(topk, accuracy_topks[ind] * 100))
        return

    learned_params = None
    if args.is_first_stage:
        for name, value in main_model.named_parameters():
            if 'iou_scores' in name or 'mix_fc' in name:
                value.requires_grad = False
        learned_params = filter(lambda p: p.requires_grad, main_model.parameters())
        n_epoch = 10
    elif args.is_second_stage:
        head_params = main_model.module.fcos.head.iou_scores.parameters()
        fc_params = main_model.module.fcos.head.mix_fc.parameters()
        learned_params = list(head_params) + list(fc_params)
        lr /= 100
    elif args.is_third_stage:
        learned_params = main_model.parameters()
        lr /= 10000

    optimizer = torch.optim.Adam(learned_params, lr)

    for epoch in range(args.start_epoch, n_epoch):

        train_loss = train_epoch(main_model, train_dataloader, optimizer, epoch)

        if (epoch + 1) % args.eval_freq == 0 or epoch == args.n_epoch - 1:

            val_loss, topks, accuracy_topks = validate_epoch(
                main_model, test_dataloader, epoch, word2idx, False
            )

            for ind, topk in enumerate(topks):
                writer.add_scalar('test_result/Recall@top{}'.format(topk), accuracy_topks[ind]*100, epoch)

            is_best_top1 = (accuracy_topks[0]*100) > best_top1
            best_top1 = max((accuracy_topks[0]*100), best_top1)
            if is_best_top1:
                best_top1_epoch = epoch
                best_top1_top5 = accuracy_topks[1]*100
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': main_model.state_dict(),
                'loss': val_loss,
                'top1': accuracy_topks[0]*100,
                'top5': accuracy_topks[1]*100,
            }, is_best_top1, epoch=epoch, top1=accuracy_topks[0]*100, top5=accuracy_topks[1]*100)

            is_best_top5 = (accuracy_topks[1]*100) > best_top5
            best_top5= max((accuracy_topks[1]*100), best_top5)
            if is_best_top5:
                best_top5_epoch = epoch
                best_top5_top1= accuracy_topks[0] * 100
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': main_model.state_dict(),
                'loss': val_loss,
                'top1': accuracy_topks[0]*100,
                'top5': accuracy_topks[1]*100,
            }, is_best_top5, epoch=epoch, top1=accuracy_topks[0]*100, top5=accuracy_topks[1]*100)

            writer.add_scalar('test_result/Best_Recall@top1', best_top1, epoch)
            writer.add_scalar('test_result/Best_Recall@top5', best_top5, epoch)

            logger.info(
                "R@1: {:.2f}, R@5: {:.2f}, epoch: {}\n".format(
                    accuracy_topks[0] * 100, accuracy_topks[1] * 100, epoch)
            )
            logger.info(
                "Current best top1: R@1: {:.2f}, R@5: {:.2f}, epoch: {} \n".format(
                    best_top1, best_top1_top5, best_top1_epoch)
            )
            logger.info(
                "Current best top5: R@1: {:.2f}, R@5: {:.2f}, epoch: {} \n".format(
                    best_top5_top1, best_top5, best_top5_epoch)
            )
Esempio n. 28
0
def main():
    global args, best_prec1
    global crop_size
    args = parser.parse_args()

    num_class, train_list, val_list, args.root_path, prefix = dataset_config.return_dataset(
        args.dataset, args.modality)
    num_class = 1
    if args.train_list == "":
        args.train_list = train_list
    if args.val_list == "":
        args.val_list = val_list

    full_arch_name = args.arch
    if args.shift:
        full_arch_name += '_shift{}_{}'.format(args.shift_div,
                                               args.shift_place)
    if args.concat != "":
        full_arch_name += '_concat{}'.format(args.concat)
    if args.temporal_pool:
        full_arch_name += '_tpool'
    args.store_name += '_'.join([
        'TSM', args.dataset, args.modality, full_arch_name,
        args.consensus_type,
        'lr%.5f' % args.lr,
        'dropout%.2f' % args.dropout,
        'wd%.5f' % args.weight_decay,
        'batch%d' % args.batch_size,
        'segment%d' % args.num_segments, 'e{}'.format(args.epochs)
    ])
    if args.data_fuse:
        args.store_name += '_fuse'
    if args.extra_temporal_modeling:
        args.store_name += '_extra'
    if args.tune_from is not None:
        args.store_name += '_finetune'
    if args.pretrain != 'imagenet':
        args.store_name += '_{}'.format(args.pretrain)
    if args.lr_type != 'step':
        args.store_name += '_{}'.format(args.lr_type)
    if args.dense_sample:
        args.store_name += '_dense'
    if args.non_local > 0:
        args.store_name += '_nl'
    if args.clipnums:
        args.store_name += "_clip{}".format(args.clipnums)
    if args.suffix is not None:
        args.store_name += '_{}'.format(args.suffix)
    print('storing name: ' + args.store_name)

    check_rootfolders()

    if args.prune in ['input', 'inout'] and args.tune_from:
        sd = torch.load(args.tune_from)
        sd = sd['state_dict']
        sd = input_dim_L2distance(sd, args.shift_div)

    model = TSN(
        num_class,
        args.num_segments,
        args.modality,
        base_model=args.arch,
        new_length=2 if args.data_fuse else None,
        consensus_type=args.consensus_type,
        dropout=args.dropout,
        img_feature_dim=args.img_feature_dim,
        partial_bn=not args.no_partialbn,
        pretrain=args.pretrain,
        is_shift=args.shift,
        shift_div=args.shift_div,
        shift_place=args.shift_place,
        fc_lr5=not (args.tune_from and args.dataset in args.tune_from),
        temporal_pool=args.temporal_pool,
        non_local=args.non_local,
        concat=args.concat,
        extra_temporal_modeling=args.extra_temporal_modeling,
        prune_list=[prune_conv1in_list, prune_conv1out_list],
        is_prune=args.prune,
    )

    print(model)
    #summary(model, torch.zeros((16, 24, 224, 224)))
    #exit(1)
    if args.dataset == 'ucf101':  #twice sample & full resolution
        twice_sample = True
        crop_size = model.scale_size  #256 x 256
    else:
        twice_sample = False
        crop_size = model.crop_size  #224 x 224
    crop_size = 256
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies(args.concat)
    #print(type(policies))
    #print(policies)
    #exit()
    train_augmentation = model.get_augmentation(
        flip=False if 'something' in args.dataset or 'jester' in args.dataset
        or 'nvgesture' in args.dataset else True)

    model = torch.nn.DataParallel(model).cuda()

    if args.resume:
        if args.temporal_pool:  # early temporal pool so that we can load the state_dict
            make_temporal_pool(model.module.base_model, args.num_segments)
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    if args.tune_from:
        print(("=> fine-tuning from '{}'".format(args.tune_from)))
        tune_from_list = args.tune_from.split(',')
        sd = torch.load(tune_from_list[0])
        sd = sd['state_dict']

        model_dict = model.state_dict()
        replace_dict = []
        for k, v in sd.items():
            if k not in model_dict and k.replace('.net', '') in model_dict:
                print('=> Load after remove .net: ', k)
                replace_dict.append((k, k.replace('.net', '')))
        for k, v in model_dict.items():
            if k not in sd and k.replace('.net', '') in sd:
                print('=> Load after adding .net: ', k)
                replace_dict.append((k.replace('.net', ''), k))
        for k, v in model_dict.items():
            if k not in sd and k.replace('.prune', '') in sd:
                print('=> Load after adding .prune: ', k)
                replace_dict.append((k.replace('.prune', ''), k))

        if args.prune in ['input', 'inout']:
            sd = adjust_para_shape_prunein(sd, model_dict)
        if args.prune in ['output', 'inout']:
            sd = adjust_para_shape_pruneout(sd, model_dict)

        if args.concat != "" and "concat" not in tune_from_list[0]:
            sd = adjust_para_shape_concat(sd, model_dict)

        for k, k_new in replace_dict:
            sd[k_new] = sd.pop(k)
        keys1 = set(list(sd.keys()))
        keys2 = set(list(model_dict.keys()))
        set_diff = (keys1 - keys2) | (keys2 - keys1)
        print('#### Notice: keys that failed to load: {}'.format(set_diff))
        if args.dataset not in tune_from_list[0]:  # new dataset
            print('=> New dataset, do not load fc weights')
            sd = {k: v for k, v in sd.items() if 'fc' not in k}
        if args.modality != 'Flow' and 'Flow' in tune_from_list[0]:
            sd = {k: v for k, v in sd.items() if 'conv1.weight' not in k}
        #print(sd.keys())
        #print("*"*50)
        #print(model_dict.keys())
        for k, v in list(sd.items()):
            if k not in model_dict:
                sd.pop(k)
        sd.pop("module.base_model.embedding.weight")

        model_dict.update(sd)
        model.load_state_dict(model_dict)

    if args.temporal_pool and not args.resume:
        make_temporal_pool(model.module.base_model, args.num_segments)

    decoder = TransformerModel().cuda()
    if args.decoder_resume:
        decoder_chkpoint = torch.load(args.decoder_resume)

        decoder.load_state_dict(decoder_chkpoint["state_dict"])
    print("decoder parameters = ", decoder.parameters())
    policies.append({
        "params": decoder.parameters(),
        "lr_mult": 5,
        "decay_mult": 1,
        "name": "Attndecoder_weight"
    })
    cudnn.benchmark = True
    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality in ['RGB']:
        data_length = 1
    elif args.modality in ['Depth']:
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
    '''
	dataRoot = r"/home/share/YouCook/downloadVideo"
	for dirPath, dirnames, filenames in os.walk(dataRoot):
		for filename in filenames:
			print(os.path.join(dirPath,filename) +"is {}".format("exist" if os.path.isfile(os.path.join(dirPath,filename))else "NON"))
			train_data = torchvision.io.read_video(os.path.join(dirPath,filename),start_pts=0,end_pts=1001, )
			tmp = torchvision.io.read_video_timestamps(os.path.join(dirPath,filename),)
			print(tmp)
			print(len(tmp[0]))
			print(train_data[0].size())
			exit()
	exit()
	'''
    '''
	train_loader = torch.utils.data.DataLoader(
		TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments,
				   new_length=data_length,
				   modality=args.modality,
				   image_tmpl=prefix,
				   transform=torchvision.transforms.Compose([
					   train_augmentation,
					   Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
					   ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])),
					   normalize,
				   ]), dense_sample=args.dense_sample, data_fuse = args.data_fuse),
		batch_size=args.batch_size, shuffle=True,
		num_workers=args.workers, pin_memory=True,
		drop_last=True)  # prevent something not % n_GPU
	

	
	val_loader = torch.utils.data.DataLoader(
		TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments,
				   new_length=data_length,
				   modality=args.modality,
				   image_tmpl=prefix,
				   random_shift=False,
				   transform=torchvision.transforms.Compose([
					   GroupScale(int(scale_size)),
					   GroupCenterCrop(crop_size),
					   Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
					   ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])),
					   normalize,
				   ]), dense_sample=args.dense_sample, twice_sample=twice_sample, data_fuse = args.data_fuse),
		batch_size=args.batch_size, shuffle=False,
		num_workers=args.workers, pin_memory=True)
	'''
    #global trainDataloader, valDataloader, train_loader, val_loader
    trainDataloader = YouCookDataSetRcg(args.root_path, args.train_list,train=True,inputsize=crop_size,hasPreprocess = False,\
      clipnums=args.clipnums,
      hasWordIndex = True,)
    valDataloader = YouCookDataSetRcg(args.root_path, args.val_list,val=True,inputsize=crop_size,hasPreprocess = False,\
			#clipnums=args.clipnums,

      hasWordIndex = True,)

    #print(trainDataloader._getMode())
    #print(valDataloader._getMode())
    #exit()
    train_loader = torch.utils.data.DataLoader(trainDataloader,
                                               #shuffle=True,
                                               )
    val_loader = torch.utils.data.DataLoader(valDataloader)
    index2wordDict = trainDataloader.getIndex2wordDict()
    #print(train_loader is val_loader)
    #print(trainDataloader._getMode())
    #print(valDataloader._getMode())

    #print(trainDataloader._getMode())
    #print(valDataloader._getMode())
    #print(len(train_loader))

    #exit()
    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.NLLLoss().cuda()
    elif args.loss_type == "MSELoss":
        criterion = torch.nn.MSELoss().cuda()
    elif args.loss_type == "BCELoss":
        #print("BCELoss")
        criterion = torch.nn.BCELoss().cuda()
    elif args.loss_type == "CrossEntropyLoss":
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(args.root_log, args.store_name, 'log.csv'), 'w')
    with open(os.path.join(args.root_log, args.store_name, 'args.txt'),
              'w') as f:
        f.write(str(args))
    #print(os.path.join(args.root_log, args.store_name, 'args.txt'))
    #exit()
    tf_writer = SummaryWriter(
        log_dir=os.path.join(args.root_log, args.store_name))

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_type, args.lr_steps)
        #print("265")
        # train for one epoch
        ######
        #print(trainDataloader._getMode())
        #print(valDataloader._getMode())
        train(train_loader, model, decoder, criterion, optimizer, epoch,
              log_training, tf_writer, index2wordDict)
        ######
        #print("268")
        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader,
                             model,
                             decoder,
                             criterion,
                             epoch,
                             log_training,
                             tf_writer,
                             index2wordDict=index2wordDict)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            tf_writer.add_scalar('acc/test_top1_best', best_prec1, epoch)

            output_best = 'Best Prec@1: %.3f\n' % (best_prec1)
            #print(output_best)
            log_training.write(output_best + '\n')
            log_training.flush()

            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': decoder.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'best_prec1': best_prec1,
                },
                is_best,
                filename="decoder")
        else:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'best_prec1': best_prec1,
                }, False)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': decoder.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'best_prec1': best_prec1,
                },
                is_best,
                filename="decoder")
        #break
        print("test pass")
Esempio n. 29
0
def main():

    global args

    args = parser.parse_args()
  
    train_videofolder, val_videofolder, args.root_path, _ = return_dataset(args.dataset)

    num_class = 174
    rgb_prefix = ''
    rgb_read_format = "{:05d}.jpg"

    model = VideoModel(num_class=num_class, modality=args.modality,
                        num_segments=args.num_segments, base_model=args.arch, consensus_type=args.consensus_type,
                        dropout=args.dropout, partial_bn=not args.no_partialbn, gsm=args.gsm, target_transform=None)

    model.consensus = Identity()
    print("parameters", sum(p.numel() for p in model.parameters()))

    print(model)
    sys.exit(1)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std

    train_augmentation = model.get_augmentation()
    policies = model.get_optim_policies()
    model = model.cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    normalize = GroupNormalize(input_mean, input_std)



    dataset = VideoDataset(args.root_path, train_videofolder, num_segments=8,
                   new_length=1,
                   modality="RGB",
                   image_tmpl=rgb_prefix+rgb_read_format,
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                       ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])),
                       normalize
                   ]))

    def normalize_output(img):
        img = img - img.min()
        img = img / img.max()
        return img
    data = dataset[0][0].unsqueeze_(0).cuda()
    output = model(data)

    #print(model)
    #.exit(1)

    # Plot some images
    idx = torch.randint(0, output.size(0), ())
    #pred = normalize_output(output[idx, 0])
    img = data[idx, 0]

    #fig, axarr = plt.subplots(1, 2)
    plt.imshow(img.cpu().detach().numpy())
    #axarr[1].imshow(pred.cpu().detach().numpy())

    # Visualize feature maps
    activation = {}
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()
        return hook

  

    model.base_model.conv1_7x7_s2.register_forward_hook(get_activation('conv1'))
    data, _ = dataset[0]
    data.unsqueeze_(0)
    output = model(data.cuda())

    kernels = model.base_model.conv1_7x7_s2.weight.cpu().detach()

    fig, axarr = plt.subplots(kernels.size(0)-40, figsize=(15,15))
    for idx in range(kernels.size(0)-40):
        axarr[idx].imshow(np.transpose(kernels[idx].squeeze(), (1,2,0)))
        

    act = activation['conv1'].squeeze()
    fig, axarr = plt.subplots(act.size(0), figsize=(15,15))
    for idx in range(act.size(0)):
        axarr[idx].imshow(np.transpose(act[idx][:3].cpu(), (1,2,0)))

    plt.tight_layout()
    plt.show()
Esempio n. 30
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    num_class, args.train_list, args.val_list, args.root_path, prefix = dataset_config.return_dataset(
        args.dataset, args.modality)
    full_arch_name = args.arch
    if args.shift:
        full_arch_name += '_shift{}_{}'.format(args.shift_div,
                                               args.shift_place)
    if args.temporal_pool:
        full_arch_name += '_tpool'
    args.store_name = '_'.join([
        'TSM', args.dataset, args.modality, full_arch_name,
        args.consensus_type,
        'segment%d' % args.num_segments, 'e{}'.format(args.epochs)
    ])
    if args.pretrain != 'imagenet':
        args.store_name += '_{}'.format(args.pretrain)
    if args.lr_type != 'step':
        args.store_name += '_{}'.format(args.lr_type)
    if args.dense_sample:
        args.store_name += '_dense'
    if args.non_local > 0:
        args.store_name += '_nl'
    if args.suffix is not None:
        args.store_name += '_{}'.format(args.suffix)
    print('storing name: ' + args.store_name)

    check_rootfolders()

    model = TSN(num_class,
                args.num_segments,
                args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type,
                dropout=args.dropout,
                img_feature_dim=args.img_feature_dim,
                partial_bn=not args.no_partialbn,
                pretrain=args.pretrain,
                is_shift=args.shift,
                shift_div=args.shift_div,
                shift_place=args.shift_place,
                fc_lr5=not (args.tune_from and args.dataset in args.tune_from),
                temporal_pool=args.temporal_pool,
                non_local=args.non_local)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation(
        flip=False
        if 'something' in args.dataset or 'jester' in args.dataset else True)

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.resume:
        if args.temporal_pool:  # early temporal pool so that we can load the state_dict
            make_temporal_pool(model.module.base_model, args.num_segments)
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print(("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    if args.tune_from:
        print(("=> fine-tuning from '{}'".format(args.tune_from)))
        sd = torch.load(args.tune_from)
        sd = sd['state_dict']
        model_dict = model.state_dict()
        replace_dict = []
        for k, v in sd.items():
            if k not in model_dict and k.replace('.net', '') in model_dict:
                print('=> Load after remove .net: ', k)
                replace_dict.append((k, k.replace('.net', '')))
        for k, v in model_dict.items():
            if k not in sd and k.replace('.net', '') in sd:
                print('=> Load after adding .net: ', k)
                replace_dict.append((k.replace('.net', ''), k))

        for k, k_new in replace_dict:
            sd[k_new] = sd.pop(k)
        keys1 = set(list(sd.keys()))
        keys2 = set(list(model_dict.keys()))
        set_diff = (keys1 - keys2) | (keys2 - keys1)
        print('#### Notice: keys that failed to load: {}'.format(set_diff))
        if args.dataset not in args.tune_from:  # new dataset
            print('=> New dataset, do not load fc weights')
            sd = {k: v for k, v in sd.items() if 'fc' not in k}
        if args.modality == 'Flow' and 'Flow' not in args.tune_from:
            sd = {k: v for k, v in sd.items() if 'conv1.weight' not in k}
        model_dict.update(sd)
        model.load_state_dict(model_dict)

    if args.temporal_pool and not args.resume:
        make_temporal_pool(model.module.base_model, args.num_segments)

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet(
            args.root_path,
            args.train_list,
            num_segments=args.num_segments,
            new_length=data_length,
            modality=args.modality,
            image_tmpl=prefix,
            transform=torchvision.transforms.Compose([
                train_augmentation,
                Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
                ToTorchFormatTensor(
                    div=(args.arch not in ['BNInception', 'InceptionV3'])),
                normalize,
            ]),
            dense_sample=args.dense_sample),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True,
        drop_last=True)  # prevent something not % n_GPU

    val_loader = torch.utils.data.DataLoader(TSNDataSet(
        args.root_path,
        args.val_list,
        num_segments=args.num_segments,
        new_length=data_length,
        modality=args.modality,
        image_tmpl=prefix,
        random_shift=False,
        transform=torchvision.transforms.Compose([
            GroupScale(int(scale_size)),
            GroupCenterCrop(crop_size),
            Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
            ToTorchFormatTensor(
                div=(args.arch not in ['BNInception', 'InceptionV3'])),
            normalize,
        ]),
        dense_sample=args.dense_sample),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'],
            group['decay_mult'])))

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    log_training = open(
        os.path.join(args.root_log, args.store_name, 'log.csv'), 'w')
    with open(os.path.join(args.root_log, args.store_name, 'args.txt'),
              'w') as f:
        f.write(str(args))
    tf_writer = SummaryWriter(
        log_dir=os.path.join(args.root_log, args.store_name))
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_type, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, log_training,
              tf_writer)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, epoch, log_training,
                             tf_writer)

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            tf_writer.add_scalar('acc/test_top1_best', best_prec1, epoch)

            output_best = 'Best Prec@1: %.3f\n' % (best_prec1)
            print(output_best)
            log_training.write(output_best + '\n')
            log_training.flush()

            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best)
Esempio n. 31
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset '+args.dataset)

    model = TSN(num_class, args.num_segments, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5

    train_loader = torch.utils.data.DataLoader(
        TSNDataSet("", args.train_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   transform=torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        TSNDataSet("", args.val_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   random_shift=False,
                   transform=torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.evaluate:
        validate(val_loader, model, criterion, 0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args.lr_steps)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader))

            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)