def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')
        self.rgb_model = None
        self.flow_model = None
        if opt.rgb_model != '':
            print('create rgb model')
            self.rgb_model = create_model(opt.arch,
                                          opt.branch_info,
                                          opt.head_conv,
                                          opt.K,
                                          flip_test=opt.flip_test)
            self.rgb_model = load_model(self.rgb_model, opt.rgb_model)
            self.rgb_model = DataParallel(self.rgb_model,
                                          device_ids=opt.gpus,
                                          chunk_sizes=opt.chunk_sizes).to(
                                              opt.device)
            self.rgb_model.eval()
        if opt.flow_model != '':
            print('create flow model')
            self.flow_model = create_model(opt.arch,
                                           opt.branch_info,
                                           opt.head_conv,
                                           opt.K,
                                           flip_test=opt.flip_test)
            self.flow_model = convert2flow(opt.ninput, self.flow_model)
            self.flow_model = load_model(self.flow_model, opt.flow_model)

            self.flow_model = DataParallel(self.flow_model,
                                           device_ids=opt.gpus,
                                           chunk_sizes=opt.chunk_sizes).to(
                                               opt.device)
            self.flow_model.eval()
        self.num_classes = opt.num_classes
        self.opt = opt
Esempio n. 2
0
    def __init__(self, opt):
        if opt.gpus[0] >= 0:
            opt.device = torch.device('cuda')
        else:
            opt.device = torch.device('cpu')
        self.rgb_model = None
        self.flow_model = None
        self.pa_model = None
        if opt.rgb_model != '':
            print('create rgb model')
            self.rgb_model = create_model(opt.arch,
                                          opt.branch_info,
                                          opt.head_conv,
                                          opt.K,
                                          flip_test=opt.flip_test,
                                          is_pa=False)
            self.rgb_model = load_model(self.rgb_model,
                                        opt.save_root + opt.rgb_model)
            '''
            # ADDED: debug param weights
            for i, child in enumerate(self.rgb_model.children()):
                if i < 2 : 
                    continue
                
                for l, param in enumerate(child.parameters()):
                    if l == 0:
                        vistensor(param)
                            
                    #param.requires_grad = False
                    #print(param.size())
            '''
            # ORIG
            self.rgb_model = DataParallel(self.rgb_model,
                                          device_ids=opt.gpus,
                                          chunk_sizes=opt.chunk_sizes).to(
                                              opt.device)

            self.rgb_model.eval()
        if opt.flow_model != '':
            print('create flow model')
            self.flow_model = create_model(opt.arch,
                                           opt.branch_info,
                                           opt.head_conv,
                                           opt.K,
                                           flip_test=opt.flip_test)
            self.flow_model = convert2flow(opt.ninput, self.flow_model)
            self.flow_model = load_model(self.flow_model, opt.flow_model)

            self.flow_model = DataParallel(self.flow_model,
                                           device_ids=opt.gpus,
                                           chunk_sizes=opt.chunk_sizes).to(
                                               opt.device)
            self.flow_model.eval()

        if opt.pa_model != '':
            print('create PA model')
            self.pa_model = create_model(opt.arch,
                                         opt.branch_info,
                                         opt.head_conv,
                                         opt.K,
                                         flip_test=opt.flip_test,
                                         is_pa=True,
                                         pa_fuse_mode=opt.pa_fuse_mode,
                                         rgb_w3=opt.rgb_w3)

            if opt.pa_fuse_mode == 'PAN':
                self.pa_model = convert2PAN(opt.ninput,
                                            self.pa_model,
                                            conv_idx=1)

            elif opt.pa_fuse_mode == 'TDN':
                pass
                #self.pa_model = convert2TDN(opt.ninput, self.pa_model, conv_idx=2) # idx 1 or 2? does not matter here as trained weight would be loaded here?

            # Single PAN stream
            else:
                self.pa_model = convert2PAN(opt.ninput,
                                            self.pa_model,
                                            conv_idx=1)

            self.pa_model = load_model(self.pa_model,
                                       opt.save_root + opt.pa_model)

            self.pa_model = DataParallel(
                self.pa_model,
                device_ids=opt.gpus,  #[0]
                chunk_sizes=opt.chunk_sizes).to(opt.device)
            self.pa_model.eval()

        self.num_classes = opt.num_classes
        self.opt = opt

        # added: for speed measurement
        self.total_time = 0
Esempio n. 3
0
def main(opt):
    set_seed(opt.seed)

    torch.backends.cudnn.benchmark = True
    print()
    print('dataset: ' + opt.dataset + '   task:  ' + opt.task)
    Dataset = get_dataset(opt.dataset)
    opt = opts().update_dataset(opt, Dataset)

    train_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'train'))
    epoch_train_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'train_epoch'))
    val_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'val'))
    epoch_val_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'val_epoch'))

    logger = Logger(opt, epoch_train_writer, epoch_val_writer)

    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
    opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')

    model = create_model(opt.arch, opt.branch_info, opt.head_conv, opt.K)
    optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    start_epoch = opt.start_epoch

    if opt.pretrain_model == 'coco':
        model = load_coco_pretrained_model(opt, model)
    else:
        model = load_imagenet_pretrained_model(opt, model)

    if opt.load_model != '':
        model, optimizer, _, _ = load_model(model, opt.load_model, optimizer,
                                            opt.lr, opt.ucf_pretrain)

    trainer = MOCTrainer(opt, model, optimizer)
    trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)

    train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'),
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.num_workers,
                                               pin_memory=opt.pin_memory,
                                               drop_last=True,
                                               worker_init_fn=worker_init_fn)
    val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'),
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.num_workers,
                                             pin_memory=opt.pin_memory,
                                             drop_last=True,
                                             worker_init_fn=worker_init_fn)

    print('training...')
    print('GPU allocate:', opt.chunk_sizes)
    best_ap = 0
    best_epoch = 0
    stop_step = 0
    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        print('eopch is ', epoch)
        log_dict_train = trainer.train(epoch, train_loader, train_writer)
        logger.write('epoch: {} |'.format(epoch))
        for k, v in log_dict_train.items():
            logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'train')
            logger.write('train: {} {:8f} | '.format(k, v))
        logger.write('\n')
        if opt.save_all and not opt.auto_stop:
            time_str = time.strftime('%Y-%m-%d-%H-%M')
            model_name = 'model_[{}]_{}.pth'.format(epoch, time_str)
            save_model(os.path.join(opt.save_dir, model_name), model,
                       optimizer, epoch, log_dict_train['loss'])
        else:
            model_name = 'model_last.pth'
            save_model(os.path.join(opt.save_dir, model_name), model,
                       optimizer, epoch, log_dict_train['loss'])

        # this step evaluate the model
        if opt.val_epoch:
            with torch.no_grad():
                log_dict_val = trainer.val(epoch, val_loader, val_writer)
            for k, v in log_dict_val.items():
                logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'val')
                logger.write('val: {} {:8f} | '.format(k, v))
        logger.write('\n')

        if opt.auto_stop:
            tmp_rgb_model = opt.rgb_model
            tmp_flow_model = opt.flow_model
            if opt.rgb_model != '':
                opt.rgb_model = os.path.join(opt.rgb_model, model_name)
            if opt.flow_model != '':
                opt.flow_model = os.path.join(opt.flow_model, model_name)
            stream_inference(opt)
            ap = frameAP(opt, print_info=opt.print_log)
            os.system("rm -rf tmp")
            if ap > best_ap:
                best_ap = ap
                best_epoch = epoch
                saved1 = os.path.join(opt.save_dir, model_name)
                saved2 = os.path.join(opt.save_dir, 'model_best.pth')
                os.system("cp " + str(saved1) + " " + str(saved2))
            if stop_step < len(
                    opt.lr_step) and epoch >= opt.lr_step[stop_step]:
                model, optimizer, _, _ = load_model(
                    model, os.path.join(opt.save_dir, 'model_best.pth'),
                    optimizer, opt.lr)
                opt.lr = opt.lr * 0.1
                logger.write('Drop LR to ' + str(opt.lr) + '\n')
                print('Drop LR to ' + str(opt.lr))
                print('load epoch is ', best_epoch)
                for param_group in optimizer.param_groups:
                    param_group['lr'] = opt.lr
                torch.cuda.empty_cache()
                trainer = MOCTrainer(opt, model, optimizer)
                trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)
                stop_step = stop_step + 1

            opt.rgb_model = tmp_rgb_model
            opt.flow_model = tmp_flow_model

        else:
            # this step drop lr
            if epoch in opt.lr_step:
                lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1))
                logger.write('Drop LR to ' + str(lr) + '\n')
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
    if opt.auto_stop:
        print('best epoch is ', best_epoch)

    logger.close()
Esempio n. 4
0
def main(opt):
    # added to specify gpu id; the gpus arg in the provided code does not work
    torch.cuda.set_device(opt.gpus[0])

    set_seed(opt.seed)

    print('dataset: ' + opt.dataset + '   task:  ' + opt.task)
    Dataset = get_dataset(opt.dataset)
    opt = opts().update_dataset(opt, Dataset)

    train_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'train'))
    epoch_train_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'train_epoch'))
    val_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'val'))
    epoch_val_writer = tensorboardX.SummaryWriter(
        log_dir=os.path.join(opt.log_dir, 'val_epoch'))

    logger = Logger(opt, epoch_train_writer, epoch_val_writer)

    opt.device = torch.device('cuda')

    is_pa = False
    if opt.pa_model != '':
        is_pa = True
    model = create_model(opt.arch,
                         opt.branch_info,
                         opt.head_conv,
                         opt.K,
                         is_pa=is_pa,
                         pa_fuse_mode=opt.pa_fuse_mode,
                         rgb_w3=opt.rgb_w3)

    # TODO: Compute grad magnitude (maybe check youssef's snippet)
    # TODO: Log grad to TB
    # default (single set of hyperparam)

    # Complexity analysis
    '''
    with torch.cuda.device(1):
      macs, params = get_model_complexity_info(model, (15, 288, 288), input_constructor=prepare_input, as_strings=True,
                                               print_per_layer_stat=True, verbose=True)
      print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
      print('{:<30}  {:<8}'.format('Number of parameters: ', params))
    '''

    # orig
    #optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    # custom
    lr_factor = 1.0
    if opt.pa_model != '':
        optimizer = torch.optim.Adam([{
            "params": model.pa.parameters(),
            "lr": opt.lr * lr_factor
        }, {
            "params": model.backbone.parameters(),
            "lr": opt.lr
        }, {
            "params": model.deconv_layer.parameters(),
            "lr": opt.lr
        }, {
            "params": model.branch.parameters(),
            "lr": opt.lr
        }], opt.lr)
    else:  # rgb model
        optimizer = torch.optim.Adam([{
            "params": model.backbone.parameters(),
            "lr": opt.lr
        }, {
            "params": model.deconv_layer.parameters(),
            "lr": opt.lr
        }, {
            "params": model.branch.parameters(),
            "lr": opt.lr
        }], opt.lr)

    start_epoch = opt.start_epoch

    # ADDED: allowing automatica lr dropping upon resuming a training
    step_count = 0
    for step in range(len(opt.lr_step)):
        if start_epoch >= opt.lr_step[step]:
            step_count += 1
    opt.lr = opt.lr * (opt.lr_drop**step_count)

    if opt.pretrain_model == 'coco':
        model = load_coco_pretrained_model(opt, model)
    elif opt.pretrain_model == 'imagenet':
        model = load_imagenet_pretrained_model(opt, model)
    else:
        model = load_custom_pretrained_model(opt, model)

    if opt.load_model != '':
        model, optimizer, _, _ = load_model(model, opt.load_model, optimizer,
                                            opt.lr, opt.ucf_pretrain)

    for i, child in enumerate(model.children()):
        pass
        #if i == 2 or i == 3: # unfreeze branch, deconv: reproducible! but not pa nor backbone
        #    for l, param in enumerate(child.parameters()):
        #            param.requires_grad = False
        '''
        if i == 0: # PA
            continue 
            #for l, param in enumerate(child.parameters()):
                #if l < 3: # 3: conv1 15: block2
                #param.requires_grad = False
        elif i == 1: # backbone
            continue
        
            #for l, param in enumerate(child.parameters()):
                
                #print ('layer {} shape: {}'.format(l, param.size()))
                #if l == 2 or l == 3 or l == 4: # 5: conv1 and conv1_5, 30: resnext_layer1
                    #param.requires_grad = False
        elif i == 2: # deconv
            for l, param in enumerate(child.parameters()):
                param.requires_grad = False
        '''
        #else:
        #for name, module in child.named_modules():
        #if name in list_of_lay_freeze:
        #for param in module.parameters():
        #param.requires_grad = False
        #if isinstance(module, torch.nn.ReLU):
        #break
        #print (name)

    trainer = MOCTrainer(opt, model, optimizer)
    trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)

    train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'),
                                               batch_size=opt.batch_size,
                                               shuffle=True,
                                               num_workers=opt.num_workers,
                                               pin_memory=opt.pin_memory,
                                               drop_last=True,
                                               worker_init_fn=worker_init_fn)
    val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'),
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=opt.num_workers,
                                             pin_memory=opt.pin_memory,
                                             drop_last=True,
                                             worker_init_fn=worker_init_fn)

    print('training...')
    print('GPU allocate:', opt.chunk_sizes)
    best_ap = 0
    best_epoch = 0
    stop_step = 0  # TODO: this needs to be adjusted otherwise lr is dropped incorrectly when resuming training! (can set to 1 now if resuming from drop-once)

    # added: to ensure no decrease of lr too early (for jh s1?)
    if stop_step == 0:
        drop_early_flag = False  # should be False if wanting more reproducible results  (e.g., jh s1)
    else:
        drop_early_flag = True

    set_seed(opt.seed)  #317

    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
        print('eopch is ', epoch)
        log_dict_train = trainer.train(epoch, train_loader, train_writer)
        logger.write('epoch: {} |'.format(epoch))
        for k, v in log_dict_train.items():
            logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'train')
            logger.write('train: {} {:8f} | '.format(k, v))
        logger.write('\n')
        if opt.save_all and not opt.auto_stop:
            time_str = time.strftime('%Y-%m-%d-%H-%M')
            model_name = 'model_[{}]_{}.pth'.format(epoch, time_str)
            save_model(os.path.join(opt.save_dir, model_name), model,
                       optimizer, epoch, log_dict_train['loss'])
        else:
            model_name = 'model_last.pth'
            save_model(os.path.join(opt.save_dir, model_name), model,
                       optimizer, epoch, log_dict_train['loss'])

        # this step evaluate the model
        if opt.val_epoch:
            with torch.no_grad():
                log_dict_val = trainer.val(epoch, val_loader, val_writer)
            for k, v in log_dict_val.items():
                logger.scalar_summary('epcho/{}'.format(k), v, epoch, 'val')
                logger.write('val: {} {:8f} | '.format(k, v))
        logger.write('\n')

        if opt.auto_stop:
            tmp_rgb_model = opt.rgb_model
            tmp_flow_model = opt.flow_model
            tmp_pa_model = opt.pa_model
            if opt.rgb_model != '':
                opt.rgb_model = os.path.join(opt.rgb_model, model_name)
            if opt.flow_model != '':
                opt.flow_model = os.path.join(opt.flow_model, model_name)
            if opt.pa_model != '':
                opt.pa_model = os.path.join(opt.pa_model, model_name)

            # orig: difficult to handle with long-range mem
            #stream_inference(opt)
            normal_inference(opt)

            ap = frameAP(opt, print_info=opt.print_log)

            ### added for debug
            print('frame mAP: {}'.format(ap))

            os.system("rm -rf tmp")
            if ap > best_ap:
                best_ap = ap
                best_epoch = epoch
                saved1 = os.path.join(opt.save_dir, model_name)
                saved2 = os.path.join(opt.save_dir, 'model_best.pth')
                os.system("cp " + str(saved1) + " " + str(saved2))
            if stop_step < len(
                    opt.lr_step) and epoch >= opt.lr_step[stop_step]:

                # added: don't want it to decrease lr too early just bc the map was higher there ...
                # seemed to create problem for jh s1
                if drop_early_flag is False:
                    model, optimizer, _, _ = load_model(
                        model, os.path.join(opt.save_dir, 'model_last.pth'),
                        optimizer, opt.lr)  # model_best -> model_last?
                    drop_early_flag = True
                    print('load epoch is ', epoch)

                else:  # after the first drop, the rest could drop based on mAP
                    model, optimizer, _, _ = load_model(
                        model, os.path.join(opt.save_dir, 'model_best.pth'),
                        optimizer, opt.lr)  # model_best -> model_last?
                    print('load epoch is ', best_epoch)

                opt.lr = opt.lr * opt.lr_drop
                logger.write('Drop LR to ' + str(opt.lr) + '\n')

                for ii, param_group in enumerate(optimizer.param_groups):
                    if ii >= 1:  # backbone + deconv
                        param_group['lr'] = opt.lr
                    else:
                        param_group['lr'] = opt.lr * lr_factor

                print('Drop PA LR to ' + str(opt.lr * lr_factor))
                print('Drop backbone LR to ' + str(opt.lr))
                print('Drop branch LR to ' + str(opt.lr))

                torch.cuda.empty_cache()
                trainer = MOCTrainer(opt, model, optimizer)
                trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)
                stop_step = stop_step + 1

            opt.rgb_model = tmp_rgb_model
            opt.flow_model = tmp_flow_model
            opt.pa_model = tmp_pa_model

        else:
            # this step drop lr
            if epoch in opt.lr_step:
                lr = opt.lr * (opt.lr_drop**(opt.lr_step.index(epoch) + 1))
                logger.write('Drop LR to ' + str(lr) + '\n')

                # added for debug
                print('Drop LR to ' + str(lr) + '\n')

                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
    if opt.auto_stop:
        print('best epoch is ', best_epoch)

    logger.close()