예제 #1
0
    def __init__(self,
                 datamanager,
                 model,
                 optimizer,
                 scheduler=None,
                 use_cpu=False,
                 label_smooth=True):
        super(ImageSphereEngine, self).__init__(datamanager, model, optimizer,
                                                scheduler, use_cpu)

        self.criterion = AngleLoss(num_classes=self.datamanager.num_train_pids,
                                   use_gpu=self.use_gpu,
                                   label_smooth=label_smooth)
예제 #2
0
def main(args):
    args = parser.parse_args(args)
    #global best_rank1
    best_rank1 = -np.inf
    torch.manual_seed(args.seed)
    # np.random.seed(args.seed)
    # random.seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        test_dir = args.save_dir
        if args.save_dir == 'log':
            if args.resume:
                test_dir = os.path.dirname(args.resume)
            else:
                test_dir = os.path.dirname(args.load_weights)
        sys.stdout = Logger(osp.join(test_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_imgreid_dataset(
        root=args.root,
        name=args.dataset,
        split_id=args.split_id,
        cuhk03_labeled=args.cuhk03_labeled,
        cuhk03_classic_split=args.cuhk03_classic_split,
    )

    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        #T.Resize((args.height, args.width)),
        T.RandomSizedEarser(),
        T.RandomHorizontalFlip_custom(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transform_train),
        batch_size=args.train_batch,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    queryloader = DataLoader(
        ImageDataset(dataset.query,
                     transform=transform_test,
                     return_path=args.draw_tsne),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDataset(dataset.gallery,
                     transform=transform_test,
                     return_path=args.draw_tsne),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(
        name=args.arch,
        num_classes=dataset.num_train_pids,
        loss={'xent', 'angular'} if args.use_angular else {'xent'},
        use_gpu=use_gpu)
    print("Model size: {:.3f} M".format(count_num_param(model)))

    if not (args.use_angular):
        if args.label_smooth:
            print("Using Label Smoothing")
            criterion = CrossEntropyLabelSmooth(
                num_classes=dataset.num_train_pids, use_gpu=use_gpu)
        else:
            print("Using Normal Cross-Entropy")
            criterion = nn.CrossEntropyLoss()

        if args.jsd:
            print("Using JSD regularizer")
            criterion = (criterion, JSD_loss(dataset.num_train_pids))
        else:
            if args.confidence_penalty:
                print("Using Confidence Penalty")
            criterion = (criterion, ConfidencePenalty())
    else:
        if args.label_smooth:
            print("Using Angular Label Smoothing")
            criterion = AngularLabelSmooth(num_classes=dataset.num_train_pids,
                                           use_gpu=use_gpu)

        else:
            print("Using Angular Loss")
            criterion = AngleLoss()
    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)
    if args.scheduler:
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             milestones=args.stepsize,
                                             gamma=args.gamma)

    if args.fixbase_epoch > 0:
        if hasattr(model, 'classifier') and isinstance(model.classifier,
                                                       nn.Module):
            optimizer_tmp = init_optim(args.optim,
                                       model.classifier.parameters(),
                                       args.fixbase_lr, args.weight_decay)
        else:
            print(
                "Warn: model has no attribute 'classifier' and fixbase_epoch is reset to 0"
            )
            args.fixbase_epoch = 0

    if args.load_weights and check_isfile(args.load_weights):
        # load pretrained weights but ignore layers that don't match in size
        checkpoint = torch.load(args.load_weights)
        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {
            k: v
            for k, v in pretrain_dict.items()
            if k in model_dict and model_dict[k].size() == v.size()
        }
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)
        print("Loaded pretrained weights from '{}'".format(args.load_weights))

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        args.start_epoch = checkpoint['epoch'] + 1
        best_rank1 = checkpoint['rank1']
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch,
                                                      best_rank1))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.single_folder != '':
        extract_features(model,
                         use_gpu,
                         args,
                         transform_test,
                         return_distmat=False)
        return
    if args.evaluate:
        print("Evaluate only")
        test_dir = args.save_dir
        if args.save_dir == 'log':
            if args.resume:
                test_dir = os.path.dirname(args.resume)
            else:
                test_dir = os.path.dirname(args.load_weights)
        distmat = test(model,
                       queryloader,
                       galleryloader,
                       use_gpu,
                       args,
                       writer=None,
                       epoch=-1,
                       return_distmat=True,
                       draw_tsne=args.draw_tsne,
                       tsne_clusters=args.tsne_labels)

        if args.visualize_ranks:
            visualize_ranked_results(
                distmat,
                dataset,
                save_dir=osp.join(test_dir, 'ranked_results'),
                topk=10,
            )
        return

    writer = SummaryWriter(log_dir=osp.join(args.save_dir, 'tensorboard'))
    start_time = time.time()
    train_time = 0
    best_epoch = args.start_epoch
    print("==> Start training")

    if args.fixbase_epoch > 0:
        print(
            "Train classifier for {} epochs while keeping base network frozen".
            format(args.fixbase_epoch))

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch,
                  model,
                  criterion,
                  optimizer_tmp,
                  trainloader,
                  use_gpu,
                  writer,
                  args,
                  freeze_bn=True)
            train_time += round(time.time() - start_train_time)

        del optimizer_tmp
        print("Now open all layers for training")
    best_epoch = 0
    for epoch in range(args.start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion, optimizer, trainloader, use_gpu, writer,
              args)
        train_time += round(time.time() - start_train_time)

        if args.scheduler:
            scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (
                epoch + 1) % args.eval_step == 0 or (epoch +
                                                     1) == args.max_epoch:
            if (epoch + 1) == args.max_epoch:
                if use_gpu:
                    state_dict = model.module.state_dict()
                else:
                    state_dict = model.state_dict()

                save_checkpoint(
                    {
                        'state_dict': state_dict,
                        'rank1': -1,
                        'epoch': epoch,
                    }, False,
                    osp.join(
                        args.save_dir, 'beforeTesting_checkpoint_ep' +
                        str(epoch + 1) + '.pth.tar'))
            print("==> Test")

            rank1 = test(model,
                         queryloader,
                         galleryloader,
                         use_gpu,
                         args,
                         writer=writer,
                         epoch=epoch)

            is_best = rank1 > best_rank1

            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
    return best_rank1, best_epoch