Esempio n. 1
0
def main():
    global args

    torch.manual_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    log_name = 'log_test.txt' if args.evaluate else 'log_train.txt'
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU, however, GPU is highly recommended")

    print("Initializing video data manager")
    dm = VideoDataManager(use_gpu, **video_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              loss={'xent', 'htri'})
    print("Model size: {:.3f} M".format(count_num_param(model)))

    criterion = CrossEntropyLoss(num_classes=dm.num_train_pids,
                                 use_gpu=use_gpu,
                                 label_smooth=args.label_smooth)
    criterion_htri = TripletLoss(margin=args.margin)

    optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args))
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=args.stepsize,
                                         gamma=args.gamma)

    if args.load_weights and check_isfile(args.load_weights):
        # load pretrained weights but ignore layers that don't match in size
        checkpoint = torch.load(args.load_weights)
        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {
            k: v
            for k, v in pretrain_dict.items()
            if k in model_dict and model_dict[k].size() == v.size()
        }
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)
        print("Loaded pretrained weights from '{}'".format(args.load_weights))

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        args.start_epoch = checkpoint['epoch'] + 1
        best_rank1 = checkpoint['rank1']
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch,
                                                      best_rank1))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")

        for name in args.target_names:
            print("Evaluating {} ...".format(name))
            queryloader = testloader_dict[name]['query']
            galleryloader = testloader_dict[name]['gallery']
            distmat = test(model,
                           queryloader,
                           galleryloader,
                           args.pool_tracklet_features,
                           use_gpu,
                           return_distmat=True)

            if args.visualize_ranks:
                visualize_ranked_results(distmat,
                                         dm.return_testdataset_by_name(name),
                                         save_dir=osp.join(
                                             args.save_dir, 'ranked_results',
                                             name),
                                         topk=20)
        return

    start_time = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    train_time = 0
    print("=> Start training")

    if args.fixbase_epoch > 0:
        print(
            "Train {} for {} epochs while keeping other layers frozen".format(
                args.open_layers, args.fixbase_epoch))
        initial_optim_state = optimizer.state_dict()

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch,
                  model,
                  criterion_xent,
                  criterion_htri,
                  optimizer,
                  trainloader,
                  use_gpu,
                  fixbase=True)
            train_time += round(time.time() - start_train_time)

        print("Done. All layers are open to train for {} epochs".format(
            args.max_epoch))
        optimizer.load_state_dict(initial_optim_state)

    for epoch in range(args.start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_xent, criterion_htri, optimizer,
              trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("=> Test")

            for name in args.target_names:
                print("Evaluating {} ...".format(name))
                queryloader = testloader_dict[name]['query']
                galleryloader = testloader_dict[name]['gallery']
                rank1 = test(model, queryloader, galleryloader,
                             args.pool_tracklet_features, use_gpu)
                ranklogger.write(name, epoch + 1, rank1)

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, False,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
    ranklogger.show_summary()
def main():
    global args

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    log_name = 'test.log' if args.evaluate else 'train.log'
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print('==========\nArgs:{}\n=========='.format(args))

    if use_gpu:
        print('Currently using GPU {}'.format(args.gpu_devices))
        cudnn.benchmark = True
    else:
        warnings.warn(
            'Currently using CPU, however, GPU is highly recommended')

    print('Initializing video data manager')
    dm = VideoDataManager(use_gpu, **video_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    print('Initializing model: {}'.format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              loss={'xent'},
                              pretrained=not args.no_pretrained,
                              use_gpu=use_gpu)
    print('Model size: {:.3f} M'.format(count_num_param(model)))

    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)

    model = nn.DataParallel(model).cuda() if use_gpu else model

    criterion = CrossEntropyLoss(num_classes=dm.num_train_pids,
                                 use_gpu=use_gpu,
                                 label_smooth=args.label_smooth)
    optimizer = init_optimizer(model, **optimizer_kwargs(args))
    scheduler = init_lr_scheduler(optimizer, **lr_scheduler_kwargs(args))

    if args.resume and check_isfile(args.resume):
        args.start_epoch = resume_from_checkpoint(args.resume,
                                                  model,
                                                  optimizer=optimizer)

    if args.evaluate:
        print('Evaluate only')

        for name in args.target_names:
            print('Evaluating {} ...'.format(name))
            queryloader = testloader_dict[name]['query']
            galleryloader = testloader_dict[name]['gallery']
            distmat = test(model,
                           queryloader,
                           galleryloader,
                           args.pool_tracklet_features,
                           use_gpu,
                           return_distmat=True)

            if args.visualize_ranks:
                visualize_ranked_results(distmat,
                                         dm.return_testdataset_by_name(name),
                                         save_dir=osp.join(
                                             args.save_dir, 'ranked_results',
                                             name),
                                         topk=20)
        return

    time_start = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    print('=> Start training')

    if args.fixbase_epoch > 0:
        print(
            'Train {} for {} epochs while keeping other layers frozen'.format(
                args.open_layers, args.fixbase_epoch))
        initial_optim_state = optimizer.state_dict()

        for epoch in range(args.fixbase_epoch):
            train(epoch,
                  model,
                  criterion,
                  optimizer,
                  trainloader,
                  use_gpu,
                  fixbase=True)

        print('Done. All layers are open to train for {} epochs'.format(
            args.max_epoch))
        optimizer.load_state_dict(initial_optim_state)

    for epoch in range(args.start_epoch, args.max_epoch):
        train(epoch, model, criterion, optimizer, trainloader, use_gpu)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print('=> Test')

            for name in args.target_names:
                print('Evaluating {} ...'.format(name))
                queryloader = testloader_dict[name]['query']
                galleryloader = testloader_dict[name]['gallery']
                rank1 = test(model, queryloader, galleryloader,
                             args.pool_tracklet_features, use_gpu)
                ranklogger.write(name, epoch + 1, rank1)

            save_checkpoint(
                {
                    'state_dict': model.state_dict(),
                    'rank1': rank1,
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'optimizer': optimizer.state_dict(),
                }, args.save_dir)

    elapsed = round(time.time() - time_start)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    print('Elapsed {}'.format(elapsed))
    ranklogger.show_summary()
Esempio n. 3
0
def main():
    global args

    torch.manual_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False
    log_name = 'log_test.txt' if args.evaluate else 'log_train.txt'
    sys.stderr = sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU, however, GPU is highly recommended")

    print("Initializing image data manager")
    dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, loss={'xent'}, use_gpu=use_gpu, args=vars(args))
    print(model)
    print("Model size: {:.3f} M".format(count_num_param(model)))

    criterion = get_criterion(dm.num_train_pids, use_gpu, args)
    regularizer = get_regularizer(vars(args))
    optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args))
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma)

    if args.load_weights and check_isfile(args.load_weights):
        # load pretrained weights but ignore layers that don't match in size
        try:
            checkpoint = torch.load(args.load_weights)
        except Exception as e:
            print(e)
            checkpoint = torch.load(args.load_weights, map_location={'cuda:0': 'cpu'})

        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        print(set(pretrain_dict)-set(model_dict))
        pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()}
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)
        print("Loaded pretrained weights from '{}'".format(args.load_weights))

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        state = model.state_dict()
        state.update(checkpoint['state_dict'])
        model.load_state_dict(state)
        # args.start_epoch = checkpoint['epoch'] + 1
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch, checkpoint['rank1']))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")

        for name in args.target_names:
            print("Evaluating {} ...".format(name))
            if 'val' in testloader_dict[name]:                    
                performance = test_classification(model, testloader_dict[name]['val'], use_gpu) / 100
            else:
                queryloader = testloader_dict[name]['query'], testloader_dict[name]['query_flip']
                galleryloader = testloader_dict[name]['gallery'], testloader_dict[name]['gallery_flip']
                performance = test_reid(model, queryloader, galleryloader, use_gpu)

            if args.visualize_ranks:
                visualize_ranked_results(
                    distmat, dm.return_testdataset_by_name(name),
                    save_dir=osp.join(args.save_dir, 'ranked_results', name),
                    topk=20
                )
        return

    start_time = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    train_time = 0
    print("==> Start training")

    if args.fixbase_epoch > 0:
        print("Train {} for {} epochs while keeping other layers frozen".format(args.open_layers, args.fixbase_epoch))
        initial_optim_state = optimizer.state_dict()

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch, model, criterion, regularizer, optimizer, trainloader, use_gpu, fixbase=True)
            train_time += round(time.time() - start_train_time)

        print("Done. All layers are open to train for {} epochs".format(args.max_epoch))
        optimizer.load_state_dict(initial_optim_state)

    max_performance = 0

    for epoch in range(args.start_epoch, args.max_epoch):
        start_train_time = time.time()
        print(epoch)
        print(criterion)

        train(epoch, model, criterion, regularizer, optimizer, trainloader, use_gpu, fixbase=False)
        train_time += round(time.time() - start_train_time)

        if use_gpu:
            state_dict = model.module.state_dict()
        else:
            state_dict = model.state_dict()

        save_checkpoint({
            'state_dict': state_dict,
            'rank1': 0,
            'epoch': epoch,
        }, False, osp.join(args.save_dir, 'checkpoint_latest.pth.tar'))

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch:
            print("==> Test")

            for name in args.target_names:
                print("Evaluating {} ...".format(name))
                if 'val' in testloader_dict[name]:                    
                    performance = test_classification(model, testloader_dict[name]['val'], use_gpu)
                else:
                    queryloader = testloader_dict[name]['query'], testloader_dict[name]['query_flip']
                    galleryloader = testloader_dict[name]['gallery'], testloader_dict[name]['gallery_flip']
                    performance = test_reid(model, queryloader, galleryloader, use_gpu)
                ranklogger.write(name, epoch + 1, performance)

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            if max_performance < performance:
                print('Save!', max_performance, performance)
                save_checkpoint({
                    'state_dict': state_dict,
                    'rank1': performance,
                    'epoch': epoch,
                }, False, osp.join(args.save_dir, 'checkpoint_best.pth.tar'))

                max_performance = performance

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time))
    ranklogger.show_summary()
def main():
    global args

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    log_name = 'log_test.txt' if args.evaluate else 'log_train.txt'
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
    else:
        print("Currently using CPU, however, GPU is highly recommended")

    print("Initializing MultiScale data manager")
    dm = ImageDataManager(use_gpu,
                          scales=[224, 160],
                          **image_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()
    # sys.exit(0)

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              input_size=args.width,
                              loss={'xent'},
                              use_gpu=use_gpu)
    print("Model size: {:.3f} M".format(count_num_param(model)))
    print(model)

    criterion = CrossEntropyLoss(num_classes=dm.num_train_pids,
                                 use_gpu=use_gpu,
                                 label_smooth=args.label_smooth)
    optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args))
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=args.stepsize,
                                         gamma=args.gamma)
    # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True, threshold=1e-04)

    if args.load_weights and check_isfile(
            args.load_weights
    ):  # load pretrained weights but ignore layers that don't match in size
        checkpoint = torch.load(args.load_weights)
        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {
            k: v
            for k, v in pretrain_dict.items()
            if k in model_dict and model_dict[k].size() == v.size()
        }
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)
        print("Loaded pretrained weights from '{}'".format(args.load_weights))

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        args.start_epoch = checkpoint['epoch'] + 1
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch,
                                                      checkpoint['rank1']))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")

        for name in args.target_names:
            print("Evaluating {} ...".format(name))
            queryloader = testloader_dict[name]['query']
            galleryloader = testloader_dict[name]['gallery']
            test_set = dm.return_testdataset_by_name(name)
            rank1, mAP = test(model,
                              test_set,
                              name,
                              queryloader,
                              galleryloader,
                              use_gpu,
                              visualize=args.visualize_ranks)

        return

    start_time = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    maplogger = RankLogger(args.source_names, args.target_names)
    train_time = 0

    # Tensorboard
    writer = SummaryWriter(log_dir=osp.join('runs', 'tensorboard'))
    print("=> Start training")

    if args.fixbase_epoch > 0:
        print(
            "Train {} for {} epochs while keeping other layers frozen".format(
                args.open_layers, args.fixbase_epoch))
        initial_optim_state = optimizer.state_dict()

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            loss, prec1 = train(epoch,
                                model,
                                criterion,
                                optimizer,
                                trainloader,
                                writer,
                                use_gpu,
                                fixbase=True)
            writer.add_scalar('train/loss', loss, epoch + 1)
            writer.add_scalar('train/prec1', prec1, epoch + 1)
            print(
                'Epoch: [{:02d}] [Average Loss:] {:.4f}\t [Average Prec.:] {:.2%}'
                .format(epoch + 1, loss, prec1))
            train_time += round(time.time() - start_train_time)

        print("Done. All layers are open to train for {} epochs".format(
            args.max_epoch))
        optimizer.load_state_dict(initial_optim_state)

    args.start_epoch += args.fixbase_epoch
    args.max_epoch += args.fixbase_epoch

    for epoch in range(args.start_epoch, args.max_epoch):
        start_train_time = time.time()
        loss, prec1 = train(epoch, model, criterion, optimizer, trainloader,
                            writer, use_gpu)
        writer.add_scalar('train/loss', loss, epoch + 1)
        writer.add_scalar('train/prec1', prec1, epoch + 1)
        print(
            'Epoch: [{:02d}] [Average Loss:] {:.4f}\t [Average Prec.:] {:.2%}'.
            format(epoch + 1, loss, prec1))
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("=> Test")

            for name in args.target_names:
                print("Evaluating {} ...".format(name))
                queryloader = testloader_dict[name]['query']
                galleryloader = testloader_dict[name]['gallery']

                test_set = dm.return_testdataset_by_name(name)

                if epoch + 1 == args.max_epoch:
                    rank1, mAP = test(model,
                                      test_set,
                                      name,
                                      queryloader,
                                      galleryloader,
                                      use_gpu,
                                      visualize=True)
                else:
                    rank1, mAP = test(model, test_set, name, queryloader,
                                      galleryloader, use_gpu)

                writer.add_scalar('test/top1', rank1, epoch + 1)
                writer.add_scalar('test/mAP', mAP, epoch + 1)

                ranklogger.write(name, epoch + 1, rank1)
                maplogger.write(name, epoch + 1, mAP)

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, False,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    # save last checkpoint
    save_checkpoint(
        {
            'state_dict': state_dict,
            'rank1': rank1,
            'epoch': epoch,
        }, False,
        osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
    ranklogger.show_summary()
    maplogger.show_summary()
Esempio n. 5
0
def main():
    global args, dropout_optimizer

    torch.manual_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False
    log_name = 'log_test.txt' if args.evaluate else 'log_train.txt'
    sys.stderr = sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU, however, GPU is highly recommended")

    print("Initializing image data manager")
    dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              loss={'xent'},
                              use_gpu=use_gpu,
                              dropout_optimizer=dropout_optimizer)
    print(model)
    print("Model size: {:.3f} M".format(count_num_param(model)))

    # criterion = WrappedCrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth)
    criterion, fix_criterion, switch_criterion, htri_param_controller = get_criterions(
        dm.num_train_pids, use_gpu, args)
    regularizer, reg_param_controller = get_regularizer(args.regularizer)
    optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args))
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=args.stepsize,
                                         gamma=args.gamma)

    if args.load_weights and check_isfile(args.load_weights):
        # load pretrained weights but ignore layers that don't match in size
        try:

            checkpoint = torch.load(args.load_weights)
        except Exception as e:
            print(e)
            checkpoint = torch.load(args.load_weights,
                                    map_location={'cuda:0': 'cpu'})

        # dropout_optimizer.set_p(checkpoint.get('dropout_p', 0))
        # print(list(checkpoint.keys()), checkpoint['dropout_p'])

        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {
            k: v
            for k, v in pretrain_dict.items()
            if k in model_dict and model_dict[k].size() == v.size()
        }
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)
        print("Loaded pretrained weights from '{}'".format(args.load_weights))

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        state = model.state_dict()
        state.update(checkpoint['state_dict'])
        model.load_state_dict(state)
        # args.start_epoch = checkpoint['epoch'] + 1
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch,
                                                      checkpoint['rank1']))

    if use_gpu:
        model = nn.DataParallel(
            model,
            device_ids=list(range(len(args.gpu_devices.split(','))))).cuda()

    extract_train_info(model, trainloader)
Esempio n. 6
0
def main():
    global args

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    log_name = 'log_test.txt' if args.evaluate else 'log_train.txt'
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print('==========\nArgs:{}\n=========='.format(args))

    if use_gpu:
        print('Currently using GPU {}'.format(args.gpu_devices))
        cudnn.benchmark = True
    else:
        print('Currently using CPU, however, GPU is highly recommended')

    print('Initializing image data manager')
    dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    print('Initializing model: {}'.format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              loss={'xent', 'htri'})
    print('Model size: {:.3f} M'.format(count_num_param(model)))

    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        args.start_epoch = checkpoint['epoch'] + 1
        print('Loaded checkpoint from "{}"'.format(args.resume))
        print('- start_epoch: {}\n- rank1: {}'.format(args.start_epoch,
                                                      checkpoint['rank1']))

    model = nn.DataParallel(model).cuda() if use_gpu else model

    criterion_xent = CrossEntropyLoss(num_classes=dm.num_train_pids,
                                      use_gpu=use_gpu,
                                      label_smooth=args.label_smooth)
    criterion_htri = TripletLoss(margin=args.margin)
    optimizer = init_optimizer(model, **optimizer_kwargs(args))
    scheduler = init_lr_scheduler(optimizer, **lr_scheduler_kwargs(args))

    if args.evaluate:
        print('Evaluate only')

        for name in args.target_names:
            print('Evaluating {} ...'.format(name))
            queryloader = testloader_dict[name]['query']
            galleryloader = testloader_dict[name]['gallery']
            distmat = test(model,
                           queryloader,
                           galleryloader,
                           use_gpu,
                           return_distmat=True)

            if args.visualize_ranks:
                visualize_ranked_results(distmat,
                                         dm.return_testdataset_by_name(name),
                                         save_dir=osp.join(
                                             args.save_dir, 'ranked_results',
                                             name),
                                         topk=20)
        return

    start_time = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    train_time = 0
    print('=> Start training')

    if args.fixbase_epoch > 0:
        print(
            'Train {} for {} epochs while keeping other layers frozen'.format(
                args.open_layers, args.fixbase_epoch))
        initial_optim_state = optimizer.state_dict()

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch,
                  model,
                  criterion_xent,
                  criterion_htri,
                  optimizer,
                  trainloader,
                  use_gpu,
                  fixbase=True)
            train_time += round(time.time() - start_train_time)

        print('Done. All layers are open to train for {} epochs'.format(
            args.max_epoch))
        optimizer.load_state_dict(initial_optim_state)

    for epoch in range(args.start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_xent, criterion_htri, optimizer,
              trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print('=> Test')

            for name in args.target_names:
                print('Evaluating {} ...'.format(name))
                queryloader = testloader_dict[name]['query']
                galleryloader = testloader_dict[name]['gallery']
                rank1 = test(model, queryloader, galleryloader, use_gpu)
                ranklogger.write(name, epoch + 1, rank1)

            save_checkpoint(
                {
                    'state_dict': model.state_dict(),
                    'rank1': rank1,
                    'epoch': epoch,
                }, False,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        'Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.'.
        format(elapsed, train_time))
    ranklogger.show_summary()