Beispiel #1
0
	if LOSS_NAME == 'Focal':
		LOSS = FocalLoss()
	elif LOSS_NAME == 'Softmax':
		LOSS = nn.CrossEntropyLoss()
	else:
		raise NotImplementedError
	print("=" * 60)
	print(LOSS)
	print("{} Loss Generated".format(LOSS_NAME))
	print("=" * 60)

	if BACKBONE_NAME.find("IR") >= 0:
		backbone_paras_only_bn, backbone_paras_wo_bn = separate_irse_bn_paras(BACKBONE) # separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability
		_, head_paras_wo_bn = separate_irse_bn_paras(HEAD)
	else:
		backbone_paras_only_bn, backbone_paras_wo_bn = separate_resnet_bn_paras(BACKBONE) # separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability
		_, head_paras_wo_bn = separate_resnet_bn_paras(HEAD)
	OPTIMIZER = optim.SGD([{'params': backbone_paras_wo_bn + head_paras_wo_bn, 'weight_decay': WEIGHT_DECAY}, {'params': backbone_paras_only_bn}], lr = LR, momentum = MOMENTUM)
	print("=" * 60)
	print(OPTIMIZER)
	print("Optimizer Generated")
	print("=" * 60)

	# optionally resume from a checkpoint
	if BACKBONE_RESUME_ROOT and HEAD_RESUME_ROOT:
		print("=" * 60)
		if os.path.isfile(BACKBONE_RESUME_ROOT) and os.path.isfile(HEAD_RESUME_ROOT):
			print("Loading Backbone Checkpoint '{}'".format(BACKBONE_RESUME_ROOT))
			BACKBONE.load_state_dict(torch.load(BACKBONE_RESUME_ROOT))
			print("Loading Head Checkpoint '{}'".format(HEAD_RESUME_ROOT))
			HEAD.load_state_dict(torch.load(HEAD_RESUME_ROOT))
    print("{} Head Generated".format(HEAD_NAME))
    print("=" * 60)

    LOSS_DICT = {'Focal': FocalLoss(), 'Softmax': nn.CrossEntropyLoss()}
    LOSS = LOSS_DICT[LOSS_NAME]
    print("=" * 60)
    print(LOSS)
    print("{} Loss Generated".format(LOSS_NAME))
    print("=" * 60)

    if BACKBONE_NAME.find("IR") >= 0:
        backbone_paras_only_bn, backbone_paras_wo_bn = separate_irse_bn_paras(
            BACKBONE)  # do not do weight decay for batch_norm parameters
        _, head_paras_wo_bn = separate_irse_bn_paras(HEAD)
    else:
        backbone_paras_only_bn, backbone_paras_wo_bn = separate_resnet_bn_paras(
            BACKBONE)  # do not do weight decay for batch_norm parameters
        _, head_paras_wo_bn = separate_resnet_bn_paras(HEAD)
    OPTIMIZER = optim.SGD([{
        'params': backbone_paras_wo_bn + head_paras_wo_bn,
        'weight_decay': WEIGHT_DECAY
    }, {
        'params': backbone_paras_only_bn
    }],
                          lr=LR,
                          momentum=MOMENTUM)
    print("=" * 60)
    print(OPTIMIZER)
    print("Optimizer Generated")
    print("=" * 60)

    if MULTI_GPU:
Beispiel #3
0
def main_worker(gpu, ngpus_per_node, cfg):
    cfg['GPU'] = gpu
    if gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass
    cfg['RANK'] = cfg['RANK'] * ngpus_per_node + gpu
    dist.init_process_group(backend=cfg['DIST_BACKEND'],
                            init_method=cfg["DIST_URL"],
                            world_size=cfg['WORLD_SIZE'],
                            rank=cfg['RANK'])

    # Data loading code
    batch_size = int(cfg['BATCH_SIZE'] / ngpus_per_node)
    workers = int((cfg['NUM_WORKERS'] + ngpus_per_node - 1) / ngpus_per_node)
    DATA_ROOT = cfg[
        'DATA_ROOT']  # the parent root where your train/val/test data are stored
    RECORD_DIR = cfg['RECORD_DIR']
    RGB_MEAN = cfg['RGB_MEAN']  # for normalize inputs
    RGB_STD = cfg['RGB_STD']
    train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=RGB_MEAN, std=RGB_STD),
    ])
    dataset_train = FaceDataset(DATA_ROOT, RECORD_DIR, train_transform)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        dataset_train)
    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=workers,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               drop_last=True)
    SAMPLE_NUMS = dataset_train.get_sample_num_of_each_class()
    NUM_CLASS = len(train_loader.dataset.classes)
    print("Number of Training Classes: {}".format(NUM_CLASS))

    #======= model & loss & optimizer =======#
    BACKBONE_DICT = {
        'ResNet_50': ResNet_50,
        'ResNet_101': ResNet_101,
        'ResNet_152': ResNet_152,
        'IR_50': IR_50,
        'IR_101': IR_101,
        'IR_152': IR_152,
        'IR_SE_50': IR_SE_50,
        'IR_SE_101': IR_SE_101,
        'IR_SE_152': IR_SE_152
    }
    BACKBONE_NAME = cfg['BACKBONE_NAME']
    INPUT_SIZE = cfg['INPUT_SIZE']
    assert INPUT_SIZE == [112, 112]
    backbone = BACKBONE_DICT[BACKBONE_NAME](INPUT_SIZE)
    print("=" * 60)
    print(backbone)
    print("{} Backbone Generated".format(BACKBONE_NAME))
    print("=" * 60)
    HEAD_DICT = {'ArcFace': ArcFace, 'CurricularFace': CurricularFace}
    HEAD_NAME = cfg['HEAD_NAME']
    EMBEDDING_SIZE = cfg['EMBEDDING_SIZE']  # feature dimension
    head = HEAD_DICT[HEAD_NAME](in_features=EMBEDDING_SIZE,
                                out_features=NUM_CLASS)
    print("=" * 60)
    print(head)
    print("{} Head Generated".format(HEAD_NAME))
    print("=" * 60)

    #--------------------optimizer-----------------------------
    if BACKBONE_NAME.find("IR") >= 0:
        backbone_paras_only_bn, backbone_paras_wo_bn = separate_irse_bn_paras(
            backbone
        )  # separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability
    else:
        backbone_paras_only_bn, backbone_paras_wo_bn = separate_resnet_bn_paras(
            backbone
        )  # separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability

    LR = cfg['LR']  # initial LR
    WEIGHT_DECAY = cfg['WEIGHT_DECAY']
    MOMENTUM = cfg['MOMENTUM']
    optimizer = optim.SGD(
        [{
            'params': backbone_paras_wo_bn + list(head.parameters()),
            'weight_decay': WEIGHT_DECAY
        }, {
            'params': backbone_paras_only_bn
        }],
        lr=LR,
        momentum=MOMENTUM)
    print("=" * 60)
    print(optimizer)
    print("Optimizer Generated")
    print("=" * 60)

    # loss
    LOSS_NAME = cfg['LOSS_NAME']
    LOSS_DICT = {'Softmax': nn.CrossEntropyLoss()}
    loss = LOSS_DICT[LOSS_NAME].cuda(gpu)
    print("=" * 60)
    print(loss)
    print("{} Loss Generated".format(loss))
    print("=" * 60)

    torch.cuda.set_device(cfg['GPU'])
    backbone.cuda(cfg['GPU'])
    head.cuda(cfg['GPU'])

    #optionally resume from a checkpoint
    BACKBONE_RESUME_ROOT = cfg[
        'BACKBONE_RESUME_ROOT']  # the root to resume training from a saved checkpoint
    HEAD_RESUME_ROOT = cfg[
        'HEAD_RESUME_ROOT']  # the root to resume training from a saved checkpoint
    if BACKBONE_RESUME_ROOT:
        print("=" * 60)
        if os.path.isfile(BACKBONE_RESUME_ROOT):
            print("Loading Backbone Checkpoint '{}'".format(
                BACKBONE_RESUME_ROOT))
            loc = 'cuda:{}'.format(cfg['GPU'])
            backbone.load_state_dict(
                torch.load(BACKBONE_RESUME_ROOT, map_location=loc))
            if os.path.isfile(HEAD_RESUME_ROOT):
                print("Loading Head Checkpoint '{}'".format(HEAD_RESUME_ROOT))
                checkpoint = torch.load(HEAD_RESUME_ROOT, map_location=loc)
                cfg['START_EPOCH'] = checkpoint['EPOCH']
                head.load_state_dict(checkpoint['HEAD'])
                optimizer.load_state_dict(checkpoint['OPTIMIZER'])
        else:
            print(
                "No Checkpoint Found at '{}' and '{}'. Please Have a Check or Continue to Train from Scratch"
                .format(BACKBONE_RESUME_ROOT, HEAD_RESUME_ROOT))
        print("=" * 60)

    backbone = torch.nn.parallel.DistributedDataParallel(
        backbone, device_ids=[cfg['GPU']])
    head = torch.nn.parallel.DistributedDataParallel(head,
                                                     device_ids=[cfg['GPU']])

    # checkpoint and tensorboard dir
    MODEL_ROOT = cfg['MODEL_ROOT']  # the root to buffer your checkpoints
    LOG_ROOT = cfg['LOG_ROOT']  # the root to log your train/val status
    STAGES = cfg['STAGES']  # epoch stages to decay learning rate
    if not os.path.exists(MODEL_ROOT):
        os.makedirs(MODEL_ROOT)
    if not os.path.exists(LOG_ROOT):
        os.makedirs(LOG_ROOT)
    writer = SummaryWriter(
        LOG_ROOT)  # writer for buffering intermedium results
    # train
    for epoch in range(cfg['START_EPOCH'], cfg['NUM_EPOCH']):
        train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, cfg)

        #train for one epoch
        train(train_loader, backbone, head, loss, optimizer, epoch, cfg,
              writer)
        print("=" * 60)
        print("Save Checkpoint...")
        if cfg['RANK'] % ngpus_per_node == 0:
            torch.save(
                backbone.module.state_dict(),
                os.path.join(
                    MODEL_ROOT,
                    "Backbone_{}_Epoch_{}_Time_{}_checkpoint.pth".format(
                        BACKBONE_NAME, epoch + 1, get_time())))
            save_dict = {
                'EPOCH': epoch + 1,
                'HEAD': head.module.state_dict(),
                'OPTIMIZER': optimizer.state_dict()
            }
            torch.save(
                save_dict,
                os.path.join(
                    MODEL_ROOT,
                    "Head_{}_Epoch_{}_Time_{}_checkpoint.pth".format(
                        HEAD_NAME, epoch + 1, get_time())))
    print("=" * 60)

    LOSS_DICT = {'Focal': FocalLoss(),
                 'Softmax': nn.CrossEntropyLoss()}
    LOSS = LOSS_DICT[LOSS_NAME]
    print("=" * 60)
    print(LOSS)
    print("{} Loss Generated".format(LOSS_NAME))
    print("=" * 60)

    if BACKBONE_NAME.find("IR") >= 0:
        backbone_paras_only_bn, backbone_paras_wo_bn = separate_irse_bn_paras(
            BACKBONE)  # separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability
        _, head_paras_wo_bn = separate_irse_bn_paras(HEAD)
    else:
        backbone_paras_only_bn, backbone_paras_wo_bn = separate_resnet_bn_paras(
            BACKBONE)  # separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability
        _, head_paras_wo_bn = separate_resnet_bn_paras(HEAD)
    OPTIMIZER = optim.SGD(
        [{'params': backbone_paras_wo_bn}, {'params': backbone_paras_only_bn}, {'params': head_paras_wo_bn}], lr=LR,
        momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    print("=" * 60)
    print(OPTIMIZER)
    print("Optimizer Generated")
    print("=" * 60)

    # optionally resume from a checkpoint
    if BACKBONE_RESUME_ROOT and HEAD_RESUME_ROOT:
        print("=" * 60)
        if os.path.isfile(BACKBONE_RESUME_ROOT):
            print("Loading Backbone Checkpoint '{}'".format(BACKBONE_RESUME_ROOT))
            BACKBONE.load_state_dict(torch.load(BACKBONE_RESUME_ROOT))