Esempio n. 1
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = float('-inf')
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r18':
            model = resnet18(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r101':
            model = resnet101(args)
        elif args.network == 'r152':
            model = resnet152(args)
        elif args.network == 'mobile':
            model = MobileNetV2()
        else:
            raise TypeError('network {} is not supported.'.format(
                args.network))

        # print(model)
        model = nn.DataParallel(model)
        metric_fc = ArcMarginModel(args)
        metric_fc = nn.DataParallel(metric_fc)

        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                        lr=args.lr,
                                        momentum=args.mom,
                                        weight_decay=args.weight_decay)
        else:
            optimizer = torch.optim.Adam([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    logger = get_logger()

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    train_dataset = ArcFaceDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=4)

    scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        # One epoch's training
        train_loss, train_acc = train(train_loader=train_loader,
                                      model=model,
                                      metric_fc=metric_fc,
                                      criterion=criterion,
                                      optimizer=optimizer,
                                      epoch=epoch,
                                      logger=logger)

        writer.add_scalar('model/train_loss', train_loss, epoch)
        writer.add_scalar('model/train_acc', train_acc, epoch)

        # One epoch's validation
        lfw_acc, threshold = lfw_test(model)
        writer.add_scalar('model/valid_acc', lfw_acc, epoch)
        writer.add_scalar('model/valid_thres', threshold, epoch)

        # Check if there was an improvement
        is_best = lfw_acc > best_acc
        best_acc = max(lfw_acc, best_acc)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                        optimizer, best_acc, is_best)

        scheduler.step(epoch)
Esempio n. 2
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = float('-inf')
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r18':
            model = resnet18(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r101':
            model = resnet101(args)
        elif args.network == 'r152':
            model = resnet152(args)
        elif args.network == 'mobile':
            from mobilenet_v2 import MobileNetV2
            model = MobileNetV2()
        else:
            raise TypeError('network {} is not supported.'.format(
                args.network))

        metric_fc = ArcMarginModel(args)

        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                        lr=args.lr,
                                        momentum=args.mom,
                                        weight_decay=args.weight_decay)
        else:
            optimizer = torch.optim.Adam([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    model = nn.DataParallel(model)
    metric_fc = nn.DataParallel(metric_fc)

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    train_dataset = ArcFaceDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=num_workers)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        # Decay learning rate if there is no improvement for 2 consecutive epochs, and terminate training after 10
        if epochs_since_improvement == 10:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 2 == 0:
            checkpoint = 'BEST_checkpoint.tar'
            checkpoint = torch.load(checkpoint)
            model = checkpoint['model']
            metric_fc = checkpoint['metric_fc']
            optimizer = checkpoint['optimizer']

            adjust_learning_rate(optimizer, 0.5)

        # One epoch's training
        train_loss, train_top1_accs = train(train_loader=train_loader,
                                            model=model,
                                            metric_fc=metric_fc,
                                            criterion=criterion,
                                            optimizer=optimizer,
                                            epoch=epoch)
        lr = optimizer.param_groups[0]['lr']
        print('\nCurrent effective learning rate: {}\n'.format(lr))
        # print('Step num: {}\n'.format(optimizer.step_num))

        writer.add_scalar('model/train_loss', train_loss, epoch)
        writer.add_scalar('model/train_accuracy', train_top1_accs, epoch)
        writer.add_scalar('model/learning_rate', lr, epoch)

        if epoch % 5 == 0:
            # One epoch's validation
            megaface_acc = megaface_test(model)
            writer.add_scalar('model/megaface_accuracy', megaface_acc, epoch)

            # Check if there was an improvement
            is_best = megaface_acc > best_acc
            best_acc = max(megaface_acc, best_acc)
            if not is_best:
                epochs_since_improvement += 1
                print("\nEpochs since last improvement: %d\n" %
                      (epochs_since_improvement, ))
            else:
                epochs_since_improvement = 0

            # Save checkpoint
            save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                            optimizer, best_acc, is_best)
Esempio n. 3
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = 0
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r18':
            model = resnet18(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r101':
            model = resnet101(args)
        elif args.network == 'r152':
            model = resnet152(args)
        elif args.network == 'mobile':
            model = MobileNet(1.0)
        elif args.network == 'mr18':
            print("mr18")
            model = myResnet18()
        else:
            model = resnet_face18(args.use_se)
        model = nn.DataParallel(model)
        metric_fc = ArcMarginModel(args)
        metric_fc = nn.DataParallel(metric_fc)

        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                        lr=args.lr,
                                        momentum=args.mom,
                                        weight_decay=args.weight_decay)
        else:
            optimizer = torch.optim.Adam([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    logger = get_logger()

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    train_dataset = ArcFaceDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True)

    scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        scheduler.step()

        if args.full_log:
            lfw_acc, threshold = lfw_test(model)
            writer.add_scalar('LFW_Accuracy', lfw_acc, epoch)
            full_log(epoch)

        start = datetime.now()
        # One epoch's training
        train_loss, train_top5_accs = train(train_loader=train_loader,
                                            model=model,
                                            metric_fc=metric_fc,
                                            criterion=criterion,
                                            optimizer=optimizer,
                                            epoch=epoch,
                                            logger=logger,
                                            writer=writer)

        writer.add_scalar('Train_Loss', train_loss, epoch)
        writer.add_scalar('Train_Top5_Accuracy', train_top5_accs, epoch)

        end = datetime.now()
        delta = end - start
        print('{} seconds'.format(delta.seconds))

        # One epoch's validation
        lfw_acc, threshold = lfw_test(model)
        writer.add_scalar('LFW Accuracy', lfw_acc, epoch)

        # Check if there was an improvement
        is_best = lfw_acc > best_acc
        best_acc = max(lfw_acc, best_acc)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                        optimizer, best_acc, is_best)
Esempio n. 4
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = 0
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r18':
            model = resnet18(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r101':
            model = resnet101(args)
        elif args.network == 'r152':
            model = resnet152(args)
        elif args.network == 'mobile':
            model = MobileNet(1.0)
        else:
            model = resnet_face18(args.use_se)
        model = nn.DataParallel(model)
        metric_fc = ArcMarginModel(args)
        metric_fc = nn.DataParallel(metric_fc)

        if args.optimizer == 'sgd':
            # optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}],
            #                             lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay)
            optimizer = InsightFaceOptimizer(
                torch.optim.SGD([{
                    'params': model.parameters()
                }, {
                    'params': metric_fc.parameters()
                }],
                                lr=args.lr,
                                momentum=args.mom,
                                weight_decay=args.weight_decay))
        else:
            optimizer = torch.optim.Adam([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    logger = get_logger()

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    train_dataset = ArcFaceDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=8)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        # One epoch's training
        train_loss, train_top1_accs = train(train_loader=train_loader,
                                            model=model,
                                            metric_fc=metric_fc,
                                            criterion=criterion,
                                            optimizer=optimizer,
                                            epoch=epoch,
                                            logger=logger)
        print('\nCurrent effective learning rate: {}\n'.format(optimizer.lr))
        print('Step num: {}\n'.format(optimizer.step_num))

        writer.add_scalar('model/train_loss', train_loss, epoch)
        writer.add_scalar('model/train_accuracy', train_top1_accs, epoch)
        writer.add_scalar('model/learning_rate', optimizer.lr, epoch)

        # One epoch's validation
        megaface_acc = megaface_test(model)
        writer.add_scalar('model/megaface_accuracy', megaface_acc, epoch)

        # Check if there was an improvement
        is_best = megaface_acc > best_acc
        best_acc = max(megaface_acc, best_acc)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                        optimizer, best_acc, is_best)
Esempio n. 5
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = float('-inf')
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        model = MobileFaceNet()
        metric_fc = ArcMarginModel(args)

        optimizer = torch.optim.SGD([{
            'params': model.conv1.parameters()
        }, {
            'params': model.dw_conv.parameters()
        }, {
            'params': model.features.parameters()
        }, {
            'params': model.conv2.parameters()
        }, {
            'params': model.gdconv.parameters()
        }, {
            'params': model.conv3.parameters(),
            'weight_decay': 4e-4
        }, {
            'params': model.bn.parameters()
        }, {
            'params': metric_fc.parameters()
        }],
                                    lr=args.lr,
                                    momentum=args.mom,
                                    weight_decay=args.weight_decay,
                                    nesterov=True)

        model = nn.DataParallel(model)
        metric_fc = nn.DataParallel(metric_fc)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    logger = get_logger()

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    train_dataset = ArcFaceDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=4)

    scheduler = MultiStepLR(optimizer, milestones=[5, 10, 15, 20], gamma=0.1)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        # One epoch's training
        train_loss, train_acc = train(train_loader=train_loader,
                                      model=model,
                                      metric_fc=metric_fc,
                                      criterion=criterion,
                                      optimizer=optimizer,
                                      epoch=epoch,
                                      logger=logger)

        lr = optimizer.param_groups[0]['lr']
        print('\nLearning rate={}\n'.format(lr))

        writer.add_scalar('model/train_loss', train_loss, epoch)
        writer.add_scalar('model/train_acc', train_acc, epoch)
        writer.add_scalar('model/learning_rate', lr, epoch)

        # One epoch's validation
        lfw_acc, threshold = lfw_test(model)
        writer.add_scalar('model/lfw_acc', lfw_acc, epoch)
        writer.add_scalar('model/threshold', threshold, epoch)

        # Check if there was an improvement
        is_best = lfw_acc > best_acc
        best_acc = max(lfw_acc, best_acc)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                        optimizer, best_acc, is_best)
        scheduler.step(epoch)
Esempio n. 6
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = float('-inf')
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r18':
            model = resnet18(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r101':
            model = resnet101(args)
        elif args.network == 'r152':
            model = resnet152(args)
        else:
            raise TypeError('network {} is not supported.'.format(
                args.network))

        if args.pretrained:
            model.load_state_dict(torch.load('insight-face-v3.pt'))

        model = nn.DataParallel(model)
        metric_fc = ArcMarginModel(args)
        metric_fc = nn.DataParallel(metric_fc)

        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                        lr=args.lr,
                                        momentum=args.mom,
                                        nesterov=True,
                                        weight_decay=args.weight_decay)
        else:
            optimizer = torch.optim.Adam([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma)
    else:
        criterion = nn.CrossEntropyLoss()

    # Custom dataloaders
    train_dataset = ArcFaceDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=num_workers)

    scheduler = MultiStepLR(optimizer, milestones=[8, 16, 24, 32], gamma=0.1)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        lr = optimizer.param_groups[0]['lr']
        logger.info('\nCurrent effective learning rate: {}\n'.format(lr))
        # print('Step num: {}\n'.format(optimizer.step_num))
        writer.add_scalar('model/learning_rate', lr, epoch)

        # One epoch's training
        train_loss, train_top1_accs = train(train_loader=train_loader,
                                            model=model,
                                            metric_fc=metric_fc,
                                            criterion=criterion,
                                            optimizer=optimizer,
                                            epoch=epoch)

        writer.add_scalar('model/train_loss', train_loss, epoch)
        writer.add_scalar('model/train_accuracy', train_top1_accs, epoch)

        # One epoch's validation
        megaface_acc = megaface_test(model)
        writer.add_scalar('model/megaface_accuracy', megaface_acc, epoch)

        scheduler.step(epoch)

        # Check if there was an improvement
        is_best = megaface_acc > best_acc
        best_acc = max(megaface_acc, best_acc)
        if not is_best:
            epochs_since_improvement += 1
            logger.info("\nEpochs since last improvement: %d\n" %
                        (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                        optimizer, best_acc, is_best, scheduler)
Esempio n. 7
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = float('-inf')
    import socket
    exp_name = datetime.now().strftime('%Y%m%d_%H%M%S')
    writer = SummaryWriter(
        os.path.join('runs_mobileNet', exp_name + '_' + socket.gethostname()))
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        # model = MobileFaceNet()
        # metric_fc = ArcMarginModel(args)
        model = MobileFaceNet_PRELU()
        # model = MobileFaceNet_PRELU_AirFace()
        # metric_fc = ArcMarginProduct()
        metric_fc = MV_Softmax()

        # For mobilenet RELU
        # optimizer = torch.optim.SGD([{'params': model.conv1.parameters()},
        #                              {'params': model.dw_conv.parameters()},
        #                              {'params': model.features.parameters()},
        #                              {'params': model.conv2.parameters()},
        #                              {'params': model.gdconv.parameters()},
        #                              {'params': model.conv3.parameters(), 'weight_decay': 4e-4},
        #                              {'params': model.bn.parameters()},
        #                              {'params': metric_fc.parameters()}],
        #                             lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay, nesterov=True)

        # FOR mobileNet PRELU
        optimizer = torch.optim.SGD([{
            'params': model.parameters(),
            'weight_decay': 5e-4
        }, {
            'params': metric_fc.parameters(),
            'weight_decay': 5e-4
        }],
                                    lr=0.04,
                                    momentum=0.9,
                                    nesterov=True)

        model = nn.DataParallel(model)
        metric_fc = nn.DataParallel(metric_fc)

    else:
        print("Training from pretrained model!: " + str(checkpoint))
        checkpoint = torch.load(checkpoint)
        # start_epoch = checkpoint['epoch'] + 1
        start_epoch = 32
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']

        metric_fc = checkpoint['metric_fc']
        # Enable to use new clasification layer
        # metric_fc = ArcMarginProduct()
        # metric_fc = nn.DataParallel(metric_fc)

        # Enable to use learning rate from pre-trained
        # optimizer = checkpoint['optimizer']
        # Enable to use new learning rate
        # optimizer = torch.optim.SGD([
        #     {'params': model.parameters(), 'weight_decay': 5e-4},
        #     {'params': metric_fc.parameters(), 'weight_decay': 5e-4}
        # ], lr=0.0004, momentum=0.9, nesterov=True)
        optimizer = torch.optim.Adam([{
            'params': model.parameters(),
            'weight_decay': 5e-4
        }, {
            'params': metric_fc.parameters(),
            'weight_decay': 5e-4
        }],
                                     lr=0.001)

    # log init
    save_dir = os.path.join('logs', 'train' + '_' + exp_name)
    if os.path.exists(save_dir):
        raise NameError('model dir exists!')
    os.makedirs(save_dir)
    logger = init_log(save_dir)
    # logger = get_logger()

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    if args.triplet:
        metric_fc = AngleLinear()
        criterion = AngleLoss()

    # Custom dataloaders
    dataset = ArcFaceDataset('train')
    print(dataset.__len__())
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    validation_split = 0.02
    split = int(np.floor(validation_split * dataset_size))
    shuffle_dataset = True
    if shuffle_dataset:
        np.random.seed(42)
        np.random.shuffle(indices)
    train_indices, val_indices = indices[split:], indices[:split]
    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)

    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=args.batch_size,
                                               num_workers=8,
                                               sampler=train_sampler,
                                               pin_memory=True)
    validation_loader = torch.utils.data.DataLoader(dataset,
                                                    batch_size=16,
                                                    num_workers=4,
                                                    sampler=valid_sampler,
                                                    pin_memory=True)

    # scheduler = MultiStepLR(optimizer, milestones=[10, 15, 20], gamma=0.1)  # 5, 10, 15, 2
    # for i in range(0, start_epoch):
    #     print('Learning rate={}'.format(optimizer.param_groups[0]['lr']))
    #     scheduler.step()
    # scheduler = ReduceLROnPlateau(optimizer, mode='max', verbose=True)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer, max_lr=0.01, steps_per_epoch=len(train_loader), epochs=10)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        print('\nLearning rate={}\n'.format(optimizer.param_groups[0]['lr']))
        # One epoch's training
        train_loss, train_acc = train(train_loader=train_loader,
                                      model=model,
                                      metric_fc=metric_fc,
                                      criterion=criterion,
                                      optimizer=optimizer,
                                      epoch=epoch,
                                      logger=logger,
                                      scheduler=scheduler)
        val_acc = validate(val_loader=validation_loader,
                           model=model,
                           metric_fc=metric_fc)
        lr = optimizer.param_groups[0]['lr']
        writer.add_scalar('model/train_loss', train_loss, epoch)
        writer.add_scalar('model/train_acc', train_acc, epoch)
        writer.add_scalar('model/learning_rate', lr, epoch)
        writer.add_scalar('model/val_acc', val_acc, epoch)
        # One epoch's validation
        lfw_acc, threshold = lfw_test(model)
        writer.add_scalar('model/lfw_acc', lfw_acc, epoch)
        writer.add_scalar('model/threshold', threshold, epoch)
        logger.info('LFW Ave Accuracy: {:.4f}\t'
                    'Threshold: {:.4f}'.format(
                        np.mean(lfw_acc) * 100, threshold))
        # Check if there was an improvement
        is_best = val_acc > best_acc
        best_acc = max(val_acc, best_acc)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        model.train()
        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                        optimizer, best_acc, is_best, train_loss)