Ejemplo n.º 1
0
def train(conf):
    """Total training procedure. 
    """ 
    conf.device = torch.device('cuda:0')
    criterion = torch.nn.CrossEntropyLoss().cuda(conf.device)
    backbone_factory = BackboneFactory(conf.backbone_type, conf.backbone_conf_file)
    probe_net = backbone_factory.get_backbone()
    gallery_net = backbone_factory.get_backbone()        
    head_factory = HeadFactory(conf.head_type, conf.head_conf_file)
    prototype = head_factory.get_head().cuda(conf.device)
    probe_net = torch.nn.DataParallel(probe_net).cuda()
    gallery_net = torch.nn.DataParallel(gallery_net).cuda()
    optimizer = optim.SGD(probe_net.parameters(), lr=conf.lr, momentum=conf.momentum, weight_decay=5e-4)
    lr_schedule = optim.lr_scheduler.MultiStepLR(optimizer, milestones=conf.milestones, gamma=0.1)
    if conf.resume:
        probe_net.load_state_dict(torch.load(args.pretrain_model))
    moving_average(probe_net, gallery_net, 0)
    probe_net.train()
    gallery_net.eval().apply(train_BN)    

    exclude_id_set = set()
    loss_meter = AverageMeter()
    for epoch in range(conf.epoches):
        data_loader = DataLoader(
            ImageDataset_SST(conf.data_root, conf.train_file, exclude_id_set), 
            conf.batch_size, True, num_workers = 4, drop_last = True)
        exclude_id_set = train_one_epoch(data_loader, probe_net, gallery_net, 
            prototype, optimizer, criterion, epoch, conf, loss_meter)
        lr_schedule.step()
Ejemplo n.º 2
0
def train(conf):
    """Total training procedure.
    """
    data_loader = DataLoader(ImageDataset(conf.data_root, conf.train_file),
                             conf.batch_size,
                             True,
                             num_workers=4)
    conf.device = torch.device('cuda:0')
    criterion = torch.nn.CrossEntropyLoss().cuda(conf.device)
    backbone_factory = BackboneFactory(conf.backbone_type,
                                       conf.backbone_conf_file)
    head_factory = HeadFactory(conf.head_type, conf.head_conf_file)
    model = FaceModel(backbone_factory, head_factory)
    ori_epoch = 0
    if conf.resume:
        ori_epoch = torch.load(args.pretrain_model)['epoch'] + 1
        state_dict = torch.load(args.pretrain_model)['state_dict']
        model.load_state_dict(state_dict)
    model = model.cuda()
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(parameters,
                          lr=conf.lr,
                          momentum=conf.momentum,
                          weight_decay=1e-4)
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    model = torch.nn.DataParallel(model).cuda()
    lr_schedule = optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=conf.milestones,
                                                 gamma=0.1)
    loss_meter = AverageMeter()
    model.train()
    for epoch in range(ori_epoch, conf.epoches):
        train_one_epoch(data_loader, model, optimizer, criterion, epoch,
                        loss_meter, conf)
        lr_schedule.step()
Ejemplo n.º 3
0
def train(args):
    """Total training procedure.
    """
    print("Use GPU: {} for training".format(args.local_rank))
    if args.local_rank == 0:
        writer = SummaryWriter(log_dir=args.tensorboardx_logdir)
        args.writer = writer
        if not os.path.exists(args.out_dir):
            os.makedirs(args.out_dir)
    dist.init_process_group(backend='nccl', init_method='env://')
    torch.cuda.set_device(args.local_rank)
    args.rank = dist.get_rank()
    #print('args.rank: ', dist.get_rank())
    #print('args.get_world_size: ', dist.get_world_size())
    #print('is_nccl_available: ', dist.is_nccl_available())
    args.world_size = dist.get_world_size()
    trainset = ImageDataset(args.data_root, args.train_file)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        trainset, shuffle=True)
    train_loader = DataLoader(dataset=trainset,
                              batch_size=args.batch_size,
                              sampler=train_sampler,
                              num_workers=0,
                              pin_memory=True,
                              drop_last=False)

    backbone_factory = BackboneFactory(args.backbone_type,
                                       args.backbone_conf_file)
    head_factory = HeadFactory(args.head_type, args.head_conf_file)
    model = FaceModel(backbone_factory, head_factory)
    model = model.to(args.local_rank)
    model.train()
    for ps in model.parameters():
        dist.broadcast(ps, 0)
    # DDP
    model = torch.nn.parallel.DistributedDataParallel(
        module=model, broadcast_buffers=False, device_ids=[args.local_rank])
    criterion = torch.nn.CrossEntropyLoss().to(args.local_rank)
    ori_epoch = 0
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(parameters,
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=1e-4)
    lr_schedule = optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=args.milestones,
                                                 gamma=0.1)
    loss_meter = AverageMeter()
    model.train()
    for epoch in range(ori_epoch, args.epoches):
        train_one_epoch(train_loader, model, optimizer, criterion, epoch,
                        loss_meter, args)
        lr_schedule.step()
    dist.destroy_process_group()
Ejemplo n.º 4
0
def train(conf):
    """Total training procedure.
    """
    data_loader = DataLoader(ImageDataset_SST(conf.data_root, conf.train_file),
                             conf.batch_size,
                             True,
                             num_workers=4)
    conf.device = torch.device('cuda:0')
    #criterion = OnlineContrastiveLoss(margin=2.5, pair_selector=HardNegativePairSelector(cpu=False)).cuda(torch.device('cuda:0'))

    triplet_selector = FunctionNegativeTripletSelector(
        margin=2.5, negative_selection_fn=random_hard_negative, cpu=False)
    criterion = OnlineTripletLoss(margin=2.5,
                                  triplet_selector=triplet_selector).cuda(
                                      conf.device)
    backbone_factory = BackboneFactory(conf.backbone_type,
                                       conf.backbone_conf_file)
    model = backbone_factory.get_backbone()
    if conf.resume:
        model.load_state_dict(torch.load(args.pretrain_model))
    model = torch.nn.DataParallel(model).cuda()
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(parameters,
                          lr=conf.lr,
                          momentum=conf.momentum,
                          weight_decay=1e-4)
    lr_schedule = optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=conf.milestones,
                                                 gamma=0.1)
    loss_meter = AverageMeter()
    model.train()
    for epoch in range(conf.epoches):
        train_one_epoch(data_loader, model, optimizer, criterion, epoch,
                        loss_meter, conf)
        lr_schedule.step()
        if conf.evaluate:
            conf.evaluator.evaluate(model)
Ejemplo n.º 5
0
def train(conf):
    """Total training procedure.
    """
    if conf.virtual_batch:
        assert conf.batch_size % 64 == 0
        update_interval = conf.batch_size // 64
        batch_per_epoch = 64
    else:
        update_interval = 1
        batch_per_epoch = conf.batch_size
    data_loader = DataLoader(ImageDataset(conf.data_root, conf.train_file), 
                             batch_per_epoch, True, num_workers = 6)
    conf.device = torch.device('cuda:0')
    criterion = torch.nn.CrossEntropyLoss().cuda(conf.device)
    backbone_factory = BackboneFactory(conf.backbone_type, conf.backbone_conf_file)    
    head_factory = HeadFactory(conf.head_type, conf.head_conf_file)
    model = FaceModel(backbone_factory, head_factory)
    ori_epoch = 0
    if conf.pretrain_model != '':
        state_dict = torch.load(args.pretrain_model)['state_dict']
        model.load_state_dict(state_dict)
        if conf.resume:
            ori_epoch = torch.load(args.pretrain_model)['epoch'] + 1
        del state_dict
    model = model.cuda()
    # parameters = [p for p in model.parameters() if p.requires_grad]
    backbone_parameters = [p for n, p in model.named_parameters() if ("backbone" in n) and (p.requires_grad)]
    head_parameters = [p for n, p in model.named_parameters() if ("head" in n) and (p.requires_grad)]
    optimizer = optim.AdamW(backbone_parameters + head_parameters, lr = conf.lr, weight_decay = 3e-5)
    if conf.resume:
        for param_group in optimizer.param_groups:
            param_group['initial_lr'] = args.lr
    scaler = torch.cuda.amp.GradScaler()
    model = torch.nn.DataParallel(model).cuda()
    lr_schedule = optim.lr_scheduler.MultiStepLR(
        optimizer, milestones = conf.milestones, gamma = 0.1, last_epoch=ori_epoch-1)
    loss_meter = AverageMeter()
    model.train()
    for epoch in range(ori_epoch, conf.epoches):
        train_one_epoch(data_loader, model, optimizer, 
                        criterion, epoch, loss_meter, backbone_parameters, conf, scaler, update_interval)
        lr_schedule.step()                        
Ejemplo n.º 6
0
 # parse config.
 with open(args.data_conf_file) as f:
     data_conf = yaml.load(f, Loader=yaml.FullLoader)[args.test_set]
     pairs_file_path = data_conf['pairs_file_path']
     cropped_face_folder = data_conf['cropped_face_folder']
     image_list_file_path = data_conf['image_list_file_path']
 # define pairs_parser_factory
 pairs_parser_factory = PairsParserFactory(pairs_file_path, args.test_set)
 # define dataloader
 data_loader = DataLoader(CommonTestDataset(cropped_face_folder,
                                            image_list_file_path, False),
                          batch_size=args.batch_size,
                          num_workers=4,
                          shuffle=False)
 #model def
 backbone_factory = BackboneFactory(args.backbone_type,
                                    args.backbone_conf_file)
 model_loader = ModelLoader(backbone_factory)
 feature_extractor = CommonExtractor('cuda:0')
 lfw_evaluator = LFWEvaluator(data_loader, pairs_parser_factory,
                              feature_extractor)
 if os.path.isdir(args.model_path):
     accu_list = []
     model_name_list = os.listdir(args.model_path)
     for model_name in model_name_list:
         if model_name.endswith('.pt'):
             model_path = os.path.join(args.model_path, model_name)
             model = model_loader.load_model(model_path)
             mean, std = lfw_evaluator.test(model)
             accu_list.append((os.path.basename(model_path), mean, std))
     accu_list.sort(key=accu_key, reverse=True)
 else:
Ejemplo n.º 7
0
"""
@author: Jun Wang 
@date: 20201012 
@contact: [email protected]
"""

import sys
import torch
from thop import profile
from thop import clever_format

sys.path.append('..')
from backbone.backbone_def import BackboneFactory

backbone_type = 'MobileFaceNet'
#backbone_type = 'ResNet'
#backbone_type = 'EfficientNet'
#backbone_type = 'HRNet'
#backbone_type = 'GhostNet'
#backbone_type = 'AttentionNet'

backbone_conf_file = '../training_mode/backbone_conf.yaml'
backbone_factory = BackboneFactory(backbone_type, backbone_conf_file)
backbone = backbone_factory.get_backbone()
input = torch.randn(1, 3, 112, 112)
macs, params = profile(backbone, inputs=(input, ))
macs, params = clever_format([macs, params], "%.2f")
print('backbone type: ', backbone_type)
print('Params: ', params)
print('Macs: ', macs)