def train(conf): """Total training procedure. """ conf.device = torch.device('cuda:0') criterion = torch.nn.CrossEntropyLoss().cuda(conf.device) backbone_factory = BackboneFactory(conf.backbone_type, conf.backbone_conf_file) probe_net = backbone_factory.get_backbone() gallery_net = backbone_factory.get_backbone() head_factory = HeadFactory(conf.head_type, conf.head_conf_file) prototype = head_factory.get_head().cuda(conf.device) probe_net = torch.nn.DataParallel(probe_net).cuda() gallery_net = torch.nn.DataParallel(gallery_net).cuda() optimizer = optim.SGD(probe_net.parameters(), lr=conf.lr, momentum=conf.momentum, weight_decay=5e-4) lr_schedule = optim.lr_scheduler.MultiStepLR(optimizer, milestones=conf.milestones, gamma=0.1) if conf.resume: probe_net.load_state_dict(torch.load(args.pretrain_model)) moving_average(probe_net, gallery_net, 0) probe_net.train() gallery_net.eval().apply(train_BN) exclude_id_set = set() loss_meter = AverageMeter() for epoch in range(conf.epoches): data_loader = DataLoader( ImageDataset_SST(conf.data_root, conf.train_file, exclude_id_set), conf.batch_size, True, num_workers = 4, drop_last = True) exclude_id_set = train_one_epoch(data_loader, probe_net, gallery_net, prototype, optimizer, criterion, epoch, conf, loss_meter) lr_schedule.step()
def train(conf): """Total training procedure. """ data_loader = DataLoader(ImageDataset(conf.data_root, conf.train_file), conf.batch_size, True, num_workers=4) conf.device = torch.device('cuda:0') criterion = torch.nn.CrossEntropyLoss().cuda(conf.device) backbone_factory = BackboneFactory(conf.backbone_type, conf.backbone_conf_file) head_factory = HeadFactory(conf.head_type, conf.head_conf_file) model = FaceModel(backbone_factory, head_factory) ori_epoch = 0 if conf.resume: ori_epoch = torch.load(args.pretrain_model)['epoch'] + 1 state_dict = torch.load(args.pretrain_model)['state_dict'] model.load_state_dict(state_dict) model = model.cuda() parameters = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(parameters, lr=conf.lr, momentum=conf.momentum, weight_decay=1e-4) model, optimizer = amp.initialize(model, optimizer, opt_level="O1") model = torch.nn.DataParallel(model).cuda() lr_schedule = optim.lr_scheduler.MultiStepLR(optimizer, milestones=conf.milestones, gamma=0.1) loss_meter = AverageMeter() model.train() for epoch in range(ori_epoch, conf.epoches): train_one_epoch(data_loader, model, optimizer, criterion, epoch, loss_meter, conf) lr_schedule.step()
def train(args): """Total training procedure. """ print("Use GPU: {} for training".format(args.local_rank)) if args.local_rank == 0: writer = SummaryWriter(log_dir=args.tensorboardx_logdir) args.writer = writer if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) dist.init_process_group(backend='nccl', init_method='env://') torch.cuda.set_device(args.local_rank) args.rank = dist.get_rank() #print('args.rank: ', dist.get_rank()) #print('args.get_world_size: ', dist.get_world_size()) #print('is_nccl_available: ', dist.is_nccl_available()) args.world_size = dist.get_world_size() trainset = ImageDataset(args.data_root, args.train_file) train_sampler = torch.utils.data.distributed.DistributedSampler( trainset, shuffle=True) train_loader = DataLoader(dataset=trainset, batch_size=args.batch_size, sampler=train_sampler, num_workers=0, pin_memory=True, drop_last=False) backbone_factory = BackboneFactory(args.backbone_type, args.backbone_conf_file) head_factory = HeadFactory(args.head_type, args.head_conf_file) model = FaceModel(backbone_factory, head_factory) model = model.to(args.local_rank) model.train() for ps in model.parameters(): dist.broadcast(ps, 0) # DDP model = torch.nn.parallel.DistributedDataParallel( module=model, broadcast_buffers=False, device_ids=[args.local_rank]) criterion = torch.nn.CrossEntropyLoss().to(args.local_rank) ori_epoch = 0 parameters = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(parameters, lr=args.lr, momentum=args.momentum, weight_decay=1e-4) lr_schedule = optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.milestones, gamma=0.1) loss_meter = AverageMeter() model.train() for epoch in range(ori_epoch, args.epoches): train_one_epoch(train_loader, model, optimizer, criterion, epoch, loss_meter, args) lr_schedule.step() dist.destroy_process_group()
def train(conf): """Total training procedure. """ data_loader = DataLoader(ImageDataset_SST(conf.data_root, conf.train_file), conf.batch_size, True, num_workers=4) conf.device = torch.device('cuda:0') #criterion = OnlineContrastiveLoss(margin=2.5, pair_selector=HardNegativePairSelector(cpu=False)).cuda(torch.device('cuda:0')) triplet_selector = FunctionNegativeTripletSelector( margin=2.5, negative_selection_fn=random_hard_negative, cpu=False) criterion = OnlineTripletLoss(margin=2.5, triplet_selector=triplet_selector).cuda( conf.device) backbone_factory = BackboneFactory(conf.backbone_type, conf.backbone_conf_file) model = backbone_factory.get_backbone() if conf.resume: model.load_state_dict(torch.load(args.pretrain_model)) model = torch.nn.DataParallel(model).cuda() parameters = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(parameters, lr=conf.lr, momentum=conf.momentum, weight_decay=1e-4) lr_schedule = optim.lr_scheduler.MultiStepLR(optimizer, milestones=conf.milestones, gamma=0.1) loss_meter = AverageMeter() model.train() for epoch in range(conf.epoches): train_one_epoch(data_loader, model, optimizer, criterion, epoch, loss_meter, conf) lr_schedule.step() if conf.evaluate: conf.evaluator.evaluate(model)
def train(conf): """Total training procedure. """ if conf.virtual_batch: assert conf.batch_size % 64 == 0 update_interval = conf.batch_size // 64 batch_per_epoch = 64 else: update_interval = 1 batch_per_epoch = conf.batch_size data_loader = DataLoader(ImageDataset(conf.data_root, conf.train_file), batch_per_epoch, True, num_workers = 6) conf.device = torch.device('cuda:0') criterion = torch.nn.CrossEntropyLoss().cuda(conf.device) backbone_factory = BackboneFactory(conf.backbone_type, conf.backbone_conf_file) head_factory = HeadFactory(conf.head_type, conf.head_conf_file) model = FaceModel(backbone_factory, head_factory) ori_epoch = 0 if conf.pretrain_model != '': state_dict = torch.load(args.pretrain_model)['state_dict'] model.load_state_dict(state_dict) if conf.resume: ori_epoch = torch.load(args.pretrain_model)['epoch'] + 1 del state_dict model = model.cuda() # parameters = [p for p in model.parameters() if p.requires_grad] backbone_parameters = [p for n, p in model.named_parameters() if ("backbone" in n) and (p.requires_grad)] head_parameters = [p for n, p in model.named_parameters() if ("head" in n) and (p.requires_grad)] optimizer = optim.AdamW(backbone_parameters + head_parameters, lr = conf.lr, weight_decay = 3e-5) if conf.resume: for param_group in optimizer.param_groups: param_group['initial_lr'] = args.lr scaler = torch.cuda.amp.GradScaler() model = torch.nn.DataParallel(model).cuda() lr_schedule = optim.lr_scheduler.MultiStepLR( optimizer, milestones = conf.milestones, gamma = 0.1, last_epoch=ori_epoch-1) loss_meter = AverageMeter() model.train() for epoch in range(ori_epoch, conf.epoches): train_one_epoch(data_loader, model, optimizer, criterion, epoch, loss_meter, backbone_parameters, conf, scaler, update_interval) lr_schedule.step()
# parse config. with open(args.data_conf_file) as f: data_conf = yaml.load(f, Loader=yaml.FullLoader)[args.test_set] pairs_file_path = data_conf['pairs_file_path'] cropped_face_folder = data_conf['cropped_face_folder'] image_list_file_path = data_conf['image_list_file_path'] # define pairs_parser_factory pairs_parser_factory = PairsParserFactory(pairs_file_path, args.test_set) # define dataloader data_loader = DataLoader(CommonTestDataset(cropped_face_folder, image_list_file_path, False), batch_size=args.batch_size, num_workers=4, shuffle=False) #model def backbone_factory = BackboneFactory(args.backbone_type, args.backbone_conf_file) model_loader = ModelLoader(backbone_factory) feature_extractor = CommonExtractor('cuda:0') lfw_evaluator = LFWEvaluator(data_loader, pairs_parser_factory, feature_extractor) if os.path.isdir(args.model_path): accu_list = [] model_name_list = os.listdir(args.model_path) for model_name in model_name_list: if model_name.endswith('.pt'): model_path = os.path.join(args.model_path, model_name) model = model_loader.load_model(model_path) mean, std = lfw_evaluator.test(model) accu_list.append((os.path.basename(model_path), mean, std)) accu_list.sort(key=accu_key, reverse=True) else:
""" @author: Jun Wang @date: 20201012 @contact: [email protected] """ import sys import torch from thop import profile from thop import clever_format sys.path.append('..') from backbone.backbone_def import BackboneFactory backbone_type = 'MobileFaceNet' #backbone_type = 'ResNet' #backbone_type = 'EfficientNet' #backbone_type = 'HRNet' #backbone_type = 'GhostNet' #backbone_type = 'AttentionNet' backbone_conf_file = '../training_mode/backbone_conf.yaml' backbone_factory = BackboneFactory(backbone_type, backbone_conf_file) backbone = backbone_factory.get_backbone() input = torch.randn(1, 3, 112, 112) macs, params = profile(backbone, inputs=(input, )) macs, params = clever_format([macs, params], "%.2f") print('backbone type: ', backbone_type) print('Params: ', params) print('Macs: ', macs)