#!/usr/bin/python # -*- coding:utf-8 -*- """ @author:fangpf @time: 2020/09/28 """ import torch from torch.backends import cudnn from data import cfg_re50 from models.retinaface import RetinaFace from utils.net_utils import load_model, image_process, process_face_data cfg = cfg_re50 retina_trained_model = './weights/Resnet50_Final.pth' retina_net = RetinaFace(cfg=cfg, phase='test') retina_net = load_model(retina_net, retina_trained_model, False) retina_net = retina_net.cuda(0) retina_net.eval() cudnn.benchmark = True device = torch.device("cuda") def face_detection(im): resize = 1 im, im_width, im_height, scale = image_process(im, device) loc, conf, landms = retina_net(im) result_data = process_face_data(cfg, im, im_height, im_width, loc, scale, conf, landms, resize) return result_data
# create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda()
def main(): global args global minmum_loss args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.total_batch_size = args.world_size * args.batch_size # build dsfd network print("Building net...") model = RetinaFace(cfg=cfg) print("Printing net...") # for multi gpu if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model) model = model.cuda() # optimizer and loss function optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.35, True, 0, True, 7, 0.35, False) ## dataset print("loading dataset") train_dataset = WiderFaceDetection( args.training_dataset, preproc(cfg['image_size'], cfg['rgb_mean'])) train_loader = data.DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] minmum_loss = checkpoint['minmum_loss'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) print('Using the specified args:') print(args) # load PriorBox print("Load priorbox") with torch.no_grad(): priorbox = PriorBox(cfg=cfg, image_size=(cfg['image_size'], cfg['image_size'])) priors = priorbox.forward() priors = priors.cuda() print("start traing") for epoch in range(args.start_epoch, args.epochs): # train for one epoch train_loss = train(train_loader, model, priors, criterion, optimizer, epoch) if args.local_rank == 0: is_best = train_loss < minmum_loss minmum_loss = min(train_loss, minmum_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': minmum_loss, 'optimizer': optimizer.state_dict(), }, is_best, epoch)
def main(): args = get_args() if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 rgb_mean = (104, 117, 123) # bgr order num_classes = 2 img_dim = cfg["image_size"] num_gpu = cfg["ngpu"] batch_size = cfg["batch_size"] max_epoch = cfg["epoch"] gpu_train = cfg["gpu_train"] num_workers = args.num_workers momentum = args.momentum weight_decay = args.weight_decay initial_lr = args.lr gamma = args.gamma training_dataset = args.training_dataset save_folder = args.save_folder net = RetinaFace(cfg=cfg) print("Printing net...") print(net) if args.resume_net is not None: print("Loading resume network...") state_dict = torch.load(args.resume_net) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == "module.": name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() net.train() epoch = 0 + args.resume_epoch print("Loading Dataset...") dataset = WiderFaceDetection(training_dataset, preproc(img_dim, rgb_mean)) epoch_size = math.ceil(len(dataset) / batch_size) max_iter = max_epoch * epoch_size stepvalues = (cfg["decay1"] * epoch_size, cfg["decay2"] * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate)) if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg["decay1"]): torch.save( net.state_dict(), save_folder + cfg["name"] + "_epoch_" + str(epoch) + ".pth") epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(initial_lr, optimizer, gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg["loc_weight"] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) print( "Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} " "|| LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}".format( epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)), )) torch.save(net.state_dict(), save_folder + cfg["name"] + "_Final.pth")
def main(args): # dataset rgb_mean = (104, 117, 123) # bgr order dataset = MyDataset(args.txt_path, args.txt_path2, preproc(args.img_size, rgb_mean)) dataloader = DataLoader(dataset, args.bs, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate, pin_memory=True) # net and load net = RetinaFace(cfg=cfg_mnet) if args.resume_net is not None: print('Loading resume network...') state_dict = load_normal(args.resume_net) net.load_state_dict(state_dict) print('Loading success!') net = net.cuda() if torch.cuda.device_count() >= 1 and args.multi_gpu: net = torch.nn.DataParallel(net) # optimizer and loss optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) scheduler = WarmupCosineSchedule(optimizer, args.warm_epoch, args.max_epoch, len(dataloader), args.cycles) num_classes = 2 criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) # priorbox priorbox = PriorBox(cfg_mnet, image_size=(args.img_size, args.img_size)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() # save folder time_str = datetime.datetime.strftime(datetime.datetime.now(), '%y-%m-%d-%H-%M-%S') args.save_folder = os.path.join(args.save_folder, time_str) if not os.path.exists(args.save_folder): os.makedirs(args.save_folder) logger = logger_init(args.save_folder) logger.info(args) # train for i_epoch in range(args.max_epoch): net.train() for i_iter, data in enumerate(dataloader): load_t0 = time.time() images, targets = data[:2] images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backward optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg_mnet['loc_weight'] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() scheduler.step() # print info load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (len(dataloader) * (args.max_epoch - i_epoch) - i_iter)) logger.info('Epoch:{}/{} || Iter: {}/{} || ' 'Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || ' 'LR: {:.8f} || ' 'Batchtime: {:.4f} s || ' 'ETA: {}'.format( i_epoch + 1, args.max_epoch, i_iter + 1, len(dataloader), loss_l.item(), loss_c.item(), loss_landm.item(), optimizer.state_dict()['param_groups'][0]['lr'], batch_time, str(datetime.timedelta(seconds=eta)))) if (i_epoch + 1) % args.save_fre == 0: save_name = 'mobile0.25_' + str(i_epoch + 1) + '.pth' torch.save(net.state_dict(), os.path.join(args.save_folder, save_name))
def train(): # dataset = WiderFaceDetection( training_dataset,preproc(img_dim, rgb_mean), landmark_num) # dataset = Dataset300W(training_dataset, preproc(img_dim, rgb_mean)) dataset = Dataset300W(training_dataset, preproc(img_dim, rgb_mean), landmark_indices) dataloader = data.DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate) net = RetinaFace(cfg=cfg) print("Printing net...") print(net) if args.resume_net is not None: print('Loading resume network...') load_net(net, args.resume_net) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True net.train() epoch = 0 + args.resume_epoch print('Loading Dataset...') optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, landmark_num, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() epoch_size = math.ceil(len(dataset) / batch_size) max_iter = max_epoch * epoch_size stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter(dataloader) if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']): fullname = os.path.join( save_folder, cfg['name'] + '_landmark' + str(landmark_num) + '_epoch_' + str(epoch) + '.pth') torch.save(net.state_dict(), fullname) epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + cfg[ 'landmark_weight'] * loss_landm loss.backward() optimizer.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) print( 'Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)))) fullname = os.path.join( save_folder, cfg['name'] + '_landmark' + str(landmark_num) + '_Final.pth') torch.save(net.state_dict(), fullname) print("final finished!")
def Train(self): self.setup() cfg = self.system_dict["local"]["cfg"] print(cfg) rgb_mean = (104, 117, 123) # bgr order num_classes = 2 img_dim = cfg['image_size'] num_gpu = cfg['ngpu'] batch_size = cfg['batch_size'] max_epoch = cfg['epoch'] gpu_train = cfg['gpu_train'] num_workers = self.system_dict["params"]["num_workers"] momentum = self.system_dict["params"]["momentum"] weight_decay = self.system_dict["params"]["weight_decay"] initial_lr = self.system_dict["params"]["lr"] gamma = self.system_dict["params"]["gamma"] save_folder = self.system_dict["params"]["save_folder"] print("Loading Network...") net = RetinaFace(cfg=cfg) if self.system_dict["params"]["resume_net"] is not None: print('Loading resume network...') state_dict = torch.load(self.system_dict["params"]["resume_net"]) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True print("Done...") optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() net.train() epoch = 0 + self.system_dict["params"]["resume_epoch"] dataset = self.system_dict["local"]["dataset"] epoch_size = math.ceil(len(dataset) / batch_size) max_iter = max_epoch * epoch_size stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) step_index = 0 if self.system_dict["params"]["resume_epoch"] > 0: start_iter = self.system_dict["params"]["resume_epoch"] * epoch_size else: start_iter = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate)) torch.save( net.state_dict(), save_folder + "/" + cfg['name'] + '_intermediate.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = self.adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size, initial_lr) # load train data images, targets = next(batch_iterator) images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) if (iteration % 50 == 0): print( 'Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)))) torch.save(net.state_dict(), save_folder + "/" + cfg['name'] + '_Final.pth')
def train(): net = RetinaFace(cfg=cfg) logger.info("Printing net...") logger.info(net) if args.resume_net is not None: logger.info('Loading resume network...') state_dict = torch.load(args.resume_net) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() net.train() epoch = 0 + args.resume_epoch logger.info('Loading Dataset...') trainset = WiderFaceDetection(training_dataset, preproc=train_preproc(img_dim, rgb_mean), mode='train') validset = WiderFaceDetection(training_dataset, preproc=valid_preproc(img_dim, rgb_mean), mode='valid') # trainset = WiderFaceDetection(training_dataset, transformers=train_transformers(img_dim), mode='train') # validset = WiderFaceDetection(training_dataset, transformers=valid_transformers(img_dim), mode='valid') trainloader = data.DataLoader(trainset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate) validloader = data.DataLoader(validset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate) logger.info(f'Totally {len(trainset)} training samples and {len(validset)} validating samples.') epoch_size = math.ceil(len(trainset) / batch_size) max_iter = max_epoch * epoch_size logger.info(f'max_epoch: {max_epoch:d} epoch_size: {epoch_size:d}, max_iter: {max_iter:d}') # optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) optimizer = optim.Adam(net.parameters(), lr=initial_lr, weight_decay=weight_decay) scheduler = _utils.get_linear_schedule_with_warmup(optimizer, int(0.1 * max_iter), max_iter) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 best_loss_val = float('inf') for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator # batch_iterator = iter(tqdm(trainloader, total=len(trainloader))) batch_iterator = iter(trainloader) # if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']): # torch.save(net.state_dict(), save_folder + cfg['name']+ '_epoch_' + str(epoch) + '.pth') epoch += 1 torch.cuda.empty_cache() if (valid_steps > 0) and (iteration > 0) and (iteration % valid_steps == 0): net.eval() # validation loss_l_val = 0. loss_c_val = 0. loss_landm_val = 0. loss_val = 0. # for val_no, (images, targets) in tqdm(enumerate(validloader), total=len(validloader)): for val_no, (images, targets) in enumerate(validloader): # load data images = images.cuda() targets = [anno.cuda() for anno in targets] # forward with torch.no_grad(): out = net(images) loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss_l_val += loss_l.item() loss_c_val += loss_c.item() loss_landm_val += loss_landm.item() loss_val += loss.item() loss_l_val /= len(validloader) loss_c_val /= len(validloader) loss_landm_val /= len(validloader) loss_val /= len(validloader) logger.info('[Validating] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_val, loss_l_val, loss_c_val, loss_landm_val)) if loss_val < best_loss_val: best_loss_val = loss_val pth = os.path.join(save_folder, cfg['name'] + '_iter_' + str(iteration) + f'_{loss_val:.4f}_' + '.pth') torch.save(net.state_dict(), pth) logger.info(f'Best validating loss: {best_loss_val:.4f}, model saved as {pth:s})') net.train() load_t0 = time.time() # if iteration in stepvalues: # step_index += 1 # lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() scheduler.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) if iteration % verbose_steps == 0: logger.info('[Training] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss.item(), loss_l.item(), loss_c.item(), loss_landm.item(), scheduler.get_last_lr()[-1], batch_time, str(datetime.timedelta(seconds=eta))))
from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v teacher_net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() teacher_net = torch.nn.DataParallel(teacher_net).cuda() else: net = net.cuda() teacher_net = teacher_net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss2(num_classes, 0.35, True, 0, True, 7, 0.35, False, args.r_weight, args.c_weight) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda()