import torch from torch import nn from torchviz import make_dot from models.retinaface import RetinaFace from data import cfg_mnetv2, cfg_re50 net = RetinaFace(cfg=cfg_re50) x = torch.randn(1, 3, 224, 224).requires_grad_(True) y = net(x) vis_graph = make_dot(y, params=dict(list(net.named_parameters()) + [('x', x)])) vis_graph.view(filename="viz/resnet50.gv")
name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) cudnn.benchmark = True # okay now we want to add new layers for params in net.parameters(): # set all layers to false params.requires_grad = False net.ClassHead = net._make_class_head(fpn_num=3, inchannels=cfg['out_channel']) # re-initiliaze the layers net.BboxHead = net._make_bbox_head(fpn_num=5, inchannels=cfg['out_channel']) for name, param in net.named_parameters(): print(name,param.shape) # for name, param in net.named_parameters(): # util to print layers that are now trainable # if param.requires_grad: # print (name) Plist = [] for params in net.parameters(): # stores parameters that will be updated in Plist if params.requires_grad: Plist.append(params) if num_gpu > 1 and gpu_train: # now transfer net to gpu if possible net = torch.nn.DataParallel(net).cuda() else: net = net.cuda()
def train(opt, train_dict, device, tb_writer=None): log_dir = Path(tb_writer.log_dir) if tb_writer else Path( train_dict['logdir']) / 'logs' wdir = str(log_dir / 'weights') + os.sep os.makedirs(wdir, exist_ok=True) last = wdir + 'last.pt' best = wdir + 'best.pt' results_file = 'results.txt' with open(log_dir / 'hyp.yaml', 'w') as f: yaml.dump(train_dict, f, sort_keys=False) with open(log_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = device.type != 'cpu' rank = opt.global_rank init_seeds(2 + rank) train_path = train_dict['train'] test_path = train_dict['val'] train_dict['weights'] = last if not train_dict['pretrain'] or ( train_dict['pretrain'] and not os.path.exists(train_dict['weights'])) else train_dict['weights'] model = RetinaFace(train_dict, phase='Train') pretrained = False if os.path.exists(train_dict['weights']): pretrained = True logger('Loading resume network from ====>{}'.format( train_dict['weights'])) state_dict = torch.load(train_dict['weights'], map_location=device) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict['model'].items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v model.load_state_dict(new_state_dict) pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_parameters(): v.requires_grad = True if '.bias' in k: pg2.append(v) # biases elif '.weight' in k and '.bn' not in k: pg1.append(v) # apply weight decay else: pg0.append(v) # all else if train_dict['adam']: optimizer = optim.Adam(pg0, lr=train_dict['lr0'], betas=(train_dict['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=train_dict['lr0'], momentum=train_dict['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': train_dict['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 epochs = train_dict['epoch'] lf = lambda x: (( (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.8 + 0.2 # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) plot_lr_scheduler(optimizer, scheduler, epochs) # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if state_dict['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = state_dict['best_fitness'] # Results if state_dict.get('training_results') is not None: with open(results_file, 'w') as file: file.write(state_dict['training_results']) # write results.txt # Epochs start_epoch = state_dict['epoch'] + 1 if epochs < start_epoch: logger.info( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, state_dict['epoch'], epochs)) epochs += state_dict['epoch'] # finetune additional epochs del ckpt, state_dict if train_dict['sync_bn'] and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # Exponential moving average ema = ModelEMA(model) if rank in [-1, 0] else None # ddp if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=(opt.local_rank)) # Trainloader batch_size = train_dict['batch_size'] image_size = train_dict['image_size'] # dataloader, dataset = create_dataloader(train_path,image_size, batch_size, opt, hyp=train_dict, augment=True, # rect=opt.rect, rank=rank, # world_size=opt.world_size, workers=train_dict['workers']) rgb_mean = (104, 117, 123) # bgr order dataset = WiderFaceDetection(train_path, preproc(image_size, rgb_mean)) sampler = torch.utils.data.distributed.DistributedSampler(dataset) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=8, sampler=sampler, pin_memory=True, collate_fn=detection_collate) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(train_dict, image_size=(image_size, image_size)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() for epoch in range(start_epoch, epochs): if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) if rank in [-1, 0]: pbar = tqdm(pbar) # progress bar optimizer.zero_grad() for i, ( images, targets ) in pbar: # batch ------------------------------------------------------------- with amp.autocast(enabled=cuda): images = images.cuda() targets = [anno.cuda() for anno in targets] out = model(images) optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion( out, priors, targets) * opt.world_size loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) if rank in [-1, 0]: print( 'Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)))) torch.save(net.state_dict(), wdir + os.sep + '{}_Final.pth'.format(i))