def train_dist(model, config, step, x, pre_model_file, model_file=None): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument("--local_rank", type=int, default=0) args = parser.parse_args() local_rank = args.local_rank print('******************* local_rank', local_rank) torch.cuda.set_device(local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") assert torch.distributed.is_initialized() batch_size = config.gpus * config.batch_size_per_GPU print('--------batch_size--------', batch_size) model = model(config) print(model) model.eval() model_dic = model.state_dict() pretrained_dict = torch.load(pre_model_file, map_location='cpu') a = pretrained_dict['classifier.0.weight'] b = pretrained_dict['classifier.0.bias'] c = pretrained_dict['classifier.3.weight'] d = pretrained_dict['classifier.3.bias'] pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dic } print(len(pretrained_dict)) model_dic.update(pretrained_dict) print(list(model_dic.keys())) # model_dic['fast.fast_head.0.weight'] = a # model_dic['fast.fast_head.0.bias'] = b # model_dic['fast.fast_head.2.weight'] = c # model_dic['fast.fast_head.2.bias'] = d model.load_state_dict(model_dic) if step > 0: model.load_state_dict(torch.load(model_file, map_location='cpu')) print(model_file) else: print(pre_model_file) parameters = list(model.parameters()) for i in range(8): parameters[i].requires_grad = False model = torch.nn.parallel.DistributedDataParallel( model.cuda(), device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) train_params = list(model.parameters())[8:] bias_p = [] weight_p = [] for name, p in model.named_parameters(): if 'bias' in name: bias_p.append(p) else: weight_p.append(p) print(len(weight_p), len(bias_p)) lr = config.lr * config.batch_size_per_GPU if lr >= 60000 * x: lr = lr / 10 if lr >= 80000 * x: lr = lr / 10 print('lr ******************', lr) opt = torch.optim.SGD( [{ 'params': weight_p, 'weight_decay': config.weight_decay, 'lr': lr }, { 'params': bias_p, 'lr': lr * config.bias_lr_factor }], momentum=0.9, ) epochs = 10000 flag = False dataset = Read_Data(config) train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) dataloader = DataLoader(dataset, batch_size=config.batch_size_per_GPU, sampler=train_sampler, collate_fn=func, drop_last=True, pin_memory=True) for epoch in range(epochs): train_sampler.set_epoch(epoch) for imgs, bboxes, num_b, num_H, num_W in dataloader: loss = model(imgs, bboxes, num_b, num_H, num_W) loss = loss / imgs.shape[0] opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(train_params, 35, norm_type=2) opt.step() # torch.cuda.empty_cache() if step % 20 == 0 and local_rank == 0: print(datetime.now(), 'loss:%.4f' % (loss), opt.param_groups[0]['lr'], step) step += 1 if (step == int(60000 * x) or step == int(80000 * x)): for param_group in opt.param_groups: param_group['lr'] = param_group['lr'] / 10 print('***************************', param_group['lr'], local_rank) if ((step <= 10000 and step % 1000 == 0) or step % 5000 == 0 or step == 1) and local_rank == 0: torch.save( model.module.state_dict(), './models/vgg16_cascade_%dx_%d_1_%d.pth' % (x, step, local_rank)) if step >= 90010 * x: flag = True break if flag: break if local_rank == 0: torch.save( model.module.state_dict(), './models/vgg16_cascade_%dx_final_1_%d.pth' % (x, local_rank))
def train(model, config, step, x, pre_model_file, model_file=None): model = model(config) model.eval() model_dic = model.state_dict() pretrained_dict = torch.load(pre_model_file, map_location='cpu') a = pretrained_dict['classifier.0.weight'] b = pretrained_dict['classifier.0.bias'] c = pretrained_dict['classifier.3.weight'] d = pretrained_dict['classifier.3.bias'] pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dic } print(len(pretrained_dict)) model_dic.update(pretrained_dict) print(list(model_dic.keys())) model_dic['fast.fast_head.0.weight'] = a model_dic['fast.fast_head.0.bias'] = b model_dic['fast.fast_head.2.weight'] = c model_dic['fast.fast_head.2.bias'] = d model.load_state_dict(model_dic) if step > 0: model.load_state_dict(torch.load(model_file, map_location='cpu')) print(model_file) else: print(pre_model_file) train_params = list(model.parameters()) for p in train_params[:8]: p.requires_grad = False cuda(model) train_params = list(model.parameters())[8:] lr = config.lr * config.batch_size_per_GPU if step >= 60000 * x: lr = lr / 10 if step >= 80000 * x: lr = lr / 10 print('lr ******************', lr) print('weight_decay ******************', config.weight_decay) if True: bias_p = [] weight_p = [] print(len(train_params)) for name, p in model.named_parameters(): if 'bias' in name: bias_p.append(p) else: weight_p.append(p) print(len(weight_p), len(bias_p)) opt = torch.optim.SGD( [{ 'params': weight_p, 'weight_decay': config.weight_decay, 'lr': lr }, { 'params': bias_p, 'lr': lr * config.bias_lr_factor }], momentum=0.9, ) else: bias_p = [] weight_p = [] bn_weight_p = [] print(len(train_params)) for name, p in model.named_parameters(): print(name, p.shape) if len(p.shape) == 1: if 'bias' in name: bias_p.append(p) else: bn_weight_p.append(p) else: weight_p.append(p) print(len(weight_p), len(bias_p), len(bn_weight_p)) opt = torch.optim.SGD( [{ 'params': weight_p, 'weight_decay': config.weight_decay, 'lr': lr }, { 'params': bn_weight_p, 'lr': lr }, { 'params': bias_p, 'lr': lr * config.bias_lr_factor }], momentum=0.9, ) dataset = Read_Data(config) dataloader = DataLoader(dataset, batch_size=config.batch_size_per_GPU, collate_fn=func, shuffle=True, drop_last=True, pin_memory=True, num_workers=6) epochs = 10000 flag = False print('start: step=', step) for epoch in range(epochs): for imgs, bboxes, num_b, num_H, num_W in dataloader: loss = model(imgs, bboxes, num_b, num_H, num_W) loss = loss / imgs.shape[0] opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(train_params, 10, norm_type=2) opt.step() if step % 20 == 0: print(datetime.now(), 'loss:%.4f' % loss, 'rpn_cls_loss:%.4f' % model.a, 'rpn_box_loss:%.4f' % model.b, 'fast_cls_loss:%.4f' % model.c, 'fast_box_loss:%.4f' % model.d, model.fast_num, model.fast_num_P, opt.param_groups[0]['lr'], step) step += 1 if step == int(60000 * x) or step == int(80000 * x): for param_group in opt.param_groups: param_group['lr'] = param_group['lr'] / 10 print('*******************************************', param_group['lr']) if (step <= 10000 and step % 1000 == 0) or step % 5000 == 0 or step == 1: torch.save(model.state_dict(), './models/vgg16_cascade_%d_2.pth' % step) if step >= 90010 * x: flag = True break if flag: break torch.save(model.state_dict(), './models/vgg16_cascade_final_2.pth')
def train(model, config, step, x, pre_model_file, model_file=None): dataset = Read_Data(config) dataloader = DataLoader(dataset, batch_size=config.batch_size_per_GPU, collate_fn=func, shuffle=True, drop_last=True, pin_memory=True) model = model(config) print(model) model.eval() model_dic = model.state_dict() pretrained_dict = torch.load(pre_model_file, map_location='cpu') pretrained_dict = {'features.' + k: v for k, v in pretrained_dict.items() if 'features.' + k in model_dic} print('*******', len(pretrained_dict)) model_dic.update(pretrained_dict) model.load_state_dict(model_dic) if step > 0: model.load_state_dict(torch.load(model_file, map_location='cpu')) print(model_file) else: print(pre_model_file) cuda(model) train_params = list(model.parameters()) bias_p = [] weight_p = [] for name, p in model.named_parameters(): if 'bias' in name: bias_p.append(p) else: weight_p.append(p) print(len(weight_p), len(bias_p)) lr = config.lr * config.batch_size_per_GPU if lr >= 60000 * x: lr = lr / 10 if lr >= 80000 * x: lr = lr / 10 print('lr ******************', lr) opt = torch.optim.SGD( [{'params': weight_p, 'weight_decay': config.weight_decay, 'lr': lr}, {'params': bias_p, 'lr': lr * config.bias_lr_factor}], momentum=0.9, ) scheduler = WarmupMultiStepLR(opt, [60000 * x, 80000 * x], warmup_factor=1 / 3, warmup_iters=500) epochs = 10000 flag = False print('start: step=', step) for epoch in range(epochs): for imgs, bboxes, num_b, num_H, num_W in dataloader: loss = model(imgs, bboxes, num_b, num_H, num_W) loss = loss / imgs.shape[0] opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(train_params, 10, norm_type=2) opt.step() scheduler.step() if step % 20 == 0: print(datetime.now(), 'loss:%.4f' % loss, 'rpn_cls_loss:%.4f' % model.a, 'rpn_box_loss:%.4f' % model.b, 'fast_cls_loss:%.4f' % model.c, 'fast_box_loss:%.4f' % model.d, model.fast_num, model.fast_num_P, opt.param_groups[0]['lr'], step) step += 1 # if step == int(60000 * x) or step == int(80000 * x): # for param_group in opt.param_groups: # param_group['lr'] = param_group['lr'] / 10 # print('*********************************', param_group['lr']) if (step <= 10000 and step % 1000 == 0) or step % 5000 == 0 or step == 1: torch.save(model.state_dict(), './models/FPN_50_%d_1.pth' % step) if step >= 90010 * x: flag = True break if flag: break torch.save(model.state_dict(), './models/FPN_50_final_1.pth')