def train(): ''' get the dataset and dataloader ''' print(args.dataset) if args.dataset == 'COCO': if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = coco dataset = COCODetection(root=COCO_ROOT, transform=SSDAugmentation( cfg['min_dim'], MEANS), filename='train.txt') elif args.dataset == 'VOC': if not os.path.exists(VOC_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = voc dataset = VOCDetection(root=VOC_ROOT, transform=SSDAugmentation(cfg['min_dim'], mean=cfg['mean'], std=cfg['std'])) print(len(dataset)) elif args.dataset == 'LAJI': if not os.path.exists(LAJI_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = laji_se_resnext101_32x4d dataset = LAJIDetection(root=LAJI_ROOT, transform=SSDAugmentation(cfg['min_dim'], mean=cfg['mean'], std=cfg['std'])) print(len(dataset)) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # build, load, the net ssd_net = build_ssd('train', size=cfg['min_dim'], cfg=cfg) ''' for name,param in ssd_net.named_parameters(): if param.requires_grad: print(name) ''' start_iter = args.start_iter if args.resume: print('Resuming training, loading {}...'.format(args.resume)) data_tmp = torch.load(args.resume) data_tmp = {k.lstrip("module."): v for k, v in data_tmp.items()} ssd_net.load_state_dict(data_tmp) start_iter = int(args.resume.split("/")[-1].split("_")[-2]) print("start_iter is {}".format(start_iter)) if args.cuda: net = ssd_net.cuda() # net = torch.nn.DataParallel(net) net.train() #optimizer optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer = torch.optim.AdamW(net.parameters(), args.lr) #loss:SmoothL1\Iou\Giou\Diou\Ciou print(cfg['losstype']) criterion = MultiBoxLoss(cfg=cfg, overlap_thresh=0.5, prior_for_matching=True, bkg_label=0, neg_mining=True, neg_pos=3, neg_overlap=0.5, encode_target=False, use_gpu=args.cuda, loss_name=cfg['losstype']) project_name = "_".join([args.net_name, args.config]) pth_path = os.path.join(args.save_path, project_name) log_path = os.path.join(pth_path, 'tensorboard') os.makedirs(pth_path, exist_ok=True) os.makedirs(log_path, exist_ok=True) writer = SummaryWriter( log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name, epoch_size) iteration = args.start_iter step_index = 0 loc_loss = 0 conf_loss = 0 step = 0 num_iter_per_epoch = len(data_loader) lr_need_steps = list(cfg['lr_steps']) for epoch in range(start_iter, args.max_epoch): progress_bar = tqdm(data_loader) for ii, batch_iterator in enumerate(progress_bar): iteration += 1 if step in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, num_iter_per_epoch) # load train data images, targets = batch_iterator # print(images,targets) if args.cuda: images = images.cuda() targets = [ann.cuda() for ann in targets] else: images = images targets = [ann for ann in targets] t0 = time.time() out = net(images, 'train') optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = weight * loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch, args.max_epoch, ii + 1, num_iter_per_epoch, loss_c.item(), loss_l.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': loss_l.item()}, step) writer.add_scalars('Classfication_loss', {'train': loss_c.item()}, step) current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) # print(iteration) # if iteration % 10 == 0: # print('timer: %.4f sec.' % (t1 - t0)) # print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') step += 1 # if epoch % 10 == 0 and epoch >60: # # epoch>1000 and epoch % 50 == 0: # print('Saving state, iter:', iteration) # #print('loss_l:'+weight * loss_l+', loss_c:'+'loss_c') # save_folder = args.work_dir+cfg['work_name'] # if not os.path.exists(save_folder): # os.mkdir(save_folder) # torch.save(net.state_dict(),args.work_dir+cfg['work_name']+'/ssd'+ # repr(epoch)+'_.pth') if step != 0 and step % 4000 == 0: torch.save( net.state_dict(), os.path.join(pth_path, f'{args.net_name}_{epoch}_{step}.pth')) loc_loss = 0 conf_loss = 0 torch.save(net.state_dict(), os.path.join(pth_path, f'{args.net_name}_{epoch}_{step}.pth'))
def train(): ''' get the dataset and dataloader ''' print(args.dataset) if args.dataset == 'COCO': if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = coco dataset = COCODetection(root=COCO_ROOT, transform=SSDAugmentation( cfg['min_dim'], MEANS), filename='train.txt') elif args.dataset == 'VOC': if not os.path.exists(VOC_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = voc dataset = VOCDetection(root=VOC_ROOT, transform=SSDAugmentation(cfg['min_dim'], mean=cfg['mean'], std=cfg['std'])) print(len(dataset)) elif args.dataset == 'CRACK': if not os.path.exists(CRACK_ROOT): parser.error('Must specify dataset_root if specifying dataset') cfg = crack dataset = CRACKDetection(root=CRACK_ROOT, transform=SSDAugmentation(cfg['min_dim'], mean=cfg['mean'], std=cfg['std'])) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) #build, load, the net ssd_net = build_ssd('train', size=cfg['min_dim'], cfg=cfg) ''' for name,param in ssd_net.named_parameters(): if param.requires_grad: print(name) ''' if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_state_dict(torch.load(args.resume)) if args.cuda: net = ssd_net.cuda() net.train() #optimizer optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #loss:SmoothL1\Iou\Giou\Diou\Ciou print(cfg['losstype']) criterion = MultiBoxLoss(cfg=cfg, overlap_thresh=0.5, prior_for_matching=True, bkg_label=0, neg_mining=True, neg_pos=3, neg_overlap=0.5, encode_target=False, use_gpu=args.cuda, loss_name=cfg['losstype']) if args.visdom: import visdom viz = visdom.Visdom(env=cfg['work_name']) vis_title = 'SSD on ' + args.dataset vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot(viz, 'Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot(viz, 'Epoch', 'Loss', vis_title + " epoch loss", vis_legend) #epoch_acc = create_acc_plot(viz,'Epoch', 'acc', args.dataset+" Acc",["Acc"]) epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name, epoch_size) iteration = args.start_iter step_index = 0 loc_loss = 0 conf_loss = 0 for epoch in range(args.max_epoch): for ii, batch_iterator in tqdm(enumerate(data_loader)): iteration += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data images, targets = batch_iterator #print(images,targets) if args.cuda: images = images.cuda() targets = [ann.cuda() for ann in targets] else: images = images targets = [ann for ann in targets] t0 = time.time() out = net(images, 'train') optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = weight * loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() #print(iteration) if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') if args.visdom: if iteration > 20 and iteration % 10 == 0: update_vis_plot(viz, iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if epoch % 10 == 0 and epoch > 60: #epoch>1000 and epoch % 50 == 0: print('Saving state, iter:', iteration) #print('loss_l:'+weight * loss_l+', loss_c:'+'loss_c') save_folder = args.work_dir + cfg['work_name'] if not os.path.exists(save_folder): os.mkdir(save_folder) torch.save( net.state_dict(), args.work_dir + cfg['work_name'] + '/ssd' + repr(epoch) + '_.pth') if args.visdom: update_vis_plot(viz, epoch, loc_loss, conf_loss, epoch_plot, epoch_plot, 'append', epoch_size) loc_loss = 0 conf_loss = 0 torch.save( net.state_dict(), args.work_dir + cfg['work_name'] + '/ssd' + repr(epoch) + str(args.weight) + '_.pth')
if __name__ == '__main__': print('Loading Dataset...') (show_classes, num_classes, dataset, epoch_size, max_iter, testset) = load_dataset() print('Loading Network...') from models.detector import Detector model = Detector(args.size, num_classes, args.backbone, args.neck) model.train() model.cuda() num_param = sum(p.numel() for p in model.parameters() if p.requires_grad) print('Total param is : {:e}'.format(num_param)) print('Preparing Optimizer & AnchorBoxes...') optimizer = optim.SGD(tencent_trick(model), lr=args.lr, momentum=0.9, weight_decay=0.0005) criterion = MultiBoxLoss(num_classes, mutual_guide=args.mutual_guide) priorbox = PriorBox(args.base_anchor_size, args.size) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() if args.trained_model is not None: print('loading weights from', args.trained_model) state_dict = torch.load(args.trained_model) model.load_state_dict(state_dict, strict=True) else: print('Training {}-{} on {} with {} images'.format(args.neck, args.backbone, dataset.name, len(dataset))) os.makedirs(args.save_folder, exist_ok=True) epoch = 0 timer = Timer() for iteration in range(max_iter):
def train(): ''' get the dataset and dataloader ''' if args.dataset == 'PB': if not (os.path.exists(args.image_path) or os.path.exists(args.anno_path)): parser.error('Must specify dataset_root if specifying dataset') if args.min_dim == 512: cfg = pb512 else: cfg = pb300 dataset = PBDetection(image_path=args.image_path, anno_path=args.anno_path, transform=SSDAugmentation(cfg['min_dim'], mean=cfg['mean'], std=cfg['std'])) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) #build, load, the net ssd_net = build_ssd('train', size=cfg['min_dim'], cfg=cfg) if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_state_dict(torch.load(args.resume)) if args.cuda: net = ssd_net.cuda() net.train() #optimizer optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #loss # print(cfg['losstype']) criterion = MultiBoxLoss(cfg=cfg, overlap_thresh=0.5, prior_for_matching=True, bkg_label=0, neg_mining=True, neg_pos=3, neg_overlap=0.5, encode_target=False, weight=args.loss_weights, use_gpu=args.cuda, loss_name=cfg['losstype']) epoch_size = len(dataset) // args.batch_size print('Training SSD {} on: {} ,Epoch size:{}'.format( args.min_dim, dataset.name, epoch_size)) iteration = args.start_iter step_index = 0 loc_loss = 0 conf_loss = 0 save_folder = os.path.join(args.work_dir, cfg['work_name']) if not os.path.exists(save_folder): os.makedirs(save_folder) Lossc, Lossl, Loss = [], [], [] for epoch in range(args.max_epoch): print('\nEpoch : {:0>3d}'.format(epoch + 1)) for _, batch_iterator in tqdm(enumerate(data_loader)): iteration += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data images, targets = batch_iterator #print(images,targets) if args.cuda: images = images.cuda() targets = [ann.cuda() for ann in targets] else: targets = [ann for ann in targets] t0 = time.time() out = net(images, 'train') optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = weight * loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() #print(iteration) if iteration % 10 == 0: print('timer: {:.4f} sec.'.format(t1 - t0)) print('iter {} Loss: {:.4f} '.format(repr(iteration), loss.item()), end=' ') Loss.append(loss.item()) Lossc.append(loc_loss) Lossl.append(conf_loss) if epoch % 10 == 0 and epoch > 60: print('Saving state, iter:', iteration) torch.save(net.state_dict(), save_folder + '/ssd' + repr(epoch) + '.pth') loc_loss = 0 conf_loss = 0 torch.save(net.state_dict(), save_folder + '/ssd' + repr(epoch) + str(args.weight) + '.pth') with open(save_folder + '/lossc.json', 'w+', encoding='utf-8') as obj: json.dump(Lossc, obj, ensure_ascii=False) with open(save_folder + '/lossl.json', 'w+', encoding='utf-8') as obj: json.dump(Lossl, obj, ensure_ascii=False) with open(save_folder + '/loss.json', 'w+', encoding='utf-8') as obj: json.dump(Loss, obj, ensure_ascii=False)