def train(): # dataset = WiderFaceDetection( training_dataset,preproc(img_dim, rgb_mean), landmark_num) # dataset = Dataset300W(training_dataset, preproc(img_dim, rgb_mean)) dataset = Dataset300W(training_dataset, preproc(img_dim, rgb_mean), landmark_indices) dataloader = data.DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate) net = RetinaFace(cfg=cfg) print("Printing net...") print(net) if args.resume_net is not None: print('Loading resume network...') load_net(net, args.resume_net) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True net.train() epoch = 0 + args.resume_epoch print('Loading Dataset...') optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, landmark_num, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() epoch_size = math.ceil(len(dataset) / batch_size) max_iter = max_epoch * epoch_size stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter(dataloader) if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']): fullname = os.path.join( save_folder, cfg['name'] + '_landmark' + str(landmark_num) + '_epoch_' + str(epoch) + '.pth') torch.save(net.state_dict(), fullname) epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + cfg[ 'landmark_weight'] * loss_landm loss.backward() optimizer.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) print( 'Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)))) fullname = os.path.join( save_folder, cfg['name'] + '_landmark' + str(landmark_num) + '_Final.pth') torch.save(net.state_dict(), fullname) print("final finished!")
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=3) if args.cuda: cudnn.benchmark = True net = nn.DataParallel(net).cuda() criterion = nn.DataParallel(criterion).cuda() # net = net.cuda() # criterion = criterion.cuda() # criterion = criterion.cuda() # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [x for x in cfg.delayed_settings if x[0] > iteration] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma ** step_index)) # Load training data # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there images, targets, masks, num_crowds = prepare_data(datum) # Forward Pass out = net(images) # Compute Loss optimizer.zero_grad() wrapper = ScatterWrapper(targets, masks, num_crowds) losses = criterion(out, wrapper, wrapper.make_mask()) losses = {k: v.mean() for k, v in losses.items()} # Mean here because Dataparallel # original loss = sum([losses[k] for k in losses]) # loss = sum([losses[k] for k in losses]) + losses['S'] * 10 # Huan # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 100 == 0: eta_str = \ str(datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(yolact_net, val_dataset) except KeyboardInterrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights(save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net if args.cuda: cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ') if args.visdom: update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), 'weights/ssd300_COCO_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
def train(net, cfg, resume_epoch): torch.set_grad_enabled(True) rgb_mean = (104, 117, 123) # bgr order num_classes = 2 img_dim = cfg['image_size'] batch_size = cfg['batch_size'] max_epoch = cfg['epoch'] # gpu_train = cfg['gpu_train'] num_workers = 4 momentum = 0.9 weight_decay = 5e-4 initial_lr = 1e-3 gamma = 0.1 training_dataset = './data/widerface/train/label.txt' # cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() net.train() epoch = resume_epoch print('Loading Dataset...') dataset = WiderFaceDetection(training_dataset, preproc(img_dim, rgb_mean)) epoch_size = math.ceil(len(dataset) / batch_size) max_iter = max_epoch * epoch_size stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) step_index = 0 if resume_epoch > 0: start_iter = resume_epoch * epoch_size else: start_iter = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate)) epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.to(device) targets = [anno.to(device) for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) print( 'Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta))))
if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() def test(testdata): net.eval() print("Testing ...") loss_sum = 0 testLoader = data.DataLoader(testdata, batch_size=1, num_workers=0, collate_fn=detection_collate) for i, bat in enumerate(testLoader): images, targets = bat images = images.cuda()
def train(): cfg = voc voc_dataset = VOCDetection(root=dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) # cuda parse net.to(device) # weight if resume: print('Resuming training, loading {}...'.format(resume)) net.load_weights(resume) else: vgg_weights = torch.load(os.path.join(save_folder, basenet)) # print(vgg_weights) # print(vgg_weights['features']) # for k in list(vgg_weights.keys()): # _, new_key = k.split(".", 1) # print("wzdebug: ", k, new_key) # # print(vgg_weights[k]) # vgg_weights[new_key] = vgg_weights.pop(k) print('Loading base network...') net.vgg.load_state_dict(vgg_weights) optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, True) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(voc_dataset) // batch_size step_index = 0 data_loader = data.DataLoader(voc_dataset, batch_size, num_workers=num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(start_iter, cfg['max_iter']): # if iteration != 0 and (iteration % epoch_size == 0): # # update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, # # 'append', epoch_size) # # reset epoch loss counters if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, gamma, step_index) # load train data # images, targets = next(batch_iterator) try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) images = Variable(images.to(device)) with torch.no_grad(): targets = [ann.to(device) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=2, norm_type=2) loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() # loc_loss += loss_l.data # conf_loss += loss_c.data if iteration != 0 and (iteration % epoch_size == 0): epoch += 1 writer.add_scalar('loss', (loc_loss + conf_loss) / epoch_size, epoch) print("epoch: ", epoch, "Loss: %.4f" % ((loc_loss + conf_loss) / epoch_size)) loc_loss = 0 conf_loss = 0 if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data), end=' ') # if args.visdom: # update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], # iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 4000 == 0: print('Saving state, iter:', iteration) torch.save( net.state_dict(), save_folder + save_semi_folder + 'ssd300_VOC_' + repr(iteration) + '.pth') torch.save(net.state_dict(), save_folder + 'VOC' + '.pth')
net.vgg.load_state_dict(vgg_weights) if args.cuda: if args.multigpu: net = torch.nn.DataParallel(s3fd_net) net = net.cuda() cudnn.benckmark = True if not args.resume: print('Initializing weights...') s3fd_net.extras.apply(s3fd_net.weights_init) s3fd_net.loc.apply(s3fd_net.weights_init) s3fd_net.conf.apply(s3fd_net.weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg, args.dataset, args.cuda) print('Loading wider dataset...') save_folder = None writer = None def train(): global save_folder global writer prefix = time.strftime("%Y-%m-%d-%H:%M:%S") save_folder = "train/models_{}".format(prefix) if not os.path.exists(save_folder): os.makedirs(save_folder) logging.basicConfig()
def train(): global REGULARIZATION_LOSS_WEIGHT, PRIOR_LOSS_WEIGHT, NMS_CONF_THRE if args.dataset == 'COCO': cfg = coco dataset = COCODetection(root=args.data_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': cfg = voc dataset = VOCDetection(root=args.data_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) ssd_net = build_bidet_ssd('train', cfg['min_dim'], cfg['num_classes'], nms_conf_thre=NMS_CONF_THRE) net = ssd_net if args.cuda: cudnn.benchmark = True opt_state_dict = None if args.resume: print('Resuming training, loading {}...'.format(args.weight_path)) try: ssd_net.load_state_dict(torch.load(args.weight_path)) except: # checkpoint print('Extracting from checkpoint') ckp = torch.load(args.weight_path, map_location='cpu') ssd_net.load_state_dict(ckp['weight']) opt_state_dict = ckp['opt'] else: if args.basenet.lower() != 'none': vgg_weights = torch.load(args.basenet) print('Loading base network...') ssd_net.vgg.layers.load_state_dict(vgg_weights, strict=True) if args.cuda: net = nn.DataParallel(ssd_net).cuda() if args.opt.lower() == 'SGD'.lower(): optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.opt.lower() == 'Adam'.lower(): optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=args.lr, weight_decay=args.weight_decay) else: exit(-1) if opt_state_dict is not None: print('Load optimizer state dict!') optimizer.load_state_dict(opt_state_dict) if get_lr(optimizer) != args.lr: adjust_learning_rate(optimizer, args.lr) optimizer.zero_grad() criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loss_count = 0. # for prior loss loc_loss_save = 0. conf_loss_save = 0. reg_loss_save = 0. prior_loss_save = 0. loss_l = 0. loss_c = 0. loss_r = 0. loss_p = 0. epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True, drop_last=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): t0 = time.time() lr = get_lr(optimizer) if iteration % epoch_size == 0 and iteration != 0: # reset epoch loss counters epoch += 1 if iteration in cfg['lr_steps']: # add our BiDet loss in the after the first lr decay if step_index == 0: args.reg_weight = 0.1 args.prior_weight = 0.2 REGULARIZATION_LOSS_WEIGHT = args.reg_weight PRIOR_LOSS_WEIGHT = args.prior_weight step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) print("decay lr") # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) if args.cuda: with torch.no_grad(): images = Variable(images.float().cuda()) targets = [Variable(ann.cuda()) for ann in targets] else: with torch.no_grad(): images = Variable(images.float()) targets = [Variable(ann) for ann in targets] batch_size = images.size(0) if PRIOR_LOSS_WEIGHT != 0.: gt_class = [ targets[batch_idx][:, -1] for batch_idx in range(batch_size) ] # forward out = net(images) loc_data, conf_data, priors, feature_map = out # sample loc data from predicted miu and sigma normal_dist = torch.randn(loc_data.size(0), loc_data.size(1), 4).float().cuda() log_sigma_2 = loc_data[:, :, :4] miu = loc_data[:, :, 4:] sigma = torch.exp(log_sigma_2 / 2.) sample_loc_data = normal_dist * sigma * args.sigma + miu loc_data = sample_loc_data out = (loc_data, conf_data, priors) # BP loss_l, loss_c = criterion(out, targets) loss_temp = loss_l + loss_c # COCO dataset bug, maybe due to wrong annotations? if loss_temp.item() == float("Inf"): print('inf loss error!') # the following code is to clear GPU memory for feature_map # I don't know other better ways to do so except for BP the loss loss_temp.backward() net.zero_grad() optimizer.zero_grad() torch.cuda.empty_cache() continue if PRIOR_LOSS_WEIGHT != 0.: loss_count = 0. detect_result = net.module.detect_prior.forward( loc_data, # localization preds net.module.softmax(conf_data), # confidence preds priors, # default boxes gt_class ) # [batch, classes, top_k, 5 (score, (y1, x1, y2, x2))] num_classes = detect_result.size(1) # skip j = 0, because it's the background class for j in range(1, num_classes): all_dets = detect_result[:, j, :, :] # [batch, top_k, 5] all_mask = all_dets[:, :, :1].gt(0.).expand_as( all_dets) # [batch, top_k, 5] for batch_idx in range(batch_size): # skip non-existed class if not (gt_class[batch_idx] == j - 1).any(): continue dets = torch.masked_select(all_dets[batch_idx], all_mask[batch_idx]).view( -1, 5) # [num, 5] if dets.size(0) == 0: continue # if pred num == gt num, skip if dets.size(0) <= ((gt_class[batch_idx] == j - 1).sum().detach().cpu().item()): continue scores = dets[:, 0] # [num] scores_sum = scores.sum().item() # no grad scores = scores / scores_sum # normalization log_scores = log_func(scores) gt_num = (gt_class[batch_idx] == j - 1).sum().detach().cpu().item() loss_p += (-1. * log_scores.sum() / float(gt_num)) loss_count += 1. loss_p /= (loss_count + 1e-6) loss_p *= PRIOR_LOSS_WEIGHT # Calculate regularization loss on feature maps # directly use L2 loss here if REGULARIZATION_LOSS_WEIGHT != 0.: f_num = len(feature_map) loss_r = 0. for f_m in feature_map: loss_r += (f_m**2).mean() loss_r *= REGULARIZATION_LOSS_WEIGHT loss_r /= float(f_num) loss = loss_l + loss_c + loss_r + loss_p # COCO dataset bug, maybe due to wrong annotations? if loss.item() == float("Inf"): print('inf loss error!') # the following code is to clear GPU memory for feature_map # I don't know other better ways to do so except for BP the loss loss.backward() net.zero_grad() optimizer.zero_grad() torch.cuda.empty_cache() continue # compute gradient and do optimizer step loss.backward() # clip gradient because binary net training is very unstable if args.clip_grad: grad_norm = get_grad_norm(net) nn.utils.clip_grad_norm_(net.parameters(), GRADIENT_CLIP_NORM) optimizer.step() optimizer.zero_grad() loss_l = loss_l.detach().cpu().item() loss_c = loss_c.detach().cpu().item() if REGULARIZATION_LOSS_WEIGHT != 0.: loss_r = loss_r.detach().cpu().item() if PRIOR_LOSS_WEIGHT != 0.: loss_p = loss_p.detach().cpu().item() loc_loss_save += loss_l conf_loss_save += loss_c reg_loss_save += loss_r prior_loss_save += loss_p t1 = time.time() if iteration % 100 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter:', iteration, 'loss:', round(loss.detach().cpu().item(), 4)) print('conf_loss:', round(loss_c, 4), 'loc_loss:', round(loss_l, 4), 'reg_loss:', round(loss_r, 4), 'prior_loss:', round(loss_p, 4), 'lr:', lr) if args.clip_grad: print('gradient norm:', grad_norm) torch.cuda.empty_cache() if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) loss_save = loc_loss_save + conf_loss_save + reg_loss_save + prior_loss_save checkpoint = { 'weight': net.module.state_dict(), 'opt': optimizer.state_dict() } torch.save( checkpoint, logs_dir + '/model_' + str(iteration) + '_loc_' + str(round(loc_loss_save / 5000., 4)) + '_conf_' + str(round(conf_loss_save / 5000., 4)) + '_reg_' + str(round(reg_loss_save / 5000., 4)) + '_prior_' + str(round(prior_loss_save / 5000., 4)) + '_loss_' + str(round(loss_save / 5000., 4)) + '_lr_' + str(round(args.lr * (args.gamma**step_index), 6)) + '.pth') loc_loss_save = 0. conf_loss_save = 0. reg_loss_save = 0. prior_loss_save = 0. loss_l = 0. loss_c = 0. loss_r = 0. loss_p = 0. loss_count = 0. torch.save(net.module.state_dict(), logs_dir + '/' + args.dataset + '_final.pth')
def train(): init_log('global', logging.INFO) logger = logging.getLogger("global") if args.img_dim == 300: cfg = (FEDet_VOC_300, FEDet_COCO_300)[args.dataset == 'COCO'] else: cfg = (FEDet_VOC_512, FEDet_COCO_512)[args.dataset == 'COCO'] if args.use_dataAug: train_transform = SSDAugmentation(cfg['min_dim'], MEANS) else: train_transform = Augmentation(cfg['min_dim'], MEANS) if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') logger.warning( "WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT dataset = COCODetection(root=args.dataset_root, image_sets=[("2017", "train")], transform=train_transform, target_transform=COCOAnnotationTransform(), aux=args.use_aux) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') args.dataset_root = VOC_ROOT dataset = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'trainval'), ('2012', 'trainval')], transform=train_transform, aux=args.use_aux) if not os.path.exists(args.save_folder): os.makedirs(args.save_folder) if args.visdom: import visdom viz = visdom.Visdom() if args.arch == 'FEDet': build_net = build_fedet(cfg, 'train', cfg['min_dim'], cfg['num_classes']) else: logger.error('architenture error!!!') return net = build_net logger.info(net) logger.info('---------config-----------') logger.info(cfg) if args.cuda: net = torch.nn.DataParallel(build_net) cudnn.benchmark = True if args.resume: logger.info('Resuming training, loading {}...'.format(args.resume)) build_net.load_weights(args.resume) else: vgg_weights = torch.load(args.pretrained_model + args.basenet) logger.info('Loading base network...') build_net.vgg.load_state_dict(vgg_weights) if not args.resume: logger.info('Initializing weights...') def weights_init(m): for key in m.state_dict(): if key.split('.')[-1] == 'weight': if 'conv' in key: init.kaiming_normal_(m.state_dict()[key], mode='fan_out') if 'bn' in key: m.state_dict()[key][...] = 1 elif key.split('.')[-1] == 'bias': m.state_dict()[key][...] = 0 # initialize newly added layers' weights with xavier method build_net.extras.apply(weights_init) build_net.loc.apply(weights_init) build_net.conf.apply(weights_init) if args.cuda: net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion1 = MultiBoxLoss(cfg, 0.5, True, 0, True, 3, 0.5, False, args.cuda) criterion2 = nn.BCELoss(size_average=True).cuda() net.train() # loss counters loc_loss = 0 conf_loss = 0 ssm_loss = 0 ## SSM loss counter epoch = 0 logger.info('Loading the dataset...') epoch_size = len(dataset) // args.batch_size logger.info('Training FEDet on: %s' % dataset.name) logger.info('Trainging images size: %d' % len(dataset)) logger.info('Using the specified args:') logger.info(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot(viz, 'Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot(viz, 'Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate_fedet if args.use_aux else detection_collate, pin_memory=True) start_training_time = time.time() # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if iteration != 0 and (iteration % epoch_size == 0): epoch += 1 if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(viz, epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 ssm_loss = 0 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data try: if args.use_aux: images, targets, aux_targets = next(batch_iterator) else: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) if args.use_aux: images, targets, aux_targets = next(batch_iterator) else: images, targets = next(batch_iterator) if images.size(0) < args.batch_size: continue if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda()) for ann in targets] if args.use_aux: aux_targets = Variable(aux_targets.cuda()) else: images = Variable(images) targets = [Variable(ann) for ann in targets] if args.use_aux: aux_targets = Variable(aux_targets) # forward t0 = time.time() assert images.size(2) == args.img_dim and images.size( 3) == args.img_dim out = net(images) # backprop optimizer.zero_grad() if args.use_aux: loss_loc, loss_cls = criterion1(out[2:], targets) loss_ssm1 = criterion2(out[0], aux_targets) loss_ssm2 = criterion2(out[1], aux_targets) loss = loss_loc + loss_cls + loss_ssm1.double() + loss_ssm2.double( ) loss.backward() optimizer.step() t1 = time.time() loc_loss = loss_loc.item() conf_loss = loss_cls.item() ssm_loss = loss_ssm1.item() + loss_ssm2.item() else: loss_loc, loss_cls = criterion1(out, targets) loss = loss_loc + loss_cls loss.backward() optimizer.step() t1 = time.time() loc_loss = loss_loc.item() conf_loss = loss_cls.item() ssm_loss = 0 if iteration % 10 == 0: logger.info( 'iter ' + repr(iteration) + '/' + str(cfg['max_iter']) + ' || epoch: ' + str(epoch + 1) + ' || LR: ' + repr(optimizer.param_groups[0]['lr']) + ' || total loss: %.4f || loc Loss: %.4f || conf Loss: %.4f || SSM loss: %.4f || ' % (loss.item(), loc_loss, conf_loss, ssm_loss) + 'timer: %.4f sec.' % (t1 - t0)) if args.visdom: update_vis_plot(viz, iteration, loss_loc.item(), loss_cls.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 10000 == 0: logger.info('Saving state, iter: %d' % iteration) ckpt_path = os.path.join( args.save_folder, args.arch + str(args.img_dim) + '_' + str(args.dataset) + '_' + str(iteration) + '.pth') torch.save(build_net.state_dict(), ckpt_path) torch.save(build_net.state_dict(), os.path.join(args.save_folder, 'models.pth')) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logging.info("Total training time : {} ".format(total_time_str))
def train(): if args.dataset == 'TEXTBOXES': cfg = carplate if args.input_size == 512: cfg = change_cfg_for_ssd512(cfg) dataset = CARPLATEDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS), dataset_name='trainval') from data import CARPLATE_CLASSES as labelmap eval_dataset = CARPLATEDetection( root=args.dataset_root, transform=BaseTransform(args.input_size, MEANS), target_transform=CARPLATEAnnotationTransform(keep_difficult=True), dataset_name='test') if args.visdom: import visdom global viz viz = visdom.Visdom() ssd_net = build_tb('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net # summary summary(net, input_size=(3, int(cfg['min_dim']), int(cfg['min_dim']))) if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load('weights/' + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) # optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, # weight_decay=args.weight_decay) optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(0.9, 0.999), weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) lr = args.lr # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): epoch += 1 update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 if iteration in cfg['lr_steps']: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) with torch.no_grad(): targets = [Variable(ann.cuda()) for ann in targets] else: images = Variable(images) with torch.no_grad(): targets = [Variable(ann) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 100 == 0: log.l.info(''' Timer: {:.5f} sec.\t LR: {}.\t Iter: {}.\t Loss_l: {:.5f}.\t Loss_c: {:.5f}.\t Loss: {:.5f}. '''.format((t1 - t0), lr, iteration, loss_l.item(), loss_c.item(), loss_l.item() + loss_c.item())) if args.visdom: update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save( ssd_net.state_dict(), 'weights/' + args.save_folder + 'ssd' + str(args.input_size) + '_' + repr(iteration) + '.pth') # load net for evaluation num_classes = len(labelmap) + 1 # +1 for background eval_net = build_tb('test', args.input_size, num_classes) # initialize SSD eval_net.load_state_dict( torch.load('weights/' + args.save_folder + 'ssd' + str(args.input_size) + '_' + repr(iteration) + '.pth')) eval_net.eval() print('Finished loading model!') if args.cuda: eval_net = eval_net.cuda() cudnn.benchmark = True # evaluation begin eval_results.test_net(args.eval_save_folder, args.obj_type, args.dataset_root, 'test', labelmap, eval_net, args.cuda, eval_dataset, BaseTransform(eval_net.size, MEANS), args.top_k, args.input_size, thresh=args.confidence_threshold) torch.save( ssd_net.state_dict(), 'weights/' + args.save_folder + '' + args.dataset + str(args.input_size) + '.pth') # load net for evaluation for the final model num_classes = len(labelmap) + 1 # +1 for background eval_net = build_tb('test', args.input_size, num_classes) # initialize SSD eval_net.load_state_dict( torch.load('weights/' + args.save_folder + '' + args.dataset + str(args.input_size) + '.pth')) eval_net.eval() print('Finished loading model!') if args.cuda: eval_net = eval_net.cuda() cudnn.benchmark = True # evaluation begin eval_results.test_net(args.eval_save_folder, args.obj_type, args.dataset_root, 'test', labelmap, eval_net, args.cuda, eval_dataset, BaseTransform(eval_net.size, MEANS), args.top_k, args.input_size, thresh=args.confidence_threshold)
def train(): cfg = voc # voc是一个字典 里面包括网络的一系列参数信息 dataset = VOCDetection( # 是一个VOC数据的类 root=args.dataset_root, # 数据集的根目录 transform=SSDAugmentation( cfg['min_dim'], MEANS)) # 图片的预处理方法(输入图片的尺寸和均值) 原本类中定义为None 后面的MEANS我人为可以删除 if args.visdom: # 这里是可视化工具,不用管################### import visdom viz = visdom.Visdom() ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) # 阶段【train or test】 输入图片尺寸大小 类别数 # build_ssd是一个放在ssd.py的函数 # return是一个类的对象,也就是class SSD(nn.Module),ssd_net也就是SSD类的一个对象 # ssd_net拥有所有class SSD继承于nn.Module以及作者增加方法的所有属性 # 在SSD这个类中就定义了网络的base部分(修改全连接层后的VGG16)和extras部分(论文作者加入的多尺度feature map)和head部分 # 对选定的6个尺度下的feature map进行卷积操作得到的每个default box 的每一个分类类别的confidence以及位置坐标的信息 net = ssd_net # 到这里class类SSD只完成了__init__()并没有执行__forward__() net是一个类 if args.cuda: # 是否将模型放到多个个GPU上运行{我认为在我的任务中不要放在多线程GPU中} net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: # 【resume】的默认值是None,表示不是接着某个断点来继续训练这个模型 【其实checkpoint里面最好还要加上优化器的保存】 # 【model_state_dict,optimizer_state_dict,epoch】 见深度之眼 print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: # 那么就从weights文件夹下面直接加载预训练好vgg基础网络预训练权重 vgg_weights = torch.load(args.save_folder + args.basenet) # 整个ssd_net中vgg基础网络的权重 print('Loading base network...') ssd_net.vgg.load_state_dict( vgg_weights ) # 只在整个ssd_net中的vgg模块中加载预训练好的权重,其余的extra,特征融合,CBAM模块没有加载预训练权重 if args.cuda: # 将模型结构放在GPU上训练 net = net.cuda() if not args.resume: # ###################################################################### print('Initializing weights...' ) # 如果不是接着某个断点接着训练,那么其余extras loc con都会xavier方法初始化 # initialize newly added layers' weights with xavier method ssd_net.extras.apply( weights_init) # extras 模块由 xavier 方法默认初始化data和bias ssd_net.loc.apply(weights_init) # loc 模块由 xavier 方法默认初始化data和bias ssd_net.conf.apply(weights_init) # conf 模块由 xavier 方法默认初始化data和bias # 【优化器】net.parameters()是网络结构中的参数,学习率,动量,权重衰减率 optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # 定义损失函数部分【MultiBoxesLoss是一个类用于计算网络的损失,criterion是一个对象】 # 【损失函数】 关键!!! criterion是个nn.Moudule的形式 里面包括两部分loss_c 和 loss_l criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) # 前向传播 net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size # 每个epoch中有多少个batch print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) # 讲设定的参数打印出来 step_index = 0 # 可视化部分 if args.visdom: # 默认值为False vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader( dataset, args.batch_size, num_workers=args.num_workers, # 默认值我修改成了0 shuffle=True, collate_fn=detection_collate, # collate_fn将一个batch_size数目的图片进行合并成batch pin_memory=True) batch_iterator = iter(data_loader) # batch迭代器 依次迭代batch for iteration in range(args.start_iter, cfg['max_iter']): # 由最大迭代次数来迭代训练 if args.visdom and iteration != 0 and ( iteration % epoch_size == 0): # 因为args.visdom一直设置为False因此没有被调用 update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: # 通过多少次epoch调节一次学习率 step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data try: images, targets = next(batch_iterator) # targets 和image都是读取的训练数据 except StopIteration: bath_iterator = iter(data_loader) images, targets = next(bath_iterator) # images=【batch_size,3,300,300】 # targets=【batch_size,num_object,5】 # num_object代表一张图里面有几个ground truth,5代表四个位置信息和一个label if args.cuda: # 将数据放在cuda上 images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() # ##out是netforward的输出:是个元组,里面包括3个部分[loc conf priors] out = net(images) # ## backprop 优化器梯度清零 optimizer.zero_grad() # ## criterion是nn.Module形式,下面是调用它的forward模式【重点看,里面包括难例挖掘的内容】 # ###################################【【【训练阶段的损失!!!】】】###################################### # ##输入参数1:网络结构net输出的out:[loc conf priors] # ##输入参数2:targets:真实目标的位置标签值 loss_l, loss_c = criterion( out, targets ) # criterion就是MultiBoxLoss类定义的对象,forward前传播返回的结果是【loss_l, loss_c】 loss = loss_l + loss_c # 总loss loss.backward() optimizer.step() t1 = time.time() # 下面两行好像没有使用 loc_loss += loss_l.data # ###到底是改成item()还是data conf_loss += loss_c.data # ###到底是改成item()还是data if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % loss.data, end=' ') # 到底是改成item()还是data if args.visdom: update_vis_plot(iteration, loss_l.data, loss_c.data, iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 2000 == 0: # 迭代多少次保存一次模型。 在尝试阶段,为了节省时间,建议将根据迭代次数保存模型的参数调低,例如调节到500 print('Saving state, iter:', iteration) # 保存的checkpoint torch.save(ssd_net.state_dict(), 'weights/ssd300_VOC_' + repr(iteration) + '.pth') # 保存模型的路径 torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth') # 最后的保存:不是保存整个模型,只是保存了参数
def train(): iteration = 0 start_epoch = 0 step_index = 0 per_epoch_size = len(train_dataset) // args.batch_size pyramidbox = build_net('train', cfg.NUM_CLASSES) net = pyramidbox if args.resume: print(('Resuming training, loading {}...'.format(args.resume))) state_dict = torch.load(args.resume) net.load_state_dict(state_dict) #iteration = int(args.resume[args.resume.rfind('pyramidbox_')+len('pyramidbox_'):-4]) #start_epoch = iteration // per_epoch_size #print('Start iteration: %d (epoch %d)'%(iteration, start_epoch)) else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Load base network....') net.vgg.load_state_dict(vgg_weights) if args.cuda: torch.backends.cudnn.benchmark = True if args.multigpu: net = torch.nn.DataParallel(pyramidbox) net = net.cuda() if not args.resume: print('Initializing weights...') pyramidbox.extras.apply(pyramidbox.weights_init) pyramidbox.lfpn_topdown.apply(pyramidbox.weights_init) pyramidbox.lfpn_later.apply(pyramidbox.weights_init) pyramidbox.cpm.apply(pyramidbox.weights_init) pyramidbox.loc_layers.apply(pyramidbox.weights_init) pyramidbox.conf_layers.apply(pyramidbox.weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion1 = MultiBoxLoss(cfg, args.cuda) criterion2 = MultiBoxLoss(cfg, args.cuda, use_head_loss=True) print('Loading wider dataset...') print('Using the specified args:') print(args) for step in cfg.LR_STEPS: if iteration > step: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) net.train() for epoch in range(start_epoch, cfg.EPOCHES): losses = 0 for batch_idx, (images, face_targets, head_targets) in enumerate(train_loader): with torch.no_grad(): if args.cuda: images = Variable(images.cuda()) face_targets = [Variable(ann.cuda()) for ann in face_targets] head_targets = [Variable(ann.cuda()) for ann in head_targets] else: images = Variable(images) face_targets = [Variable(ann) for ann in face_targets] head_targets = [Variable(ann) for ann in head_targets] if iteration in cfg.LR_STEPS: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) t0 = time.time() out = net(images) # backprop optimizer.zero_grad() face_loss_l, face_loss_c = criterion1(out, face_targets) head_loss_l, head_loss_c = criterion2(out, head_targets) loss = face_loss_l + face_loss_c + head_loss_l + head_loss_c losses += loss.item() loss.backward() optimizer.step() t1 = time.time() face_loss = (face_loss_l + face_loss_c).item() head_loss = (head_loss_l + head_loss_c).item() if iteration % 10 == 0: loss_ = losses / (batch_idx + 1) print(('Timer: {:.4f} sec.'.format(t1 - t0))) print(('epoch ' + repr(epoch) + ' iter ' + repr(iteration) + ' || Loss:%.4f' % (loss_))) print(('->> face Loss: {:.4f} || head loss : {:.4f}'.format( face_loss, head_loss))) print(('->> lr: {}'.format(optimizer.param_groups[0]['lr']))) if iteration != 0 and iteration % 5000 == 0: print(('Saving state, iter:', iteration)) file = 'pyramidbox_' + repr(iteration) + '.pth' torch.save(pyramidbox.state_dict(), os.path.join(args.save_folder, file)) iteration += 1 logger.add_scalar('data/loss', loss, epoch) logger.add_scalar('data/face_loss', face_loss, epoch) logger.add_scalar('data/head_loss', head_loss, epoch) val(epoch, net, pyramidbox, criterion1, criterion2)
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT # cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') # cfg = vocd512 dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) ssd_net = build_ssd('train', cfg, args.use_pred_module) net = ssd_net print(net) if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: resnext_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.resnext.load_state_dict(resnext_weights, strict=False) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, cfg, args.cuda, loss_type=args.loss_type) net.train() # loss counters loc_loss = 0 conf_loss = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) total_samples = len(data_loader.sampler) batch_size = data_loader.batch_size steps_per_epoch = math.ceil(total_samples / batch_size) # print(total_samples, batch_size, steps_per_epoch) # 16551 32 518 for epoch in range(args.start_epoch, cfg['max_epoch']): if epoch in cfg['lr_steps']: step_index += 1 for iteration, (images, targets) in enumerate(data_loader): # to make burnin working, we adjust lr every epoch lr = adjust_learning_rate(optimizer, args.gamma, step_index, epoch, iteration, steps_per_epoch) if args.cuda: images = images.cuda() targets = [ann.cuda() for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] t0 = time.time() out = net(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() if iteration % 10 == 0: print( 'iter {0:3d}/{1} || Loss: {2:6.4f} || lr: {3:.6f}|| {4:.4f} sec' .format(iteration, len(data_loader), loss.data, lr, (t1 - t0))) print('Saving state, epoch:', epoch) torch.save( ssd_net.state_dict(), args.save_folder + args.weight_prefix + repr(epoch) + '.pth')
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset_train = COCODetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': # if args.dataset_root == COCO_ROOT: # parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset_train = VOCDetection(root=args.dataset_root, image_sets=[('2012', 'trainval')], transform=SSDAugmentation( cfg['min_dim'], MEANS)) dataset_val = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'val')], transform=SSDAugmentation( cfg['min_dim'], MEANS)) dataset_test = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'test')], transform=SSDAugmentation( cfg['min_dim'], MEANS)) # Take subsample of the datasets frac = args.subset_size data_sizes = np.array( [len(dataset_train), len(dataset_val), len(dataset_test)]) data_sizes_sub = (data_sizes * frac).astype(int) np.random.seed(10) data_indices = [ np.random.choice(data_sizes[i], data_sizes_sub[i]) for i in range(3) ] print("Train/Val/Test split " + str(data_sizes_sub[0]) + ':' + str(data_sizes_sub[1]) + ':' + str(data_sizes_sub[2])) if args.arch == 'ssd': ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net elif args.arch == 'box_ssd': ssd_net = build_ssd_box2('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net elif args.arch == 'full_box_ssd': ssd_net = build_ssd_full_box('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net else: raise ("Incorrect Architecture chosen") if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True # if args.resume: # print('Resuming training, loading {}...'.format(args.resume)) # ssd_net.load_weights(args.resume) # else: # args.basenet == False # pass # vgg_weights = torch.load(args.save_folder + args.basenet) # print('Loading base network...') # ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: device = torch.device('cuda') net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) train_loss = [] valid_loss = [] train_loss_iter = [] epoch_time = [] net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 loss_val = 0 print('Loading the dataset...') epoch_size = len(dataset_train) // (args.batch_size / frac) print('Training ' + args.arch + ' on:', dataset_train.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = args.arch+' on ' + dataset_train.name + ' LR=' \ + str(args.lr) + ' WD=' + str(args.weight_decay) vis_legend_train = [ 'Loc Loss Train', 'Conf Loss Train', 'Total Loss Train' ] vis_legend_trainval = ['Train Loss', 'Val Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend_train) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend_train) loss_plot = viz.line(Y=torch.tensor([[0, 0]]).zero_(), opts=dict( title='Loss Tracker', legend=['Training Loss', 'Validation Loss'], xlabel='Iteration', ylabel='Loss', show_legend=True)) data_loader_train = data.DataLoader(dataset_train, args.batch_size, num_workers=args.num_workers, sampler=SubsetRandomSampler( data_indices[0]), shuffle=False, collate_fn=detection_collate, pin_memory=True) data_loader_val = data.DataLoader(dataset_val, args.batch_size, num_workers=args.num_workers, sampler=SubsetRandomSampler( data_indices[1]), shuffle=False, collate_fn=detection_collate, pin_memory=True) # data_loader_test = data.DataLoader(dataset_test, args.batch_size, # num_workers=args.num_workers, # sampler=SubsetRandomSampler(data_indices[2]), # shuffle=False, collate_fn=detection_collate, # pin_memory=True) # import pdb; pdb.set_trace() # mean = 0 # count = 0 # for t, (img,y) in enumerate(data_loader_train): # mean += img.mean([0,2,3]) # print(img.mean([0,2,3])) # count += 1 # if t % 10 == 0: # print(mean/count) # mean = mean/count # print(mean) # breakpoint() # create batch iterator batch_iterator = iter(data_loader_train) for iteration in range(args.start_iter, cfg['max_iter'] // 10): # //10 here to end early if args.visdom and iteration != 0 and (iteration % epoch_size == 0): epoch += 1 print("Training Epoch number " + str(epoch)) epoch_start = time.time() update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader_train) images, targets = next(batch_iterator) if args.cuda: images = images.to(device) targets = [ann.to(device) for ann in targets] # images = Variable(images.cuda()) # with torch.no_grad(): # targets = [Variable(ann.cuda()) for ann in targets] # else: # images = Variable(images) # with torch.no_grad(): # targets = [Variable(ann) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() train_loss_iter.append(loss.item()) if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Training Loss: %.4f ||' % (loss.item()), end=' ') if args.visdom: update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') # if iteration != 0 and iteration % 5000 == 0: # print('Saving state, iter:', iteration) # torch.save(ssd_net.state_dict(), 'weights/ssd300_COCO_' + # repr(iteration) + '.pth') # calculate val loss #import pdb; pdb.set_trace() if iteration != 0 and (iteration % epoch_size == 0): net.eval() loss_val = 0 with torch.no_grad(): for t, (images, targets) in enumerate(data_loader_val): if args.cuda: images = images.to(device) targets = [ann.to(device) for ann in targets] # images = images.cuda() # targets = [Variable(ann.cuda()) for ann in targets] out = net(images) loss_l, loss_c = criterion(out, targets) loss_val += loss_l + loss_c loss_val /= len(data_loader_val) # loc_loss += loss_l.item() # conf_loss += loss_c.item() print('iter ' + repr(iteration) + ' || Val Loss: %.4f ||' % (loss_val.item()), end=' ') viz_tracker(loss_plot, torch.tensor([[loss.item(), loss_val.item()]]), torch.tensor([[iteration - 1, iteration - 1]])) #epoch += 1 # reset epoch loss counters loss_l = 0 loss_c = 0 train_loss.append(loss.item()) valid_loss.append(loss_val.item()) #Epoch time epoch_end = time.time() epoch_time.append(epoch_end - epoch_start) print("Epoch " + str(epoch) + " took " + str(int(epoch_end - epoch_start)) + "secs to train") suffix = args.arch + '_lr'+ str(args.lr) + '_wd' + str(args.weight_decay) \ + '_sub'+ str(args.subset_size) #Save resulting arrays so far every 10 epochs if ((epoch) % 1 == 0): with open('pkl_files/' + suffix + 'train_loss.pkl', 'wb') as handle: pickle.dump(train_loss, handle, protocol=pickle.HIGHEST_PROTOCOL) with open('pkl_files/' + suffix + 'valid_loss.pkl', 'wb') as handle: pickle.dump(valid_loss, handle, protocol=pickle.HIGHEST_PROTOCOL) with open('pkl_files/' + suffix + 'epoch_time.pkl', 'wb') as handle: pickle.dump(epoch_time, handle, protocol=pickle.HIGHEST_PROTOCOL) with open('pkl_files/' + suffix + 'train_loss_iter.pkl', 'wb') as handle: pickle.dump(train_loss_iter, handle, protocol=pickle.HIGHEST_PROTOCOL) state = { 'epoch': epoch, 'state_dict': ssd_net.state_dict(), 'optimizer': optimizer.state_dict() } torch.save( state, args.save_folder + '' + str(args.subset_size) + args.dataset + suffix + '.pth')
def main(parser_data): device = torch.device( parser_data.device if torch.cuda.is_available() else "cpu") print(device) VOC_root = parser_data.data_path batch_sizes = parser_data.batch_size Cuda = True model = build_ssd("train", Config["num_classes"]) # 加载与训练模型 print('Loading weights into state dict...') # device = torch.device(parser_data.device if torch.cuda.is_available() else 'cpu') model_dict = model.state_dict() pretrained_dict = torch.load( "/home/zdst/Ryan/pytorch-ssd-self/save_weights/ssd_weights.pth", map_location=device) # 好像这个地方写device有error 后来直接换成cpu就可以了 pretrained_dict = { k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v) } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('Finished!') net = model.train() if Cuda: net = torch.nn.DataParallel(model) cudnn.benchmark = True net = net.cuda() annotation_path = '2012_train.txt' with open(annotation_path) as f: lines = f.readlines() np.random.seed(10101) np.random.shuffle(lines) np.random.seed(None) num_train = len(lines) train_dataset = VOC2012DataSet(lines[:num_train], (Config["min_dim"], Config["min_dim"])) train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_sizes, shuffle=True, pin_memory=True, drop_last=True, num_workers=4, collate_fn=collate_fn) #如果指定了上次训练保存的权重文件地址,则接着上次结果接着训练 # if parser_data.resume != "": # print('Loading weights into state dict...') # model_dict = model.state_dict() # pretrained_dict = torch.load(parser_data.resume, map_location="cpu") # pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} # model_dict.update(pretrained_dict) # model.load_state_dict(model_dict) # print('Finished!') # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9, weight_decay=0.0005) #optimizer = torch.optim.Adam(params, lr=0.0005) #model.to(net) one_epoch_sizes = num_train // batch_sizes criterion = MultiBoxLoss(Config['num_classes'], 0.5, True, 0, True, 3, 0.5, False, Cuda) for epoch in range(parser_data.start_epoch, parser_data.epochs): if epoch % 2 == 0: adjust_learning_rate(optimizer, 0.0005, 0.9, epoch) loc_loss = 0 conf_loss = 0 for iteration, dataset in enumerate(train_data_loader): if iteration >= one_epoch_sizes: break images, targets = dataset[0], dataset[1] with torch.no_grad(): if Cuda: images = Variable( torch.from_numpy(images).type( torch.FloatTensor)).cuda() targets = [ Variable( torch.from_numpy(ann).type( torch.FloatTensor)).cuda() for ann in targets ] else: images = Variable( torch.from_numpy(images).type(torch.FloatTensor)) targets = [ Variable( torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets ] #前向传播 out = net(images) #梯度清零 optimizer.zero_grad() #计算loss loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c #反向传播 loss.backward() optimizer.step() #梯度加 loc_loss += loss_l.item() conf_loss += loss_c.item() print('\nEpoch:' + str(epoch + 1) + '/' + str(parser_data.epochs)) print('iter:' + str(iteration) + '/' + str(one_epoch_sizes) + '|| Loc_loss:%.4f || Conf_Loss:%.4f ||' % (loc_loss / (iteration + 1), conf_loss / (iteration + 1)), end=" ") print('Saving state, iter:', str(epoch + 1)) torch.save( model.state_dict(), 'save_weights/Epoch%d-Loc%.4f-Conf%.4f.pth' % ((epoch + 1), loc_loss / (iteration + 1), conf_loss / (iteration + 1)))
default=60000, type=int, help='How long the class model shall be optimized') parser.add_argument('--classes', default=['tvmonitor'], nargs='+', help='The class, that shal be recognised in the image') args = parser.parse_args() if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) lr = args.lr torch.set_default_tensor_type('torch.cuda.FloatTensor') criterion = MultiBoxLoss(21, 0.5, True, 0, True, 3, 0.5, False, True) net = build_ssd('train', 300, 21) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) for param in net.parameters(): param.requires_grad = False net.eval() for category in args.classes: category_index = VOC_CLASSES.index(category) print('New category: ' + category + ' (' + str(category_index) + ')') if args.refine == '': input = Variable(torch.zeros(1, 3, 300, 300), requires_grad=True) else: im = np.swapaxes(cv2.imread(args.refine), 0, 2).astype('f') input = Variable((torch.from_numpy(np.expand_dims(im, axis=0)).cuda()),
net.load_state_dict(new_state_dict) if args.ngpu > 1: net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) if args.cuda: net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08, # momentum=args.momentum, weight_decay=args.weight_decay) criterion = [MultiBoxLoss(2 if C_agnostic else num_classes, 0.5, True, 0, True, 3, 0.5, False), MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False)] priorbox = PriorBox(cfg) priors = Variable(priorbox.forward(), volatile=True) def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc(
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc transform = SSDAugmentation(cfg['min_dim'], MEANS) if not args.no_transform else None dataset = VOCDetection(root=args.dataset_root, transform=transform) elif args.dataset == 'okutama': if args.dataset_root != OKUTAMA_ROOT: parser.error('Please specify Okutama dataset root.') if args.min_dim == 300: cfg = okutama_300_cfg elif args.min_dim == 512: cfg = okutama_512_cfg dataset = OkutamaDetection(dataset_root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() ssd_net = build_ssd('train', cfg['min_dim'], cfg) net = ssd_net if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, cfg, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = args.start_epoch print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) with open(log_path, 'a+') as l: l.write('~~~~~~~~~~~~~~~~~~~~ Epoch {} ~~~~~~~~~~~~~~~~~~~~ \n'.format( epoch)) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration != 0 and (iteration % epoch_size == 0): epoch += 1 with open(log_path) as l: past_losses = l.readlines() num_lines = epoch_size // args.log_every epoch_lines = past_losses[-num_lines:] epoch_losses = [float(line.split('||')[-1].split(':')[-1].strip()) \ for line in epoch_lines \ if not line.startswith('~')] avg_epoch_loss = sum(epoch_losses) / len(epoch_losses) with open(log_path, 'a+') as l: l.write('~ Avg epoch loss: {:4f} \n'.format(avg_epoch_loss)) l.write( '~~~~~~~~~~~~~~~~~~~~ Epoch {} ~~~~~~~~~~~~~~~~~~~~ \n'. format(epoch)) if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data try: images, targets, img_orig, targ_orig = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets, img_orig, targ_orig = next(batch_iterator) # print('new batch') # for q, (img, trg) in enumerate(zip(img_orig, targ_orig)): # print(len(trg)) # for t in trg: # pts = (t[0], t[1]), (t[2], t[3]) # tl, br = [(int(pt[0] / 1), int(pt[1] / 1)) \ # for pt in pts] # 7.2 for 300, 4.2188, # cv.rectangle(img, tl, br, (0, 0, 255), 2) # cv.imwrite(os.path.join('test_imgs', str(q) + '.jpg'), img) # exit(0) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % args.log_every == 0: time_elapsed = t1 - t0 print('iter: {} || '.format(iteration) + 'timer: {:4f} || '.format(time_elapsed) + 'loss_l: {:4f} || '.format(loss_l) + 'loss_c: {:4f} || '.format(loss_c) + 'total loss: {:4f} \n'.format(loss)) with open(log_path, 'a+') as l: l.write('iter: {} || '.format(iteration) + 'timer: {:4f} || '.format(time_elapsed) + 'loss_l: {:4f} || '.format(loss_l) + 'loss_c: {:4f} || '.format(loss_c) + 'total loss: {:4f} \n'.format(loss)) if args.visdom: update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save( ssd_net.state_dict(), 'weights/{}_'.format(args.name) + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
def main(): args.cfg = v2 args.train_sets = 'train' args.means = (104, 117, 123) num_classes = len(CLASSES) + 1 args.num_classes = num_classes args.stepvalues = [int(val) for val in args.stepvalues.split(',')] args.loss_reset_step = 30 args.eval_step = 10000 args.print_step = 10 ## Define the experiment Name will used to same directory and ENV for visdom args.exp_name = 'CONV-SSD-{}-{}-bs-{}-{}-lr-{:05d}'.format(args.dataset, args.input_type, args.batch_size, args.basenet[:-14], int(args.lr*100000)) args.save_root += args.dataset+'/' args.save_root = args.save_root+'cache/'+args.exp_name+'/' if not os.path.isdir(args.save_root): os.makedirs(args.save_root) net = build_ssd(300, args.num_classes) if args.input_type == 'fastOF': print('Download pretrained brox flow trained model weights and place them at:::=> ',args.data_root + 'ucf24/train_data/brox_wieghts.pth') pretrained_weights = args.data_root + 'ucf24/train_data/brox_wieghts.pth' print('Loading base network...') net.load_state_dict(torch.load(pretrained_weights)) else: vgg_weights = torch.load(args.data_root +'ucf24/train_data/' + args.basenet) print('Loading base network...') net.vgg.load_state_dict(vgg_weights) args.data_root += args.dataset + '/' if args.cuda: net = net.cuda() def xavier(param): init.xavier_uniform(param) def weights_init(m): if isinstance(m, nn.Conv2d): xavier(m.weight.data) m.bias.data.zero_() print('Initializing weights for extra layers and HEADs...') # initialize newly added layers' weights with xavier method net.extras.apply(weights_init) net.loc.apply(weights_init) net.conf.apply(weights_init) parameter_dict = dict(net.named_parameters()) # Get parmeter of network in dictionary format wtih name being key params = [] #Set different learning rate to bias layers and set their weight_decay to 0 for name, param in parameter_dict.items(): if name.find('bias') > -1: print(name, 'layer parameters will be trained @ {}'.format(args.lr*2)) params += [{'params': [param], 'lr': args.lr*2, 'weight_decay': 0}] else: print(name, 'layer parameters will be trained @ {}'.format(args.lr)) params += [{'params':[param], 'lr': args.lr, 'weight_decay':args.weight_decay}] optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(args.num_classes, 0.5, True, 0, True, 3, 0.5, False, args.cuda) scheduler = LogLR(optimizer, lr_milestones=args.lr_milestones, total_epoch= args.total_epoch) # scheduler = ExponentialLR(optimizer, gamma = args.gamma, last_epoch = args.last_epoch) # scheduler = MultiStepLR(optimizer, lr_milestones=args.lr_milestones) train(args, net, optimizer, criterion, scheduler)
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': #if args.dataset_root == COCO_ROOT: #parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load(args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) ssd_net.aux_loc.apply(weights_init) ssd_net.aux_conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) aux_criterion1 = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) aux_criterion2 = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) aux_criterion3 = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) aux_criterion4 = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 avg_loss = 0 loss_total = 0 loss_aux = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) # data_loader = data.DataLoader(dataset, args.batch_size, # num_workers=args.num_workers, # shuffle=True, collate_fn=detection_collate, # pin_memory=True) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=Group_detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if iteration % epoch_size == 0: # reset epoch loss counters # loc_loss = 0 # conf_loss = 0 # loss_aux = 0 # loss_total = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data try: images, targets,gt1,gt2,gt3,gt4 = next(batch_iterator) except StopIteration: # Start a new iteration batch_iterator = iter(data_loader) images, targets, gt1, gt2, gt3, gt4 = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] gt1 = [Variable(ann.cuda(), volatile=True) for ann in gt1] gt2 = [Variable(ann.cuda(), volatile=True) for ann in gt2] gt3 = [Variable(ann.cuda(), volatile=True) for ann in gt3] gt4 = [Variable(ann.cuda(), volatile=True) for ann in gt4] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] gt1 = [Variable(ann, volatile=True) for ann in gt1] gt2 = [Variable(ann, volatile=True) for ann in gt2] gt3 = [Variable(ann, volatile=True) for ann in gt3] gt4 = [Variable(ann, volatile=True) for ann in gt4] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion((out[:3]), targets) #aux_l, aux_c = aux_criterion((out[2],out[3],out[4]), targets) #loss = (loss_l + loss_c)*0.6 + (aux_l+ aux_c)*0.4 aux1_l, aux1_c = aux_criterion1((out[3], out[4], out[2]), gt1) aux2_l, aux2_c = aux_criterion2((out[5], out[6], out[2]), gt2) aux3_l, aux3_c = aux_criterion3((out[7], out[8], out[2]), gt3) aux4_l, aux4_c = aux_criterion4((out[9], out[10], out[2]), gt4) aux_loss = aux1_l + aux1_c + aux2_l + aux2_c + aux3_l + aux3_c + aux4_l + aux4_c loss = loss_l + loss_c total_loss = loss + aux_loss * 0.25 total_loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l conf_loss += loss_c avg_loss += loss loss_aux += aux_loss loss_total += total_loss if iteration % 100 == 0: if iteration != 0: loc_loss /= 100.0 conf_loss /= 100.0 avg_loss /= 100.0 loss_aux /= 100.0 loss_total /= 100.0 # aux_l_loss /= 100.0 # aux_c_loss /= 100.0 #print('timer: %.4f sec.' % (t1 - t0)) #print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ') load_t1 = time.time() now = time.localtime(load_t1) for param_group in optimizer.param_groups: lr = param_group['lr'] print('[%s-%2s-%2s %2s:%2s:%2s] ' % (now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec) + 'iter ' + '%6d'%(iteration) + ' || L: %2.4f C: %2.4f Loss: %2.4f ||' % (loc_loss, conf_loss, avg_loss) + ' || AUX Loss: %2.4f ||' % (loss_aux) + ' || Total Loss: %.4f ||' % (loss_total) +' LR: %.5f' % (lr)) loc_loss = 0 conf_loss = 0 avg_loss = 0 loss_total = 0 loss_aux = 0 # aux_l_loss = 0 # aux_c_loss = 0 if args.visdom: update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: #if iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), args.save_folder + '/ssd300_VOC_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'VOC300': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc300 dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'VOC512': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc512 dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() finish_flag = True while(finish_flag): ssd_net1 = build_ssd_con('train', cfg['min_dim'], cfg['num_classes'], flip=True) #ssd_net2 = build_ssd_con('train', cfg['min_dim'], cfg['num_classes'], flip=True) net_s = ssd_net1 net_t = ssd_net1 if args.cuda: #net = ssd_net net_s = torch.nn.DataParallel(net_s) net_t = torch.nn.DataParallel(net_t) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) net_s.load_weights(args.resume) net_t.load_weights(args.resume) #ssd_net1 = nn.DataParallel(ssd_net1)#, device_ids=[6,7]) #ssd_net2 = nn.DataParallel(ssd_net2)#, device_ids=[6,7]) #ssd_net1.load_state_dict(torch.load(args.resume)) #ssd_net2.load_state_dict(torch.load(args.resume)) else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net1.vgg.load_state_dict(vgg_weights) #ssd_net2.vgg.load_state_dict(vgg_weights) if args.cuda: net_s = net_s.cuda() net_t = net_t.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net1.extras.apply(weights_init) #ssd_net2.extras.apply(weights_init) ssd_net1.loc.apply(weights_init) #ssd_net2.loc.apply(weights_init) ssd_net1.conf.apply(weights_init) #ssd_net2.conf.apply(weights_init) net_t.module = copy.deepcopy(net_s.module) vat = VAT() #t_model optimizer = optim.SGD(net_s.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) conf_consistency_criterion = torch.nn.KLDivLoss(size_average=False, reduce=False).cuda() net_s.train() net_t.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 supervised_flag = 1 print('Loading the dataset...') step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) total_un_iter_num = 0 supervised_batch = args.batch_size #unsupervised_batch = args.batch_size - supervised_batch #data_shuffle = 0 if(args.start_iter==0): supervised_dataset = VOCDetection_con_init(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) else: supervised_flag = 0 supervised_dataset = VOCDetection_con(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS))#,shuffle_flag=data_shuffle) #data_shuffle = 1 supervised_data_loader = data.DataLoader(supervised_dataset, supervised_batch, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True, drop_last=True) batch_iterator = iter(supervised_data_loader) for iteration in range(args.start_iter, cfg['max_iter'] + 50000 ): # 17W if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) try: images, targets, semis = next(batch_iterator) except StopIteration: supervised_flag = 0 supervised_dataset = VOCDetection_con(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS))#, shuffle_flag=data_shuffle) supervised_data_loader = data.DataLoader(supervised_dataset, supervised_batch, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True, drop_last=True) batch_iterator = iter(supervised_data_loader) images, targets, semis = next(batch_iterator) # images [32,3,300,300] tensor # targets [1*32] list if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] #vat x_hat = vat(images, targets, semis, net_s, iteration).detach() #images = x_hat # forward t0 = time.time() isflip_s = True #False True True isflip_t = True #True True False out, _, conf, _, loc = net_s(images, images.clone(), isflip=isflip_s) _, _, conf_flip, _, loc_flip = net_t(images, x_hat.clone(), isflip=isflip_t) sup_image_binary_index = np.zeros([len(semis),1]) for super_image in range(len(semis)): if(int(semis[super_image])==1): sup_image_binary_index[super_image] = 1 else: sup_image_binary_index[super_image] = 0 if(int(semis[len(semis)-1-super_image])==0): del targets[len(semis)-1-super_image] sup_image_index = np.where(sup_image_binary_index == 1)[0] unsup_image_index = np.where(sup_image_binary_index == 0)[0] loc_data, conf_data, priors = out if (len(sup_image_index) != 0): loc_data = loc_data[sup_image_index,:,:] conf_data = conf_data[sup_image_index,:,:] output = ( loc_data, conf_data, priors ) # backprop loss_l = Variable(torch.cuda.FloatTensor([0])) loss_c = Variable(torch.cuda.FloatTensor([0])) if(len(sup_image_index)!=0): try: loss_l, loss_c = criterion(output, targets) except: break print('--------------') sampling = True if(sampling is True): conf_class = conf[:,:,1:].clone() background_score = conf[:, :, 0].clone() each_val, each_index = torch.max(conf_class, dim=2) mask_val = each_val > background_score mask_val = mask_val.data mask_conf_index = mask_val.unsqueeze(2).expand_as(conf) mask_loc_index = mask_val.unsqueeze(2).expand_as(loc) conf_mask_sample = conf.clone() loc_mask_sample = loc.clone() conf_sampled = conf_mask_sample[mask_conf_index].view(-1, 21) loc_sampled = loc_mask_sample[mask_loc_index].view(-1, 4) conf_mask_sample_flip = conf_flip.detach().clone() loc_mask_sample_flip = loc_flip.detach().clone() conf_sampled_flip = conf_mask_sample_flip[mask_conf_index].view(-1, 21) loc_sampled_flip = loc_mask_sample_flip[mask_loc_index].view(-1, 4) if(mask_val.sum()>0): ## JSD conf_sampled_flip = conf_sampled_flip + 1e-7 conf_sampled = conf_sampled + 1e-7 consistency_conf_loss_a = conf_consistency_criterion(conf_sampled.log(), conf_sampled_flip.detach()).sum(-1).mean() consistency_conf_loss_b = conf_consistency_criterion(conf_sampled_flip.log(), conf_sampled.detach()).sum(-1).mean() consistency_conf_loss = consistency_conf_loss_a + consistency_conf_loss_b ## LOC LOSS if isflip_s == isflip_t: consistency_loc_loss_x = torch.mean(torch.pow(loc_sampled[:, 0] - loc_sampled_flip[:, 0], exponent=2)) else: consistency_loc_loss_x = torch.mean(torch.pow(loc_sampled[:, 0] + loc_sampled_flip[:, 0], exponent=2)) consistency_loc_loss_y = torch.mean(torch.pow(loc_sampled[:, 1] - loc_sampled_flip[:, 1], exponent=2)) consistency_loc_loss_w = torch.mean(torch.pow(loc_sampled[:, 2] - loc_sampled_flip[:, 2], exponent=2)) consistency_loc_loss_h = torch.mean(torch.pow(loc_sampled[:, 3] - loc_sampled_flip[:, 3], exponent=2)) consistency_loc_loss = torch.div( consistency_loc_loss_x + consistency_loc_loss_y + consistency_loc_loss_w + consistency_loc_loss_h, 4) else: consistency_conf_loss = Variable(torch.cuda.FloatTensor([0])) consistency_loc_loss = Variable(torch.cuda.FloatTensor([0])) consistency_loss = torch.div(consistency_conf_loss,2) + consistency_loc_loss ramp_weight = rampweight(iteration) consistency_loss = torch.mul(consistency_loss, ramp_weight) if(supervised_flag ==1): loss = loss_l + loss_c + consistency_loss else: if(len(sup_image_index)==0): loss = consistency_loss else: loss = loss_l + loss_c + consistency_loss if(loss.data>0): optimizer.zero_grad() loss.backward() optimizer.step() t1 = time.time() ##EMA ema_factor = min(1 - 1 / (iteration+1), 0.99) for emp_p, p in zip(net_t.parameters(), net_s.parameters()): emp_p.data = ema_factor * emp_p.data + (1 - ema_factor) * p.data if(len(sup_image_index)==0): loss_l.data = Variable(torch.cuda.FloatTensor([0])) loss_c.data = Variable(torch.cuda.FloatTensor([0])) else: loc_loss += loss_l.data # [0] conf_loss += loss_c.data # [0] if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f || consistency_loss : %.4f ||' % (loss.data, consistency_loss.data), end=' ') print('loss: %.4f , loss_c: %.4f , loss_l: %.4f , loss_con: %.4f, lr : %.4f, super_len : %d\n' % (loss.data, loss_c.data, loss_l.data, consistency_loss.data,float(optimizer.param_groups[0]['lr']),len(sup_image_index))) print("ramp_weight:%.4f" %(ramp_weight)) if(float(loss)>100): break if args.visdom: update_vis_plot(iteration, loss_l.data, loss_c.data, iter_plot, epoch_plot, 'append') if iteration != 0 and (iteration+1) % 10000 == 0: print('Saving state, iter:', iteration) torch.save(net_s.state_dict(), 'weights/ssd300_VAT11_change_' + repr(iteration+1) + '.pth') torch.save(net_t.state_dict(), 'weights/ssd300_VAT11t_change_' + repr(iteration+1) + '.pth') torch.save(net_s.state_dict(), args.save_folder + '' + args.dataset + '.pth') print('-------------------------------\n') print(loss.data) print('-------------------------------') if((iteration +1) ==cfg['max_iter'] + 50000): # 17W finish_flag = False
args.num_workers, cfg.FACE.SRC_VAL_FILE, 'transfer', test=True) else: val_loader_ss_tsf = None DataList = {'Checkpoint': [], 'l_pal2': [], 'c_pal2': [], 'total_2': []} trainer = Trainer(cfg, args.cuda) trainer.eval() if args.cuda: trainer = trainer.cuda() criterion = MultiBoxLoss(cfg) def val(checkpoint): trainer.load_state_dict(torch.load(checkpoint)) if args.multigpu: trainer_final = torch.nn.DataParallel(trainer) else: trainer_final = trainer step = 0 total_loss_l_pal2 = 0 total_loss_c_pal2 = 0 total_loss_ss = 0
net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True # INITIALIZE OPTIMIZER optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) # INITIALIZE LOSS criterion = MultiBoxLoss(num_classes=2, overlap_thresh=0.35, prior_for_matching=True, bkg_label=0, neg_mining=True, neg_pos=7, neg_overlap=0.35, encode_target=False) # INITIALIZE PRIORBOX/ANCHORS priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) # (640,640) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() # priors.shape == torch.Size([16800, 4]) # num of anchors == 16800
def main(): global my_dict, keys, k_len, arr, xxx, args, log_file, best_prec1 parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training') parser.add_argument('--version', default='v2', help='conv11_2(v2) or pool6(v1) as last layer') parser.add_argument('--basenet', default='vgg16_reducedfc.pth', help='pretrained base model') parser.add_argument('--dataset', default='ucf24', help='pretrained base model') parser.add_argument('--ssd_dim', default=300, type=int, help='Input Size for SSD') # only support 300 now parser.add_argument('--modality', default='rgb', type=str, help='INput tyep default rgb options are [rgb,brox,fastOF]') parser.add_argument('--jaccard_threshold', default=0.5, type=float, help='Min Jaccard index for matching') parser.add_argument('--batch_size', default=1, type=int, help='Batch size for training') parser.add_argument('--num_workers', default=0, type=int, help='Number of workers used in dataloading') parser.add_argument('--max_iter', default=120000, type=int, help='Number of training iterations') parser.add_argument('--man_seed', default=123, type=int, help='manualseed for reproduction') parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model') parser.add_argument('--ngpu', default=1, type=str2bool, help='Use cuda to train model') parser.add_argument('--lr', '--learning-rate', default=0.0005, type=float, help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, help='momentum') parser.add_argument('--stepvalues', default='70000,90000', type=str, help='iter number when learning rate to be dropped') parser.add_argument('--weight_decay', default=5e-4, type=float, help='Weight decay for SGD') parser.add_argument('--gamma', default=0.2, type=float, help='Gamma update for SGD') parser.add_argument('--log_iters', default=True, type=bool, help='Print the loss at each iteration') parser.add_argument('--visdom', default=False, type=str2bool, help='Use visdom to for loss visualization') parser.add_argument('--data_root', default='/data4/lilin/my_code/realtime/', help='Location of VOC root directory') parser.add_argument('--save_root', default='/data4/lilin/my_code/realtime/realtime-lstm/saveucf24/', help='Location to save checkpoint models') parser.add_argument('--iou_thresh', default=0.5, type=float, help='Evaluation threshold') parser.add_argument('--conf_thresh', default=0.01, type=float, help='Confidence threshold for evaluation') parser.add_argument('--nms_thresh', default=0.45, type=float, help='NMS threshold') parser.add_argument('--topk', default=50, type=int, help='topk for evaluation') parser.add_argument('--clip_gradient', default=40, type=float, help='gradients clip') parser.add_argument('--resume', default="/data4/lilin/my_code/realtime/realtime-lstm/saveucf24/ucf101_CONV-SSD-ucf24-rgb-bs-32-vgg16-lr-00050_train_ssd_conv_lstm_01-06_epoch_11_checkpoint.pth.tar", type=str, help='Resume from checkpoint') parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') parser.add_argument('--epochs', default=35, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--eval_freq', default=2, type=int, metavar='N', help='evaluation frequency (default: 5)') parser.add_argument('--snapshot_pref', type=str, default="ucf101_vgg16_ssd300_") parser.add_argument('--lr_milestones', default=[-2, -5], type=float, help='initial learning rate') parser.add_argument('--arch', type=str, default="VGG16") parser.add_argument('--Finetune_SSD', default=False, type=str) parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') print(__file__) file_name = (__file__).split('/')[-1] file_name = file_name.split('.')[0] print(file_name) ## Parse arguments args = parser.parse_args() ## set random seeds np.random.seed(args.man_seed) torch.manual_seed(args.man_seed) if args.cuda: torch.cuda.manual_seed_all(args.man_seed) if args.cuda and torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') args.cfg = v2 args.train_sets = 'train' args.means = (104, 117, 123) num_classes = len(CLASSES) + 1 args.num_classes = num_classes args.stepvalues = [int(val) for val in args.stepvalues.split(',')] args.loss_reset_step = 30 args.eval_step = 10000 args.print_step = 10 args.data_root += args.dataset + '/' ## Define the experiment Name will used to same directory args.snapshot_pref = ('ucf101_CONV-SSD-{}-{}-bs-{}-{}-lr-{:05d}').format(args.dataset, args.modality, args.batch_size, args.basenet[:-14], int(args.lr*100000)) + '_' + file_name + '_' + day print (args.snapshot_pref) if not os.path.isdir(args.save_root): os.makedirs(args.save_root) net = build_ssd(300, args.num_classes) if args.Finetune_SSD is True: print ("load snapshot") pretrained_weights = "/data4/lilin/my_code/realtime/ucf24/rgb-ssd300_ucf24_120000.pth" pretrained_dict = torch.load(pretrained_weights) model_dict = net.state_dict() # 1. filter out unnecessary keys pretrained_dict_2 = {k: v for k, v in pretrained_dict.items() if k in model_dict } # 2. overwrite entries in the existing state dict # pretrained_dict_2['vgg.25.bias'] = pretrained_dict['vgg.24.bias'] # pretrained_dict_2['vgg.25.weight'] = pretrained_dict['vgg.24.weight'] # pretrained_dict_2['vgg.27.bias'] = pretrained_dict['vgg.26.bias'] # pretrained_dict_2['vgg.27.weight'] = pretrained_dict['vgg.26.weight'] # pretrained_dict_2['vgg.29.bias'] = pretrained_dict['vgg.28.bias'] # pretrained_dict_2['vgg.29.weight'] = pretrained_dict['vgg.28.weight'] # pretrained_dict_2['vgg.32.bias'] = pretrained_dict['vgg.31.bias'] # pretrained_dict_2['vgg.32.weight'] = pretrained_dict['vgg.31.weight'] # pretrained_dict_2['vgg.34.bias'] = pretrained_dict['vgg.33.bias'] # pretrained_dict_2['vgg.34.weight'] = pretrained_dict['vgg.33.weight'] model_dict.update(pretrained_dict_2) # 3. load the new state dict elif args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] xxx = checkpoint['state_dict'] net.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) elif args.modality == 'fastOF': print('Download pretrained brox flow trained model weights and place them at:::=> ',args.data_root + 'ucf24/train_data/brox_wieghts.pth') pretrained_weights = args.data_root + 'train_data/brox_wieghts.pth' print('Loading base network...') net.load_state_dict(torch.load(pretrained_weights)) else: vgg_weights = torch.load(args.data_root +'train_data/' + args.basenet) print('Loading base network...') net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() def xavier(param): init.xavier_uniform(param) def weights_init(m): if isinstance(m, nn.Conv2d): xavier(m.weight.data) m.bias.data.zero_() print('Initializing weights for extra layers and HEADs...') # initialize newly added layers' weights with xavier method # if args.Finetune_SSD is False: # net.extras.apply(weights_init) # net.loc.apply(weights_init) # net.conf.apply(weights_init) parameter_dict = dict(net.named_parameters()) # Get parmeter of network in dictionary format wtih name being key params = [] #Set different learning rate to bias layers and set their weight_decay to 0 for name, param in parameter_dict.items(): # if name.find('vgg') > -1 and int(name.split('.')[1]) < 23:# :and name.find('cell') <= -1 # param.requires_grad = False # print(name, 'layer parameters will be fixed') # else: if name.find('bias') > -1: print(name, 'layer parameters will be trained @ {}'.format(args.lr*2)) params += [{'params': [param], 'lr': args.lr*2, 'weight_decay': 0}] else: print(name, 'layer parameters will be trained @ {}'.format(args.lr)) params += [{'params':[param], 'lr': args.lr, 'weight_decay':args.weight_decay}] optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(args.num_classes, 0.5, True, 0, True, 3, 0.5, False, args.cuda) scheduler = None # scheduler = LogLR(optimizer, lr_milestones=args.lr_milestones, total_epoch=args.epochs) scheduler = MultiStepLR(optimizer, milestones=args.stepvalues, gamma=args.gamma) print('Loading Dataset...') train_dataset = UCF24Detection(args.data_root, args.train_sets, SSDAugmentation(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.modality) val_dataset = UCF24Detection(args.data_root, 'test', BaseTransform(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.modality, full_test=False) train_data_loader = data.DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) val_data_loader = data.DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) print ("train epoch_size: ", len(train_data_loader)) print('Training SSD on', train_dataset.name) my_dict = copy.deepcopy(train_data_loader.dataset.train_vid_frame) keys = list(my_dict.keys()) k_len = len(keys) arr = np.arange(k_len) xxx = copy.deepcopy(train_data_loader.dataset.ids) log_file = open(args.save_root + args.snapshot_pref + "_training_" + day + ".log", "w", 1) log_file.write(args.snapshot_pref+'\n') for arg in vars(args): print(arg, getattr(args, arg)) log_file.write(str(arg)+': '+str(getattr(args, arg))+'\n') log_file.write(str(net)) torch.cuda.synchronize() for epoch in range(args.start_epoch, args.epochs): train(train_data_loader, net, criterion, optimizer, scheduler, epoch) print('Saving state, epoch:', epoch) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, }, epoch = epoch) #### log lr ### # scheduler.step() # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: torch.cuda.synchronize() tvs = time.perf_counter() mAP, ap_all, ap_strs = validate(args, net, val_data_loader, val_dataset, epoch, iou_thresh=args.iou_thresh) # remember best prec@1 and save checkpoint is_best = mAP > best_prec1 best_prec1 = max(mAP, best_prec1) print('Saving state, epoch:', epoch) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, }, is_best,epoch) for ap_str in ap_strs: print(ap_str) log_file.write(ap_str+'\n') ptr_str = '\nMEANAP:::=>'+str(mAP)+'\n' print(ptr_str) log_file.write(ptr_str) torch.cuda.synchronize() t0 = time.perf_counter() prt_str = '\nValidation TIME::: {:0.3f}\n\n'.format(t0-tvs) print(prt_str) log_file.write(ptr_str) log_file.close()
def train(): per_epoch_size = len(train_dataset) // args.batch_size start_epoch = 0 iteration = 0 step_index = 0 basenet = basenet_factory(args.model) dsfd_net = build_net('train', cfg.NUM_CLASSES, args.model) net = dsfd_net if args.resume: print('Resuming training, loading {}...'.format(args.resume)) start_epoch = net.load_weights(args.resume) iteration = start_epoch * per_epoch_size else: base_weights = torch.load(args.save_folder + basenet) print('Load base network {}'.format(args.save_folder + basenet)) if args.model == 'vgg': net.vgg.load_state_dict(base_weights) else: new_base_weights = OrderedDict() print("base weight len: ", len(base_weights)) if (len(base_weights) >= 267): #the pretrained model weith fc layer i = 0 for k, v in base_weights.items(): if i < 265: i = i + 1 new_base_weights.update({k: v}) else: new_base_weights = base_weights #net.resnet.load_state_dict(base_weights) net.resnet.load_state_dict(new_base_weights) if args.cuda: if args.multigpu: net = torch.nn.DataParallel(dsfd_net) net = net.cuda() cudnn.benckmark = True if not args.resume: print('Initializing weights...') dsfd_net.extras.apply(dsfd_net.weights_init) dsfd_net.fpn_topdown.apply(dsfd_net.weights_init) dsfd_net.fpn_latlayer.apply(dsfd_net.weights_init) dsfd_net.fpn_fem.apply(dsfd_net.weights_init) dsfd_net.loc_pal1.apply(dsfd_net.weights_init) dsfd_net.conf_pal1.apply(dsfd_net.weights_init) dsfd_net.loc_pal2.apply(dsfd_net.weights_init) dsfd_net.conf_pal2.apply(dsfd_net.weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg, args.cuda) print('Loading wider dataset...') print('Using the specified args:') print(args) for step in cfg.LR_STEPS: if iteration > step: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) net.train() for epoch in range(start_epoch, cfg.EPOCHES): losses = 0 for batch_idx, (images, targets) in enumerate(train_loader): if args.cuda: images = Variable(images.cuda()) targets = [ Variable(ann.cuda(), volatile=True) for ann in targets ] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] if iteration in cfg.LR_STEPS: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l_pa1l, loss_c_pal1 = criterion(out[:3], targets) loss_l_pa12, loss_c_pal2 = criterion(out[3:], targets) loss = loss_l_pa1l + loss_c_pal1 + loss_l_pa12 + loss_c_pal2 loss.backward() optimizer.step() t1 = time.time() losses += loss.data[0] if iteration % 10 == 0: tloss = losses / (batch_idx + 1) print('Timer: %.4f' % (t1 - t0)) print('epoch:' + repr(epoch) + ' || iter:' + repr(iteration) + ' || Loss:%.4f' % (tloss)) print( '->> pal1 conf loss:{:.4f} || pal1 loc loss:{:.4f}'.format( loss_c_pal1.data[0], loss_l_pa1l.data[0])) print( '->> pal2 conf loss:{:.4f} || pal2 loc loss:{:.4f}'.format( loss_c_pal2.data[0], loss_l_pa12.data[0])) print('->>lr:{}'.format(optimizer.param_groups[0]['lr'])) if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) file = 'dsfd_' + repr(iteration) + '.pth' torch.save(dsfd_net.state_dict(), os.path.join(save_folder, file)) iteration += 1 val(epoch, net, dsfd_net, criterion) if iteration == cfg.MAX_STEPS: break
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() # ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = EfficientDet(num_class=cfg['num_classes']) if args.cuda: net = torch.nn.DataParallel(net) cudnn.benchmark = True # if args.resume: # print('Resuming training, loading {}...'.format(args.resume)) # ssd_net.load_weights(args.resume) # else: # vgg_weights = torch.load(args.save_folder + args.basenet) # print('Loading base network...') # ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() optimizer = optim.AdamW(net.parameters(), lr=args.lr) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) iteration = 0 for epoch in range(args.num_epoch): for idx, (images, targets) in enumerate(data_loader): if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) if args.cuda: images = Variable(images.cuda()) targets = [ Variable(ann.cuda(), volatile=True) for ann in targets ] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l conf_loss += loss_c if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss), end=' ') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(net.state_dict(), 'weights/Effi' + repr(idx) + '.pth') iteration += 1 torch.save(net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio) if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print( 'Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = CustomDataParallel(NetLoss(net, criterion)) if args.cuda: net = net.cuda() # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset) // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration, best=False: SavePath( cfg.name + '_best_' if best else '', epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = {k: MovingAverage(100) for k in loss_types} print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: best_map = 0.0 for epoch in range(num_epochs): # Resume from start_iter if (epoch + 1) * epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch + 1) * epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [ x for x in cfg.delayed_settings if x[0] > iteration ] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len( cfg.lr_steps ) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma**step_index)) # Zero the grad to get ready to compute gradients optimizer.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) losses = net(datum) losses = {k: (v).mean() for k, v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # Backprop loss.backward( ) # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str( datetime.timedelta(seconds=(cfg.max_iter - iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) if args.log: precision = 5 loss_info = { k: round(losses[k].item(), precision) for k in losses } loss_info['T'] = round(losses[k].item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0 ) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: mask_map = compute_validation_map( epoch, iteration, yolact_net, val_dataset, log if args.log else None) if mask_map > best_map: print('Best map raise from {} to {}'.format( best_map, mask_map)) yolact_net.save_weights( save_path(epoch, iteration, True)) best_map = mask_map else: print('map: {} (Best map {})'.format( mask_map, best_map)) # Compute validation mAP after training is finished compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights( save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration))
m.bias.data.zero_() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method if net._extras: net._extras.apply(weights_init) net._loc.apply(weights_init) net._conf.apply(weights_init) optimizer = optim.SGD(parallel_net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg, num_classes, image_size, 0.35, True, 0, True, 3, 0.5, False, args.cuda) def train(): parallel_net.train() # loss counters # loc_loss = 0 # epoch # conf_loss = 0 # epoch = 0: print('Loading Dataset...') dataset = GetDataset(args.voc_root, SSDAugmentation(image_size, means, type=args.img_type), AnnotationTransform(), type=args.img_type)
elif args.optimizer.lower() == 'adam': #args.lr=1e-3 optimizer = optim.Adam(params=curParams) elif args.optimizer.lower() == 'rmsprop': #args.lr=1e-2 optimizer = optim.RMSprop(params=curParams) elif args.optimizer.lower() in ['yellowfin', 'yf']: optimizer = YFOptimizer(curParams, lr=args.lr, mu=0.0, weight_decay=weight_decay, clip_thresh=2.0, curv_win_width=20) else: raise Exception('Unsupported optimizer type encountered:' + args.optimizer) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, args.cuda) def train(): #import cProfile, pstats #from io import StringIO #pr = cProfile.Profile() #pr.enable() net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 print('Loading Dataset...')
if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False, variance=[0.1, 0.2], use_gpu=args.cuda) def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 print('Loading Dataset...') dataset = NexarDetection(root, train_sets, SSDAugmentation(ssd_dim, means),