def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!') return epoch_size = len(dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', dataset.name) step_index = 0 if args.visdom: # initialize visdom loss plot lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict(xlabel='Iteration', ylabel='Loss', title='Current SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) epoch_lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict( xlabel='Epoch', ylabel='Loss', title='Epoch SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 lr = args.lr log_file = open(log_file_path, 'a') for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) loc_loss = 0 conf_loss = 0 if epoch % args.save_frequency == 0 and epoch > 0: torch.save( net.state_dict(), save_folder + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 if args.visdom: viz.line( X=torch.ones((1, 3)).cpu() * epoch, Y=torch.Tensor([loc_loss, conf_loss, loc_loss + conf_loss ]).unsqueeze(0).cpu() / epoch_size, win=epoch_lot, update='append') lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets])) if args.cuda: images = Variable(images.cuda()) targets = [ Variable(anno.cuda(), volatile=True) for anno in targets ] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] load_t1 = time.time() if iteration % 10 == 0: print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) log_file.write('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr) + '\n') if args.visdom and args.send_images_to_visdom: random_batch_index = np.random.randint(images.size(0)) viz.image(images.data[random_batch_index].cpu().numpy()) if args.visdom: viz.line(X=torch.ones((1, 3)).cpu() * iteration, Y=torch.Tensor([ loss_l.data[0], loss_c.data[0], loss_l.data[0] + loss_c.data[0] ]).unsqueeze(0).cpu(), win=lot, update='append') if iteration == 0: viz.line(X=torch.zeros((1, 3)).cpu(), Y=torch.Tensor( [loc_loss, conf_loss, loc_loss + conf_loss]).unsqueeze(0).cpu(), win=epoch_lot, update=True) log_file.close() torch.save( net.state_dict(), save_folder + 'Final_' + args.version + '_' + args.dataset + '.pth')
def main(): parser = argparse.ArgumentParser( description='Single Shot MultiBox Detector Testing') parser.add_argument('--resume', dest='resume', help='initialize with pretrained model weights', default='./weights/ic15_90_15.pth', type=str) parser.add_argument('--version', dest='version', help='512x512, 768x768, 768x1280, 1280x1280', default='768x1280', type=str) parser.add_argument('--dataset', dest='dataset', help='ic15, ic13, td500, coco', default='ic15', type=str) parser.add_argument('--works', dest='num_workers', help='num_workers to load data', default=1, type=int) parser.add_argument('--test_batch_size', dest='test_batch_size', help='train_batch_size', default=1, type=int) parser.add_argument('--out', dest='out', help='output file dir', default='./outputs_eval/ic15/', type=str) parser.add_argument('--log_file_dir', dest='log_file_dir', help='log_file_dir', default='./logs/', type=str) parser.add_argument('--ssd_dim', default=512, type=int, help='ssd dim') #parser.add_argument('--root', default='../../DataSets/text_detect/',type=str, help='Location of data root directory') parser.add_argument('--ic_root', default='../data/ocr/detection/', type=str, help='Location of data root directory') # parser.add_argument('--ic_root', default='/home/lvpengyuan/research/text/',type=str, help='Location of data root directory') parser.add_argument('--td_root', default='/home/lpy/Datasets/TD&&TR/', type=str, help='Location of data root directory') parser.add_argument('--coco_root', default='/home/lpy/Datasets/coco-text/', type=str, help='Location of data root direction') args = parser.parse_args() cuda = torch.cuda.is_available() ## setup logger if os.path.exists(args.log_file_dir) == False: os.mkdir(args.log_file_dir) log_file_path = args.log_file_dir + 'eval_' + time.strftime( '%Y%m%d_%H%M%S') + '.log' setup_logger(log_file_path) if args.version == '512x512': cfg = cfg_512x512 elif args.version == '768x768': cfg = cfg_768x768 elif args.version == '1280x1280': cfg = cfg_1280x1280 elif args.version == '768x1280': cfg = cfg_768x1280 else: exit() ssd_dim = args.ssd_dim means = (104, 117, 123) if args.dataset == 'ic15': dataset = ICDARDetection(args.ic_root, 'val', None, None, '15', dim=cfg['min_dim']) data_loader = data.DataLoader(dataset, args.test_batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) elif args.dataset == 'ic13': dataset = ICDARDetection(args.ic_root, 'val', None, None, '13', dim=cfg['min_dim']) data_loader = data.DataLoader(dataset, args.test_batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) elif args.dataset == 'td500': dataset = TD500Detection(args.td_root, 'val', None, None, aug=False, dim=cfg['min_dim']) data_loader = data.DataLoader(dataset, args.test_batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) elif args.dataset == 'coco': dataset = COCODetection(args.coco_root, 'test', dim=cfg['min_dim']) data_loader = data.DataLoader(dataset, args.test_batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) else: exit() logging.info('dataset initialize done.') ## setup mode net = build_dssd('test', cfg, ssd_dim, 2) logging.info('loading {}...'.format(args.resume)) net.load_weights(args.resume) rpsroi_pool = RPSRoIPool(2, 2, 1, 2, 1) if cuda: net = net.cuda() rpsroi_pool = rpsroi_pool.cuda() net.eval() rpsroi_pool.eval() if os.path.exists(args.out) == False: os.makedirs(args.out) save_dir = args.out + '/' + args.resume.strip().split('_')[-1].split( '.')[0] + '/' if os.path.exists(save_dir) == False: os.mkdir(save_dir) seg_dir = save_dir + 'seg/' box_dir = save_dir + 'box/' res_dir = save_dir + 'res/' if os.path.exists(seg_dir) == False: os.mkdir(seg_dir) os.mkdir(box_dir) os.mkdir(res_dir) logging.info('eval begin') for i, sample in enumerate(data_loader, 0): img, image_name, ori_h, ori_w = sample # print(image_name) if i % 100 == 0: print(i, len(data_loader)) h, w = img.size(2), img.size(3) if cuda: img = img.cuda() img = Variable(img) out, seg_pred, seg_map = net(img) save_name = image_name[0].split('/')[-1].split('.')[0] candidate_box = eval_img(out, seg_pred, seg_map, rpsroi_pool, img, save_name, seg_dir, box_dir, vis=True) # format output if args.dataset == 'coco': save_name = save_name.strip().split('_')[-1] save_name = str(int(save_name)) res_name = res_dir + '/' + 'res_' + save_name + '.txt' fp = open(res_name, 'w') for box in candidate_box: temp_x = [] temp_y = [] temp = [] for j in range(len(box) - 1): if j % 2 == 0: temp_x.append(int(box[j] * ori_w[0] / w)) temp.append(str(int(box[j] * ori_w[0] / w))) else: temp_y.append(int(box[j] * ori_h[0] / h)) temp.append(str(int(box[j] * ori_h[0] / h))) if args.dataset == 'ic13': fp.write(','.join([ str(min(temp_x)), str(min(temp_y)), str(max(temp_x)), str(max(temp_y)) ]) + '\n') elif args.dataset == 'coco': fp.write(','.join([ str(min(temp_x)), str(min(temp_y)), str(max(temp_x)), str(max(temp_y)), str(box[-1]) ]) + '\n') else: fp.write(','.join(temp) + '\n') fp.close() logging.info('evaluate done')
detector = Detect(num_classes, 0, cfg) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) priorbox = PriorBox(cfg) priors = Variable(priorbox.forward()) # dataset print('Loading Dataset...') if args.dataset == 'VOC': testset = VOCDetection( VOCroot, [('2007', 'test')], None, AnnotationTransform()) train_dataset = VOCDetection(VOCroot, train_sets, preproc( img_dim, rgb_means, rgb_std, p), AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection( COCOroot, [('2017', 'val')], None) #testset = COCODetection(COCOroot, [('2017', 'test-dev')], None) train_dataset = COCODetection(COCOroot, train_sets, preproc( img_dim, rgb_means, rgb_std, p)) else: print('Only VOC and COCO are supported now!') exit() def train(): net.train() # loss counters epoch = 0 if args.resume_net: epoch = 0 + args.resume_epoch epoch_size = len(train_dataset) // args.batch_size
# optimizer = optim.RMSprop(net.parameters(), lr=args.lr,alpha = 0.9, eps=1e-08, # momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) priorbox = PriorBox(cfg) priors = Variable(priorbox.forward(), volatile=True) # dataset print('Loading Dataset...') if args.dataset == 'VOC': testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) train_dataset = VOCDetection( VOCroot, train_sets, preproc(img_dim, rgb_means, p=p, rgb_std=rgb_std), AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection(COCOroot, [('2014', 'valminusminival')], None) train_dataset = COCODetection( COCOroot, train_sets, preproc(img_dim, rgb_means, p=p, rgb_std=rgb_std)) else: print('Only VOC and COCO are supported now!') exit() def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 if args.resume_net:
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!') return epoch_size = len(dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (100 * epoch_size, 135 * epoch_size, 170 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', dataset.name) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size for sv in stepvalues: if start_iter > sv: step_index += 1 continue else: break else: start_iter = 0 lr = args.lr avg_loss_list = [] for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) avg_loss = (loc_loss + conf_loss) / epoch_size avg_loss_list.append(avg_loss) print("avg_loss_list:") if len(avg_loss_list) <= 5: print(avg_loss_list) else: print(avg_loss_list[-5:]) loc_loss = 0 conf_loss = 0 if (epoch % 10 == 0): torch.save( net.state_dict(), args.save_folder + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] out = net(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() # if epoch > args.warm_epoch: # updateBN() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() load_t1 = time.time() if iteration % 10 == 0: print( 'Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f S: %.4f||' % (loss_l.item(), loss_c.item(), loss_l.item() + loss_c.item()) + 'Batch time: %.4f ||' % (load_t1 - load_t0) + 'LR: %.7f' % (lr)) torch.save( net.state_dict(), args.save_folder + 'Final_' + args.version + '_' + args.dataset + '.pth')
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') f_writer.write('Loading Dataset...\n') if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc( img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc( img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!') return epoch_size = len(dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC,stepvalues_COCO)[args.dataset=='COCO'] print('Training',args.version, 'on', dataset.name) f_writer.write('Training'+args.version+ 'on'+ dataset.name+ '\n') step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 lr = args.lr loss = [None] * 2 loss_l = [None] * 2 loss_c = [None] * 2 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter(data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) loc_loss = 0 conf_loss = 0 if (epoch % 40 == 0 and epoch > 0) or (epoch % 10 ==0 and epoch > 200): torch.save(net.state_dict(), args.save_folder+args.version+'_'+args.dataset + '_epoches_'+ repr(epoch) + '_refine_agnostic_{}.pth.{}'.format(C_agnostic, args.extra)) epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data targets = [None] * 2 images, targets[1] = next(batch_iterator) targets[0] = [None] * len(targets[1]) if C_agnostic: for i in range(len(targets[1])): targets[0][i] = targets[1][i].clone() targets[0][i][:,4] = targets[0][i][:,4].ge(1) else: targets[0] = targets[1] #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets])) if args.cuda: images = Variable(images.cuda()) targets[0] = [Variable(anno.cuda(),volatile=True) for anno in targets[0]] targets[1] = [Variable(anno.cuda(),volatile=True) for anno in targets[1]] else: images = Variable(images) targets[0] = [Variable(anno, volatile=True) for anno in targets[0]] targets[1] = [Variable(anno, volatile=True) for anno in targets[1]] # forward t0 = time.time() out = net(images) ### calculation refined anchors # loc_data = Variable(out[0][0].data.clone(), volatile=True) loc_data = out[0][0].data.clone() conf_data = Variable(out[0][1].data.clone(), volatile=True) ## decode and clamp r_priors = decode(loc_data, priors.data, cfg['variance']) if args.bp_anchors: r_priors = Variable(r_priors, requires_grad=True) else: r_priors = Variable(r_priors, volatile=True) # for i in range(loc_data.size(0)): # z = box_utils.decode(loc_data.data[i,:,:], priors.data, cfg['variance']) # # loc_data[i,:,:].clamp_(0,1) # backprop optimizer.zero_grad() loss_l[0], loss_c[0], pass_index = criterion[0](out[0], priors, targets[0]) loss[0] = loss_l[0] + loss_c[0] loss_l[1], loss_c[1], _ = criterion[1](out[1], r_priors, targets[1], pass_index) loss[1] = loss_l[1] + loss_c[1] loss_total = loss[0] + loss[1] loss_total.backward() optimizer.step() t1 = time.time() # loc_loss += loss_l.data[0] # conf_loss += loss_c.data[0] load_t1 = time.time() if iteration % 10 == 0: print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L1: %.4f C1: %.4f||' % (loss_l[0].data[0],loss_c[0].data[0]) + ' || L2: %.4f C2: %.4f||' % (loss_l[1].data[0],loss_c[1].data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) f_writer.write('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L1: %.4f C1: %.4f||' % (loss_l[0].data[0],loss_c[0].data[0]) + ' || L2: %.4f C2: %.4f||' % (loss_l[1].data[0],loss_c[1].data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr) + '\n') torch.save(net.state_dict(), args.save_folder + 'Final_' + args.version +'_' + args.dataset+ '_refine_agnostic_{}.pth.{}'.format(C_agnostic, args.extra)) f_writer.write('training finished!\n') f_writer.close()
dataset.evaluate_detections(all_boxes, output_dir) if __name__ == '__main__': # load net num_classes = len(labelmap) + 1 # +1 for background # net = build_refinedet('test', int(args.input_size), num_classes) # initialize SSD net = build_refinedet('test', int(args.input_size), 81) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data dataset = COCODetection( root=COCO_ROOT, image_set="val2017", transform=BaseTransform(320, dataset_mean), ) shutil.rmtree("./result") os.mkdir("./result") if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, dataset_mean), args.top_k,
def Train(self, epochs=200, log_iters=True, output_weights_dir="weights", saved_epoch_interval=10): self.system_dict["params"]["max_epoch"] = epochs self.system_dict["params"]["log_iters"] = log_iters self.system_dict["params"]["save_folder"] = output_weights_dir if not os.path.exists(self.system_dict["params"]["save_folder"]): os.mkdir(self.system_dict["params"]["save_folder"]) if (self.system_dict["params"]["size"] == 300): cfg = COCO_300 else: cfg = COCO_512 if self.system_dict["params"]["version"] == 'RFB_vgg': from models.RFB_Net_vgg import build_net elif self.system_dict["params"]["version"] == 'RFB_E_vgg': from models.RFB_Net_E_vgg import build_net elif self.system_dict["params"]["version"] == 'RFB_mobile': from models.RFB_Net_mobile import build_net cfg = COCO_mobile_300 else: print('Unkown version!') img_dim = (300, 512)[self.system_dict["params"]["size"] == 512] rgb_means = ((104, 117, 123), ( 103.94, 116.78, 123.68))[self.system_dict["params"]["version"] == 'RFB_mobile'] p = (0.6, 0.2)[self.system_dict["params"]["version"] == 'RFB_mobile'] f = open( self.system_dict["dataset"]["train"]["root_dir"] + "/" + self.system_dict["dataset"]["train"]["coco_dir"] + "/annotations/classes.txt", 'r') lines = f.readlines() if (lines[-1] == ""): num_classes = len(lines) - 1 else: num_classes = len(lines) + 1 batch_size = self.system_dict["params"]["batch_size"] weight_decay = self.system_dict["params"]["weight_decay"] gamma = self.system_dict["params"]["gamma"] momentum = self.system_dict["params"]["momentum"] self.system_dict["local"]["net"] = build_net('train', img_dim, num_classes) if self.system_dict["params"]["resume_net"] == None: base_weights = torch.load(self.system_dict["params"]["basenet"]) print('Loading base network...') self.system_dict["local"]["net"].base.load_state_dict(base_weights) def xavier(param): init.xavier_uniform(param) def weights_init(m): for key in m.state_dict(): if key.split('.')[-1] == 'weight': if 'conv' in key: init.kaiming_normal_(m.state_dict()[key], mode='fan_out') if 'bn' in key: m.state_dict()[key][...] = 1 elif key.split('.')[-1] == 'bias': m.state_dict()[key][...] = 0 print('Initializing weights...') # initialize newly added layers' weights with kaiming_normal method self.system_dict["local"]["net"].extras.apply(weights_init) self.system_dict["local"]["net"].loc.apply(weights_init) self.system_dict["local"]["net"].conf.apply(weights_init) self.system_dict["local"]["net"].Norm.apply(weights_init) if self.system_dict["params"]["version"] == 'RFB_E_vgg': self.system_dict["local"]["net"].reduce.apply(weights_init) self.system_dict["local"]["net"].up_reduce.apply(weights_init) else: # load resume network print('Loading resume network...') state_dict = torch.load(self.system_dict["params"]["resume_net"]) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v self.system_dict["local"]["net"].load_state_dict(new_state_dict) if self.system_dict["params"]["ngpu"] > 1: self.system_dict["local"]["net"] = torch.nn.DataParallel( self.system_dict["local"]["net"], device_ids=list(range(self.system_dict["params"]["ngpu"]))) if self.system_dict["params"]["cuda"]: self.system_dict["local"]["net"].cuda() cudnn.benchmark = True optimizer = optim.SGD( self.system_dict["local"]["net"].parameters(), lr=self.system_dict["params"]["lr"], momentum=self.system_dict["params"]["momentum"], weight_decay=self.system_dict["params"]["weight_decay"]) #optimizer = optim.RMSprop(self.system_dict["local"]["net"].parameters(), lr=self.system_dict["params"]["lr"], alpha = 0.9, eps=1e-08, # momentum=self.system_dict["params"]["momentum"], weight_decay=self.system_dict["params"]["weight_decay"]) criterion = MultiBoxLoss(num_classes, 0.5, True, 0, True, 3, 0.5, False) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() if self.system_dict["params"]["cuda"]: priors = priors.cuda() self.system_dict["local"]["net"].train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + self.system_dict["params"]["resume_epoch"] print('Loading Dataset...') if (os.path.isdir("coco_cache")): os.system("rm -r coco_cache") dataset = COCODetection( self.system_dict["dataset"]["train"]["root_dir"], self.system_dict["dataset"]["train"]["coco_dir"], self.system_dict["dataset"]["train"]["set_dir"], preproc(img_dim, rgb_means, p)) epoch_size = len(dataset) // self.system_dict["params"]["batch_size"] max_iter = self.system_dict["params"]["max_epoch"] * epoch_size stepvalues = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) print('Training', self.system_dict["params"]["version"], 'on', dataset.name) step_index = 0 if self.system_dict["params"]["resume_epoch"] > 0: start_iter = self.system_dict["params"]["resume_epoch"] * epoch_size else: start_iter = 0 lr = self.system_dict["params"]["lr"] for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader( dataset, batch_size, shuffle=True, num_workers=self.system_dict["params"]["num_workers"], collate_fn=detection_collate)) loc_loss = 0 conf_loss = 0 torch.save( self.system_dict["local"]["net"].state_dict(), self.system_dict["params"]["save_folder"] + "/" + self.system_dict["params"]["version"] + '_' + self.system_dict["params"]["dataset"] + '_epoches_' + 'intermediate' + '.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = self.adjust_learning_rate(optimizer, self.system_dict["params"]["gamma"], epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets])) if self.system_dict["params"]["cuda"]: images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] # forward t0 = time.time() out = self.system_dict["local"]["net"](images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() load_t1 = time.time() if iteration % saved_epoch_interval == 0: print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Current iter ' + repr(iteration) + '|| Total iter ' + repr(max_iter) + ' || L: %.4f C: %.4f||' % (loss_l.item(), loss_c.item()) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) torch.save( self.system_dict["local"]["net"].state_dict(), self.system_dict["params"]["save_folder"] + "/" + 'Final_' + self.system_dict["params"]["version"] + '_' + self.system_dict["params"]["dataset"] + '.pth')
def main(): parser = argparse.ArgumentParser( description='Single Shot MultiBox Detector Testing') parser.add_argument('--resume', dest='resume', help='initialize with pretrained model weights', default='./weights/ic13_60.pth', type=str) parser.add_argument('--version', dest='version', help='512x512, 768x768, 768x1280, 1280x1280', default='768x768', type=str) parser.add_argument('--dataset', dest='dataset', help='ic15, ic13, td500, coco, mlt', default='ic13', type=str) parser.add_argument('--works', dest='num_workers', help='num_workers to load data', default=1, type=int) parser.add_argument('--test_batch_size', dest='test_batch_size', help='train_batch_size', default=1, type=int) parser.add_argument('--out', dest='out', help='output file dir', default='./outputs/imgs/ic13/', type=str) parser.add_argument('--log_file_dir', dest='log_file_dir', help='log_file_dir', default='./logs/', type=str) parser.add_argument('--ssd_dim', default=512, type=int, help='ssd dim') parser.add_argument('--ic_root', default='../data/ocr/detection/', type=str, help='Location of data root directory') parser.add_argument('--td_root', default='/home/lpy/Datasets/TD&&TR/', type=str, help='Location of data root directory') parser.add_argument('--coco_root', default='/home/lpy/Datasets/coco-text/', type=str, help='Location of data root direction') parser.add_argument('--mlt_root', default='/home/lpy/Datasets/MLT_test/', type=str, help='Location of data root direction') parser.add_argument('--vis', default=True, type=bool, help='Vis the bounding box') args = parser.parse_args() cuda = torch.cuda.is_available() ## setup logger if os.path.exists(args.log_file_dir) == False: os.mkdir(args.log_file_dir) log_file_path = args.log_file_dir + 'eval_' + time.strftime( '%Y%m%d_%H%M%S') + '.log' setup_logger(log_file_path) ##versions = ['512x512', '768x768', '768x1280', '1280x1280'] versions = ['768x768'] cfgs = [] print(args.dataset) if '512x512' in versions: cfgs.append(cfg_512x512) if '768x768' in versions: cfgs.append(cfg_768x768) if '768x1280' in versions: cfgs.append(cfg_768x1280) if '1280x1280' in versions: cfgs.append(cfg_1280x1280) if args.dataset == 'ic15': test_nums = 500 elif args.dataset == 'ic13': test_nums = 233 elif args.dataset == 'td500': test_nums = 200 elif args.dataset == 'coco': test_nums = 10000 elif args.dataset == 'mlt': test_nums = 9000 else: exit() boxes = [] for i in range(test_nums): boxes.append([]) ssd_dim = args.ssd_dim means = (104, 117, 123) rpsroi_pool = RPSRoIPool(2, 2, 1, 2, 1) rpsroi_pool = rpsroi_pool.cuda() rpsroi_pool.eval() if os.path.exists(args.out) == False: os.makedirs(args.out) save_dir = args.out + '/' + args.resume.strip().split('_')[-1].split( '.')[0] + '/' if os.path.exists(save_dir) == False: os.mkdir(save_dir) seg_dir = save_dir + 'seg/' box_dir = save_dir + 'box/' res_dir = save_dir + 'res/' if os.path.exists(seg_dir) == False: os.mkdir(seg_dir) os.mkdir(box_dir) os.mkdir(res_dir) logging.info('eval begin') for cfg in cfgs: if args.dataset == 'ic15': dataset = ICDARDetection(args.ic_root, 'val', None, None, '15', dim=cfg['min_dim']) data_loader = data.DataLoader(dataset, args.test_batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) elif args.dataset == 'ic13': dataset = ICDARDetection(args.ic_root, 'val', None, None, '13', dim=cfg['min_dim']) data_loader = data.DataLoader(dataset, args.test_batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) elif args.dataset == 'td500': dataset = TD500Detection(args.td_root, 'val', None, None, aug=False, dim=cfg['min_dim']) data_loader = data.DataLoader(dataset, args.test_batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) elif args.dataset == 'coco': dataset = COCODetection(args.coco_root, 'val', dim=cfg['min_dim']) data_loader = data.DataLoader(dataset, args.test_batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) elif args.dataset == 'mlt': dataset = MLTDetection(args.mlt_root, 'test', dim=cfg['min_dim']) data_loader = data.DataLoader(dataset, args.test_batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True) else: exit() logging.info('dataset initialize done.') ## setup mode logging.info('loading {}...'.format(args.resume)) net = build_dssd('test', cfg, ssd_dim, 2).cuda() net.load_weights(args.resume) net.eval() logging.info('begin') for i, sample in enumerate(data_loader, 0): img, image_name, ori_h, ori_w = sample # print(image_name) if i % 100 == 0: print(i, len(data_loader)) h, w = img.size(2), img.size(3) if cuda: img = img.cuda() img = Variable(img) out, seg_pred, seg_map = net(img) candidate_boxes = eval_img(out, seg_pred, seg_map, rpsroi_pool, img) temp_boxes = [] for box in candidate_boxes: temp_box = [] for k in range(len(box) - 1): if k % 2 == 0: temp_box.append(int(box[k] * ori_w[0] / w)) else: temp_box.append(int(box[k] * ori_h[0] / h)) temp_box.append(box[-1]) temp_boxes.append(temp_box) boxes[i] = boxes[i] + temp_boxes logging.info('forward done') for i, sample in enumerate(data_loader, 0): img, image_name, ori_h, ori_w = sample save_name = image_name[0].split('/')[-1].split('.')[0] temp_boxes = boxes[i] keep = ploy_nms(temp_boxes, 0.3) keep_box = [] for j, item in enumerate(temp_boxes): if j in keep: keep_box.append(item) if args.vis == True: box_img = show_box(img, keep_box, ori_h, ori_w) box_img.save(box_dir + '/' + save_name + '.jpg') # format output if args.dataset == 'coco': save_name = save_name.strip().split('_')[-1] save_name = str(int(save_name)) if args.dataset == 'mlt': save_name = save_name[3:] res_name = res_dir + '/' + 'res_' + save_name + '.txt' fp = open(res_name, 'w') for box in keep_box: temp_x = [] temp_y = [] temp = [] for j in range(len(box) - 1): if j % 2 == 0: temp_x.append(box[j]) temp.append(str(box[j])) else: temp_y.append(box[j]) temp.append(str(box[j])) if args.dataset == 'ic13': fp.write(','.join([ str(min(temp_x)), str(min(temp_y)), str(max(temp_x)), str(max(temp_y)) ]) + '\n') elif args.dataset == 'coco': #fp.write(','.join([temp[0], temp[1], temp[4], temp[5], box[-1]]) + '\n') fp.write(','.join([ str(min(temp_x)), str(min(temp_y)), str(max(temp_x)), str(max(temp_y)), str(box[-1]) ]) + '\n') elif args.dataset == 'mlt': fp.write(','.join(temp + [str(box[-1])]) + '\n') else: fp.write(','.join(temp) + '\n') fp.close() logging.info('evaluate done')
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') if args.dataset == 'Logo': dataset = LogoDetection(Logoroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!') return epoch_size = len(dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_Logo = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_Logo, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', dataset.name) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 lr = args.lr for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_minibatch)) #batch_iterator = iter(data.DataLoader(dataset, batch_size, # shuffle=True, num_workers=args.num_workers,collate_fn=collate_minibatch)) loc_loss = 0 conf_loss = 0 if (epoch % 5 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > 200): torch.save( net.state_dict(), args.save_folder + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data samples = next(batch_iterator) # import pdb;pdb.set_trace() #from IPython import embed; embed() if args.cuda: # samples['image'] = Variable(samples['image']) for key in samples: if key != 'target': # roidb is a list of ndarrays with inconsistent length samples[key] = list(map(Variable, samples[key])) #targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] # forward t0 = time.time() #out = net(images,targets) #samples = {'images':images,'targets':targets} # backprop optimizer.zero_grad() return_dict = net(**samples) loss_l = return_dict['loss_l'].mean() loss_c = return_dict['loss_c'].mean() #loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() load_t1 = time.time() if iteration % 10 == 0: print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.item(), loss_c.item()) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) torch.save( net.state_dict(), args.save_folder + 'Final_' + args.version + '_' + args.dataset + '.pth')
head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() print('Finished loading model!') # load data if args.dataset == 'VOC': dataset = VOCDetection(args.voc_root, [('0712', "2007_test")], None, AnnotationTransform()) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, [('2014', 'minival')], None, COCOAnnotationTransform()) #COCOroot, [('2015', 'test-dev')], None) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation top_k = 200 save_folder = os.path.join(args.save_folder, args.dataset) if args.version == "drf_refine_vgg": detector = Detect(num_classes, 0, cfg, use_arm=True) else: detector = Detect(num_classes, 0, cfg) test_net(save_folder, net, detector,
weight_decay = 0.0005 gamma = 0.1 momentum = 0.9 dataset_name = args.dataset if dataset_name[0] == "V": cfg = (VOC_300, VOC_512)[args.size == '512'] train_dataset = VOCDetection(VOCroot, datasets_dict[dataset_name], SSDAugmentation(img_dim, bgr_means), AnnotationTransform(), dataset_name) # train_dataset = VOCDetection(VOCroot, datasets_dict[dataset_name], preproc(img_dim, bgr_means, p), AnnotationTransform()) test_dataset = VOCDetection(VOCroot, datasets_dict["VOC2007"], None, AnnotationTransform(), dataset_name) elif dataset_name[0] == "C": train_dataset = COCODetection(COCOroot, datasets_dict[dataset_name], SSDAugmentation(img_dim, bgr_means), COCOAnnotationTransform(), dataset_name) test_dataset = COCODetection(COCOroot, datasets_dict["COCOval"], None, COCOAnnotationTransform(), dataset_name) cfg = (COCO_300, COCO_512)[args.size == '512'] else: print('Unkown dataset!') if args.version == "ssd_vgg": from models.ssd.vgg_net import build_ssd print("ssd vgg") elif args.version == "ssd_res": from models.ssd.res_net import build_ssd print("ssd resnet") elif args.version == "drf_ssd_vgg": from models.drfssd.vgg_drfnet import build_ssd
if __name__ == '__main__': if args.detection: num_classes = 81 # +1 background prior = 'VOC_' + str(args.ssd_dim) if 'RefineDet' in args.backbone and args.ssd_dim == 512: prior += '_RefineDet' elif 'RFB' in args.backbone and args.ssd_dim == 300: prior += '_RFB' cfg = mb_cfg[prior] dataset_mean = (104, 117, 123) ssd_dim = args.ssd_dim dataset = COCODetection(COCOroot, year=args.year, image_sets=[ args.set_file_name, ], transform=BaseTransform(ssd_dim, dataset_mean), phase='test') if 'MobNet' in args.backbone: if args.deform: from model.dualrefinedet_mobilenet import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, def_groups=args.deform, multihead=args.multihead) else: from model.refinedet_mobilenet import build_net net = build_net('test',
def train(model, resume=False): model.train() optimizer = build_optimizer(args, model) scheduler = build_lr_scheduler(args, optimizer) checkpointer = DetectionCheckpointer( model, args, optimizer=optimizer, scheduler=scheduler ) criterion = MultiBoxLoss_combined(num_classes, overlap_threshold, True, 0, True, 3, 0.5, False) start_iter = ( checkpointer.resume_or_load(args.basenet if args.phase == 1 else args.load_file, resume=resume).get("iteration", -1) + 1 ) max_iter = args.max_iter periodic_checkpointer = PeriodicCheckpointer( checkpointer, args.checkpoint_period, max_iter=max_iter ) writers = ( [ CommonMetricPrinter(max_iter), TensorboardXWriter(args.save_folder), ] ) if args.dataset == 'VOC': dataset = VOCDetection(args, VOCroot, train_sets, preproc( img_dim, rgb_means, p), AnnotationTransform(0 if args.setting == 'transfer' else args.split)) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc( img_dim, rgb_means, p)) else: raise ValueError(f"Unknown dataset: {args.dataset}") if args.phase == 2 and args.method == 'ours': sampler = TrainingSampler(len(dataset)) data_loader = torch.utils.data.DataLoader( dataset, args.batch_size, sampler=sampler, num_workers=args.num_workers, collate_fn=detection_collate, ) # initialize the OBJ(Target) parameters init_reweight(args, model, data_loader) dataset.set_mixup(np.random.beta, 1.5, 1.5) logger.info('Fine tuning on ' + str(args.shot) + '-shot task') sampler = TrainingSampler(len(dataset)) data_loader = iter(torch.utils.data.DataLoader( dataset, args.batch_size, sampler=sampler, num_workers=args.num_workers, collate_fn=detection_collate, )) assert model.training, 'Model.train() must be True during training.' logger.info("Starting training from iteration {}".format(start_iter)) # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=args.gamma, last_epoch=epoch - 1) with EventStorage(start_iter) as storage: for iteration in range(start_iter, max_iter): iteration = iteration + 1 storage.step() if args.phase == 2 and args.method == 'ours' and \ iteration == (args.max_iter - args.no_mixup_iter): dataset.set_mixup(None) data_loader = iter(torch.utils.data.DataLoader( dataset, args.batch_size, sampler=sampler, num_workers=args.num_workers, collate_fn=detection_collate, )) data, targets = next(data_loader) # storage.put_image('image', vis_tensorboard(data)) output = model(data) loss_dict = criterion(output, priors, targets) losses = sum(loss for loss in loss_dict.values()) # assert torch.isfinite(losses).all(), loss_dict storage.put_scalars(total_loss=losses, **loss_dict) optimizer.zero_grad() losses.backward() optimizer.step() if args.phase == 2 and args.method == 'ours': if isinstance(model, (DistributedDataParallel, DataParallel)): model.module.normalize() else: model.normalize() storage.put_scalar("lr", optimizer.param_groups[-1]["lr"], smoothing_hint=False) scheduler.step() if iteration - start_iter > 5 and (iteration % 20 == 0 or iteration == max_iter): for writer in writers: writer.write() periodic_checkpointer.step(iteration)
def main(args): create_time = time.strftime('%Y%m%d_%H%M', time.localtime(time.time())) save_folder_path = os.path.join(args.save_folder, create_time) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # dataset = customDetection(root = args.image_root, # json_path = args.annotation, # transform = BaseTransform(img_size = args.image_size), # target_transform = customAnnotationTransform()) dataset = COCODetection(root=args.image_root, annotation_json=args.annotation, transform=BaseTransform(img_size=args.image_size), target_transform=COCOAnnotationTransform) dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, collate_fn=detection_collate) n_classes = dataset.get_class_number() + 1 print("Detect class number: {}".format(n_classes)) ## write category id to label name map dataset.get_class_map() model = mobilenetv3(n_classes=n_classes) ssd = ssd_mobilenetv3(model, n_classes) if args.pretrain_model_path: ssd.load_state_dict(torch.load(args.pretrain_model_path)) # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo biases = list() not_biases = list() for param_name, param in model.named_parameters(): if param.requires_grad: if param_name.endswith('.bias'): biases.append(param) else: not_biases.append(param) optimizer = torch.optim.SGD(params=[{ 'params': biases, 'lr': args.learning_rate }, { 'params': not_biases }], lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) ssd = ssd.to(device) criterion = MultiBoxLossV3(ssd.priors_cxcy, args.threshold, args.neg_pos_ratio).to(device) print(f"epochs: {args.epochs}") for param_group in optimizer.param_groups: optimizer.param_groups[1]['lr'] = args.learning_rate print(f"learning rate. The new LR is {optimizer.param_groups[1]['lr']}") scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=15, verbose=True, threshold=0.00001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08) n_train = min(dataset.__len__(), 5000) global_step = 0 writer = SummaryWriter() for epoch in range(args.epochs): mean_loss = 0 inference_count = 0 ssd.train() mean_count = 0 with tqdm(total=n_train, desc=f"{epoch + 1} / {args.epochs}", unit='img') as pbar: for img, target in dataloader: img = img.to(device) # target = [anno.to(device) for anno in target] # print(target) # boxes = target[:, :-1] # labels = target[:, -1] boxes = [anno.to(device)[:, :-1] for anno in target] labels = [anno.to(device)[:, -1] for anno in target] prediction_location_loss, prediction_confidence_loss = ssd(img) loss = criterion(prediction_location_loss, prediction_confidence_loss, boxes, labels) pbar.set_postfix(**{"loss ": float(loss)}) mean_loss += float(loss) mean_count += 1 optimizer.zero_grad() loss.backward() optimizer.step() pbar.update(img.shape[0]) scheduler.step(mean_loss) writer.add_scalar('Train/Loss', float(mean_loss / mean_count), global_step) global_step += 1 if epoch % 10 == 0 or epoch == args.epochs - 1: save_model(save_folder_path, ssd, epoch) writer.close()
def train(): net.train() epoch = args.start_iter if args.dataset_name == 'COCO': dataset = COCODetection(COCOroot, year='trainval2014', image_sets=train_sets, transform=data_transform(ssd_dim, means), phase='train') else: dataset = VOCDetection(data_root, train_sets, data_transform(ssd_dim, means), AnnotationTransform(dataset_name=args.dataset_name), dataset_name=args.dataset_name, set_file_name=set_filename) epoch_size = len(dataset) // args.batch_size drop_step = [s * epoch_size for s in args.step_list] max_iter = max_epoch * epoch_size logging.info('Loading Dataset:' + args.dataset_name + ' dataset size: ' +str(len(dataset))) step_index = 0 if args.visdom: # initialize visdom loss plot y_dim = 3 legend = ['Loss', 'Loc Loss', 'Conf Loss',] if use_refine: y_dim += 1 legend += ['Arm Loc Loss',] lot = viz.line( X=torch.zeros((1,)), Y=torch.zeros((1, y_dim)), opts=dict( xlabel='Iteration', ylabel='Loss', title=args.save_folder.split('/')[-1], legend=legend, ) ) batch_iterator = None data_loader = data.DataLoader(dataset, batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=collate_fn, pin_memory=True) for iteration in range(epoch*epoch_size, max_iter + 10): if (not batch_iterator) or (iteration % epoch_size == 0): # create batch iterator batch_iterator = iter(data_loader) if epoch % args.save_interval == 0: logging.info('Saving state, epoch: '+ str(epoch)) torch.save(ssd_net.state_dict(), os.path.join(args.save_folder, args.model_name + str( ssd_dim) + '_' + args.dataset_name + '_' +repr(epoch) + '.pth')) epoch += 1 t0 = time.time() if iteration in drop_step: step_index = drop_step.index(iteration) + 1 adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # adjust_learning_rate(optimizer, args.gamma) collected_data = next(batch_iterator) with torch.no_grad(): images, targets = collected_data[:2] images = images.to(device) targets = [anno.to(device) for anno in targets] # forward loss = torch.tensor(0., requires_grad=True).to(device) out = net(images) # backward optimizer.zero_grad() if use_refine: loss_arm_l = arm_criterion(out[0], priors, targets) loss_l, loss_c = criterion(out[2:], priors, targets, arm_data=out[:2]) loss += args.loss_coe[0] * loss_arm_l else: loss_l, loss_c = criterion(out, priors, targets) loss += args.loss_coe[0] * loss_l + args.loss_coe[1] * loss_c loss.backward() optimizer.step() t1 = time.time() if iteration % 10 == 0: if use_refine: logging.info('Epoch:' + repr(epoch) + ', epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + ', total_iter ' + repr( iteration) + ' || loss: %.4f, Loss_l: %.4f, loss_c: %.4f, loss_arm_l: %.4f, lr: %.5f || Timer: %.4f sec.' % ( loss, loss_l, loss_c,loss_arm_l, optimizer.param_groups[0]['lr'], t1 - t0)) else: logging.info('Epoch:' + repr(epoch) + ', epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + ', total_iter ' + repr( iteration) + ' || loss: %.4f, Loss_l: %.4f, loss_c: %.4f, lr: %.5f || Timer: %.4f sec.' % (loss, loss_l, loss_c, optimizer.param_groups[0]['lr'], t1 - t0)) if args.visdom: y_dis = [loss.cpu(), args.loss_coe[0]*loss_l.cpu(), args.loss_coe[1]*loss_c.cpu()] if iteration == 1000: # initialize visdom loss plot lot = viz.line( X=torch.zeros((1,)), Y=torch.zeros((1, y_dim)), opts=dict( xlabel='Iteration', ylabel='Loss', title=args.save_folder.split('/')[-1], legend=legend, ) ) if use_refine: y_dis += [args.loss_coe[0]*loss_arm_l.cpu(),] # update = 'append' if iteration viz.line( X=torch.ones((1, y_dim)) * iteration, Y=torch.FloatTensor(y_dis).unsqueeze(0), win=lot, update='append', opts=dict( xlabel='Iteration', ylabel='Loss', title=args.save_folder.split('/')[-1], legend=legend,) ) torch.save(ssd_net.state_dict(), os.path.join(args.save_folder, args.model_name + str(ssd_dim) + '_' + args.dataset_name + '_' + repr(iteration) + '.pth')) print('Complet Training. Saving state, iter:', iteration)
# loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') <<<<<<< HEAD VOCroot = "/home/sqy/disk/ydata/Det_datasets/VOC_Tank" dataset = VOCDetection(VOCroot, train_sets, preproc( img_dim, rgb_means, p), AnnotationTransform()) ======= if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc( img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc( img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!') return >>>>>>> 6544e535e60c169d1904751184fb44cdf61ff894 epoch_size = len(dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) <<<<<<< HEAD stepvalues = (stepvalues_VOC) ======= stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC,stepvalues_COCO)[args.dataset=='COCO'] >>>>>>> 6544e535e60c169d1904751184fb44cdf61ff894
def load_data(args): if args.data_type.lower() == "regresion" or args.data_type.lower( ) == "recognation": if osp.exists(args.train_file) and osp.exists(args.test_file): train_dataset = ClassDataset( root=args.root, file_list=args.train_file, data_type=args.data_type.lower(), gray=args.gray, num_classes=args.num_classes, transform=ClassAugmentation(gray=args.gray, parse_type='train'), ) val_dataset = ClassDataset( root=args.root, file_list=args.test_file, data_type=args.data_type.lower(), gray=args.gray, num_classes=args.num_classes, transform=ClassAugmentation(gray=args.gray, parse_type='val'), ) else: train_dataset = datasets.ImageFolder(osp.join(args.root, 'train'), transform=ClassAugmentation( gray=args.gray, parse_type='train')) val_dataset = datasets.ImageFolder(osp.join(args.root, 'val'), transform=ClassAugmentation( gray=args.gray, parse_type='val')) # drop_last = True/False 是否扔掉最后不足一个batch的数据,batch=100,最后剩36个数据,是否扔掉,看drop_last train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.workers, shuffle=True, pin_memory=True, drop_last=False) val_loader = DataLoader(val_dataset, batch_size=args.val_batch_size, num_workers=args.workers, shuffle=False, pin_memory=True) elif args.data_type.lower() == "detector": if args.dataset_type == 'COCO': train_dataset = COCODetection(root=args.dataset_root, transform=None, mosaic=False) if args.dataset_type == 'VOC': train_dataset = VOCDetection(root=args.dataset_root, transform=None, mosaic=False) train_loader = None val_loader = None else: raise Exception(f"This project not support {args.data_type} type!!!") return train_loader, val_loader
name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() print('Finished loading model!') print(net) # load data if args.dataset == 'VOC': testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection( #COCOroot, [('2014', 'minival')], None) COCOroot, [('2015', 'test-dev')], None) else: print('Only VOC and COCO dataset are supported now!') if args.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() # evaluation #top_k = (300, 200)[args.dataset == 'COCO'] top_k = 200 detector = Detect(num_classes, 0, cfg) save_folder = os.path.join(args.save_folder, args.dataset) rgb_means = (104, 117, 123)
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') if args.dataset == 'VOC': if args.alpha - 0.0 > 1e-5: dataset = VOCDetection(VOCroot, train_sets, preproc_mixup(img_dim, rgb_means, p), AnnotationTransform(), random_erasing=args.random_erasing, mixup_alpha=args.alpha) else: dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform(), random_erasing=args.random_erasing) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!') return epoch_size = len(dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (100 * epoch_size, 135 * epoch_size, 170 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', dataset.name) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size for sv in stepvalues: if start_iter > sv: step_index += 1 continue else: break else: start_iter = 0 lr = args.lr avg_loss_list = [] flag = True for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) avg_loss = (loc_loss + conf_loss) / epoch_size avg_loss_list.append(avg_loss) print("avg_loss_list:") if len(avg_loss_list) <= 5: print(avg_loss_list) else: print(avg_loss_list[-5:]) loc_loss = 0 conf_loss = 0 if (epoch <= 150 and epoch % 10 == 0) or ( 150 < epoch < 200 and epoch % 5 == 0) or (epoch > 200): torch.save( net.state_dict(), args.save_folder + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth') if (epoch != args.resume_epoch): #if(epoch): ValNet = build_net(img_dim, num_classes, args.norm, args.vgg_bn) val_state_dict = torch.load(args.save_folder + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth') from collections import OrderedDict new_state_dict = OrderedDict() for k, v in val_state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] else: name = k new_state_dict[name] = v ValNet.load_state_dict(new_state_dict) ValNet.eval() print('Finished loading ' + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth model!') if args.dataset == 'VOC': testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection(COCOroot, [('2014', 'minival')], None) if args.cuda: ValNet = ValNet.cuda() cudnn.benchmark = True else: ValNet = ValNet.cpu() top_k = 200 detector = Detect(num_classes, 0, cfg, GIOU=args.giou) save_val_folder = os.path.join(args.save_val_folder, args.dataset) val_transform = BaseTransform(ValNet.size, rgb_means, (2, 0, 1)) val_net(priors, save_val_folder, testset, num_classes, ValNet, detector, val_transform, top_k, 0.01, args.cuda, args.vgg_bn) epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) images, targets = next(batch_iterator) # no mixup if args.cuda: images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] # fh = net.base[22].register_forward_hook(get_features_hook) # bh = net.base[22].register_backward_hook(get_grads_hook) out = net(images, vgg_bn=args.vgg_bn) optimizer.zero_grad() loss_l, loss_c, = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() # fh.remove() # bh.remove() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() load_t1 = time.time() if iteration % 10 == 0: print( 'Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f S: %.4f||' % (loss_l.item(), loss_c.item(), loss_l.item() + loss_c.item()) + 'Batch time: %.4f ||' % (load_t1 - load_t0) + 'LR: %.7f' % (lr)) torch.save( net.state_dict(), args.save_folder + 'Final_' + args.version + '_' + args.dataset + '.pth')
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) print(VOC_ROOT) print(COCO_ROOT) viz =None if args.visdom: import visdom viz = visdom.Visdom() ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images,targets = next(batch_iterator) if args.cuda: images = images.cuda() # Variable(images.cuda()) targets = [ann.cuda() for ann in targets] # [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = images # Variable(images) targets = [ann for ann in targets] # [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loss_l += loss_l.item()#data[0] loss_c += loss_c.item()#data[0] if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') if args.visdom: update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), 'weights/ssd300_COCO_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
if module_path not in sys.path: sys.path.append(module_path) from utils.utils import color_list, vis_detections import torch from torch.autograd import Variable import numpy as np import cv2 from data import COCODetection, COCO_ROOT, COCOAnnotationTransform from models.refinedetlite import build_refinedet from data import COCO_CLASSES as labels if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') net = build_refinedet('test', 320, 81) # initialize SSD net.load_weights('../weights/RefineDetLiteCOCO/RefineDet320_COCO_138000.pth') testset = COCODetection(COCO_ROOT, "val2017", None, COCOAnnotationTransform()) img_id = 121 image = testset.pull_image(img_id) x = cv2.resize(image, (320, 320)).astype(np.float32) x -= (104.0, 117.0, 123.0) x = x.astype(np.float32) x = x[:, :, ::-1].copy() x = torch.from_numpy(x).permute(2, 0, 1) xx = Variable(x.unsqueeze(0)) # wrap tensor in Variable if torch.cuda.is_available(): xx = xx.cuda()
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch print('Loading Dataset...') if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!') return epoch_size = len(dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', dataset.name) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 lr = args.lr for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) loc_loss = 0 conf_loss = 0 if (epoch % 2 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > 200): torch.save( net.state_dict(), args.save_folder + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) images, targets = next(batch_iterator) #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets])) if args.cuda: images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() load_t1 = time.time() if iteration % 10 == 0: print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.item(), loss_c.item()) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) torch.save( net.state_dict(), args.save_folder + 'Final_' + args.version + '_' + args.dataset + '.pth')
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 + args.resume_epoch # finetune方式地训练 print('Loading Dataset...') # 加载训练、验证集,preproc类可以参照data_augment.py函数,与SSD数据增强方式一致 if args.dataset == 'VOC': dataset = VOCDetection(VOCroot, train_sets, preproc(img_dim, rgb_means, p), AnnotationTransform()) elif args.dataset == 'COCO': dataset = COCODetection(COCOroot, train_sets, preproc(img_dim, rgb_means, p)) else: print('Only VOC and COCO are supported now!') return epoch_size = len(dataset) // args.batch_size # 每个epoch内需要处理的iter次数 max_iter = args.max_epoch * epoch_size # 总iter次数,max_epoch*epoch_size # learning rate调整的节点 stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', dataset.name) step_index = 0 # 是否需要finetune if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 lr = args.lr for iteration in range(start_iter, max_iter): # 共需迭代的次数,是否finetune间有差异,同时也对应到了epoch次数 if iteration % epoch_size == 0: # create batch iterator 新一轮epoch加载数据,把全部数据又重新加载了,下面的next(batch_iterator)再逐batch_size地取数据 batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) # detection_collate逐batch_size地取出图像 + 标签 loc_loss = 0 conf_loss = 0 if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > 200): torch.save(net.state_dict(), args.save_folder + args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth') # 模型保存 epoch += 1 load_t0 = time.time() # 以下操作就是针对lr的调整,warming up操作 if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data,batch_iterator一次性加载了数据,next操作就逐个batch_size地取出数据了 images, targets = next(batch_iterator) # 可以对应到detection_collate函数 #print(np.sum([torch.sum(anno[:,-1] == 2) for anno in targets])) # 对应cuda操作 if args.cuda: images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] # forward t0 = time.time() out = net(images) # batch_size图像批操作,直接forward得到结果 # backprop optimizer.zero_grad( ) # Clears the gradients of all optimized,本batch_size内来一波 loss_l, loss_c = criterion( out, priors, targets) # 对应到MultiBoxLoss,可以参照multibox_loss.py loss = loss_l + loss_c # 这里设置的loc loss、cls loss权重系数为1:1 loss.backward() # loss bp反向传播 optimizer.step() t1 = time.time() loc_loss += loss_l.item() # 累加batch_size内的loss conf_loss += loss_c.item() load_t1 = time.time() if iteration % 10 == 0: print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.item(), loss_c.item()) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) # 最终保存的模型 torch.save( net.state_dict(), args.save_folder + 'Final_' + args.version + '_' + args.dataset + '.pth')
return ( testset.evaluate_detections(all_boxes, save_folder),total_detect_time,total_nms_time,4951/(total_nms_time+total_detect_time),4951/(total_detect_time) ) if __name__ == '__main__': # load net #torch.cuda.set_device(args.device) img_dim = (300,512)[args.size=='512'] num_classes = (21, 81)[args.dataset == 'COCO'] net = build_ssd('test', img_dim, num_classes) # initialize detector if args.dataset == 'VOC': testset = VOCDetection( VOCroot, [('2007', 'test')], None, AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection( COCOroot, [('2014', 'minival')], None) #COCOroot, [('2015', 'test-dev')], None) else: print('Only VOC and COCO dataset are supported now!') top_k = 200 detector = Detect(num_classes,0,cfg) save_folder = os.path.join(args.save_folder,args.dataset) rgb_means = ((104, 117, 123),(103.94,116.78,123.68))[args.version == 'RFB_mobile'] start_iter = 100000 end_iter = 154000 step = 2000 best_ap = 0 best_iter = 100000 output_file = open('detect_summ.txt','w')
def train(args): create_time = time.strftime('%Y%m%d_%H%M', time.localtime(time.time())) save_folder_path = os.path.join(args.save_folder, create_time) # n_classes = [20, 80][args.dataset == 'COCO'] # n_classes = 91 if not ((args.train_image_folder and args.val_image_folder) or args.annotation): print("train/val image folder and annotation should not be None") return train_dataset = COCODetection( root=args.root, image_set=args.train_image_folder, annotation_json=args.annotation, transform=SSDAugmentation(img_size=args.image_size), # transform = BaseTransform(img_size = args.image_size), target_transform=COCOAnnotationTransform()) train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=detection_collate) val_dataset = COCODetection( root=args.root, image_set=args.val_image_folder, annotation_json=args.annotation, transform=BaseTransform(img_size=args.image_size), target_transform=COCOAnnotationTransform()) n_classes = train_dataset.get_class_size() + 1 if args.class_map_path: train_dataset.get_class_map(args.class_map_path) if args.model == "mobilenetv2": model = MobileNetv2( n_classes=n_classes, width_mult=args.width_mult, round_nearest=8, dropout_ratio=args.dropout_ratio, use_batch_norm=True, ) ssd = create_mobilenetv2_ssd_lite(model, n_classes, width_mult=args.width_mult, use_batch_norm=True) elif args.model == "mobilenetv3": model = MobileNetv3(model_mode=args.model_mode, n_classes=n_classes, width_mult=args.width_mult, dropout_ratio=args.dropout_ratio) ssd = create_mobilenetv3_ssd_lite(model, n_classes, model_mode=args.model_mode) else: print("model structure only accept mobilenetv2 or mobilenetv3") return print("builded ssd module") if GPU: import torch.backends.cudnn as cudnn model.cuda() ssd.cuda() cudnn.benchmark = True if args.pretrain_model: ssd.load_state_dict( torch.load(args.pretrain_model, map_location=torch.device('cpu'))) elif args.pretrain_tfmodel and args.pretrain_tfmodel_weight_list: ssd_state_dict = ssd.state_dict() tf_weights_dict = load_tf_weights(args, ssd_state_dict) ssd.load_state_dict(tf_weights_dict) optimizer = optim.Adam(ssd.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) criterion = MultiBoxLoss(n_classes, overlap_thresh=args.overlap_threshold, prior_for_matching=True, bkg_label=0, neg_mining=True, neg_pos=args.neg_pos_ratio, neg_overlap=0.5, encode_target=False) with torch.no_grad(): if args.model == "mobilenetv2": prior_box = PriorBox(MOBILEV2_300) elif args.model == "mobilenetv3": prior_box = PriorBox(MOBILEV3_300) priors = Variable(prior_box.forward()) print("created default bbox") n_train = min(train_dataset.__len__(), 5000) n_val = min(val_dataset.__len__(), 1000) global_step = 0 val_global_step = 0 writer = SummaryWriter(log_dir=args.summary_path) for epoch in range(args.epochs): mean_loss_conf = 0 mean_loss_loc = 0 inference_count = 0 ssd.train() with tqdm(total=n_train, desc=f"{epoch + 1} / {args.epochs}", unit='img') as pbar: for img, target in train_dataloader: if GPU: img = Variable(img.cuda()) target = [Variable(anno.cuda()) for anno in target] else: img = Variable(img) target = [Variable(anno) for anno in target] optimizer.zero_grad() inference = ssd(img) loss_loc, loss_conf = criterion(inference, priors, target) writer.add_scalar('Train/location_loss', float(loss_loc), global_step) writer.add_scalar('Train/confidence_loss', float(loss_conf), global_step) pbar.set_postfix( **{ "location loss": float(loss_loc), "confidence loss": float(loss_conf) }) mean_loss_loc += float(loss_loc) mean_loss_conf += float(loss_conf) total_loss = loss_loc + loss_conf total_loss.backward() # # clip gradient # # clip_grad_norm_(net.parameters(), 0.1) optimizer.step() pbar.update(img.shape[0]) global_step += 1 inference_count += img.shape[0] if inference_count > n_train: break pbar.set_postfix( **{ "location loss": float(mean_loss_loc / n_train), "confidence loss": float(mean_loss_conf / n_train) }) ssd.eval() val_mean_loss_loc = 0 val_mean_loss_conf = 0 with tqdm(total=n_val, desc="Validation", unit="img") as vpbar: for i in range(n_val): img = val_dataset.get_image(i) img = cv2.resize(img, (args.image_size, args.image_size)) height, width, _ = img.shape target = val_dataset.get_annotation(i, width, height) if GPU: img = torch.from_numpy( np.expand_dims(img.transpose(2, 0, 1), 0)).to(dtype=torch.float32).cuda() target = torch.FloatTensor(target).unsqueeze(0).cuda() else: img = torch.from_numpy( np.expand_dims(img.transpose(2, 0, 1), 0)).to(dtype=torch.float32) target = torch.FloatTensor(target).unsqueeze(0) inference = ssd(img) loss_loc, loss_conf = criterion(inference, priors, target) val_mean_loss_loc += float(loss_loc) val_mean_loss_conf += float(loss_conf) vpbar.set_postfix( **{ 'location loss': float(loss_loc), 'confidnece loss': float(loss_conf) }) vpbar.update(1) vpbar.set_postfix( **{ 'location loss': float(val_mean_loss_loc / n_val), 'confidnece loss': float(val_mean_loss_conf / n_val) }) writer.add_scalar('Test/location_loss', float(val_mean_loss_loc / n_val), val_global_step) writer.add_scalar('Test/confidence_loss', float(val_mean_loss_conf / n_val), val_global_step) val_global_step += 1 if epoch % 10 == 0 or epoch == args.epochs - 1: save_model(save_folder_path, ssd, epoch) writer.close()
anno["bbox"] = list(box) anno["score"] = scores output.append(anno) #print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, # num_images, detect_time)) print('time: ', sum_time/num_images, 'fps: ', sum_fps/num_images) print('sum: ', net.detect.count, 'mean: ', net.detect.count/num_images) print('writing detections') output_path = os.path.join(args.root_path, 'result/result.json') with open(output_path, 'w') as f: json.dump(output, f) if __name__ == '__main__': # load net net = build_ssd('test', ssd_dim, num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data dataset = COCODetection(args.root_path, img_sets, BaseTransform(ssd_dim, dataset_mean), target_transform=False) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(net, args.cuda, dataset)
with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') # with open(det_file, 'rb') as f: # all_boxes = pickle.load(f) # print('LOADED') dataset.evaluate_detections(all_boxes, save_folder) if __name__ == '__main__': # load net num_classes = len(labelmap) + 1 # +1 for background net = build_ssd('test', cfg, args.use_pred_module) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') print(net) # load data dataset = COCODetection(args.dataset_root, image_set='minival2014', transform=BaseTransform(cfg['min_dim'], MEANS), target_transform=COCOAnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, MEANS), args.top_k, 512, thresh=args.confidence_threshold)
if args.cuda: cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') if args.resume and not args.display: with open(args.ap_data_file, 'rb') as f: ap_data = pickle.load(f) calc_map(ap_data) exit() if args.image is None and args.video is None and args.images is None: dataset = COCODetection(cfg.dataset.valid_images, cfg.dataset.valid_info, transform=BaseTransform(), has_gt=cfg.dataset.has_gt) prep_coco_cats() else: dataset = None print('Loading model...', end='') net = Yolact() net.load_weights(args.trained_model) net.eval() print(' Done.') if args.cuda: net = net.cuda() evaluate(net, dataset)
# target_size = 1024 cfg = coco_refinedet[args.input_size] target_size = cfg['min_dim'] num_classes = cfg['num_classes'] objectness_threshold = 0.01 args.nms_threshold = 0.49 # nms # args.nms_threshold = 0.45 # softnms args.confidence_threshold = 0.01 args.top_k = 1000 args.keep_top_k = 500 args.vis_thres = 0.3 # args.multi_scale_test = True # load data dataset = COCODetection(COCOroot, ['val2017'], None, dataset_name='coco2017') # dataset = COCODetection(COCOroot, ['test2017'], None, dataset_name='coco2017') # load net torch.set_grad_enabled(False) load_to_cpu = not args.cuda cudnn.benchmark = True device = torch.device('cuda' if args.cuda else 'cpu') detect = Detect_RefineDet(num_classes, int(args.input_size), 0, objectness_threshold, confidence_threshold=args.confidence_threshold, nms_threshold=args.nms_threshold, top_k=args.top_k, keep_top_k=args.keep_top_k) net = build_refinedet('test', int(args.input_size), num_classes, backbone_dict) # test multi models, to filter out the best model. # start_epoch = 10; step = 10 start_epoch = 200; step = 5 ToBeTested = [] ToBeTested = [prefix + f'/RefineDet{args.input_size}_COCO_epoches_{epoch}.pth' for epoch in range(start_epoch, 300, step)]