def __init__(self, data_root, img_size, device, transform, labelmap, set_type='test', year='2007', display=False): self.data_root = data_root self.img_size = img_size self.device = device self.transform = transform self.labelmap = labelmap self.set_type = set_type self.year = year self.display = display # path self.devkit_path = data_root + 'VOC' + year self.annopath = os.path.join(data_root, 'VOC2007', 'Annotations', '%s.xml') self.imgpath = os.path.join(data_root, 'VOC2007', 'JPEGImages', '%s.jpg') self.imgsetpath = os.path.join(data_root, 'VOC2007', 'ImageSets', 'Main', set_type + '.txt') self.output_dir = self.get_output_dir('voc_eval/', self.set_type) # dataset self.dataset = VOCDetection(root=data_root, img_size=img_size[0], image_sets=[('2007', set_type)], transform=transform)
def main(args): img_dim = 300 set_type = 'test' use_voc_07_ap_metric = True data_iter = VOCDetection(args.data_root, [('2007', set_type)], BaseTransform(img_dim, (104, 117, 123)), AnnotationTransform()) print('Using data iterator "{}"'.format(data_iter.__class__.__name__)) num_classes = data_iter.num_classes() net = build_ssd('test', img_dim, num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model! {} Cuda'.format( 'Using' if args.cuda else 'No')) if args.cuda: net = net.cuda() cudnn.benchmark = True eval_ssd(data_iter, net, args.save_path, cuda=args.cuda, use_voc_07=use_voc_07_ap_metric)
def demo(img_id=0): net = build_ssd('test', 512, 21) # initialize SSD print(net) net.load_weights( '/media/sunwl/Datum/Projects/GraduationProject/SSD_VHR_512/weights/ssd512_voc_resume_95000.pth' ) testset = VOCDetection(VOCroot, [('2012', 'val')], None, AnnotationTransform()) image = testset.pull_image(img_id) # image = cv2.imread('demos/04.png') rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # View the sampled input image before transform plt.figure(figsize=(10, 10)) plt.imshow(rgb_image) x = cv2.resize(rgb_image, (512, 512)).astype(np.float32) x -= (104.0, 117.0, 123.0) x = x.astype(np.float32) x = x[:, :, ::-1].copy() x = torch.from_numpy(x).permute(2, 0, 1) xx = Variable(x.unsqueeze(0)) # wrap tensor in Variable if torch.cuda.is_available(): xx = xx.cuda() y = net(xx) plt.figure(figsize=(10, 10)) colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() plt.imshow(rgb_image.astype(np.uint8)) # plot the image for matplotlib currentAxis = plt.gca() detections = y.data # scale each detection back up to the image scale = torch.Tensor(rgb_image.shape[1::-1]).repeat(2) for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= 0.5: score = detections[0, i, j, 0] label_name = labels[i - 1] display_txt = '%s: %.2f' % (label_name, score) pt = (detections[0, i, j, 1:] * scale).cpu().numpy() coords = (pt[0], pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1 color = colors[i] currentAxis.add_patch( plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2)) currentAxis.text(pt[0], pt[1], display_txt, bbox={ 'facecolor': color, 'alpha': 0.5 }) j += 1 plt.show()
def demo_cv2(img_id=0): net = build_msc('test', 21) # initialize SSD print(net) net.load_weights( '/media/sunwl/Datum/Projects/GraduationProject/Multi_Scale_CNN_512/weights/v2_voc.pth' ) testset = VOCDetection(VOCroot, [('2012', 'val')], None, AnnotationTransform) image = testset.pull_image(img_id) # image = cv2.imread('demos/047.jpg') rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) x = cv2.resize(rgb_image, (512, 512)).astype(np.float32) x -= (104.0, 117.0, 123.0) x = x.astype(np.float32) x = x[:, :, ::-1].copy() x = torch.from_numpy(x).permute(2, 0, 1) xx = Variable(x.unsqueeze(0)) # wrap tensor in Variable if torch.cuda.is_available(): xx = xx.cuda() y = net(xx) colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() detections = y.data # scale each detection back up to the image scale = torch.Tensor(rgb_image.shape[1::-1]).repeat(2) bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_BGR2RGB) im2show = np.copy(bgr_image) for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= 0.5: score = detections[0, i, j, 0] label_name = labels[i - 1] display_txt = '%s: %.2f' % (label_name, score) pt = (detections[0, i, j, 1:] * scale).cpu().numpy() color = colors[i] color = [int(c * 255) for c in color[:3]] coords = pt[0], pt[1], pt[2], pt[3] cv2.rectangle(im2show, coords[0:2], coords[2:4], color, thickness=2) cv2.putText(im2show, display_txt, (int(coords[0]), int(coords[1]) - 3), cv2.FONT_HERSHEY_PLAIN, 1.0, color, thickness=1) j += 1 cv2.imshow('original', bgr_image) cv2.imshow('demo', im2show) # cv2.imwrite(os.path.join('/media/sunwl/Datum/Projects/GraduationProject/Multi_Scale_CNN_512', "outputs", # "{:03d}.jpg".format(img_id)), im2show) cv2.waitKey(0)
def draw_anchor(ImgPath, AnnoPath, save_path): # load data testset = VOCDetection(args.voc_root, [('2007', 'test')], None, VOCAnnotationTransform()) imagelist = os.listdir(ImgPath) cnt = 5 #for image in imagelist: for i in range(cnt): image, annotation = testset.pull_anno(i) #image_pre, ext = os.path.splitext(image) imgfile = ImgPath + image + '.png' xmlfile = AnnoPath + 'test' + image + '.xml' #xmlfile = AnnoPath + image + '.xml' #xmlfile = AnnoPath +image + '.xml' # print(image) # 打开xml文档 DOMTree = xml.dom.minidom.parse(xmlfile) # 得到文档元素对象 collection = DOMTree.documentElement # 读取图片 img = cv.imread(imgfile) filenamelist = collection.getElementsByTagName("filename") filename = filenamelist[0].childNodes[0].data print(filename) # 得到标签名为object的信息 objectlist = collection.getElementsByTagName("object") for objects in objectlist: # 每个object中得到子标签名为name的信息 namelist = objects.getElementsByTagName('name') name_idx = 0 bndbox = objects.getElementsByTagName('bndbox') # print(bndbox) for box in bndbox: x1_list = box.getElementsByTagName('xmin') x1 = int(x1_list[0].childNodes[0].data) y1_list = box.getElementsByTagName('ymin') y1 = int(y1_list[0].childNodes[0].data) x2_list = box.getElementsByTagName('xmax') #注意坐标,看是否需要转换 x2 = int(x2_list[0].childNodes[0].data) y2_list = box.getElementsByTagName('ymax') y2 = int(y2_list[0].childNodes[0].data) cv.rectangle(img, (x1, y1), (x2, y2), (0, 165, 255), thickness=2) # 通过此语句得到具体的某个name的值 objectname = namelist[name_idx].childNodes[0].data cv.putText(img, objectname, (x1, y1), cv.FONT_HERSHEY_COMPLEX, 0.7, (0, 0, 255), thickness=1) name_idx += 1 #cv.imshow(filename, img)#这个要安装Xmanager才可以看 cv.imwrite(save_path + '/' + filename, img) #save picture
def test_model(trained_model): # load net img_dim = (300, 512)[args.size == '512'] num_classes = (21, 81)[args.dataset == 'COCO'] net = build_net('test', img_dim, num_classes) # initialize detector state_dict = torch.load(trained_model) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() print('Finished loading model!') # print(net) # load data if args.dataset == 'VOC': testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) elif args.dataset == 'VOC2012': testset = VOCDetection(VOCroot, [('2012', 'test')], None, AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection(COCOroot, [('2014', 'minival')], None) # COCOroot, [('2015', 'test-dev')], None) else: print('Only VOC and COCO dataset are supported now!') if args.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() # evaluation #top_k = (300, 200)[args.dataset == 'COCO'] top_k = 200 detector = Detect(num_classes, 0, cfg) save_folder = os.path.join(args.save_folder, args.dataset) rgb_means = ((104, 117, 123), (103.94, 116.78, 123.68))[args.version == 'RFB_mobile'] test_net(save_folder, net, detector, args.cuda, testset, BaseTransform(net.size, rgb_means, (2, 0, 1)), top_k, thresh=0.01)
def create_dataset(opts, phase=None): means = (104, 117, 123) name = opts.dataset home = os.path.expanduser("~") DataAug = SSDAugmentation if opts.phase == 'train' else BaseTransform if name == 'voc': print('Loading Dataset...') sets = [('2007', 'trainval'), ('2012', 'trainval')] if opts.phase == 'train' else [('2007', 'test')] data_root = os.path.join(home, "data/VOCdevkit/") from data import VOCDetection dataset = VOCDetection(data_root, sets, DataAug(opts.ssd_dim, means), AnnotationTransform()) elif name == 'coco': data_root = os.path.join(home, 'dataset/coco') from data import COCODetection dataset = COCODetection(root=data_root, phase=opts.phase, transform=DataAug(opts.ssd_dim, means)) # dataset = dset.CocoDetection(root=(data_root + '/train2014'), # annFile=(data_root + '/annotations/' + anno_file), # transform=transforms.ToTensor()) else: raise NameError('Unknown dataset') show_phase = opts.phase if phase is None else phase print('{:s} on {:s}'.format(show_phase.upper(), dataset.name)) return dataset
def test(): # get device if args.cuda: print('use cuda') cudnn.benchmark = True device = torch.device("cuda") else: device = torch.device("cpu") # load net num_classes = len(VOC_CLASSES) testset = VOCDetection(args.voc_root, [('2007', 'test')], None, VOCAnnotationTransform()) cfg = config.voc_cfg if args.version == 'centernet': from models.centernet import CenterNet net = CenterNet(device, input_size=cfg['min_dim'], num_classes=num_classes) net.load_state_dict(torch.load(args.trained_model, map_location=device)) net.to(device).eval() print('Finished loading model!') # evaluation test_net(net, device, testset, BaseTransform(net.input_size, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)), thresh=args.visual_threshold)
def test(): # get device device = get_device(0) # load net num_classes = len(VOC_CLASSES) testset = VOCDetection(args.voc_root, [('2007', 'test')], None, VOCAnnotationTransform()) mean = config.MEANS cfg = config.voc_ab if args.version == 'yolo_v2': net = myYOLOv2(device, input_size=cfg['min_dim'], num_classes=num_classes, trainable=False, anchor_size=config.ANCHOR_SIZE) print('Let us test yolo-v2 on the VOC0712 dataset ......') elif args.version == 'yolo_v3': from models.yolo_v3 import myYOLOv3 net = myYOLOv3(device, input_size=cfg['min_dim'], num_classes=num_classes, trainable=False, anchor_size=config.MULTI_ANCHOR_SIZE) net.load_state_dict(torch.load(args.trained_model, map_location='cuda')) net.to(device).eval() print('Finished loading model!') # evaluation test_net(net, device, testset, BaseTransform(net.input_size, mean), thresh=args.visual_threshold)
def main(trained_model): # load net num_classes = len(labelmap) + 1 # +1 for background net = build_ssd('test', 300, num_classes) # print(net) net = net.cuda() # initialize SSD net.load_state_dict(torch.load(trained_model)) # resume_ckpt(trained_model,net) net.eval() print('Finished loading model!') # load data dataset = VOCDetection(args.voc_root, [('2007', set_type)], BaseTransform(300, dataset_mean), VOCAnnotationTransform()) dataset = COCO if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, dataset_mean), args.top_k, 300, thresh=args.confidence_threshold)
def read_gt(voc_dir): set_type = 'test' dataset_mean = (104, 117, 123) dataset = VOCDetection(voc_dir, [('2007', set_type)], BaseTransform(300, dataset_mean), VOCAnnotationTransform()) num_images = len(dataset) gt_bbox = [[[] for _ in range(num_images)] for _ in range(len(labelmap)+1)] for i in range(len(dataset)): im_name, gt = dataset.pull_anno(i) for box_conf in gt: gt_bbox[box_conf[4]+1][i].append(box_conf[:4]) return gt_bbox, num_images
def test(): # get device device = get_device(0) # load net num_classes = len(VOC_CLASSES) testset = VOCDetection(args.voc_root, [('2007', 'test')], None, VOCAnnotationTransform()) mean = config.MEANS cfg = config.voc_ab if args.version == 'fcos_lite': from models.fcos_lite import FCOS_LITE net = FCOS_LITE(device, input_size=cfg['min_dim'], num_classes=num_classes, trainable=False) print('Let us test FCOS-LITE on the VOC0712 dataset ......') net.load_state_dict(torch.load(args.trained_model, map_location='cuda')) net.to(device).eval() print('Finished loading model!') # evaluation test_net(net, device, testset, BaseTransform(net.input_size, mean), thresh=args.visual_threshold)
def train(net): net.train() priorbox = PriorBox() with torch.no_grad(): priors = priorbox.forward() priors = priors.to(device) dataloader = DataLoader(VOCDetection(), batch_size=2, collate_fn=detection_collate, num_workers=12) for epoch in range(1000): loss_ls, loss_cs = [], [] load_t0 = time.time() if epoch > 500: adjust_learning_rate(optimizer, 1e-4) for images, targets in dataloader: images = images.to(device) targets = [anno.to(device) for anno in targets] out = net(images) optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = 2 * loss_l + loss_c loss.backward() optimizer.step() loss_cs.append(loss_c.item()) loss_ls.append(loss_l.item()) load_t1 = time.time() print(f'{np.mean(loss_cs)}, {np.mean(loss_ls)} time:{load_t1-load_t0}') torch.save(net.state_dict(), 'Final_FaceBoxes.pth')
def DatasetSync(dataset='VOC', split='training'): if dataset == 'VOC': train_sets = [('2007', 'trainval'), ('2012', 'trainval')] # DataRoot=os.path.join(args.data_root,'VOCdevkit') DataRoot = args.data_root dataset = VOCDetection(DataRoot, train_sets, SSDAugmentation(args.dim, means), AnnotationTransform()) elif dataset == 'kitti': DataRoot = os.path.join(args.data_root, 'kitti') dataset = KittiLoader(DataRoot, split=split, img_size=(1000, 300), transforms=SSDAugmentation((1000, 300), means), target_transform=AnnotationTransform_kitti()) elif dataset == 'COCO': image_set = ['train2014', 'valminusminival2014'] image_set = 'trainval35k' DataRoot = COCO_ROOT dataset = COCODetection(root=DataRoot, transform=SSDAugmentation(args.dim, means)) elif dataset == 'tme': train_sets = [('train_mix_cut_bot')] DataRoot = '/home/kiminhan/datasets/' dataset = TMEDetection(DataRoot, train_sets, SSDAugmentation(args.dim, means), AnnotationTransform()) return dataset
def test(): if args.cuda: print('use cuda') cudnn.benchmark = True device = torch.device("cuda") else: device = torch.device("cpu") # load net input_size = [args.input_size, args.input_size] num_classes = 20 testset = VOCDetection(VOC_ROOT, img_size=None, image_sets=[('2007', 'test')], transform=None) # build model if args.version == 'yolo': from models.yolo import myYOLO net = myYOLO(device, input_size=input_size, num_classes=num_classes, trainable=False) print('Let us test yolo on the VOC0712 dataset ......') else: print('Unknown Version !!!') exit() net.load_state_dict(torch.load(args.trained_model, map_location=device)) net.eval() print('Finished loading model!') net = net.to(device) # evaluation test_net(net, device, testset, BaseTransform(net.input_size), thresh=args.visual_threshold)
def evaluate(model, save_folder, cuda, top_k, im_size=320, thresh=0.001, dataset_mean=((104, 117, 123))): model.phase = 'test' model.eval() dataset = VOCDetection(args.voc_root, BaseTransform(im_size, dataset_mean), VOCAnnotationTransform(), phase='valid') map = eval_net(save_folder, model, cuda, dataset, BaseTransform(im_size, dataset_mean), top_k, im_size, thresh=thresh) return map
def test(): # get device if args.cuda: print('use cuda') cudnn.benchmark = True device = torch.device("cuda") else: device = torch.device("cpu") # load net num_classes = len(VOC_CLASSES) testset = VOCDetection(args.voc_root, [('2007', 'test')], None, VOCAnnotationTransform()) cfg = config.voc_ab if args.version == 'yolo_v2': from models.yolo_v2 import myYOLOv2 net = myYOLOv2(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.ANCHOR_SIZE) print('Let us test yolo-v2 on the VOC0712 dataset ......') elif args.version == 'yolo_v3': from models.yolo_v3 import myYOLOv3 net = myYOLOv3(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.MULTI_ANCHOR_SIZE) elif args.version == 'slim_yolo_v2': from models.slim_yolo_v2 import SlimYOLOv2 net = SlimYOLOv2(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.ANCHOR_SIZE) print('Let us test slim-yolo-v2 on the VOC0712 dataset ......') elif args.version == 'tiny_yolo_v3': from models.tiny_yolo_v3 import YOLOv3tiny net = YOLOv3tiny(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.TINY_MULTI_ANCHOR_SIZE) print('Let us test tiny-yolo-v3 on the VOC0712 dataset ......') net.load_state_dict(torch.load(args.trained_model, map_location=device)) net.to(device).eval() print('Finished loading model!') # evaluation test_net(net, device, testset, BaseTransform(net.input_size, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)), thresh=args.visual_threshold)
def test(): # get device if args.cuda: cudnn.benchmark = True device = torch.device("cuda") else: device = torch.device("cpu") # load net num_classes = 80 if args.dataset == 'COCO_val': cfg = config.coco_af input_size = cfg['min_dim'] testset = COCODataset(data_dir=args.dataset_root, json_file='instances_val2017.json', name='val2017', img_size=cfg['min_dim'][0], debug=args.debug) elif args.dataset == 'COCO_test-dev': cfg = config.coco_af input_size = cfg['min_dim'] testset = COCODataset(data_dir=args.dataset_root, json_file='image_info_test-dev2017.json', name='test2017', img_size=cfg['min_dim'][0], debug=args.debug) elif args.dataset == 'VOC': cfg = config.voc_af input_size = cfg['min_dim'] testset = VOCDetection(VOC_ROOT, [('2007', 'test')], None, VOCAnnotationTransform()) # build model if args.version == 'yolo': from models.yolo import myYOLO net = myYOLO(device, input_size=input_size, num_classes=num_classes, trainable=False) print('Let us test YOLO on the %s dataset ......' % (args.dataset)) else: print('Unknown Version !!!') exit() net.load_state_dict(torch.load(args.trained_model, map_location=device)) net.to(device).eval() print('Finished loading model!') # evaluation test_net(net, device, testset, BaseTransform(net.input_size, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)), thresh=args.visual_threshold)
def train(): net.train() epoch = 0 + args.resume_epoch print('Loading Dataset...') dataset = VOCDetection(args.training_dataset, preproc_s3fd(img_dim, rgb_means, cfg['max_expand_ratio']), AnnotationTransform()) epoch_size = math.ceil(len(dataset) / args.batch_size) max_iter = args.max_epoch * epoch_size stepvalues = (200 * epoch_size, 250 * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter(data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate, pin_memory=True)) if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > 200): torch.save(net.state_dict(), args.save_folder + 'S3FD_{}_epoch_'.format(args.net) + repr(epoch) + '.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + cfg['conf_weight'] * loss_c loss.backward() optimizer.step() load_t1 = time.time() print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.item(), cfg['conf_weight'] * loss_c.item()) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) if writer is not None: writer.add_scalar('train/loss_l', loss_l.item(), iteration) writer.add_scalar('train/loss_c', cfg['conf_weight'] * loss_c.item(), iteration) writer.add_scalar('train/lr', lr, iteration) torch.save(net.state_dict(), args.save_folder + 'Final_{}_S3FD.pth'.format(args.net))
def DatasetSync(dataset='VOC', split='training'): if dataset == 'VOC': DataRoot = args.data_root dataset = VOCDetection(DataRoot, train_sets, transform=SSDAugmentation(args.dim, means), target_transform=AnnotationTransform_caltech(), target_vis_transform=AnnotationTransform_vis()) return dataset
def train(): net.train() epoch = 0 + args.resume_epoch print('Loading Dataset...') dataset = VOCDetection(training_dataset, preproc(img_dim, rgb_mean), AnnotationTransform()) # dataset = AFLW(training_dataset, npy_file, preproc_(img_dim, rgb_mean)) epoch_size = math.ceil(len(dataset) / batch_size) max_iter = max_epoch * epoch_size stepvalues = (200 * epoch_size, 250 * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter(data.DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate)) if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > 200): torch.save(net.state_dict(), save_folder + '{}_'.format(args.save_name) + str(epoch) + '.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) # print("trainning batch:", len(images), len(targets), targets[0].shape) images = images.to(device) targets = [anno.to(device) for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c # loss_l, loss_c, loss_f = criterion(out, priors, targets) # loss = cfg['loc_weight'] * loss_l + loss_c + cfg['loc_five_weight'] * loss_f loss.backward() optimizer.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) print('Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || L: {:.4f} C: {:.4f} || F:{:.4f} || LR: {:.4f} || Batchtime: {:.4f} s || ETA: {}'.format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_f.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)))) torch.save(net.state_dict(), save_folder + 'Final_{}_'.format(args.save_name))
def train(): net.train() epoch = 0 + args.resume_epoch print('Loading Dataset...') dataset = VOCDetection(args.training_dataset, preproc(img_dim, rgb_means), AnnotationTransform()) epoch_size = math.ceil(len(dataset) / args.batch_size) max_iter = args.max_epoch * epoch_size stepvalues = (200 * epoch_size, 250 * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter(data.DataLoader(dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) if (epoch % 1 == 0 and epoch > 0) or (epoch % 1 == 0 and epoch > 200): torch.save(net.state_dict(), args.save_folder + 'Face_epoch_' + repr(epoch) + '.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.to(device) targets = [anno.to(device) for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) #loss = cfg['loc_weight'] * loss_l + loss_c loss = loss_l + loss_c loss.backward() optimizer.step() load_t1 = time.time() #print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + # '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (cfg['loc_weight']*loss_l.item(), loss_c.item()) + # 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.item(), loss_c.item()) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) torch.save(net.state_dict(), args.save_folder + 'Final.pth')
def test_voc(): # load net num_classes = len(CUSTOM_CLASSES if args.use_custom else VOC_CLASSES ) + 1 # +1 background net = build_ssd('test', 300, num_classes) # initialize SSD if args.cuda: net.load_state_dict( torch.load(args.trained_model, map_location=torch.device('cuda'))) else: net.load_state_dict( torch.load(args.trained_model, map_location=torch.device('cpu'))) net.eval() print('Finished loading model!') # load data if args.use_custom: custom_class_to_ind = dict( zip(CUSTOM_CLASSES, range(len(CUSTOM_CLASSES)))) testset = VOCDetection(root=args.voc_root, image_sets=[('2019', 'test')], dataset_name='VOC2019', transform=BaseTransform(300, MEANS), target_transform=VOCAnnotationTransform( class_to_ind=custom_class_to_ind)) else: testset = VOCDetection(root=args.voc_root, image_sets=[('2007', 'test')], dataset_name='VOC0712', transform=BaseTransform(300, MEANS), target_transform=VOCAnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_random_img(net, args.cuda, testset, BaseTransform(300, MEANS), thresh=args.visual_threshold)
def main(args): if args.gpus is not None: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus print('Using {} GPUs'.format(args.gpus)) train_transform = Compose( [Resize(args.input_size), ToTensor(), Norm(mean=(123, 117, 104))]) trainset = VOCDetection(args.data_root, args.train_set, transform=train_transform, do_norm=True) train_loader = torch.utils.data.DataLoader(trainset, shuffle=True, batch_size=args.batch_size, num_workers=args.workers, collate_fn=detection_collate) model = build_ssd(cfg) if not args.checkpoint and args.pretrain: print('load pretrain model: {}'.format(args.pretrain)) model.load_weight(args.pretrain) if args.gpus: model = torch.nn.DataParallel(model).cuda() criterion = multibox_loss.MultiboxLoss(args.num_classes, args.neg_pos_ratio) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) args.start_epoch = 0 if args.checkpoint: print('=> loading checkpoint from {}...'.format(args.checkpoint)) state = torch.load(args.checkpoint) args.start_epoch = state['epoch'] model.load_state_dict(state['model']) optimizer.load_state_dict(state['optimizer']) for epoch in range(args.start_epoch, args.epochs): train(train_loader, model, criterion, optimizer, epoch, args) state = { 'epoch': epoch + 1, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } # save checkpoint os.makedirs(args.checkpoint_dir, exist_ok=True) checkpoint_file = os.path.join( args.checkpoint_dir, 'checkpoint_epoch_{:04d}.pth.tar'.format(state['epoch'])) torch.save(state, checkpoint_file)
def load_dataset(): if args.dataset == 'VOC': from data import VOCroot, VOCDetection, VOC_CLASSES show_classes = VOC_CLASSES num_classes = len(VOC_CLASSES) train_sets = [('2007', 'trainval'), ('2012', 'trainval')] dataset = VOCDetection(VOCroot, train_sets, preproc(args.size), AnnotationTransform(), dataset_name='VOC0712trainval') epoch_size = len(dataset) // args.batch_size max_iter = 250 * epoch_size testset = VOCDetection(VOCroot, [('2007', 'test')], None) elif args.dataset == 'COCO': from data import COCOroot, COCODetection, COCO_CLASSES show_classes = COCO_CLASSES num_classes = len(COCO_CLASSES) train_sets = [('2017', 'train')] dataset = COCODetection(COCOroot, train_sets, preproc(args.size)) epoch_size = len(dataset) // args.batch_size max_iter = 140 * epoch_size testset = COCODetection(COCOroot, [('2017', 'val')], None) else: raise NotImplementedError('Unkown dataset {}!'.format(args.dataset)) return (show_classes, num_classes, dataset, epoch_size, max_iter, testset)
def test_voc(): # load net num_classes = len(VOC_CLASSES) + 1 net = build_ssd('test', 300, num_classes) net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data testset = VOCDetection(args.voc_root, [('2007', 'test')], None, VOCAnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, testset, BaseTransform(net.size, (104, 117, 123)), thresh=args.visual_threshold)
def test(): # get device if args.cuda: print('use cuda') cudnn.benchmark = True device = torch.device("cuda") else: device = torch.device("cpu") # load net num_classes = 80 if args.dataset == 'COCO': cfg = config.coco_ab testset = COCODataset( data_dir=args.dataset_root, json_file='instances_val2017.json', name='val2017', img_size=cfg['min_dim'][0], debug=args.debug) elif args.dataset == 'VOC': cfg = config.voc_ab testset = VOCDetection(VOC_ROOT, [('2007', 'test')], None, VOCAnnotationTransform()) if args.version == 'yolo_v2': from models.yolo_v2 import myYOLOv2 net = myYOLOv2(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.ANCHOR_SIZE_COCO) print('Let us test yolo-v2 on the MSCOCO dataset ......') elif args.version == 'yolo_v3': from models.yolo_v3 import myYOLOv3 net = myYOLOv3(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.MULTI_ANCHOR_SIZE_COCO) elif args.version == 'slim_yolo_v2': from models.slim_yolo_v2 import SlimYOLOv2 net = SlimYOLOv2(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.ANCHOR_SIZE_COCO) elif args.version == 'tiny_yolo_v3': from models.tiny_yolo_v3 import YOLOv3tiny net = YOLOv3tiny(device, input_size=cfg['min_dim'], num_classes=num_classes, anchor_size=config.TINY_MULTI_ANCHOR_SIZE_COCO) net.load_state_dict(torch.load(args.trained_model, map_location='cuda')) net.to(device).eval() print('Finished loading model!') # evaluation test_net(net, device, testset, BaseTransform(net.input_size, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)), thresh=args.visual_threshold)
def DatasetSync(dataset='voc', split='training'): if dataset == 'voc': DataRoot = os.path.join(args.data_root, 'VOCdevkit') dataset = VOCDetection(DataRoot, train_sets, SSDAugmentation(args.dim, means), AnnotationTransform()) elif dataset == 'kitti': DataRoot = os.path.join(args.data_root, 'kitti') dataset = KittiLoader(DataRoot, split=split, img_size=(1000, 300), transforms=SSDAugmentation((1000, 300), means), target_transform=AnnotationTransform_kitti()) return dataset
def test(): # get device device = get_device(0) # load net num_classes = 80 anchor_size = config.ANCHOR_SIZE_COCO if args.dataset == 'COCO': cfg = config.coco_ab testset = COCODataset( data_dir=args.dataset_root, json_file='instances_val2017.json', name='val2017', img_size=cfg['min_dim'][0], debug=args.debug) mean = config.MEANS elif args.dataset == 'VOC': cfg = config.voc_ab testset = VOCDetection(VOC_ROOT, [('2007', 'test')], None, VOCAnnotationTransform()) mean = config.MEANS if args.version == 'yolo_v2': from models.yolo_v2 import myYOLOv2 net = myYOLOv2(device, input_size=cfg['min_dim'], num_classes=num_classes, trainable=False, anchor_size=anchor_size) print('Let us test yolo-v2 on the MSCOCO dataset ......') elif args.version == 'yolo_v3': from models.yolo_v3 import myYOLOv3 net = myYOLOv3(device, input_size=cfg['min_dim'], num_classes=num_classes, trainable=False, anchor_size=anchor_size) elif args.version == 'tiny_yolo_v2': from models.tiny_yolo_v2 import YOLOv2tiny net = YOLOv2tiny(device, input_size=cfg['min_dim'], num_classes=num_classes, trainable=False, anchor_size=config.ANCHOR_SIZE) elif args.version == 'tiny_yolo_v3': from models.tiny_yolo_v3 import YOLOv3tiny net = YOLOv3tiny(device, input_size=cfg['min_dim'], num_classes=num_classes, trainable=False, anchor_size=config.MULTI_ANCHOR_SIZE) net.load_state_dict(torch.load(args.trained_model, map_location='cuda')) net.to(device).eval() print('Finished loading model!') # evaluation test_net(net, device, testset, BaseTransform(net.input_size, mean), thresh=args.visual_threshold)
def test_voc(): num_classes = len(VOC_CLASSES) + 1 net = build_ssd('test', 300, num_classes) net.eval() print('Finished loading model!') testset = VOCDetection(opt.DATASETS.ROOT, ['test'], BaseTransform(300, opt.DATASETS.MEANS), VOCAnnotationTransform()) if opt.DEVICE: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, testset, BaseTransform(net.size, (104, 117, 123)), thresh=args.visual_threshold)