def draw_anchor(ImgPath, AnnoPath, save_path): # load data testset = VOCDetection(args.voc_root, [('2007', 'test')], None, VOCAnnotationTransform()) imagelist = os.listdir(ImgPath) cnt = 5 #for image in imagelist: for i in range(cnt): image, annotation = testset.pull_anno(i) #image_pre, ext = os.path.splitext(image) imgfile = ImgPath + image + '.png' xmlfile = AnnoPath + 'test' + image + '.xml' #xmlfile = AnnoPath + image + '.xml' #xmlfile = AnnoPath +image + '.xml' # print(image) # 打开xml文档 DOMTree = xml.dom.minidom.parse(xmlfile) # 得到文档元素对象 collection = DOMTree.documentElement # 读取图片 img = cv.imread(imgfile) filenamelist = collection.getElementsByTagName("filename") filename = filenamelist[0].childNodes[0].data print(filename) # 得到标签名为object的信息 objectlist = collection.getElementsByTagName("object") for objects in objectlist: # 每个object中得到子标签名为name的信息 namelist = objects.getElementsByTagName('name') name_idx = 0 bndbox = objects.getElementsByTagName('bndbox') # print(bndbox) for box in bndbox: x1_list = box.getElementsByTagName('xmin') x1 = int(x1_list[0].childNodes[0].data) y1_list = box.getElementsByTagName('ymin') y1 = int(y1_list[0].childNodes[0].data) x2_list = box.getElementsByTagName('xmax') #注意坐标,看是否需要转换 x2 = int(x2_list[0].childNodes[0].data) y2_list = box.getElementsByTagName('ymax') y2 = int(y2_list[0].childNodes[0].data) cv.rectangle(img, (x1, y1), (x2, y2), (0, 165, 255), thickness=2) # 通过此语句得到具体的某个name的值 objectname = namelist[name_idx].childNodes[0].data cv.putText(img, objectname, (x1, y1), cv.FONT_HERSHEY_COMPLEX, 0.7, (0, 0, 255), thickness=1) name_idx += 1 #cv.imshow(filename, img)#这个要安装Xmanager才可以看 cv.imwrite(save_path + '/' + filename, img) #save picture
def read_gt(voc_dir): set_type = 'test' dataset_mean = (104, 117, 123) dataset = VOCDetection(voc_dir, [('2007', set_type)], BaseTransform(300, dataset_mean), VOCAnnotationTransform()) num_images = len(dataset) gt_bbox = [[[] for _ in range(num_images)] for _ in range(len(labelmap)+1)] for i in range(len(dataset)): im_name, gt = dataset.pull_anno(i) for box_conf in gt: gt_bbox[box_conf[4]+1][i].append(box_conf[:4]) return gt_bbox, num_images
if __name__ == '__main__': dataset = VOCDetection( args.voc_root, [('2007', set_type)], BaseTransform(300, dataset_mean), # resize to 300*300 and - mean VOCAnnotationTransform(keep_difficult=True)) # cls * id/size # +1 for total scale division num_cls = len(labelmap) + 1 dets_id = [[] for i in range(num_cls)] dets_size = [[] for i in range(num_cls)] print('pulling annotation...') for i in range(len(dataset)): imgid, gt = dataset.pull_anno(i) for j, det in enumerate(gt): det_size = (det[2] - det[0]) * (det[3] - det[1]) cls = det[-1] dets_id[cls].append(imgid) dets_size[cls].append(det_size) dets_id[-1].append(imgid) dets_size[-1].append(det_size) if i % 100 == 0: print('\rProgress: {}%'.format(int(100.0 * i / len(dataset))), end='') print() # extra-small (XS: bottom 10%) # small (S: next 20%) # medium (M: next 40%)
dataset = COCODataset(data_dir=coco_root, img_size=size, transform=BaseTransform([size, size])) boxes = [] print("The dataset size: ", len(dataset)) print("Loading the dataset ...") for i in range(len(dataset)): if i % 5000 == 0: print('Loading datat [%d / %d]' % (i + 1, len(dataset))) if dataset == 'coco': # For COCO img, _ = dataset.pull_image(i) w, h = img.shape[1], img.shape[0] annotation = dataset.pull_anno(i) elif dataset == 'voc': # For VOC img = dataset.pull_image(i) w, h = img.shape[1], img.shape[0] _, annotation = dataset.pull_anno(i) # prepare bbox datas for box_and_label in annotation: box = box_and_label[:-1] xmin, ymin, xmax, ymax = box bw = (xmax - xmin) / w * size bh = (ymax - ymin) / h * size # check bbox if bw < 1.0 or bh < 1.0:
show_threshold = 0.05 net = build_ssd('test', 300, 2) net.load_state_dict(torch.load('weights/ssd300_0712_115000.pth')) testset = VOCDetection(VOCroot, [('2007', 'test')], None, AnnotationTransform()) transform = BaseTransform(net.size, (104, 117, 123)) net.eval() net.cuda() cudnn.benchmark = True print('Finished loading model!') num_images = len(testset) for i in range(num_images): img = testset.pull_image(i) img_id, annotation = testset.pull_anno(i) gtloc = tuple(np.array(annotation[0][:-1]).astype('int32')) x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1) x = Variable(x.unsqueeze(0)) x = x.cuda() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) im2show = np.copy(img) for i in range(1, detections.size(1)): j = 0 while detections[0, i, j, 0] >= show_threshold: score = detections[0, i, j, 0] label_name = labelmap[i - 1] pt = (detections[0, i, j, 1:] *
def ssd_detect(limit_detection, dataset): # image = cv2.imread('./data/example.jpg', cv2.IMREAD_COLOR) # uncomment if dataset not downloaded # here we specify year (07 or 12) and dataset ('test', 'val', 'train') testset = VOCDetection(VOC_ROOT, [('2007', dataset)], None, VOCAnnotationTransform()) for img_id in range(len(testset)): if img_id % 100 == 1: print(img_id, '/', len(testset)) image = testset.pull_image(img_id) rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) x = cv2.resize(image, (300, 300)).astype(np.float32) x -= (104.0, 117.0, 123.0) x = x.astype(np.float32) x = x[:, :, ::-1].copy() x = torch.from_numpy(x).permute(2, 0, 1) xx = Variable(x.unsqueeze(0)) # wrap tensor in Variable if torch.cuda.is_available(): xx = xx.cuda() y = net(xx) from data import VOC_CLASSES as labels top_k = 10 plt.figure(figsize=(10, 10)) colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() # plot the image for matplotlib currentAxis = plt.gca() detections = y.data # scale each detection back up to the image scale = torch.Tensor(rgb_image.shape[1::-1]).repeat(2) for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= limit_detection: score = detections[0, i, j, 0] label_name = labels[i - 1] display_txt = '%s: %.2f' % (label_name, score) pt = (detections[0, i, j, 1:] * scale).cpu().numpy() coords = (pt[0], pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1 color = colors[i] currentAxis.add_patch( plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2)) currentAxis.text(pt[0], pt[1], display_txt, bbox={ 'facecolor': color, 'alpha': 0.5 }) j += 1 #显示正确的检测框 [imageid, gts] = testset.pull_anno(img_id) for gt in gts: coords = (gt[0], gt[1]), gt[2] - gt[0] + 1, gt[3] - gt[1] + 1 currentAxis.add_patch( plt.Rectangle(*coords, fill=False, edgecolor=colors[15], linewidth=2)) plt.imshow(rgb_image) plt.savefig('result/data/' + str(img_id) + '.jpg') plt.close()