def test_nms_device_and_dtypes_cpu(): """ CommandLine: xdoctest -m tests/test_nms.py test_nms_device_and_dtypes_cpu """ iou_thr = 0.6 base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], [49.3, 32.9, 51.0, 35.3, 0.9], [35.3, 11.5, 39.9, 14.5, 0.4], [35.2, 11.7, 39.7, 15.7, 0.3]]) # CPU can handle float32 and float64 dets = base_dets.astype(np.float32) supressed, inds = nms(dets, iou_thr) assert dets.dtype == supressed.dtype assert len(inds) == len(supressed) == 2 dets = torch.FloatTensor(base_dets) surpressed, inds = nms(dets, iou_thr) assert dets.dtype == surpressed.dtype assert len(inds) == len(surpressed) == 2 dets = base_dets.astype(np.float64) supressed, inds = nms(dets, iou_thr) assert dets.dtype == supressed.dtype assert len(inds) == len(supressed) == 2 dets = torch.DoubleTensor(base_dets) surpressed, inds = nms(dets, iou_thr) assert dets.dtype == surpressed.dtype assert len(inds) == len(surpressed) == 2
def test_nms_device_and_dtypes_gpu(): """ CommandLine: xdoctest -m tests/test_nms.py test_nms_device_and_dtypes_gpu """ if not torch.cuda.is_available(): import pytest pytest.skip('test requires GPU and torch+cuda') iou_thr = 0.6 base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], [49.3, 32.9, 51.0, 35.3, 0.9], [35.3, 11.5, 39.9, 14.5, 0.4], [35.2, 11.7, 39.7, 15.7, 0.3]]) for device_id in range(torch.cuda.device_count()): print(f'Run NMS on device_id = {device_id!r}') # GPU can handle float32 but not float64 dets = base_dets.astype(np.float32) supressed, inds = nms(dets, iou_thr, device_id) assert dets.dtype == supressed.dtype assert len(inds) == len(supressed) == 2 dets = torch.FloatTensor(base_dets).to(device_id) surpressed, inds = nms(dets, iou_thr) assert dets.dtype == surpressed.dtype assert len(inds) == len(surpressed) == 2
def test_nms_match(): iou_thr = 0.6 # empty input empty_dets = np.array([]) assert len(nms_match(empty_dets, iou_thr)) == 0 # non empty ndarray input np_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], [49.3, 32.9, 51.0, 35.3, 0.9], [35.3, 11.5, 39.9, 14.5, 0.4], [35.2, 11.7, 39.7, 15.7, 0.3]]) np_groups = nms_match(np_dets, iou_thr) assert isinstance(np_groups[0], np.ndarray) assert len(np_groups) == 2 nms_keep_inds = nms(np_dets, iou_thr)[1] assert set([g[0].item() for g in np_groups]) == set(nms_keep_inds.tolist()) # non empty tensor input tensor_dets = torch.from_numpy(np_dets) tensor_groups = nms_match(tensor_dets, iou_thr) assert isinstance(tensor_groups[0], torch.Tensor) for i in range(len(tensor_groups)): assert np.equal(tensor_groups[i].numpy(), np_groups[i]).all() # input of wrong shape wrong_dets = np.zeros((2, 3)) with pytest.raises(AssertionError): nms_match(wrong_dets, iou_thr)
def combine_result(result, result_det, thre_nms): n = result.__len__() output = np.empty((n, ), dtype=np.object) for i in range(n): if result_det[i].shape[0] > 0: temp_lr = np.log(result_det[i][:, 4] + 0.25) - np.log(1 - result_det[i][:, 4] + 0.25) temp_lr[temp_lr < 0] = 0 result_det[i][:, 4] = temp_lr output[i] = nms(np.vstack((result[i], result_det[i])), thre_nms, device_id=None)[0] output[i] = output[i][output[i][:, 4] > 0, :] return output
def test_nms_device_and_dtypes_cpu(): """ CommandLine: xdoctest -m tests/test_nms.py test_nms_device_and_dtypes_cpu """ iou_thr = 0.6 base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.1], [49.3, 32.9, 51.0, 35.3, 0.05], [35.3, 11.5, 39.9, 14.5, 0.9], [35.2, 11.7, 39.7, 15.7, 0.3]]) base_expected_suppressed = np.array([[35.3, 11.5, 39.9, 14.5, 0.9], [49.1, 32.4, 51.0, 35.9, 0.1]]) # CPU can handle float32 and float64 dets = base_dets.astype(np.float32) expected_suppressed = base_expected_suppressed.astype(np.float32) suppressed, inds = nms(dets, iou_thr) assert dets.dtype == suppressed.dtype assert np.array_equal(suppressed, expected_suppressed) dets = torch.FloatTensor(base_dets) expected_suppressed = torch.FloatTensor(base_expected_suppressed) suppressed, inds = nms(dets, iou_thr) assert dets.dtype == suppressed.dtype assert torch.equal(suppressed, expected_suppressed) dets = base_dets.astype(np.float64) expected_suppressed = base_expected_suppressed.astype(np.float64) suppressed, inds = nms(dets, iou_thr) assert dets.dtype == suppressed.dtype assert np.array_equal(suppressed, expected_suppressed) dets = torch.DoubleTensor(base_dets) expected_suppressed = torch.DoubleTensor(base_expected_suppressed) suppressed, inds = nms(dets, iou_thr) assert dets.dtype == suppressed.dtype assert torch.equal(suppressed, expected_suppressed)
def nms(ann, mode, thresh, out_file=None, CLASS_NUM=18): for name in ann.keys(): info = ann[name] for cls in range(CLASS_NUM): bbox = np.array(info['bbox'][cls], np.float32) vis = np.array(info['vis'][cls]) if (len(bbox) <= 0): continue if mode == "rec": _, inds = nms_wrapper.nms(bbox, thresh) elif mode == "poly": dets = vis.reshape(-1, 8) dets = np.array(dets, np.int32) scores = bbox[:, 4] dets = np.c_[dets, scores] # print(bbox.shape) inds = poly_nms(dets, thresh) # print(len(inds)) ann[name]['bbox'][cls] = bbox[inds] ann[name]['vis'][cls] = vis[inds] if out_file is not None: mmcv.dump(ann, out_file) return ann
def main(): args = parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.run_gpu model = init_detector(args.config, args.model, device=torch.device('cuda', args.gpu)) results = [] dataset = MyDataset('../../data/ImageSets/Main/val.txt', '../../data/defect') dataloader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, num_workers=1) count = len(dataloader) start = time.time() for image_index, data in enumerate(dataloader): iter_start = time.time() image_dir, img, template_img = data[0][0], data[1][0], data[2][0] img, template_img = img.numpy(), template_img.numpy() height, width, _ = img.shape sub_height, sub_width = int(height / 2), int(width / 2) dets = [] for row in range(2): for col in range(2): height_start = row * sub_height height_end = (row + 1) * sub_height width_start = col * sub_width width_end = (col + 1) * sub_width sub_im = img[height_start:height_end, width_start:width_end, :] sub_template = template_img[height_start:height_end, width_start:width_end, :] sub_dets = inference_detector(model, sub_im, sub_template, 0.1) for index, sub_det in enumerate(sub_dets): for sub_item in sub_det: sub_item[0] += width_start sub_item[1] += height_start sub_item[2] += width_start sub_item[3] += height_start if len(dets) == 0: for sub_det in sub_dets: dets.append(sub_det) else: for index, (det, sub_det) in enumerate(zip(dets, sub_dets)): dets[index] = np.concatenate((det, sub_det), axis=0) sub_dets = inference_detector(model, img, template_img, 0.1) for index, (det, sub_det) in enumerate(zip(dets, sub_dets)): need_delete_index = [] for temp_index, temp in enumerate(sub_det): if temp[2] - temp[0] < 48 or temp[3] - temp[1] < 48: need_delete_index.append(temp_index) if len(need_delete_index) > 0: sub_det = np.delete(sub_det, need_delete_index, axis=0) dets[index] = np.concatenate((det, sub_det), axis=0) if index == 12: need_delete_index = [] temp_det = dets[index] for t_index, t in enumerate(temp_det): if t[2] - t[0] < width * 0.9 or t[3] - t[1] < height * 0.9: need_delete_index.append(t_index) dets[index] = np.delete(dets[index], need_delete_index, axis=0) dets[index] = nms(dets[index], 0.15)[0] results.append(dets) iter_end = time.time() print("\r" + "{}/{}, use time = {}".format(image_index, count, iter_end - iter_start), end="", flush=True) mmcv.dump(results, 'eval/result.pkl')
def main(): args = parse_args() model = init_detector(args.config, args.checkpoint, device=torch.device('cuda', args.device)) cfg = model.cfg # # build the data pipeline test_pipeline = cfg.data.test.pipeline[1:] test_pipeline = Compose(test_pipeline) device = next(model.parameters()).device # model device image_names = os.listdir(args.image_dir) if not os.path.exists(args.result_dir): os.makedirs(args.result_dir) if os.path.exists(args.voc_res_file): os.remove(args.voc_res_file) if args.vis: if not os.path.exists(args.vis_image_dir): os.makedirs(args.vis_image_dir) print("Begin to predict mask: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) for image_name in image_names: image_path = os.path.join(args.image_dir, image_name) img = mmcv.imread(image_path) height, width, _ = img.shape image_shape = (width, height) strides = args.strides patch_size = args.patch_size x_num, y_num = calc_split_num(image_shape, patch_size, strides) # print(image_name) result_all = [] for i in range(x_num): for j in range(y_num): x = strides[0] * i if i < x_num - 1 else image_shape[ 0] - args.patch_size[0] y = strides[1] * j if j < y_num - 1 else image_shape[ 1] - args.patch_size[1] crop_img = img[y:y + patch_size[1], x:x + patch_size[0], :].copy() data = dict(filename=image_name, img=crop_img, img_shape=crop_img.shape, ori_shape=img.shape) data = test_pipeline(data) data = scatter(collate([data], samples_per_gpu=1), [device])[0] # forward the model with torch.no_grad(): result = model(return_loss=False, rescale=True, **data)[0] result[:, 0] += x result[:, 1] += y result[:, 2] += x result[:, 3] += y result_all.append(result) # import pdb;pdb.set_trace() result_all = np.concatenate(result_all, axis=0) nms_result, _ = nms(result_all, 0.5, device_id=args.device) # nms_result = result_all if args.vis: out_file = os.path.join(args.vis_image_dir, image_name) vis_img = show_result(img, [nms_result], model.CLASSES, score_thr=args.score_thr, wait_time=0, show=False, out_file=None) ann_file = os.path.join(args.ann_dir, image_name.replace('png', 'json')) gt_bboxes = load_annotation(ann_file) for gt_bbox in gt_bboxes: xmin, ymin, xmax, ymax = gt_bbox cv2.rectangle(vis_img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 1) cv2.imwrite(out_file, vis_img) voc_format = '{} {:.4f} {} {} {} {}' pos_all = [] voc_all = [] for i in range(nms_result.shape[0]): x = int(nms_result[i, 0]) y = int(nms_result[i, 1]) w = max(int(nms_result[i, 2] - nms_result[i, 0]), 1) h = max(int(nms_result[i, 3] - nms_result[i, 1]), 1) p = float(nms_result[i, 4]) pos = {'x': x, 'y': y, 'w': w, 'h': h, 'p': p} pos_all.append(pos) xmin = x ymin = y xmax = int(nms_result[i, 2]) ymax = int(nms_result[i, 3]) voc_str = voc_format.format( os.path.splitext(image_name)[0], p, xmin, ymin, xmax, ymax) voc_all.append(voc_str) with open( os.path.join(args.result_dir, image_name.replace('png', 'json')), 'w') as f: json.dump(pos_all, f) with open(args.voc_res_file, 'a') as f: for voc_str in voc_all: f.write(voc_str + '\n') print("Finish predict mask: ", image_name, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
def main(anno_save_dir): args = parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = '6' model = init_detector(args.config, args.model, device=torch.device('cuda', args.gpu)) root_dir = 'data/zip/zip/normal' #root_dir = "/tcdata/guangdong1_round2_testA_20190924" results = [] dataset = MyDataset(root_dir) dataloader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, num_workers=1) count = len(dataloader) start = time.time() for image_index, data in enumerate(dataloader): image_dir, img = data[0][0], data[1][0] img = img.numpy() height, width, _ = img.shape sub_height, sub_width = int(height / 2), int(width / 2) dets = [] for row in range(2): for col in range(2): height_start = row * sub_height height_end = (row + 1) * sub_height width_start = col * sub_width width_end = (col + 1) * sub_width sub_im = img[height_start:height_end, width_start:width_end, :] sub_dets = inference_detector(model, sub_im, 0.15) for index, sub_det in enumerate(sub_dets): for sub_item in sub_det: sub_item[0] += width_start sub_item[1] += height_start sub_item[2] += width_start sub_item[3] += height_start if len(dets) == 0: for sub_det in sub_dets: dets.append(sub_det) else: for index, (det, sub_det) in enumerate(zip(dets, sub_dets)): dets[index] = np.concatenate((det, sub_det), axis=0) sub_dets = inference_detector(model, img, 0.15) for index, (det, sub_det) in enumerate(zip(dets, sub_dets)): dets[index] = np.concatenate((det, sub_det), axis=0) dets[index] = nms(dets[index], 0.15) for index, bboxes in enumerate(dets): bboxes = bboxes[0] for bbox in bboxes: if len(bbox) == 0: continue score = bbox[4].item() cls_type = get_cls_type(index) new_bbox = [ round(bbox[0].item(), 2), round(bbox[1].item(), 2), round(bbox[2].item(), 2), round(bbox[3].item(), 2) ] if new_bbox[2] - new_bbox[0] < 10 or new_bbox[3] - new_bbox[ 1] < 10: continue create_xml(anno_save_dir, root_dir, image_dir + '.jpg', "16", new_bbox) print("\r" + "{}/{}".format(image_index, count), end="", flush=True) print("use time = {}".format(time.time() - start)) with open('../result.json', 'w') as fp: json.dump(results, fp, indent=4, separators=(',', ': '))
def main(): args = parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = '0' model = init_detector(args.config, args.model, device=torch.device('cuda', args.gpu)) #root_dir = "/tcdata/guangdong1_round2_testB_20191024" root_dir = args.data results = [] dataset = MyDataset(root_dir) dataloader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, num_workers=1) count = len(dataloader) start = time.time() for image_index, data in enumerate(dataloader): image_dir, img, template_img = data[0][0], data[1][0], data[2][0] img, template_img = img.numpy(), template_img.numpy() height, width, _ = img.shape sub_height, sub_width = int(height / 2), int(width / 2) dets = [] for row in range(2): for col in range(2): height_start = row * sub_height height_end = (row + 1) * sub_height width_start = col * sub_width width_end = (col + 1) * sub_width sub_im = img[height_start:height_end, width_start:width_end, :] sub_template = template_img[height_start:height_end, width_start:width_end, :] sub_dets = inference_detector(model, sub_im, sub_template, 0.065) for index, sub_det in enumerate(sub_dets): for sub_item in sub_det: sub_item[0] += width_start sub_item[1] += height_start sub_item[2] += width_start sub_item[3] += height_start sub_item[4] /= 8 if len(dets) == 0: for sub_det in sub_dets: dets.append(sub_det) else: for index, (det, sub_det) in enumerate(zip(dets, sub_dets)): dets[index] = np.concatenate((det, sub_det), axis=0) sub_dets = inference_detector(model, img, template_img, 0.065) for index, (det, sub_det) in enumerate(zip(dets, sub_dets)): need_delete_index = [] for temp_index, temp in enumerate(sub_det): if temp[2] - temp[0] < 48 or temp[3] - temp[1] < 48: need_delete_index.append(temp_index) if len(need_delete_index) > 0: sub_det = np.delete(sub_det, need_delete_index, axis=0) dets[index] = np.concatenate((det, sub_det), axis=0) if index == 12: need_delete_index = [] temp_det = dets[index] for t_index, t in enumerate(temp_det): if t[2] - t[0] < width * 0.9 or t[3] - t[1] < height * 0.9: need_delete_index.append(t_index) dets[index] = np.delete(dets[index], need_delete_index, axis=0) dets[index] = nms(dets[index], 0.15) for index, bboxes in enumerate(dets): if index >= 15: continue bboxes = bboxes[0] for bbox in bboxes: if len(bbox) == 0: continue score = bbox[4].item() cls_type = get_cls_type(index) new_bbox = [ round(bbox[0].item(), 2), round(bbox[1].item(), 2), round(bbox[2].item(), 2), round(bbox[3].item(), 2) ] name = image_dir + '.jpg' result = { 'name': name, 'category': cls_type, 'bbox': new_bbox, 'score': score } results.append(result) print("\r" + "{}/{}".format(image_index, count), end="", flush=True) print("use time = {}".format(time.time() - start)) with open('../../submit/result.json', 'w') as fp: json.dump(results, fp, indent=4, separators=(',', ': '))
def nms_update(result, thres): n = result.__len__() for i in range(n): result[i] = nms(result[i], thres) return result
def main(): args = parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = '6' model = init_detector(args.config, args.model, device=torch.device('cuda', args.gpu)) show_source = False vis_dir = 'vis' if not os.path.exists(vis_dir): os.mkdir(vis_dir) anno_dir = 'data/Annotations' root_dir = 'data/normal' #root_dir = "/tcdata/guangdong1_round2_testA_20190924" results = [] #dataset = MyDataset(root_dir) #dataloader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, num_workers=1) #count = len(dataloader) image_dirs = os.listdir(root_dir) count = len(image_dirs) start = time.time() #for image_index, data in enumerate(dataloader): for image_index, image_dir in enumerate(image_dirs): if not os.path.isdir(os.path.join(root_dir, image_dir)): continue if image_index % 10 != 0: continue result_dir = os.path.join(vis_dir, "{}".format(image_index)) if not os.path.exists(result_dir): os.mkdir(result_dir) #image_dir, img = data[0][0], data[1][0] #img = img.numpy() image_files = os.listdir(os.path.join(root_dir, image_dir)) image_path = os.path.join(root_dir, image_dir, image_dir + '.jpg') img = cv2.imread(image_path) if show_source: source_path = os.path.join(root_dir, image_dir, image_dir + '.jpg') source_im = cv2.imread(source_path) style = image_dir.split('_')[0] template_path = os.path.join(root_dir, image_dir, "template_{}.jpg".format(style)) template_im = cv2.imread(template_path) anno_path = os.path.join(anno_dir, image_dir + '.xml') if not os.path.exists(anno_path): continue root = ET.parse(anno_path) objs = root.findall('object') for obj in objs: cls = obj.find('name').text bndbox = obj.find('bndbox') xmin = int(float(bndbox.find('xmin').text)) ymin = int(float(bndbox.find('ymin').text)) xmax = int(float(bndbox.find('xmax').text)) ymax = int(float(bndbox.find('ymax').text)) #print("cls = {}, xmin = {}, xmax = {}, ymin = {}, ymax = {}".format(cls, xmin, xmax, ymin, ymax)) cv2.rectangle(source_im, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) cv2.putText(source_im, cls, (xmin + 40, ymin + 40), cv2.FONT_HERSHEY_COMPLEX, 2, (0, 0, 255)) cv2.rectangle(template_im, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) cv2.putText(template_im, cls, (xmin + 40, ymin + 40), cv2.FONT_HERSHEY_COMPLEX, 2, (0, 0, 255)) cv2.imwrite("{}/{}_source.jpg".format(result_dir, image_index), source_im) cv2.imwrite("{}/{}_template.jpg".format(result_dir, image_index), template_im) iter_start = time.time() height, width, _ = img.shape sub_height, sub_width = int(height / 2), int(width / 2) dets = [] for row in range(2): for col in range(2): height_start = row * sub_height height_end = (row + 1) * sub_height width_start = col * sub_width width_end = (col + 1) * sub_width sub_im = img[height_start:height_end, width_start:width_end, :].copy() sub_dets = inference_detector(model, sub_im, 0.15) message = "" for index, sub_det in enumerate(sub_dets): for temp_box in sub_det: score = temp_box[4] show_message = "{}:{}".format(index + 1, score) cv2.rectangle(sub_im, (int(temp_box[0]), int(temp_box[1])), (int(temp_box[2]), int(temp_box[3])), (255, 0, 0), 2) cv2.putText( sub_im, show_message, (int(temp_box[0]) + 40, int(temp_box[1] + 40)), cv2.FONT_HERSHEY_COMPLEX, 2, (0, 0, 255)) message += ", {}:{}".format(index, sub_det.shape[0]) cv2.imwrite( "{}/{}_{}_{}.jpg".format(result_dir, image_index, row, col), sub_im) print("row:{}, col:{}{}".format(row, col, message)) for index, sub_det in enumerate(sub_dets): for sub_item in sub_det: sub_item[0] += width_start sub_item[1] += height_start sub_item[2] += width_start sub_item[3] += height_start sub_item[4] /= 8 if len(dets) == 0: for sub_det in sub_dets: dets.append(sub_det) else: for index, (det, sub_det) in enumerate(zip(dets, sub_dets)): dets[index] = np.concatenate((det, sub_det), axis=0) temp_img = img.copy() sub_dets = inference_detector(model, img, 0.05) message1 = "" message2 = "" message3 = "" for index, (det, sub_det) in enumerate(zip(dets, sub_dets)): for temp_box in sub_det: score = temp_box[4] show_message = "{}:{}".format(index + 1, score) cv2.rectangle(temp_img, (int(temp_box[0]), int(temp_box[1])), (int(temp_box[2]), int(temp_box[3])), (255, 0, 0), 2) cv2.putText(temp_img, show_message, (int(temp_box[0]) + 40, int(temp_box[1] + 40)), cv2.FONT_HERSHEY_COMPLEX, 2, (0, 0, 255)) message1 += ", {}:{}".format(index, sub_det.shape[0]) dets[index] = np.concatenate((det, sub_det), axis=0) message2 += ", {}:{}".format(index, dets[index].shape[0]) dets[index] = nms(dets[index], 0.01) message3 += ", {}:{}".format(index, dets[index][0].shape[0]) cv2.imwrite("{}/{}_full_image.jpg".format(result_dir, image_index), temp_img) print("image ", message1, '\n', "cat ", message2, '\n', "nms ", message3) for index, bboxes in enumerate(dets): if index >= 15: continue bboxes = bboxes[0] for bbox in bboxes: if len(bbox) == 0: continue score = bbox[4].item() cls_type = get_cls_type(index) new_bbox = [ round(bbox[0].item(), 2), round(bbox[1].item(), 2), round(bbox[2].item(), 2), round(bbox[3].item(), 2) ] cv2.rectangle(img, (int(new_bbox[0]), int(new_bbox[1])), (int(new_bbox[2]), int(new_bbox[3])), (255, 0, 0), 2) message = "{}:{}".format(cls_type, score) cv2.putText(img, message, (int(new_bbox[0]) + 40, int(new_bbox[1] + 40)), cv2.FONT_HERSHEY_COMPLEX, 2, (0, 0, 255)) #print("class type = {}, bbox = {}, score = {}".format(cls_type, bbox, score)) name = image_dir + '.jpg' result = { 'name': name, 'category': cls_type, 'bbox': new_bbox, 'score': score } results.append(result) iter_end = time.time() cv2.imwrite("{}/{}.jpg".format(result_dir, image_index), img) print("{}/{}, use time = {}".format(image_index, count, iter_end - iter_start)) #print("\r"+"{}/{}, use time = {}".format(image_index, count, iter_end-iter_start), end="", flush=True) print("total use time = {}".format(time.time() - start)) with open('../result.json', 'w') as fp: json.dump(results, fp, indent=4, separators=(',', ': '))
def main(): args = parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = '6' model = init_detector(args.config, args.model, device=torch.device('cuda', args.gpu)) model.set_socre_thr() root_dir = 'data/defect' #root_dir = "/tcdata/guangdong1_round2_testA_20190924" results = [] dataset = MyDataset(root_dir) dataloader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, num_workers=1) count = len(dataloader) start = time.time() for image_index, data in enumerate(dataloader): if image_index % 500 != 0: continue image_dir, img = data[0][0], data[1][0] img = img.numpy() height, width, _ = img.shape sub_height, sub_width = int(height / 2), int(width / 2) dets = [] for row in range(2): for col in range(2): height_start = row * sub_height height_end = (row + 1) * sub_height width_start = col * sub_width width_end = (col + 1) * sub_width sub_im = img[height_start:height_end, width_start:width_end, :] sub_dets = inference_detector(model, sub_im) for index, sub_det in enumerate(sub_dets): for sub_item in sub_det: sub_item[0] += width_start sub_item[1] += height_start sub_item[2] += width_start sub_item[3] += height_start if len(dets) == 0: for sub_det in sub_dets: dets.append(sub_det) else: for index, (det, sub_det) in enumerate(zip(dets, sub_dets)): dets[index] = np.concatenate((det, sub_det), axis=0) sub_dets = inference_detector(model, img) for index, (det, sub_det) in enumerate(zip(dets, sub_dets)): dets[index] = np.concatenate((det, sub_det), axis=0) dets[index] = nms(dets[index], 0.3) for index, bboxes in enumerate(dets): if index >= 15: continue bboxes = bboxes[0] for bbox in bboxes: if len(bbox) == 0: continue score = bbox[4].item() cls_type = get_cls_type(index) new_bbox = [ round(bbox[0].item(), 2), round(bbox[1].item(), 2), round(bbox[2].item(), 2), round(bbox[3].item(), 2) ] #cv2.rectangle(img, (int(new_bbox[0]), int(new_bbox[1])), (int(new_bbox[2]), int(new_bbox[3])), (255, 0, 0), 2) #cv2.putText(img, str(cls_type), (int(new_bbox[0])+10, int(new_bbox[1]+10)), cv2.FONT_HERSHEY_COMPLEX, 2, (0, 255, 0)) #print("class type = {}, bbox = {}, score = {}".format(cls_type, bbox, score)) name = image_dir + '.jpg' result = { 'name': name, 'category': cls_type, 'bbox': new_bbox, 'score': score } results.append(result) print("\r" + "{}/{}".format(image_index, count), end="", flush=True) print("use time = {}".format(time.time() - start)) with open('../result.json', 'w') as fp: json.dump(results, fp, indent=4, separators=(',', ': '))