def evaluate(self, output, batch): detection = output['detection'] score = detection[:, 4].detach().cpu().numpy() label = detection[:, 5].detach().cpu().numpy().astype(int) label = snake_cityscapes_utils.continuous_label_to_cityscapes_label(label) py = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio h, w = batch['inp'].size(2), batch['inp'].size(3) center = batch['meta']['center'][0].detach().cpu().numpy() scale = batch['meta']['scale'][0].detach().cpu().numpy() trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) py = [data_utils.affine_transform(py_, trans_output_inv) for py_ in py] ori_h, ori_w = 1024, 2048 mask = snake_eval_utils.poly_to_mask(py, label, ori_h, ori_w) img_id = batch['meta']['img_id'][0] instance_dir = os.path.join(self.instance_dir, img_id) os.system('mkdir -p {}'.format(instance_dir)) self.anns.append(batch['meta']['ann'][0]) txt_path = os.path.join(self.txt_dir, '{}.txt'.format(img_id)) with open(txt_path, 'w') as f: for i in range(len(label)): instance_path = os.path.join(instance_dir, 'instance'+str(i)+'.png') cv2.imwrite(instance_path, mask[i]) instance_path = os.path.join('..\mask', img_id, 'instance'+str(i)+'.png') f.write('{} {} {}\n'.format(instance_path, label[i], score[i]))
def pvnet_transform(img, box): center = np.array([(box[0] + box[2]) / 2., (box[1] + box[3]) / 2.], dtype=np.float32) scale = max(box[2] - box[0], box[3] - box[1]) * tless_config.scale_ratio input_w, input_h = tless_pvnet_utils.input_scale trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) box = np.array(box).reshape(-1, 2) box = data_utils.affine_transform(box, trans_input) box = magnify_box(box, tless_config.box_ratio, input_h, input_w) new_img = np.zeros_like(inp) new_img[box[0, 1]:box[1, 1] + 1, box[0, 0]:box[1, 0] + 1] = inp[box[0, 1]:box[1, 1] + 1, box[0, 0]:box[1, 0] + 1] inp = new_img orig_img = inp.copy() inp = (inp.astype(np.float32) / 255.) # normalize the image inp = (inp - mean) / std inp = inp.transpose(2, 0, 1) return orig_img, inp, center, scale
def evaluate(self, output, batch): detection = output['detection'] score = detection[:, 4].detach().cpu().numpy() label = detection[:, 5].detach().cpu().numpy().astype(int) py = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio if len(py) == 0: return img_id = int(batch['meta']['img_id'][0]) center = batch['meta']['center'][0].detach().cpu().numpy() scale = batch['meta']['scale'][0].detach().cpu().numpy() h, w = batch['inp'].size(2), batch['inp'].size(3) trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) img = self.coco.loadImgs(img_id)[0] ori_h, ori_w = img['height'], img['width'] py = [data_utils.affine_transform(py_, trans_output_inv) for py_ in py] rles = snake_eval_utils.coco_poly_to_rle(py, ori_h, ori_w) coco_dets = [] for i in range(len(rles)): detection = { 'image_id': img_id, 'category_id': self.contiguous_category_id_to_json_id[label[i]], 'segmentation': rles[i], 'score': float('{:.2f}'.format(score[i])) } coco_dets.append(detection) self.results.extend(coco_dets) self.img_ids.append(img_id)
def evaluate(self, output, batch): detection = output['detection'] detection = detection[0] if detection.dim() == 3 else detection box = detection[:, :4].detach().cpu().numpy() * snake_config.down_ratio score = detection[:, 4].detach().cpu().numpy() label = detection[:, 5].detach().cpu().numpy().astype(int) img_id = int(batch['meta']['img_id'][0]) center = batch['meta']['center'][0].detach().cpu().numpy() scale = batch['meta']['scale'][0].detach().cpu().numpy() if len(box) == 0: return h, w = batch['inp'].size(2), batch['inp'].size(3) trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) img = self.coco.loadImgs(img_id)[0] ori_h, ori_w = img['height'], img['width'] coco_dets = [] for i in range(len(label)): box_ = data_utils.affine_transform(box[i].reshape(-1, 2), trans_output_inv).ravel() box_[2] -= box_[0] box_[3] -= box_[1] box_ = list(map(lambda x: float('{:.2f}'.format(x)), box_)) detection = { 'image_id': img_id, 'category_id': self.contiguous_category_id_to_json_id[label[i]], 'bbox': box_, 'score': float('{:.2f}'.format(score[i])) } coco_dets.append(detection) self.results.extend(coco_dets) self.img_ids.append(img_id)
def evaluate(self, output, batch): img_id = int(batch['meta']['img_id']) self.img_ids.append(img_id) img_data = self.coco.loadImgs(int(img_id))[0] depth_path = img_data['depth_path'] ann_ids = self.coco.getAnnIds(imgIds=img_id, catIds=self.obj_id) annos = self.coco.loadAnns(ann_ids) kpt_3d = np.concatenate([annos[0]['fps_3d'], [annos[0]['center_3d']]], axis=0) corner_3d = np.array(annos[0]['corner_3d']) K = np.array(annos[0]['K']) pose_gt = [np.array(anno['pose']) for anno in annos] kpt_2d = output['kpt_2d'].detach().cpu().numpy() centers = batch['meta']['center'] scales = batch['meta']['scale'] boxes = batch['meta']['box'] h, w = batch['inp'].size(2), batch['inp'].size(3) pose_preds = [] pose_preds_icp = [] for i in range(len(centers)): center = centers[i].detach().cpu().numpy() scale = scales[i].detach().cpu().numpy() kpt_2d_ = kpt_2d[i] trans_inv = data_utils.get_affine_transform(center[0], scale[0], 0, [w, h], inv=1) kpt_2d_ = data_utils.affine_transform(kpt_2d_, trans_inv) if cfg.test.un_pnp: var = output['var'][i].detach().cpu().numpy() pose_pred = self.uncertainty_pnp(kpt_3d, kpt_2d_, var, K) else: pose_pred = pvnet_pose_utils.pnp(kpt_3d, kpt_2d_, K) pose_preds.append(pose_pred) if cfg.test.icp: seg = torch.argmax(output['seg'][i], dim=0).detach().cpu().numpy() seg = seg.astype(np.uint8) seg = cv2.warpAffine(seg, trans_inv, (self.width, self.height), flags=cv2.INTER_NEAREST) pose_pred_icp = self.icp_refine(pose_pred.copy(), depth_path, seg.copy(), K.copy()) pose_preds_icp.append(pose_pred_icp) if cfg.test.icp: self.icp_adi.append(self.adi_metric(pose_preds_icp, pose_gt)) self.icp_cmd5.append( self.cm_degree_5_metric(pose_preds_icp, pose_gt)) self.pose_icp_per_id.append(pose_preds_icp) self.adi.append(self.adi_metric(pose_preds, pose_gt)) self.cmd5.append(self.cm_degree_5_metric(pose_preds, pose_gt)) self.pose_per_id.append(pose_preds)
def transform_bbox(bboxes, trans_output, h, w): new_bboxes = [] for i in range(len(bboxes)): box = np.array(bboxes[i]).reshape(-1, 2) box = data_utils.affine_transform(box, trans_output) box[:, 0] = np.clip(box[:, 0], 0, w - 1) box[:, 1] = np.clip(box[:, 1], 0, h - 1) box = box.ravel().tolist() if box[2] - box[0] <= 1 or box[3] - box[1] <= 1: new_bboxes.append([]) else: new_bboxes.append(box) return new_bboxes
def transform_polys(polys, trans_output, output_h, output_w): new_polys = [] for i in range(len(polys)): poly = polys[i] poly = data_utils.affine_transform(poly, trans_output) poly = handle_break_point(poly, 0, 0, lambda x, y: x < y) poly = handle_break_point(poly, 0, output_w, lambda x, y: x >= y) poly = handle_break_point(poly, 1, 0, lambda x, y: x < y) poly = handle_break_point(poly, 1, output_h, lambda x, y: x >= y) if len(poly) == 0: continue if len(np.unique(poly, axis=0)) <= 2: continue new_polys.append(poly) return new_polys
def __getitem__(self, index): index = 0 img_id = self.img_ids[index] img, kpt_2d, mask = self.read_data(img_id) img, kpt_2d, mask, bbox = self.get_training_img(img, kpt_2d, mask) orig_img, inp, trans_input, center, scale, inp_hw = \ tless_pvnet_utils.augment(img, bbox, 'train') kpt_2d = data_utils.affine_transform(kpt_2d, trans_input) mask = cv2.warpAffine(mask, trans_input, (inp_hw[1], inp_hw[0]), flags=cv2.INTER_NEAREST) vertex = pvnet_data_utils.compute_vertex(mask, kpt_2d).transpose(2, 0, 1) ret = {'inp': inp, 'mask': mask.astype(np.uint8), 'vertex': vertex} # visualize_utils.visualize_ann(orig_img, kpt_2d, mask, False) return ret
def visualize(self, output, batch, id=0): img = batch['img'][0].detach().cpu().numpy() center = output['center'][0] scale = output['scale'][0] h, w = tless_pvnet_utils.input_scale trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) kpt = output['kpt_2d'].detach().cpu().numpy() kpt_2d = data_utils.affine_transform(kpt, trans_output_inv)[0] img_id = int(batch['img_id'][0]) anno = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))[0] kpt_3d = np.concatenate([anno['fps_3d'], [anno['center_3d']]], axis=0) K = np.array(anno['K']) pose_gt = np.array(anno['pose']) pose_pred = pvnet_pose_utils.pnp(kpt_3d, kpt_2d, K) corner_3d = np.array(anno['corner_3d']) corner_2d_gt = pvnet_pose_utils.project(corner_3d, K, pose_gt) corner_2d_pred = pvnet_pose_utils.project(corner_3d, K, pose_pred) _, ax = plt.subplots(1) ax.imshow(img) ax.add_patch( patches.Polygon(xy=corner_2d_gt[[0, 1, 3, 2, 0, 4, 6, 2]], fill=False, linewidth=1, edgecolor='g')) ax.add_patch( patches.Polygon(xy=corner_2d_gt[[5, 4, 6, 7, 5, 1, 3, 7]], fill=False, linewidth=1, edgecolor='g')) ax.add_patch( patches.Polygon(xy=corner_2d_pred[[0, 1, 3, 2, 0, 4, 6, 2]], fill=False, linewidth=1, edgecolor='b')) ax.add_patch( patches.Polygon(xy=corner_2d_pred[[5, 4, 6, 7, 5, 1, 3, 7]], fill=False, linewidth=1, edgecolor='b')) plt.show()
def read_data(self, img_id): ann_ids = self.coco.getAnnIds(imgIds=img_id) anno = self.coco.loadAnns(ann_ids)[0] path = self.coco.loadImgs(int(img_id))[0]['file_name'] inp = cv2.imread(path) kpt_2d = np.concatenate([anno['fps_2d'], [anno['center_2d']]], axis=0) mask = pvnet_data_utils.read_tless_mask(anno['mask_path']) rot = np.random.uniform() * 360 inp, _ = tless_train_utils.rotate_image(inp, rot, get_rot=True) if np.random.uniform() < 0.8: inp = tless_train_utils.color_jitter.augment_image(inp) mask, rot = tless_train_utils.rotate_image(mask, rot, get_rot=True) kpt_2d = data_utils.affine_transform(kpt_2d, rot) return inp, kpt_2d, mask
def read_data(self, img_id, index): ann_ids = self.coco.getAnnIds(imgIds=img_id) anno = self.coco.loadAnns(ann_ids)[0] path = self.coco.loadImgs(int(img_id))[0]['file_name'] inp = cv2.imread(path) kpt_2d = np.concatenate([anno['fps_2d'], [anno['center_2d']]], axis=0) mask = pvnet_data_utils.read_tless_mask(anno['type'], anno['mask_path']) rot = get_rot(index) inp, _ = tless_train_utils.rotate_image(inp, rot, get_rot=True) if np.random.uniform() < 0.8: imgaug.seed(int(round(time.time() * 1000) % (2**16))) inp = tless_train_utils.color_jitter.augment_image(inp) mask, rot = tless_train_utils.rotate_image(mask, rot, get_rot=True) kpt_2d = data_utils.affine_transform(kpt_2d, rot) x, y, w, h = cv2.boundingRect(mask) bbox = [x, y, x + w - 1, y + h - 1] return inp, kpt_2d, mask, bbox
def _crop(img, box, trans_output_inv, output): box = data_utils.affine_transform(box.reshape(-1, 2), trans_output_inv).ravel() center = np.array([(box[0] + box[2]) / 2, (box[1] + box[3]) / 2]) scale = max(box[2] - box[0], box[3] - box[1]) * tless_config.scale_ratio input_h, input_w = tless_pvnet_utils.input_scale trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) img = img.astype(np.uint8).copy() inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) inp = (inp - tless_config.mean) / tless_config.std inp = inp.transpose(2, 0, 1) inp = torch.Tensor(inp).cuda().float()[None] init = [inp, center, scale] return init
def transform_bbox(bbox, trans_output, output_h, output_w): bbox = data_utils.affine_transform(bbox.reshape(-1, 2), trans_output).ravel() bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) return bbox
def inference(): network = make_network(cfg).cuda() load_network(network, cfg.model_dir, resume=cfg.resume, epoch=cfg.test.epoch) network.eval() with open(os.path.join(cfg.results_dir,'cfg.json'),'w') as fid: json.dump(cfg,fid) dataset = Dataset() visualizer = make_visualizer(cfg) infer_time_lst = [] for batch in tqdm.tqdm(dataset): batch['inp'] = torch.FloatTensor(batch['inp'])[None].cuda() net_time_s = time.time() with torch.no_grad(): output = network(batch['inp'], batch) net_used_time = time.time()-net_time_s org_img = batch['org_img'] rz_img = batch['rz_img'] rz_ratio = batch['rz_ratio'] img_name = batch['image_name'] center = batch['meta']['center'] scale = batch['meta']['scale'] h, w = batch['inp'].size(2), batch['inp'].size(3) if DEBUG: print('------------------img_name={}-------------------------'.format(img_name)) print('org_img.shape:', org_img.shape) print('rz_img.shape:', rz_img.shape) print('input-size:({}, {})'.format(h,w)) if cfg.rescore_map_flag: rs_thresh = 0.6 detections = output['detection'].detach().cpu().numpy() polys = output['py'][-1].detach().cpu().numpy() rs_hm = torch.sigmoid(output['rs_hm']).detach().cpu().numpy() if 0: print('output.keys:', output.keys()) rescores = rescoring_polygons(polys, rs_hm) conf_keep = np.where(rescores > rs_thresh)[0] detections = detections[conf_keep] pys = [polys[k]* snake_config.down_ratio for k in conf_keep] rescores = rescores[conf_keep] rs_hm_path = os.path.join(cfg.vis_dir,(img_name[:-4]+'_rs.png')) import matplotlib.pyplot as plt plt.imshow(rs_hm[0,0,...]) plt.savefig(rs_hm_path) if 0: print('detections.shape:', detections.shape) print('pys.num:', len(pys)) print('rs_hm.shape:', rs_hm.shape) x = rs_hm[0,0,...] import matplotlib.pyplot as plt plt.imshow(x) for k in range(len(pys)): plt.plot(pys[k][:,0], pys[k][:, 1]) plt.savefig('{}.png'.format(img_name[:-4])) plt.close() np.save('rs_hm.npy', x) np.save('pys.npy', np.array(pys)) exit() else: detections = output['detection'].detach().cpu().numpy() detections[:,:4] = detections[:, :4] * snake_config.down_ratio bboxes = detections[:, :4] scores = detections[:, 4] labels = detections[:, 5].astype(int) ex_pts = output['ex'].detach().cpu().numpy() ex_pts = ex_pts * snake_config.down_ratio #pys = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio iter_ply_output_lst = [x.detach().cpu().numpy()* snake_config.down_ratio for x in output['py']] pys = iter_ply_output_lst[-1] if cfg.vis_intermediate_output != 'none': if cfg.vis_intermediate_output == 'htp': xmin,ymin,xmax,ymax = bboxes[:,0::4], bboxes[:,1::4], bboxes[:, 2::4], bboxes[:,3::4] pys = np.hstack((xmin,ymin, xmin,ymax,xmax,ymax,xmax,ymin)) pys = pys.reshape(pys.shape[0],4,2) elif cfg.vis_intermediate_output == 'otp': pys = ex_pts elif cfg.vis_intermediate_output == 'clm_1': pys = iter_ply_output_lst[0] elif cfg.vis_intermediate_output == 'clm_2': pys = iter_ply_output_lst[1] else: raise ValueError('Not supported type:', cfg.vis_intermediate_output) cfg.poly_cls_branch = False final_contour_feat = output['final_feat'].detach().cpu().numpy() if cfg.poly_cls_branch: pys_cls = output['py_cls'][-1].detach().cpu().numpy() text_poly_scores = pys_cls[:, 1] rem_ids = np.where(text_poly_scores > cfg.poly_conf_thresh)[0] detections = detections[rem_ids] pys = pys[rem_ids] text_poly_scores = text_poly_scores[rem_ids] ex_pts = ex_pts[rem_ids] final_contour_feat = final_contour_feat[rem_ids] if DEBUG: print('py_cls_scores:', text_poly_scores) if DEBUG: print('dets_num:', len(pys)) if len(pys) == 0: all_boundaries, poly_scores = [], [] else: trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) all_boundaries = [data_utils.affine_transform(py_, trans_output_inv) for py_ in pys] bboxes_tmp = [data_utils.affine_transform(det[:4].reshape(-1,2), trans_output_inv).flatten() for det in detections] ex_pts_tmp = [data_utils.affine_transform(ep, trans_output_inv) for ep in ex_pts] detections = np.hstack((np.array(bboxes_tmp), detections[:,4:])) ex_pts = np.array(ex_pts_tmp) pp_time_s = time.time() #sorting detections by scores if cfg.poly_cls_branch: detections, ex_points, all_boundaries, final_contour_feat, poly_scores \ = sorting_det_results(detections, ex_pts, all_boundaries, final_contour_feat, text_poly_scores) else: detections, ex_points, all_boundaries = sorting_det_results(detections, ex_pts, all_boundaries) if len(all_boundaries) != 0: detections[:,:4] /= rz_ratio ex_points /= rz_ratio all_boundaries = [poly/rz_ratio for poly in all_boundaries] if 0: import matplotlib.pyplot as plt nms_polygons,rem_inds = snake_poly_utils.poly_nms(all_boundaries) print('nms_polygons.num:', len(nms_polygons)) plt.subplot(1,2,1) plt = plot_poly(org_img, all_boundaries,scores=scores) plt.subplot(1,2,2) plt = plot_poly(org_img, nms_polygons) plt.savefig('a.png') exit() #nms all_boundaries, rem_inds = snake_poly_utils.poly_nms(all_boundaries) detections = detections[rem_inds] ex_points = ex_points[rem_inds] final_contour_feat = final_contour_feat[rem_inds] if cfg.poly_cls_branch: poly_scores = poly_scores[rem_inds] pp_used_time = time.time() - pp_time_s infer_time_lst.append([net_used_time, pp_used_time]) if DEBUG: print('infer_time:',[net_used_time, pp_used_time]) if 0: vis_tmp_results(org_img, detections, ex_points, all_boundaries, final_contour_feat, poly_scores, output, indx=img_name[:-4]) #--------------------------------saving results-------------------------------# if cfg.testing_set == 'mlt': det_file = os.path.join(cfg.det_dir, ('res_'+img_name[3:-4]+'.txt')) saving_mot_det_results(det_file, all_boundaries, testing_set=cfg.testing_set, img=org_img) elif cfg.testing_set == 'ic15': det_file = os.path.join(cfg.det_dir, ('res_'+img_name[:-4]+'.txt')) saving_mot_det_results(det_file, all_boundaries, testing_set=cfg.testing_set, img=org_img) elif cfg.testing_set == 'msra': det_file = os.path.join(cfg.det_dir, ('res_'+img_name[:-4]+'.txt')) saving_mot_det_results(det_file, all_boundaries, testing_set=cfg.testing_set, img=org_img) else: #for arbitrary-shape datasets, e.g., CTW,TOT,ART det_file = os.path.join(cfg.det_dir, (img_name[:-4]+'.txt')) saving_det_results(det_file, all_boundaries, img=org_img) continue #------------------------visualizing results---------------------------------# ## ~~~~~~ vis-v0 ~~~~~~~ ## vis_file = os.path.join(cfg.vis_dir,(img_name[:-4]+'.png')) if cfg.testing_set == 'ctw': gt_file = os.path.join(cfg.gts_dir, (img_name[:-4]+'.txt')) gt_polys = load_ctw_gt_label(gt_file) elif cfg.testing_set == 'tot': gt_file = os.path.join(cfg.gts_dir, ('poly_gt_'+img_name[:-4]+'.mat')) gt_polys = load_tot_gt_label(gt_file) elif cfg.testing_set == 'art': gt_polys = None elif cfg.testing_set == 'msra': gt_file = os.path.join(cfg.gts_dir, ('gt_'+img_name[:-4]+'.txt')) gt_polys = load_msra_gt_label(gt_file) else: raise ValueError('Not supported dataset ({}) for visualizing'.format(cfg.testing_set)) plt = vis_dets_gts(org_img, all_boundaries, gt_polys) plt.savefig(vis_file,dpi=600,format='png') plt.close() ### ~~~~~~~~~ vis-v1 ~~~~~~~~~~~ ### # if cfg.poly_cls_branch: # visualizing_det_results(org_img,all_boundaries,vis_file, scores=detections[:,4],poly_scores=poly_scores) # else: # visualizing_det_results(org_img,all_boundaries,vis_file, scores=detections[:,4]) ## vis-v2 #hm_vis_dir = os.path.join(cfg.vis_dir, ('../vis_hm_on_img_dir')) #if not os.path.exists(hm_vis_dir): # os.makedirs(hm_vis_dir) #visualizer.visualize(output, batch, os.path.join(hm_vis_dir,(img_name[:-4]+'.png'))) np.save('infer_time.npy', np.array(infer_time_lst))
def visualize(self, output, batch): img_id = int(batch['meta']['img_id']) img_data = self.coco.loadImgs(int(img_id))[0] path = img_data['file_name'] depth_path = img_data['depth_path'] img = np.array(Image.open(path)) ann_ids = self.coco.getAnnIds(imgIds=img_id, catIds=self.obj_id) annos = self.coco.loadAnns(ann_ids) kpt_3d = np.concatenate([annos[0]['fps_3d'], [annos[0]['center_3d']]], axis=0) corner_3d = np.array(annos[0]['corner_3d']) K = np.array(annos[0]['K']) kpt_2d = output['kpt_2d'].detach().cpu().numpy() centers = batch['meta']['center'] scales = batch['meta']['scale'] boxes = batch['meta']['box'] h, w = batch['inp'].size(2), batch['inp'].size(3) kpt_2ds = [] segs = [] for i in range(len(centers)): center = centers[i].detach().cpu().numpy() scale = scales[i].detach().cpu().numpy() kpt_2d_ = kpt_2d[i] trans_inv = data_utils.get_affine_transform(center[0], scale[0], 0, [w, h], inv=1) kpt_2d_ = data_utils.affine_transform(kpt_2d_, trans_inv) kpt_2ds.append(kpt_2d_) seg = torch.argmax(output['seg'][i], dim=0).detach().cpu().numpy() seg = seg.astype(np.uint8) seg = cv2.warpAffine(seg, trans_inv, (720, 540), flags=cv2.INTER_NEAREST) segs.append(seg) _, ax = plt.subplots(1) ax.imshow(img) # for i in range(len(boxes)): # x_min, y_min, x_max, y_max = boxes[i].view(-1).numpy() # ax.plot([x_min, x_min, x_max, x_max, x_min], [y_min, y_max, y_max, y_min, y_min]) depth = np.array(Image.open(depth_path)).astype(np.float32) for i, kpt_2d in enumerate(kpt_2ds): pose_pred = pvnet_pose_utils.pnp(kpt_3d, kpt_2d, K) mask = segs[i] box = cv2.boundingRect(mask.astype(np.uint8)) x, y = box[0] + box[2] / 2., box[1] + box[3] / 2. z = np.mean(depth[mask != 0] / 10000.) x = ((x - K[0, 2]) * z) / float(K[0, 0]) y = ((y - K[1, 2]) * z) / float(K[1, 1]) center = [x, y, z] # pose_pred[:, 3] = center corner_2d_pred = pvnet_pose_utils.project(corner_3d, K, pose_pred) ax.add_patch( patches.Polygon(xy=corner_2d_pred[[0, 1, 3, 2, 0, 4, 6, 2]], fill=False, linewidth=1, edgecolor='b')) ax.add_patch( patches.Polygon(xy=corner_2d_pred[[5, 4, 6, 7, 5, 1, 3, 7]], fill=False, linewidth=1, edgecolor='b')) for anno in annos: pose_gt = np.array(anno['pose']) corner_2d_gt = pvnet_pose_utils.project(corner_3d, K, pose_gt) ax.add_patch( patches.Polygon(xy=corner_2d_gt[[0, 1, 3, 2, 0, 4, 6, 2]], fill=False, linewidth=1, edgecolor='g')) ax.add_patch( patches.Polygon(xy=corner_2d_gt[[5, 4, 6, 7, 5, 1, 3, 7]], fill=False, linewidth=1, edgecolor='g')) plt.show()
def inference(): network = make_network(cfg).cuda() load_network(network, cfg.model_dir, resume=cfg.resume, epoch=cfg.test.epoch) network.eval() with open(os.path.join(cfg.results_dir, 'cfg.json'), 'w') as fid: json.dump(cfg, fid) dataset = Dataset() visualizer = make_visualizer(cfg) infer_time_lst = [] for batch in tqdm.tqdm(dataset): batch['inp'] = torch.FloatTensor(batch['inp'])[None].cuda() net_time_s = time.time() with torch.no_grad(): output = network(batch['inp'], batch) net_used_time = time.time() - net_time_s org_img = batch['org_img'] rz_img = batch['rz_img'] rz_ratio = batch['rz_ratio'] img_name = batch['image_name'] center = batch['meta']['center'] scale = batch['meta']['scale'] h, w = batch['inp'].size(2), batch['inp'].size(3) detections = output['detection'].detach().cpu().numpy() detections[:, :4] = detections[:, :4] * snake_config.down_ratio bboxes = detections[:, :4] scores = detections[:, 4] labels = detections[:, 5].astype(int) ex_pts = output['ex'].detach().cpu().numpy() ex_pts = ex_pts * snake_config.down_ratio #pys = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio iter_ply_output_lst = [ x.detach().cpu().numpy() * snake_config.down_ratio for x in output['py'] ] pys = iter_ply_output_lst[-1] final_contour_feat = output['final_feat'].detach().cpu().numpy() if cfg.poly_cls_branch: pys_cls = output['py_cls'][-1].detach().cpu().numpy() text_poly_scores = pys_cls[:, 1] rem_ids = np.where(text_poly_scores > cfg.poly_conf_thresh)[0] detections = detections[rem_ids] pys = pys[rem_ids] text_poly_scores = text_poly_scores[rem_ids] ex_pts = ex_pts[rem_ids] final_contour_feat = final_contour_feat[rem_ids] if len(pys) == 0: all_boundaries, poly_scores = [], [] else: trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) all_boundaries = [ data_utils.affine_transform(py_, trans_output_inv) for py_ in pys ] bboxes_tmp = [ data_utils.affine_transform(det[:4].reshape(-1, 2), trans_output_inv).flatten() for det in detections ] ex_pts_tmp = [ data_utils.affine_transform(ep, trans_output_inv) for ep in ex_pts ] detections = np.hstack((np.array(bboxes_tmp), detections[:, 4:])) ex_pts = np.array(ex_pts_tmp) pp_time_s = time.time() #sorting detections by scores if cfg.poly_cls_branch: detections, ex_points, all_boundaries, final_contour_feat, poly_scores \ = sorting_det_results(detections, ex_pts, all_boundaries, final_contour_feat, text_poly_scores) else: detections, ex_points, all_boundaries = sorting_det_results( detections, ex_pts, all_boundaries) if cfg.rle_nms: tmp_polys = all_boundaries.copy() #all_boundaries, rem_inds = snake_poly_utils.poly_nms(tmp_polys) rem_inds = poly_rle_nms(tmp_polys, detections[:, -1], (h, w), nms_thresh=0.3) all_boundaries = [all_boundaries[idx] for idx in rem_inds] else: #nms all_boundaries, rem_inds = snake_poly_utils.poly_nms( all_boundaries) detections = detections[rem_inds] ex_points = ex_points[rem_inds] final_contour_feat = final_contour_feat[rem_inds] if cfg.poly_cls_branch: poly_scores = poly_scores[rem_inds] pp_used_time = time.time() - pp_time_s infer_time_lst.append([net_used_time, pp_used_time]) if len(all_boundaries) != 0: detections[:, :4] /= rz_ratio ex_points /= rz_ratio all_boundaries = [poly / rz_ratio for poly in all_boundaries] #--------------------------------saving results-------------------------------# det_file = os.path.join(cfg.det_dir, (img_name[:-4] + '.txt')) saving_det_results(det_file, all_boundaries, img=org_img)