def augment(img, split): # resize input height, width = img.shape[0], img.shape[1] center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) scale = max(img.shape[0], img.shape[1]) * 1.0 if not isinstance(scale, np.ndarray) and not isinstance(scale, list): scale = np.array([scale, scale], dtype=np.float32) if split != 'train': center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) scale = max(width, height) * 1.0 scale = np.array([scale, scale]) x = 32 input_w, input_h = int((width / 1. + x - 1) // x * x), int((height / 1. + x - 1) // x * x) trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # color augmentation orig_img = inp.copy() inp = (inp.astype(np.float32) / 255.) if split == 'train': data_utils.color_aug(_data_rng, inp, _eig_val, _eig_vec) # normalize the image inp = (inp - mean) / std inp = inp.transpose(2, 0, 1) output_h, output_w = input_h // tless_config.down_ratio, input_w // tless_config.down_ratio trans_output = data_utils.get_affine_transform(center, scale, 0, [output_w, output_h]) inp_out_hw = (input_h, input_w, output_h, output_w) return orig_img, inp, trans_input, trans_output, center, scale, inp_out_hw
def augment(img, split, _data_rng, _eig_val, _eig_vec, mean, std, polys=None): # resize input height, width = img.shape[0], img.shape[1] center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) scale = max(img.shape[0], img.shape[1]) * 1.0 if not isinstance(scale, np.ndarray) and not isinstance(scale, list): scale = np.array([scale, scale], dtype=np.float32) # random crop and flip augmentation flipped = False if split == 'train': scale = scale * np.random.uniform(0.6, 1.4) x, y = center w_border = data_utils.get_border(width / 4, scale[0]) + 1 h_border = data_utils.get_border(height / 4, scale[0]) + 1 center[0] = np.random.randint(low=max(x - w_border, 0), high=min(x + w_border, width - 1)) center[1] = np.random.randint(low=max(y - h_border, 0), high=min(y + h_border, height - 1)) # flip augmentation if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 input_h, input_w = snake_config.voc_input_h, snake_config.voc_input_w if split != 'train': center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) scale = max(width, height) * 1.0 scale = np.array([scale, scale]) x = 32 input_w, input_h = 512, 512 # input_w, input_h = (width + x - 1) // x * x, (height + x - 1) // x * x trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # color augmentation orig_img = inp.copy() inp = (inp.astype(np.float32) / 255.) if split == 'train': data_utils.color_aug(_data_rng, inp, _eig_val, _eig_vec) # blur_aug(inp) # normalize the image inp = (inp - mean) / std inp = inp.transpose(2, 0, 1) output_h, output_w = input_h // snake_config.down_ratio, input_w // snake_config.down_ratio trans_output = data_utils.get_affine_transform(center, scale, 0, [output_w, output_h]) inp_out_hw = (input_h, input_w, output_h, output_w) return orig_img, inp, trans_input, trans_output, flipped, center, scale, inp_out_hw
def augment(img, split, _data_rng, _eig_val, _eig_vec, mean, std, polys): # resize input height, width = img.shape[0], img.shape[1] center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) scale = snake_config.scale if not isinstance(scale, np.ndarray) and not isinstance(scale, list): scale = np.array([scale, scale], dtype=np.float32) # random crop and flip augmentation flipped = False if split == 'train': scale = scale * np.random.uniform(0.4, 1.6) seed = np.random.randint(0, len(polys)) index = np.random.randint(0, len(polys[seed])) poly = polys[seed][index]['poly'] center[0], center[1] = poly[np.random.randint(len(poly))] border = scale[0] // 2 if scale[0] < width else width - scale[0] // 2 center[0] = np.clip(center[0], a_min=border, a_max=width - border) border = scale[1] // 2 if scale[1] < height else height - scale[1] // 2 center[1] = np.clip(center[1], a_min=border, a_max=height - border) # flip augmentation if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 input_w, input_h = snake_config.input_w, snake_config.input_h if split != 'train': center = np.array([width // 2, height // 2]) scale = np.array([width, height]) # input_w, input_h = width, height input_w, input_h = int((width / 0.85 + 31) // 32 * 32), int( (height / 0.85 + 31) // 32 * 32) trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # color augmentation orig_img = inp.copy() inp = (inp.astype(np.float32) / 255.) if split == 'train': data_utils.color_aug(_data_rng, inp, _eig_val, _eig_vec) # data_utils.blur_aug(inp) # normalize the image inp = (inp - mean) / std inp = inp.transpose(2, 0, 1) output_h, output_w = input_h // snake_config.down_ratio, input_w // snake_config.down_ratio trans_output = data_utils.get_affine_transform(center, scale, 0, [output_w, output_h]) inp_out_hw = (input_h, input_w, output_h, output_w) return orig_img, inp, trans_input, trans_output, flipped, center, scale, inp_out_hw
def augment(img, split): # resize input height, width = img.shape[0], img.shape[1] center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) scale = max(img.shape[0], img.shape[1]) * 1.0 if not isinstance(scale, np.ndarray) and not isinstance(scale, list): scale = np.array([scale, scale], dtype=np.float32) if split == 'train': scale = scale * np.random.uniform(0.6, 1.4) center = np.array([0, 0]) border = scale[0] // 2 if scale[0] < width else width - scale[0] // 2 border_r = max(width - border, border + 1) center[0] = np.random.randint(border, border_r) border = scale[1] // 2 if scale[1] < height else height - scale[1] // 2 border_r = max(height - border, border + 1) center[1] = np.random.randint(border, border_r) input_w, input_h = input_scale if split != 'train': center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) scale = max(width, height) * 1.0 scale = np.array([scale, scale]) x = 32 input_w, input_h = (width + x - 1) // x * x, (height + x - 1) // x * x trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # color augmentation orig_img = inp.copy() inp = (inp.astype(np.float32) / 255.) if split == 'train': data_utils.color_aug(_data_rng, inp, _eig_val, _eig_vec) # normalize the image inp = (inp - mean) / std inp = inp.transpose(2, 0, 1) output_h, output_w = input_h // tless_config.down_ratio, input_w // tless_config.down_ratio trans_output = data_utils.get_affine_transform(center, scale, 0, [output_w, output_h]) inp_out_hw = (input_h, input_w, output_h, output_w) return orig_img, inp, trans_input, trans_output, center, scale, inp_out_hw
def __getitem__(self, index): img = self.imgs[index] img_id = os.path.basename(img).replace('_leftImg8bit.png', '') img = cv2.imread(img) width, height = 2048, 1024 center = np.array([width // 2, height // 2]) scale = np.array([width, height]) # input_w, input_h = width, height input_w, input_h = int((width / 0.85 + 31) // 32 * 32), int( (height / 0.85 + 31) // 32 * 32) trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = self.normalize_image(inp) ret = {'inp': inp} meta = { 'center': center, 'scale': scale, 'test': '', 'img_id': img_id, 'ann': '' } ret.update({'meta': meta}) return ret
def __getitem__(self, index): ann = self.anns[index] path, img_id = self.process_info(ann) img = cv2.imread(path) width, height = img.shape[1], img.shape[0] center = np.array([width // 2, height // 2]) scale = np.array([width, height]) x = 32 input_w = (int(width / 1.) | (x - 1)) + 1 input_h = (int(height / 1.) | (x - 1)) + 1 trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = self.normalize_image(inp) ret = {'inp': inp} meta = { 'center': center, 'scale': scale, 'test': '', 'img_id': img_id, 'ann': '' } ret.update({'meta': meta}) return ret
def evaluate(self, output, batch): detection = output['detection'] score = detection[:, 4].detach().cpu().numpy() label = detection[:, 5].detach().cpu().numpy().astype(int) label = snake_cityscapes_utils.continuous_label_to_cityscapes_label(label) py = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio h, w = batch['inp'].size(2), batch['inp'].size(3) center = batch['meta']['center'][0].detach().cpu().numpy() scale = batch['meta']['scale'][0].detach().cpu().numpy() trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) py = [data_utils.affine_transform(py_, trans_output_inv) for py_ in py] ori_h, ori_w = 1024, 2048 mask = snake_eval_utils.poly_to_mask(py, label, ori_h, ori_w) img_id = batch['meta']['img_id'][0] instance_dir = os.path.join(self.instance_dir, img_id) os.system('mkdir -p {}'.format(instance_dir)) self.anns.append(batch['meta']['ann'][0]) txt_path = os.path.join(self.txt_dir, '{}.txt'.format(img_id)) with open(txt_path, 'w') as f: for i in range(len(label)): instance_path = os.path.join(instance_dir, 'instance'+str(i)+'.png') cv2.imwrite(instance_path, mask[i]) instance_path = os.path.join('..\mask', img_id, 'instance'+str(i)+'.png') f.write('{} {} {}\n'.format(instance_path, label[i], score[i]))
def evaluate(self, output, batch): detection = output['detection'] score = detection[:, 4].detach().cpu().numpy() label = detection[:, 5].detach().cpu().numpy().astype(int) py = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio if len(py) == 0: return img_id = int(batch['meta']['img_id'][0]) center = batch['meta']['center'][0].detach().cpu().numpy() scale = batch['meta']['scale'][0].detach().cpu().numpy() h, w = batch['inp'].size(2), batch['inp'].size(3) trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) img = self.coco.loadImgs(img_id)[0] ori_h, ori_w = img['height'], img['width'] py = [data_utils.affine_transform(py_, trans_output_inv) for py_ in py] rles = snake_eval_utils.coco_poly_to_rle(py, ori_h, ori_w) coco_dets = [] for i in range(len(rles)): detection = { 'image_id': img_id, 'category_id': self.contiguous_category_id_to_json_id[label[i]], 'segmentation': rles[i], 'score': float('{:.2f}'.format(score[i])) } coco_dets.append(detection) self.results.extend(coco_dets) self.img_ids.append(img_id)
def __getitem__(self, index): img_path = self.imgs[index] img_name = os.path.basename(img_path) org_img = cv2.imread(img_path) if not cfg.test.target_scale: img = org_img.copy() rz_ratio = 1 else: img, rz_ratio = self.resize(org_img, cfg.test.target_scale[0], cfg.test.target_scale[1]) width, height = img.shape[1], img.shape[0] center = np.array([width // 2, height // 2]) scale = np.array([width, height]) x = 32 input_w = (int(width / 1.) | (x - 1)) + 1 input_h = (int(height / 1.) | (x - 1)) + 1 trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = self.normalize_image(inp) ret = {'inp': inp} meta = {'center': center, 'scale': scale, 'test': '', 'ann': ''} ret.update({'meta': meta}) ret.update({'org_img': org_img}) ret.update({'rz_img': img}) ret.update({'rz_ratio': rz_ratio}) ret.update({'image_name': img_name}) return ret
def crop(img, detection, batch, output): img = img[0].detach().cpu().numpy() fx_config.max_det = 1 fx_config.ct_score = 0 fx_config.down_ratio = 4 box = output['detection'][0, :fx_config.max_det, :4] score = output['detection'][0, :fx_config.max_det, 4] box = box[score > fx_config.ct_score] box = box.detach().cpu().numpy() * fx_config.down_ratio center = batch['meta']['center'][0].detach().cpu().numpy() scale = batch['meta']['scale'][0].detach().cpu().numpy() h, w = batch['inp'].size(2), batch['inp'].size(3) trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) init = [_crop(img, box_, trans_output_inv, output) for box_ in box] if len(init) == 0: output.update({'inp': [], 'center': [], 'scale': []}) return [] inp, center, scale = list(zip(*init)) inp = torch.cat(inp, dim=0) output.update({'inp': inp, 'center': center, 'scale': scale}) return inp
def pvnet_transform(img, box): center = np.array([(box[0] + box[2]) / 2., (box[1] + box[3]) / 2.], dtype=np.float32) scale = max(box[2] - box[0], box[3] - box[1]) * tless_config.scale_ratio input_w, input_h = tless_pvnet_utils.input_scale trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) box = np.array(box).reshape(-1, 2) box = data_utils.affine_transform(box, trans_input) box = magnify_box(box, tless_config.box_ratio, input_h, input_w) new_img = np.zeros_like(inp) new_img[box[0, 1]:box[1, 1] + 1, box[0, 0]:box[1, 0] + 1] = inp[box[0, 1]:box[1, 1] + 1, box[0, 0]:box[1, 0] + 1] inp = new_img orig_img = inp.copy() inp = (inp.astype(np.float32) / 255.) # normalize the image inp = (inp - mean) / std inp = inp.transpose(2, 0, 1) return orig_img, inp, center, scale
def evaluate(self, output, batch): detection = output['detection'] detection = detection[0] if detection.dim() == 3 else detection box = detection[:, :4].detach().cpu().numpy() * snake_config.down_ratio score = detection[:, 4].detach().cpu().numpy() label = detection[:, 5].detach().cpu().numpy().astype(int) img_id = int(batch['meta']['img_id'][0]) center = batch['meta']['center'][0].detach().cpu().numpy() scale = batch['meta']['scale'][0].detach().cpu().numpy() if len(box) == 0: return h, w = batch['inp'].size(2), batch['inp'].size(3) trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) img = self.coco.loadImgs(img_id)[0] ori_h, ori_w = img['height'], img['width'] coco_dets = [] for i in range(len(label)): box_ = data_utils.affine_transform(box[i].reshape(-1, 2), trans_output_inv).ravel() box_[2] -= box_[0] box_[3] -= box_[1] box_ = list(map(lambda x: float('{:.2f}'.format(x)), box_)) detection = { 'image_id': img_id, 'category_id': self.contiguous_category_id_to_json_id[label[i]], 'bbox': box_, 'score': float('{:.2f}'.format(score[i])) } coco_dets.append(detection) self.results.extend(coco_dets) self.img_ids.append(img_id)
def evaluate(self, output, batch): img_id = int(batch['meta']['img_id']) self.img_ids.append(img_id) img_data = self.coco.loadImgs(int(img_id))[0] depth_path = img_data['depth_path'] ann_ids = self.coco.getAnnIds(imgIds=img_id, catIds=self.obj_id) annos = self.coco.loadAnns(ann_ids) kpt_3d = np.concatenate([annos[0]['fps_3d'], [annos[0]['center_3d']]], axis=0) corner_3d = np.array(annos[0]['corner_3d']) K = np.array(annos[0]['K']) pose_gt = [np.array(anno['pose']) for anno in annos] kpt_2d = output['kpt_2d'].detach().cpu().numpy() centers = batch['meta']['center'] scales = batch['meta']['scale'] boxes = batch['meta']['box'] h, w = batch['inp'].size(2), batch['inp'].size(3) pose_preds = [] pose_preds_icp = [] for i in range(len(centers)): center = centers[i].detach().cpu().numpy() scale = scales[i].detach().cpu().numpy() kpt_2d_ = kpt_2d[i] trans_inv = data_utils.get_affine_transform(center[0], scale[0], 0, [w, h], inv=1) kpt_2d_ = data_utils.affine_transform(kpt_2d_, trans_inv) if cfg.test.un_pnp: var = output['var'][i].detach().cpu().numpy() pose_pred = self.uncertainty_pnp(kpt_3d, kpt_2d_, var, K) else: pose_pred = pvnet_pose_utils.pnp(kpt_3d, kpt_2d_, K) pose_preds.append(pose_pred) if cfg.test.icp: seg = torch.argmax(output['seg'][i], dim=0).detach().cpu().numpy() seg = seg.astype(np.uint8) seg = cv2.warpAffine(seg, trans_inv, (self.width, self.height), flags=cv2.INTER_NEAREST) pose_pred_icp = self.icp_refine(pose_pred.copy(), depth_path, seg.copy(), K.copy()) pose_preds_icp.append(pose_pred_icp) if cfg.test.icp: self.icp_adi.append(self.adi_metric(pose_preds_icp, pose_gt)) self.icp_cmd5.append( self.cm_degree_5_metric(pose_preds_icp, pose_gt)) self.pose_icp_per_id.append(pose_preds_icp) self.adi.append(self.adi_metric(pose_preds, pose_gt)) self.cmd5.append(self.cm_degree_5_metric(pose_preds, pose_gt)) self.pose_per_id.append(pose_preds)
def augment(img, split, down_ratio, _data_rng, _eig_val, _eig_vec, mean, std): # resize input height, width = img.shape[0], img.shape[1] center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) scale = max(img.shape[0], img.shape[1]) * 1.0 # random crop and flip augmentation flipped = False if split == 'train': scale = scale * np.random.choice(np.arange(0.6, 1.4, 0.1)) w_border = get_border(128, img.shape[1]) h_border = get_border(128, img.shape[0]) center[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) center[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) # flip augmentation if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 input_h, input_w = (512, 512) trans_input = get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # color augmentation orig_img = inp.copy() inp = (inp.astype(np.float32) / 255.) if split == 'train': color_aug(_data_rng, inp, _eig_val, _eig_vec) # normalize the image inp = (inp - mean) / std inp = inp.transpose(2, 0, 1) # resize output output_h = input_h // down_ratio output_w = input_w // down_ratio trans_output = get_affine_transform(center, scale, 0, [output_w, output_h]) return orig_img, inp, trans_input, trans_output, input_h, input_w, output_h, output_w, flipped
def pvnet_transform(img, box): center = np.array([(box[0] + box[2]) / 2., (box[1] + box[3]) / 2.], dtype=np.float32) scale = np.array([box[2] - box[0], box[3] - box[1]], dtype=np.float32) * 1.2 input_w, input_h = tless_pvnet_utils.input_scale trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) orig_img = inp.copy() inp = (inp.astype(np.float32) / 255.) # normalize the image inp = (inp - mean) / std inp = inp.transpose(2, 0, 1) return orig_img, inp, center, scale
def visualize(self, output, batch, id=0): img = batch['img'][0].detach().cpu().numpy() center = output['center'][0] scale = output['scale'][0] h, w = tless_pvnet_utils.input_scale trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) kpt = output['kpt_2d'].detach().cpu().numpy() kpt_2d = data_utils.affine_transform(kpt, trans_output_inv)[0] img_id = int(batch['img_id'][0]) anno = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))[0] kpt_3d = np.concatenate([anno['fps_3d'], [anno['center_3d']]], axis=0) K = np.array(anno['K']) pose_gt = np.array(anno['pose']) pose_pred = pvnet_pose_utils.pnp(kpt_3d, kpt_2d, K) corner_3d = np.array(anno['corner_3d']) corner_2d_gt = pvnet_pose_utils.project(corner_3d, K, pose_gt) corner_2d_pred = pvnet_pose_utils.project(corner_3d, K, pose_pred) _, ax = plt.subplots(1) ax.imshow(img) ax.add_patch( patches.Polygon(xy=corner_2d_gt[[0, 1, 3, 2, 0, 4, 6, 2]], fill=False, linewidth=1, edgecolor='g')) ax.add_patch( patches.Polygon(xy=corner_2d_gt[[5, 4, 6, 7, 5, 1, 3, 7]], fill=False, linewidth=1, edgecolor='g')) ax.add_patch( patches.Polygon(xy=corner_2d_pred[[0, 1, 3, 2, 0, 4, 6, 2]], fill=False, linewidth=1, edgecolor='b')) ax.add_patch( patches.Polygon(xy=corner_2d_pred[[5, 4, 6, 7, 5, 1, 3, 7]], fill=False, linewidth=1, edgecolor='b')) plt.show()
def _crop(img, box, trans_output_inv, output): box = data_utils.affine_transform(box.reshape(-1, 2), trans_output_inv).ravel() center = np.array([(box[0] + box[2]) / 2, (box[1] + box[3]) / 2]) scale = max(box[2] - box[0], box[3] - box[1]) * tless_config.scale_ratio input_h, input_w = tless_pvnet_utils.input_scale trans_input = data_utils.get_affine_transform(center, scale, 0, [input_w, input_h]) img = img.astype(np.uint8).copy() inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) inp = (inp.astype(np.float32) / 255.) inp = (inp - tless_config.mean) / tless_config.std inp = inp.transpose(2, 0, 1) inp = torch.Tensor(inp).cuda().float()[None] init = [inp, center, scale] return init
def augment(img, split, down_ratio, _data_rng, _eig_val, _eig_vec, mean, std, polys, boxes=None, label=None): # resize input height, width = img.shape[0], img.shape[1] center = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) scale = max(img.shape[0], img.shape[1]) * 1.0 scale = 800 # __import__('ipdb').set_trace() # random crop and flip augmentation flipped = False if cfg.small_num > 0: img, polys, boxes, label = small_aug(img, polys, boxes, label, cfg.small_num) if split == 'train': scale = scale * np.random.choice(np.arange(0.6, 1.4, 0.1)) seed = np.random.randint(0, len(polys)) index = np.random.randint(0, len(polys[seed])) x = polys[seed][index]['bbox'][0] + (polys[seed][index]['bbox'][2] - 1) / 2 y = polys[seed][index]['bbox'][1] + (polys[seed][index]['bbox'][3] - 1) / 2 w_border = get_border(200, scale) h_border = get_border(200, scale) if (w_border == 0) or (h_border == 0): center[0] = x center[1] = y else: center[0] = np.random.randint(low=max(x - w_border, 0), high=min(x + w_border, width - 1)) center[1] = np.random.randint(low=max(y - h_border, 0), high=min(y + h_border, height - 1)) # flip augmentation if np.random.random() < 0.5: flipped = True img = img[:, ::-1, :] center[0] = width - center[0] - 1 input_h, input_w = (800, 800) if split == 'val': center = np.array([1024, 512]) scale = [2048, 1024] input_h, input_w = (1024, 2048) # print(center,scale) # print(flipped) # center = np.array([1272., 718.]) # scale = 358.4 # import ipdb; ipdb.set_trace() # center = np.array([1583., 306.]) # print(center) # scale = 358.4 # print(center, scale) trans_input = get_affine_transform(center, scale, 0, [input_w, input_h]) inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) # color augmentation orig_img = inp.copy() inp = (inp.astype(np.float32) / 255.) if split == 'train': color_aug(_data_rng, inp, _eig_val, _eig_vec) # blur_aug(inp) # normalize the image inp = (inp - mean) / std inp = inp.transpose(2, 0, 1) # resize output # if split == 'train': output_h = input_h // down_ratio output_w = input_w // down_ratio trans_output = get_affine_transform(center, scale, 0, [output_w, output_h]) return orig_img, inp, trans_input, trans_output, input_h, input_w, output_h, output_w, flipped, center, scale, \ polys, boxes, label
def inference(): network = make_network(cfg).cuda() load_network(network, cfg.model_dir, resume=cfg.resume, epoch=cfg.test.epoch) network.eval() with open(os.path.join(cfg.results_dir,'cfg.json'),'w') as fid: json.dump(cfg,fid) dataset = Dataset() visualizer = make_visualizer(cfg) infer_time_lst = [] for batch in tqdm.tqdm(dataset): batch['inp'] = torch.FloatTensor(batch['inp'])[None].cuda() net_time_s = time.time() with torch.no_grad(): output = network(batch['inp'], batch) net_used_time = time.time()-net_time_s org_img = batch['org_img'] rz_img = batch['rz_img'] rz_ratio = batch['rz_ratio'] img_name = batch['image_name'] center = batch['meta']['center'] scale = batch['meta']['scale'] h, w = batch['inp'].size(2), batch['inp'].size(3) if DEBUG: print('------------------img_name={}-------------------------'.format(img_name)) print('org_img.shape:', org_img.shape) print('rz_img.shape:', rz_img.shape) print('input-size:({}, {})'.format(h,w)) if cfg.rescore_map_flag: rs_thresh = 0.6 detections = output['detection'].detach().cpu().numpy() polys = output['py'][-1].detach().cpu().numpy() rs_hm = torch.sigmoid(output['rs_hm']).detach().cpu().numpy() if 0: print('output.keys:', output.keys()) rescores = rescoring_polygons(polys, rs_hm) conf_keep = np.where(rescores > rs_thresh)[0] detections = detections[conf_keep] pys = [polys[k]* snake_config.down_ratio for k in conf_keep] rescores = rescores[conf_keep] rs_hm_path = os.path.join(cfg.vis_dir,(img_name[:-4]+'_rs.png')) import matplotlib.pyplot as plt plt.imshow(rs_hm[0,0,...]) plt.savefig(rs_hm_path) if 0: print('detections.shape:', detections.shape) print('pys.num:', len(pys)) print('rs_hm.shape:', rs_hm.shape) x = rs_hm[0,0,...] import matplotlib.pyplot as plt plt.imshow(x) for k in range(len(pys)): plt.plot(pys[k][:,0], pys[k][:, 1]) plt.savefig('{}.png'.format(img_name[:-4])) plt.close() np.save('rs_hm.npy', x) np.save('pys.npy', np.array(pys)) exit() else: detections = output['detection'].detach().cpu().numpy() detections[:,:4] = detections[:, :4] * snake_config.down_ratio bboxes = detections[:, :4] scores = detections[:, 4] labels = detections[:, 5].astype(int) ex_pts = output['ex'].detach().cpu().numpy() ex_pts = ex_pts * snake_config.down_ratio #pys = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio iter_ply_output_lst = [x.detach().cpu().numpy()* snake_config.down_ratio for x in output['py']] pys = iter_ply_output_lst[-1] if cfg.vis_intermediate_output != 'none': if cfg.vis_intermediate_output == 'htp': xmin,ymin,xmax,ymax = bboxes[:,0::4], bboxes[:,1::4], bboxes[:, 2::4], bboxes[:,3::4] pys = np.hstack((xmin,ymin, xmin,ymax,xmax,ymax,xmax,ymin)) pys = pys.reshape(pys.shape[0],4,2) elif cfg.vis_intermediate_output == 'otp': pys = ex_pts elif cfg.vis_intermediate_output == 'clm_1': pys = iter_ply_output_lst[0] elif cfg.vis_intermediate_output == 'clm_2': pys = iter_ply_output_lst[1] else: raise ValueError('Not supported type:', cfg.vis_intermediate_output) cfg.poly_cls_branch = False final_contour_feat = output['final_feat'].detach().cpu().numpy() if cfg.poly_cls_branch: pys_cls = output['py_cls'][-1].detach().cpu().numpy() text_poly_scores = pys_cls[:, 1] rem_ids = np.where(text_poly_scores > cfg.poly_conf_thresh)[0] detections = detections[rem_ids] pys = pys[rem_ids] text_poly_scores = text_poly_scores[rem_ids] ex_pts = ex_pts[rem_ids] final_contour_feat = final_contour_feat[rem_ids] if DEBUG: print('py_cls_scores:', text_poly_scores) if DEBUG: print('dets_num:', len(pys)) if len(pys) == 0: all_boundaries, poly_scores = [], [] else: trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) all_boundaries = [data_utils.affine_transform(py_, trans_output_inv) for py_ in pys] bboxes_tmp = [data_utils.affine_transform(det[:4].reshape(-1,2), trans_output_inv).flatten() for det in detections] ex_pts_tmp = [data_utils.affine_transform(ep, trans_output_inv) for ep in ex_pts] detections = np.hstack((np.array(bboxes_tmp), detections[:,4:])) ex_pts = np.array(ex_pts_tmp) pp_time_s = time.time() #sorting detections by scores if cfg.poly_cls_branch: detections, ex_points, all_boundaries, final_contour_feat, poly_scores \ = sorting_det_results(detections, ex_pts, all_boundaries, final_contour_feat, text_poly_scores) else: detections, ex_points, all_boundaries = sorting_det_results(detections, ex_pts, all_boundaries) if len(all_boundaries) != 0: detections[:,:4] /= rz_ratio ex_points /= rz_ratio all_boundaries = [poly/rz_ratio for poly in all_boundaries] if 0: import matplotlib.pyplot as plt nms_polygons,rem_inds = snake_poly_utils.poly_nms(all_boundaries) print('nms_polygons.num:', len(nms_polygons)) plt.subplot(1,2,1) plt = plot_poly(org_img, all_boundaries,scores=scores) plt.subplot(1,2,2) plt = plot_poly(org_img, nms_polygons) plt.savefig('a.png') exit() #nms all_boundaries, rem_inds = snake_poly_utils.poly_nms(all_boundaries) detections = detections[rem_inds] ex_points = ex_points[rem_inds] final_contour_feat = final_contour_feat[rem_inds] if cfg.poly_cls_branch: poly_scores = poly_scores[rem_inds] pp_used_time = time.time() - pp_time_s infer_time_lst.append([net_used_time, pp_used_time]) if DEBUG: print('infer_time:',[net_used_time, pp_used_time]) if 0: vis_tmp_results(org_img, detections, ex_points, all_boundaries, final_contour_feat, poly_scores, output, indx=img_name[:-4]) #--------------------------------saving results-------------------------------# if cfg.testing_set == 'mlt': det_file = os.path.join(cfg.det_dir, ('res_'+img_name[3:-4]+'.txt')) saving_mot_det_results(det_file, all_boundaries, testing_set=cfg.testing_set, img=org_img) elif cfg.testing_set == 'ic15': det_file = os.path.join(cfg.det_dir, ('res_'+img_name[:-4]+'.txt')) saving_mot_det_results(det_file, all_boundaries, testing_set=cfg.testing_set, img=org_img) elif cfg.testing_set == 'msra': det_file = os.path.join(cfg.det_dir, ('res_'+img_name[:-4]+'.txt')) saving_mot_det_results(det_file, all_boundaries, testing_set=cfg.testing_set, img=org_img) else: #for arbitrary-shape datasets, e.g., CTW,TOT,ART det_file = os.path.join(cfg.det_dir, (img_name[:-4]+'.txt')) saving_det_results(det_file, all_boundaries, img=org_img) continue #------------------------visualizing results---------------------------------# ## ~~~~~~ vis-v0 ~~~~~~~ ## vis_file = os.path.join(cfg.vis_dir,(img_name[:-4]+'.png')) if cfg.testing_set == 'ctw': gt_file = os.path.join(cfg.gts_dir, (img_name[:-4]+'.txt')) gt_polys = load_ctw_gt_label(gt_file) elif cfg.testing_set == 'tot': gt_file = os.path.join(cfg.gts_dir, ('poly_gt_'+img_name[:-4]+'.mat')) gt_polys = load_tot_gt_label(gt_file) elif cfg.testing_set == 'art': gt_polys = None elif cfg.testing_set == 'msra': gt_file = os.path.join(cfg.gts_dir, ('gt_'+img_name[:-4]+'.txt')) gt_polys = load_msra_gt_label(gt_file) else: raise ValueError('Not supported dataset ({}) for visualizing'.format(cfg.testing_set)) plt = vis_dets_gts(org_img, all_boundaries, gt_polys) plt.savefig(vis_file,dpi=600,format='png') plt.close() ### ~~~~~~~~~ vis-v1 ~~~~~~~~~~~ ### # if cfg.poly_cls_branch: # visualizing_det_results(org_img,all_boundaries,vis_file, scores=detections[:,4],poly_scores=poly_scores) # else: # visualizing_det_results(org_img,all_boundaries,vis_file, scores=detections[:,4]) ## vis-v2 #hm_vis_dir = os.path.join(cfg.vis_dir, ('../vis_hm_on_img_dir')) #if not os.path.exists(hm_vis_dir): # os.makedirs(hm_vis_dir) #visualizer.visualize(output, batch, os.path.join(hm_vis_dir,(img_name[:-4]+'.png'))) np.save('infer_time.npy', np.array(infer_time_lst))
def inference(): network = make_network(cfg).cuda() load_network(network, cfg.model_dir, resume=cfg.resume, epoch=cfg.test.epoch) network.eval() with open(os.path.join(cfg.results_dir, 'cfg.json'), 'w') as fid: json.dump(cfg, fid) dataset = Dataset() visualizer = make_visualizer(cfg) infer_time_lst = [] for batch in tqdm.tqdm(dataset): batch['inp'] = torch.FloatTensor(batch['inp'])[None].cuda() net_time_s = time.time() with torch.no_grad(): output = network(batch['inp'], batch) net_used_time = time.time() - net_time_s org_img = batch['org_img'] rz_img = batch['rz_img'] rz_ratio = batch['rz_ratio'] img_name = batch['image_name'] center = batch['meta']['center'] scale = batch['meta']['scale'] h, w = batch['inp'].size(2), batch['inp'].size(3) detections = output['detection'].detach().cpu().numpy() detections[:, :4] = detections[:, :4] * snake_config.down_ratio bboxes = detections[:, :4] scores = detections[:, 4] labels = detections[:, 5].astype(int) ex_pts = output['ex'].detach().cpu().numpy() ex_pts = ex_pts * snake_config.down_ratio #pys = output['py'][-1].detach().cpu().numpy() * snake_config.down_ratio iter_ply_output_lst = [ x.detach().cpu().numpy() * snake_config.down_ratio for x in output['py'] ] pys = iter_ply_output_lst[-1] final_contour_feat = output['final_feat'].detach().cpu().numpy() if cfg.poly_cls_branch: pys_cls = output['py_cls'][-1].detach().cpu().numpy() text_poly_scores = pys_cls[:, 1] rem_ids = np.where(text_poly_scores > cfg.poly_conf_thresh)[0] detections = detections[rem_ids] pys = pys[rem_ids] text_poly_scores = text_poly_scores[rem_ids] ex_pts = ex_pts[rem_ids] final_contour_feat = final_contour_feat[rem_ids] if len(pys) == 0: all_boundaries, poly_scores = [], [] else: trans_output_inv = data_utils.get_affine_transform(center, scale, 0, [w, h], inv=1) all_boundaries = [ data_utils.affine_transform(py_, trans_output_inv) for py_ in pys ] bboxes_tmp = [ data_utils.affine_transform(det[:4].reshape(-1, 2), trans_output_inv).flatten() for det in detections ] ex_pts_tmp = [ data_utils.affine_transform(ep, trans_output_inv) for ep in ex_pts ] detections = np.hstack((np.array(bboxes_tmp), detections[:, 4:])) ex_pts = np.array(ex_pts_tmp) pp_time_s = time.time() #sorting detections by scores if cfg.poly_cls_branch: detections, ex_points, all_boundaries, final_contour_feat, poly_scores \ = sorting_det_results(detections, ex_pts, all_boundaries, final_contour_feat, text_poly_scores) else: detections, ex_points, all_boundaries = sorting_det_results( detections, ex_pts, all_boundaries) if cfg.rle_nms: tmp_polys = all_boundaries.copy() #all_boundaries, rem_inds = snake_poly_utils.poly_nms(tmp_polys) rem_inds = poly_rle_nms(tmp_polys, detections[:, -1], (h, w), nms_thresh=0.3) all_boundaries = [all_boundaries[idx] for idx in rem_inds] else: #nms all_boundaries, rem_inds = snake_poly_utils.poly_nms( all_boundaries) detections = detections[rem_inds] ex_points = ex_points[rem_inds] final_contour_feat = final_contour_feat[rem_inds] if cfg.poly_cls_branch: poly_scores = poly_scores[rem_inds] pp_used_time = time.time() - pp_time_s infer_time_lst.append([net_used_time, pp_used_time]) if len(all_boundaries) != 0: detections[:, :4] /= rz_ratio ex_points /= rz_ratio all_boundaries = [poly / rz_ratio for poly in all_boundaries] #--------------------------------saving results-------------------------------# det_file = os.path.join(cfg.det_dir, (img_name[:-4] + '.txt')) saving_det_results(det_file, all_boundaries, img=org_img)
def visualize(self, output, batch): img_id = int(batch['meta']['img_id']) img_data = self.coco.loadImgs(int(img_id))[0] path = img_data['file_name'] depth_path = img_data['depth_path'] img = np.array(Image.open(path)) ann_ids = self.coco.getAnnIds(imgIds=img_id, catIds=self.obj_id) annos = self.coco.loadAnns(ann_ids) kpt_3d = np.concatenate([annos[0]['fps_3d'], [annos[0]['center_3d']]], axis=0) corner_3d = np.array(annos[0]['corner_3d']) K = np.array(annos[0]['K']) kpt_2d = output['kpt_2d'].detach().cpu().numpy() centers = batch['meta']['center'] scales = batch['meta']['scale'] boxes = batch['meta']['box'] h, w = batch['inp'].size(2), batch['inp'].size(3) kpt_2ds = [] segs = [] for i in range(len(centers)): center = centers[i].detach().cpu().numpy() scale = scales[i].detach().cpu().numpy() kpt_2d_ = kpt_2d[i] trans_inv = data_utils.get_affine_transform(center[0], scale[0], 0, [w, h], inv=1) kpt_2d_ = data_utils.affine_transform(kpt_2d_, trans_inv) kpt_2ds.append(kpt_2d_) seg = torch.argmax(output['seg'][i], dim=0).detach().cpu().numpy() seg = seg.astype(np.uint8) seg = cv2.warpAffine(seg, trans_inv, (720, 540), flags=cv2.INTER_NEAREST) segs.append(seg) _, ax = plt.subplots(1) ax.imshow(img) # for i in range(len(boxes)): # x_min, y_min, x_max, y_max = boxes[i].view(-1).numpy() # ax.plot([x_min, x_min, x_max, x_max, x_min], [y_min, y_max, y_max, y_min, y_min]) depth = np.array(Image.open(depth_path)).astype(np.float32) for i, kpt_2d in enumerate(kpt_2ds): pose_pred = pvnet_pose_utils.pnp(kpt_3d, kpt_2d, K) mask = segs[i] box = cv2.boundingRect(mask.astype(np.uint8)) x, y = box[0] + box[2] / 2., box[1] + box[3] / 2. z = np.mean(depth[mask != 0] / 10000.) x = ((x - K[0, 2]) * z) / float(K[0, 0]) y = ((y - K[1, 2]) * z) / float(K[1, 1]) center = [x, y, z] # pose_pred[:, 3] = center corner_2d_pred = pvnet_pose_utils.project(corner_3d, K, pose_pred) ax.add_patch( patches.Polygon(xy=corner_2d_pred[[0, 1, 3, 2, 0, 4, 6, 2]], fill=False, linewidth=1, edgecolor='b')) ax.add_patch( patches.Polygon(xy=corner_2d_pred[[5, 4, 6, 7, 5, 1, 3, 7]], fill=False, linewidth=1, edgecolor='b')) for anno in annos: pose_gt = np.array(anno['pose']) corner_2d_gt = pvnet_pose_utils.project(corner_3d, K, pose_gt) ax.add_patch( patches.Polygon(xy=corner_2d_gt[[0, 1, 3, 2, 0, 4, 6, 2]], fill=False, linewidth=1, edgecolor='g')) ax.add_patch( patches.Polygon(xy=corner_2d_gt[[5, 4, 6, 7, 5, 1, 3, 7]], fill=False, linewidth=1, edgecolor='g')) plt.show()