def lane_detection(ori_image, mean, std, input_size, nnet, point=True): image = cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB) height, width = image.shape[0:2] images = np.zeros((1, 3, input_size[0], input_size[1]), dtype=np.float32) masks = np.ones((1, 1, input_size[0], input_size[1]), dtype=np.float32) orig_target_sizes = torch.tensor(input_size).unsqueeze(0).cuda() pad_image = image.copy() pad_mask = np.zeros((height, width, 1), dtype=np.float32) resized_image = cv2.resize(pad_image, (input_size[1], input_size[0])) resized_mask = cv2.resize(pad_mask, (input_size[1], input_size[0])) masks[0][0] = resized_mask.squeeze() resized_image = resized_image / 255. normalize_(resized_image, mean, std) resized_image = resized_image.transpose(2, 0, 1) images[0] = resized_image images = torch.from_numpy(images).cuda(non_blocking=True) masks = torch.from_numpy(masks).cuda(non_blocking=True) torch.cuda.synchronize(0) # 0 is the GPU id outputs, _ = nnet.test([images, masks]) torch.cuda.synchronize(0) # 0 is the GPU id results = PostProcess(outputs, orig_target_sizes) pred = results[0].cpu().numpy() img = pad_image img_h, img_w, _ = img.shape pred = pred[pred[:, 0].astype(int) == 1] # overlay = np.zeros_like(img, np.uint8) overlay_rgb = img.copy() point_xy = [] for i, lane in enumerate(pred): lane = lane[1:] # remove conf lower, upper = lane[0], lane[1] lane = lane[2:] # remove upper, lower positions # generate points from the polynomial ys = np.linspace(lower, upper, num=100) points = np.zeros((len(ys), 2), dtype=np.int32) points[:, 1] = (ys * img_h).astype(int) points[:, 0] = ((lane[0] / (ys - lane[1])**2 + lane[2] / (ys - lane[1]) + lane[3] + lane[4] * ys - lane[5]) * img_w).astype(int) points = points[(points[:, 0] > 0) & (points[:, 0] < img_w)] point_xy.append(points) if point: for xxx, yyy in points: # cv2.circle(overlay, (xxx, yyy), 1, color=WHITE, thickness=1) cv2.circle(overlay_rgb, (xxx, yyy), 1, color=GREEN, thickness=1) else: for current_point, next_point in zip(points[:-1], points[1:]): # overlay = cv2.line(overlay, tuple(current_point), tuple(next_point), color=WHITE, thickness=1) overlay_rgb = cv2.line(overlay_rgb, tuple(current_point), tuple(next_point), color=GREEN, thickness=1) return overlay_rgb, point_xy
def image_preprocess(db, cfg_file, db_inds, scales, result_dir, debug, no_flip, im_queue): num_images = db_inds.size for ind in range(0, num_images): db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) height, width = image.shape[0:2] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) if 'DLA' in cfg_file: inp_height = (new_height | 31)+1 inp_width = (new_width | 31)+1 else: inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) if 'DLA' in cfg_file: out_height, out_width = inp_height // 4, inp_width // 4 else: out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] if not no_flip: images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) im_queue.put([images, ratios, borders, sizes, out_width, image_id]) time.sleep(num_images*10)
def __getitem__(self, idx): hr_img = cv2.imread(self.hr_lists[idx]).astype(np.float32) lr_img = cv2.imread(self.lr_lists[idx]).astype(np.float32) file_name = os.path.basename(self.hr_lists[idx]).split('.')[0] if self.need_patch: patch_pair = get_patch(hr_img, lr_img, self.patch_size, self.scale) else: patch_pair = {'hr_patch': hr_img, 'lr_patch': lr_img} # augment the dataset if self.aug: patch_pair = augment(patch_pair) # normalization # if self.normalization == 0: # pass # elif self.normalization == 1: # patch_pair['hr_patch'] = patch_pair['hr_patch'] / 255.0 # patch_pair['lr_patch'] = patch_pair['lr_patch'] / 255.0 # else: # raise NotImplementedError patch_pair['hr_patch'] = normalize_(patch_pair['hr_patch'], type=self.normalization) patch_pair['lr_patch'] = normalize_(patch_pair['lr_patch'], type=self.normalization) patch_pair['hr_patch'] = np.transpose(patch_pair['hr_patch'], (2, 0, 1)).astype(np.float32) patch_pair['lr_patch'] = np.transpose(patch_pair['lr_patch'], (2, 0, 1)).astype(np.float32) patch_pair['hr_patch'] = torch.from_numpy(patch_pair['hr_patch']) patch_pair['lr_patch'] = torch.from_numpy(patch_pair['lr_patch']) return { 'hr': patch_pair['hr_patch'], 'lr': patch_pair['lr_patch'], 'fn': file_name }
def compute_gn_loss(self, f_t, fb, ub, train_or_val): ''' f_t: target features F_a(ua) fb: feature map b, BxCxHxW ub: pos matches of ua in b ''' # compute start point and its feature ub = ub.to(device) B, N, _ = ub.shape # uniformly sample a perturbation from interval [-1,1] xs = torch.FloatTensor(ub.shape).uniform_(-1,1).to(device) + ub f_s = extract_features(fb, xs) # compute residual f_t = normalize_(f_t) f_s = normalize_(f_s) r = f_s - f_t # compute Jacobian f_s_gx, f_s_gy = np_gradient_filter(fb) J_xs_x = extract_features(f_s_gx, xs) J_xs_y = extract_features(f_s_gy, xs) J = torch.stack([J_xs_x, J_xs_y], dim=-1) # compute Heissian eps = 1e-9 # for invertibility H = (J.transpose(1, 2) @ J + eps * batched_eye_like(J, J.shape[2])) b = J.transpose(1, 2) @ r[..., None] miu = xs.reshape(B * N, 2, 1) - torch.inverse(H) @ b # first error term e1 = 0.5 * ((ub.reshape(B * N, 2, 1) - miu).transpose(1, 2)).type(torch.float32) @ H @ \ (ub.reshape(B * N, 2, 1) - miu).type(torch.float32) e1 = torch.sum(e1) # second error term det_H = torch.clamp(torch.det(H), min=1e-16) log_det = torch.log(det_H).to(device) e2 = B * N * torch.log(torch.tensor(2 * np.pi)).to(device) - 0.5 * log_det.sum(-1).to(device) # e = e1 + 2 * e2 / 7 e = self.e1_lamda * e1 + self.e2_lamda * e2 return e, e1, e2
def get_input_image(args): img = cv2.imread(args.test_file) file_name = os.path.basename(args.test_file).split('.')[0] # if args.normalization == 0: # pass # elif args.normalization == 1: # img = np.float32(img) / 255.0 # else: # raise NotImplementedError img = normalize_(img, type=args.normalization) img = np.transpose(img, (2, 0, 1)).astype(np.float32) img = torch.from_numpy(img) img = img.unsqueeze(0) return {'img': img, 'fn': file_name}
def kp_detection_image(image, db: LV, nnet: NetworkFactory, debug=False, decode_func=kp_decode, db_ind=None, debug_dir=None): """对单张图做detection :param image: 使用cv2.imread读入的图 :param db: :param nnet: :param debug: :param decode_func: :param db_ind: :param debug_dir: :return: {[1-5] -> (该类中检测到的数目, 5)}, 分别为tl_xs, tl_ys, br_xs, br_ys, scores """ if debug and (db_ind is None or debug_dir is None): raise ValueError( "db_ind and debug_dir should be specified when debug is turned on") K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] height, width = image.shape[0:2] detections = [] center_points = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) # 不懂为什么要做这个按位或 inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) # (inp_height + 1)、(inp_width + 1)肯定可以被4除尽 out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width # 先按照scale来resize resized_image = cv2.resize(image, (new_width, new_height)) # 然后使用scale后的image的中心点,与inp_height、inp_width进行crop # 由于inp_height、inp_width一定是比new_height、new_width大的,故这一步 # 实际上是在按照中心,扩大图片,并在周围补黑边。 resized_image, border, offset = crop_image( resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) # resized_image是(H, W, C),现在改成(C, H, W)以供pytorch使用 images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border # 这个size是有内容的图片大小,resized_image的大小为[inp_height, inp_width] sizes[0] = [int(height * scale), int(width * scale)] # 这个是out比上inp ratios[0] = [height_ratio, width_ratio] # 这个似乎是把原图和垂直翻折后的图片放在一起 images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) # dets: (batch, 2 * num_dets, 8) # center: (batch, 2 * K, 4) dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) # 这两步是把垂直翻折后图片的检测结果,变换到原图上 dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) # (1, 2 * num_dets, 8) center = center.reshape(1, -1, 4) # (1, 2 * K, 4) # 去除在原图中不合法的框 _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) # 回复到原图中的坐标 dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale # center point只使用scale为1的时候 if scale == 1: center_points.append(center) detections.append(dets) # 把所有scale下检测出的统一合并起来 detections = np.concatenate(detections, axis=1) # (1, 2 * num_dets * len(scales), 8) center_points = np.concatenate(center_points, axis=1) # (1, 2 * K, 4) classes = detections[..., -1] classes = classes[0] # (2 * num_dets * len(scales),) detections = detections[0] # (2 * num_dets * len(scales), 8) center_points = center_points[0] # (2 * K, 4) # 获得所有的合法候选框 valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] # (合法候选框, 8) box_width = valid_detections[:, 2] - valid_detections[:, 0] # (合法候选框,) box_height = valid_detections[:, 3] - valid_detections[:, 1] # (合法候选框,) # 小候选框与大候选框 s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] # (小框, 8) l_detections = valid_detections[l_ind] # (大框, 8) # 小框:判断中心区域是否有中心点 # 只要中心区域有一个同类中心点即可,分数按最高的算 s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = \ (s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 # 大框:判断中心区域是否有中心点 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][ np.newaxis, :]) == 0 ind_l_new_score = np.max( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = \ (l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 # 合并大框小框的检测结果,并按照score排序 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] ret = {} for j in range(categories): keep_inds = (classes == j) ret[j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: soft_nms_merge(ret[j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(ret[j + 1], Nt=nms_threshold, method=nms_algorithm) ret[j + 1] = ret[j + 1][:, 0:5] scores = np.hstack([ ret[j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (ret[j][:, -1] >= thresh) ret[j] = ret[j][keep_inds] if debug: image_file = db.image_file(db_ind) image = cv2.imread(image_file) im = image[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) # bboxes = {} for j in range(1, categories + 1): keep_inds = (ret[j][:, -1] >= 0.4) # 这边调整画图时接收的阈值 cat_name = db.class_name(j) for bbox in ret[j][keep_inds]: score = bbox[4] bbox = bbox[0:4].astype(np.int32) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] # if (xmax - xmin) * (ymax - ymin) > 5184: ax.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1], linewidth=4.0)) ax.text(xmin + 1, ymin - 3, '{} {:.3f}'.format(cat_name, score), bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind)) debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind)) # plt.savefig(debug_file1) plt.savefig(debug_file2, bbox_inches='tight', pad_inches=0) plt.close() # cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) # 同时保存gt图以供对比 db.display(db_ind, os.path.join(debug_dir, "{}_gt.jpg".format(db_ind)), show=False) return ret
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): result_json = os.path.join(result_dir, "results.json") debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) if db.split != "trainval": db_inds = db.db_inds[:100] if debug else db.db_inds else: db_inds = db.db_inds[:100] if debug else db.db_inds[:5000] num_images = db_inds.size K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] if True: top_bboxes = {} for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) image_xy = np.zeros((image.shape[0], image.shape[1], 2), dtype=np.float32) x_mark = np.arange(image.shape[1], dtype=np.float32) / image.shape[1] for i in range(image.shape[0]): image_xy[i, :, 0] = x_mark y_mark = np.arange(image.shape[0], dtype=np.float32) / image.shape[0] for i in range(image.shape[1]): image_xy[:, i, 1] = y_mark height, width = image.shape[0:2] detections = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 5, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image_xy = cv2.resize(image_xy, (new_width, new_height)) resized_image, border, offset = crop_image( resized_image, new_center, [inp_height, inp_width]) resized_image_xy, border, offset = crop_image( resized_image_xy, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0, 0:3] = resized_image.transpose((2, 0, 1)) images[0, 3:5] = resized_image_xy.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = torch.from_numpy(images) dets, dets_tl, dets_br, flag = decode_func( nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) if not flag: print("error when try to test %s" % image_file) continue dets = dets.reshape(1, -1, 8) _rescale_dets(dets, ratios, borders, sizes) dets[:, :, 0:4] /= scale detections.append(dets) if len(detections) == 0: continue detections = np.concatenate(detections, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: nms.soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: nms.soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack([ top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][ keep_inds] if debug: image_file = db.image_file(db_ind) image = cv2.imread(image_file) bboxes = {} for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] > 0.5) cat_name = db.class_name(j) cat_size = cv2.getTextSize(cat_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] color = np.random.random((3, )) * 0.6 + 0.4 color = color * 255 color = color.astype(np.int32).tolist() for bbox in top_bboxes[image_id][j][keep_inds]: bbox = bbox[0:4].astype(np.int32) if bbox[1] - cat_size[1] - 2 < 0: cv2.rectangle(image, (bbox[0], bbox[1] + 2), (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2), color, -1) cv2.putText(image, cat_name, (bbox[0], bbox[1] + cat_size[1] + 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) else: cv2.rectangle(image, (bbox[0], bbox[1] - cat_size[1] - 2), (bbox[0] + cat_size[0], bbox[1] - 2), color, -1) cv2.putText(image, cat_name, (bbox[0], bbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) debug_file = os.path.join(debug_dir, "{}.jpg".format(db_ind)) detections = db.convert_to_coco(top_bboxes) with open(result_json, "w") as f: json.dump(detections, f) image_ids = [db.image_ids(ind) for ind in db_inds] with open(result_json, "r") as f: result_json = json.load(f) for cls_type in range(1, categories + 1): db.evaluate(result_json, [cls_type], image_ids) return 0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) if db.split != "trainval": db_inds = db.db_inds[:100] if debug else db.db_inds else: db_inds = db.db_inds[:100] if debug else db.db_inds[:5000] num_images = db_inds.size K = db.configs["top_k"] aggr_weight = db.configs["aggr_weight"] scores_thresh = db.configs["scores_thresh"] center_thresh = db.configs["center_thresh"] suppres_ghost = db.configs["suppres_ghost"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] cluster_radius = db.configs["cluster_radius"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] top_bboxes = {} for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) height, width = image.shape[0:2] detections = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets = decode_func(nnet, images, K, aggr_weight=aggr_weight, scores_thresh=scores_thresh, center_thresh=center_thresh, kernel=nms_kernel, debug=debug) dets = dets.reshape(2, -1, 14) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] dets[1, :, [5, 7, 9, 11]] = out_width - dets[1, :, [5, 7, 9, 11]] dets[1, :, [7, 8, 11, 12]] = dets[1, :, [11, 12, 7, 8]].copy() dets = dets.reshape(1, -1, 14) _rescale_dets(dets, ratios, borders, sizes) _rescale_ex_pts(dets, ratios, borders, sizes) dets[:, :, 0:4] /= scale dets[:, :, 5:13] /= scale detections.append(dets) detections = np.concatenate(detections, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] keep_inds = (detections[:, 4] > 0) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds].astype( np.float32) soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) scores = np.hstack( [top_bboxes[image_id][j][:, 4] for j in range(1, categories + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, 4] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] for j in range(1, categories + 1): keep = [] i = 0 for bbox in top_bboxes[image_id][j]: sc = bbox[4] ex = bbox[5:13].astype(np.int32).reshape(4, 2) feature_val = feature(ex) if feature_val > cluster_radius: keep.append(i) i = i + 1 top_bboxes[image_id][j] = np.delete(top_bboxes[image_id][j], keep, axis=0) if suppres_ghost: for j in range(1, categories + 1): n = len(top_bboxes[image_id][j]) for k in range(n): inside_score = 0 if top_bboxes[image_id][j][k, 4] > 0.2: for t in range(n): if _box_inside(top_bboxes[image_id][j][t], top_bboxes[image_id][j][k]): inside_score += top_bboxes[image_id][j][t, 4] if inside_score > top_bboxes[image_id][j][k, 4] * 3: top_bboxes[image_id][j][k, 4] /= 2 if debug: image_file = db.image_file(db_ind) image = cv2.imread(image_file) bboxes = {} for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, 4] > 0.3) cat_name = db.class_name(j) cat_size = cv2.getTextSize(cat_name + '0', cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] color = np.random.random((3, )) * 0.6 + 0.4 color = color * 255 color = color.astype(np.int32).tolist() for bbox in top_bboxes[image_id][j][keep_inds]: sc = bbox[4] bbox = bbox[0:4].astype(np.int32) txt = '{}{:.0f}'.format(cat_name, sc * 10) if bbox[1] - cat_size[1] - 2 < 0: cv2.rectangle( image, (bbox[0], bbox[1] + 2), (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2), color, -1) cv2.putText(image, txt, (bbox[0], bbox[1] + cat_size[1] + 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA) else: cv2.rectangle(image, (bbox[0], bbox[1] - cat_size[1] - 2), (bbox[0] + cat_size[0], bbox[1] - 2), color, -1) cv2.putText(image, txt, (bbox[0], bbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA) cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) debug_file = os.path.join(debug_dir, "{}.jpg".format(db_ind)) cv2.imwrite(debug_file, image) cv2.imshow('out', image) cv2.waitKey() result_json = os.path.join(result_dir, "results.json") detections = db.convert_to_coco(top_bboxes) with open(result_json, "w") as f: json.dump(detections, f) cls_ids = list(range(1, categories + 1)) image_ids = [db.image_ids(ind) for ind in db_inds] db.evaluate(result_json, cls_ids, image_ids) return 0
def kp_detection(db, nnet, image_root, debug=False, evaluator=None): input_size = db.configs["input_size"] # [h w] image_dir = os.path.join(image_root, "images") result_dir = os.path.join(image_root, "detections") if not os.path.exists(result_dir): os.makedirs(result_dir) image_names = os.listdir(image_dir) num_images = len(image_names) postprocessors = {'bbox': PostProcess()} for ind in tqdm(range(0, num_images), ncols=67, desc="locating kps"): image_file = os.path.join(image_dir, image_names[ind]) image = cv2.imread(image_file) height, width = image.shape[0:2] images = np.zeros((1, 3, input_size[0], input_size[1]), dtype=np.float32) masks = np.ones((1, 1, input_size[0], input_size[1]), dtype=np.float32) orig_target_sizes = torch.tensor(input_size).unsqueeze(0).cuda() pad_image = image.copy() pad_mask = np.zeros((height, width, 1), dtype=np.float32) resized_image = cv2.resize(pad_image, (input_size[1], input_size[0])) resized_mask = cv2.resize(pad_mask, (input_size[1], input_size[0])) masks[0][0] = resized_mask.squeeze() resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) resized_image = resized_image.transpose(2, 0, 1) images[0] = resized_image images = torch.from_numpy(images).cuda(non_blocking=True) masks = torch.from_numpy(masks).cuda(non_blocking=True) torch.cuda.synchronize(0) # 0 is the GPU id t0 = time.time() outputs, weights = nnet.test([images, masks]) torch.cuda.synchronize(0) # 0 is the GPU id t = time.time() - t0 results = postprocessors['bbox'](outputs, orig_target_sizes) if evaluator is not None: evaluator.add_prediction(ind, results.cpu().numpy(), t) if debug: pred = results[0].cpu().numpy() img = pad_image img_h, img_w, _ = img.shape pred = pred[pred[:, 0].astype(int) == 1] overlay = img.copy() color = (0, 255, 0) for i, lane in enumerate(pred): lane = lane[1:] # remove conf lower, upper = lane[0], lane[1] lane = lane[2:] # remove upper, lower positions # generate points from the polynomial ys = np.linspace(lower, upper, num=100) points = np.zeros((len(ys), 2), dtype=np.int32) points[:, 1] = (ys * img_h).astype(int) points[:, 0] = ( (lane[0] / (ys - lane[1])**2 + lane[2] / (ys - lane[1]) + lane[3] + lane[4] * ys - lane[5]) * img_w).astype(int) points = points[(points[:, 0] > 0) & (points[:, 0] < img_w)] # draw lane with a polyline on the overlay for current_point, next_point in zip(points[:-1], points[1:]): overlay = cv2.line(overlay, tuple(current_point), tuple(next_point), color=color, thickness=15) # draw lane ID if len(points) > 0: cv2.putText(img, str(i), tuple(points[0]), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=color, thickness=3) # Add lanes overlay w = 0.6 img = ((1. - w) * img + w * overlay).astype(np.uint8) cv2.imwrite( os.path.join(result_dir, image_names[ind][:-4] + '.jpg'), img) return 0
def inference(db, nnet, image, decode_func=kp_decode): K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] height, width = image.shape[0:2] detections, center_points = [], [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) # bboxes, scores, tl_scores, br_scores, clses center = center.reshape(2, -1, 4) # ct_xs, ct_ys, ct_clses, ct_scores dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] # flip center[1, :, [0]] = out_width - center[1, :, [0]] # horizontal flip dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] # remap to origin image center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale # remap to origin image if scale == 1: center_points.append(center) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] # trisection s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 # x + (y-x)/3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 # x +2(y-x)/3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] # located in center region ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 # same classes ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) # select the box having center located in the center region s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort( -detections[:, 4])] # resort according to new scores classes = detections[..., -1] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] # soft_nms top_bboxes = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[j + 1] = top_bboxes[j + 1][:, 0:5] scores = np.hstack( [top_bboxes[j][:, -1] for j in range(1, categories + 1)]) # select boxes if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[j][:, -1] >= thresh) top_bboxes[j] = top_bboxes[j][keep_inds] return top_bboxes
def kp_detection(db, k_ind): data_rng = system_configs.data_rng batch_size = system_configs.batch_size input_size = db.configs["input_size"] lighting = db.configs["lighting"] rand_color = db.configs["rand_color"] images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) # b, 3, H, W masks = np.zeros((batch_size, 1, input_size[0], input_size[1]), dtype=np.float32) # b, 1, H, W gt_lanes = [] db_size = db.db_inds.size # 3268 | 2782 for b_ind in range(batch_size): if k_ind == 0: db.shuffle_inds() db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading ground truth item = db.detections(db_ind) # all in the raw coordinate img = cv2.imread(item['path']) mask = np.ones((1, img.shape[0], img.shape[1], 1), dtype=np.bool) label = item['label'] transform = True if transform: line_strings = db.lane_to_linestrings(item['old_anno']['lanes']) line_strings = LineStringsOnImage(line_strings, shape=img.shape) img, line_strings, mask = db.transform(image=img, line_strings=line_strings, segmentation_maps=mask) line_strings.clip_out_of_image_() new_anno = {'path': item['path'], 'lanes': db.linestrings_to_lanes(line_strings)} new_anno['categories'] = item['categories'] label = db._transform_annotation(new_anno, img_wh=(input_size[1], input_size[0]))['label'] # clip polys tgt_ids = label[:, 0] label = label[tgt_ids > 0] # make lower the same label[:, 1][label[:, 1] < 0] = 1 label[:, 1][...] = np.min(label[:, 1]) label = np.stack([label] * batch_size, axis=0) gt_lanes.append(torch.from_numpy(label.astype(np.float32))) img = (img / 255.).astype(np.float32) if rand_color: color_jittering_(data_rng, img) if lighting: lighting_(data_rng, img, 0.1, db.eig_val, db.eig_vec) normalize_(img, db.mean, db.std) images[b_ind] = img.transpose((2, 0, 1)) masks[b_ind] = np.logical_not(mask[:, :, :, 0]) images = torch.from_numpy(images) masks = torch.from_numpy(masks) return { "xs": [images, masks], "ys": [images, *gt_lanes] }, k_ind
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): debug_dir = os.path.join(result_dir, "debug") NT = 20 # NT:测试图片的数量 if not os.path.exists(debug_dir): os.makedirs(debug_dir) # 创建目录 if db.split != "trainval": db_inds = db.db_inds[:NT] if debug else db.db_inds # 如果不是debug模式,则将数据集中的每张图片进行检测 else: db_inds = db.db_inds[:NT] if debug else db.db_inds[:5000] # debug模式,则只选NT张图片 num_images = db_inds.size # 检测图片的个数 K = db.configs["top_k"] # 每张图片保留的检测结果 ae_threshold = db.configs["ae_threshold"] # IoU大小 nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] top_bboxes = {} # 用来记录top-k的检测框 for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): # 获取图片 db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) height, width = image.shape[0:2] # 记录检测结果以及中心点 detections = [] center_points = [] for scale in scales: # 当前尺度下图片的一系列处理 new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 # 防止超边框 inp_width = new_width | 127 # 防止超边框 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) # 检测函数 dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) # 只记录选图大小的中心点 detections.append(dets) # 检测结果 # 对当前图片的检测结果进行整理 detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] # 检测类别信息 classes = classes[0] # 类别 detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) # 每个bbx对应的Score s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = (s_temp_score[ind_s_new_score] * 2 + center_points[ index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = (l_temp_score[ind_l_new_score] * 2 + center_points[ index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] # NMS处理 top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack([ top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] # debug模式 if debug: image_file = db.image_file(db_ind) _, filename0 = os.path.split(image_file) # 分离出文件名 img_name0, _ = os.path.splitext(filename0) # 去掉后缀的文件 FileTXT = open(debug_dir + "/" + img_name0 + ".txt", mode="a") # 文件流,用来记录检测框位置 image = cv2.imread(image_file) im = image[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) # bboxes = {} for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= 0.4) cat_name = db.class_name(j) for bbox in top_bboxes[image_id][j][keep_inds]: bbox = bbox[0:4].astype(np.int32) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] FileTXT.write(str(1) + ' ' + str(int(xmin)) + ' ' + str(int(ymin)) + ' ' + str(int(xmax)) + ' ' + str(int(ymax)) + ' ' + str(1)) FileTXT.write('\n') # bbx位置大小信息 # 画框 ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1], linewidth=4.0)) ax.text(xmin + 1, ymin - 3, '{:s}'.format(cat_name), bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind)) # 用来生成pdf图片 debug_file2 = os.path.join(debug_dir, "{}.jpg".format(img_name0)) # jpg格式 # plt.savefig(debug_file1) plt.savefig(debug_file2) # 保存图片 plt.close() result_json = os.path.join(result_dir, "results.json") # 用json脚本存储检测结果 detections = db.convert_to_coco(top_bboxes) with open(result_json, "w") as f: json.dump(detections, f) cls_ids = list(range(1, categories + 1)) image_ids = [db.image_ids(ind) for ind in db_inds] db.evaluate(result_json, cls_ids, image_ids) # 验证 return 0
def kp_detection(db, nnet, result_dir, debug=True, decode_func=kp_decode): db_inds = db.db_inds[:10] if debug else db.db_inds num_images = db_inds.size K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] top_bboxes = {} for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) # Paths result_path = result_dir + "/{}".format(image_id[:-4]) result_json = os.path.join(result_path, "results.json") result_debug = os.path.join(result_path, "{}.jpg".format(db_ind)) if pexists(result_json): continue # Create dirs Path(result_path).mkdir(parents=True, exist_ok=True) height, width = image.shape[0:2] detections = [] center_points = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack( [top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] detections = db.parse_detections(top_bboxes[image_id]) # if no valid detections if len(detections) == 0: # shutil.rmtree(Path(result_dir + "/{}".format(image_id[:-4]))) continue else: # Save JSON with open(result_json, "w") as f: json.dump(detections, f) # Save also images with labels if debug: # Get image image_file = db.image_file(db_ind) image = cv2.imread(image_file) im = image[:, :, (2, 1, 0)] # Create matplotlib fig fig, ax = plt.subplots(figsize=(12, 12)) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) for x in detections: bbox = x["bbox"] # Get points from width and height bbox[2] += bbox[0] bbox[3] += bbox[1] xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] ax.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1], linewidth=4.0)) ax.text(xmin + 1, ymin - 3, '{:s}'.format(x["category_id"]), bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') plt.savefig(result_debug) plt.close() return 0
def kp_detection(db, nnet, result_dir, debug=False, evaluator=None, repeat=1, isEncAttn=False, isDecAttn=False): if db.split != "train": db_inds = db.db_inds if debug else db.db_inds else: db_inds = db.db_inds[:100] if debug else db.db_inds num_images = db_inds.size multi_scales = db.configs["test_scales"] input_size = db.configs["input_size"] # [h w] postprocessors = {'curves': PostProcess()} for ind in tqdm(range(0, num_images), ncols=67, desc="locating kps"): db_ind = db_inds[ind] # image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) raw_img = image.copy() raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB) height, width = image.shape[0:2] # item = db.detections(db_ind) # all in the raw coordinate for scale in multi_scales: images = np.zeros((1, 3, input_size[0], input_size[1]), dtype=np.float32) masks = np.ones((1, 1, input_size[0], input_size[1]), dtype=np.float32) orig_target_sizes = torch.tensor(input_size).unsqueeze(0).cuda() pad_image = image.copy() pad_mask = np.zeros((height, width, 1), dtype=np.float32) resized_image = cv2.resize(pad_image, (input_size[1], input_size[0])) resized_mask = cv2.resize(pad_mask, (input_size[1], input_size[0])) masks[0][0] = resized_mask.squeeze() resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) resized_image = resized_image.transpose(2, 0, 1) images[0] = resized_image images = torch.from_numpy(images).cuda(non_blocking=True) masks = torch.from_numpy(masks).cuda(non_blocking=True) # seeking better FPS performance images = images.repeat(repeat, 1, 1, 1).cuda(non_blocking=True) masks = masks.repeat(repeat, 1, 1, 1).cuda(non_blocking=True) # below codes are used for drawing attention maps conv_features, enc_attn_weights, dec_attn_weights = [], [], [] if isDecAttn or isEncAttn: hooks = [ nnet.model.module.layer4[-1].register_forward_hook( lambda self, input, output: conv_features.append(output)), nnet.model.module.transformer.encoder.layers[-1].self_attn.register_forward_hook( lambda self, input, output: enc_attn_weights.append(output[1])), nnet.model.module.transformer.decoder.layers[-1].multihead_attn.register_forward_hook( lambda self, input, output: dec_attn_weights.append(output[1])) ] torch.cuda.synchronize(0) # 0 is the GPU id t0 = time.time() outputs, weights = nnet.test([images, masks]) torch.cuda.synchronize(0) # 0 is the GPU id t = time.time() - t0 # below codes are used for drawing attention maps if isDecAttn or isEncAttn: for hook in hooks: hook.remove() conv_features = conv_features[0] enc_attn_weights = enc_attn_weights[0] dec_attn_weights = dec_attn_weights[0] results = postprocessors['curves'](outputs, orig_target_sizes) if evaluator is not None: evaluator.add_prediction(ind, results.cpu().numpy(), t / repeat) if debug: img_lst = image_file.split('/') lane_debug_dir = os.path.join(result_dir, "lane_debug") if not os.path.exists(lane_debug_dir): os.makedirs(lane_debug_dir) # # Draw dec attn if isDecAttn: h, w = conv_features.shape[-2:] keep = results[0, :, 0].cpu() == 1. fig, axs = plt.subplots(ncols=keep.nonzero().shape[0] + 1, nrows=2, figsize=(44, 14)) # print(keep.nonzero().shape[0], image_file) # colors = COLORS * 100 for idx, ax_i in zip(keep.nonzero(), axs.T): ax = ax_i[0] ax.imshow(dec_attn_weights[0, idx].view(h, w).cpu()) ax.axis('off') ax.set_title('query id: [{}]'.format(idx)) ax = ax_i[1] preds = db.draw_annotation(ind, pred=results[0][idx].cpu().numpy(), cls_pred=None, img=raw_img) ax.imshow(preds) ax.axis('off') fig.tight_layout() img_path = os.path.join(lane_debug_dir, 'decAttn_{}_{}_{}.jpg'.format( img_lst[-3], img_lst[-2], os.path.basename(image_file[:-4]))) plt.savefig(img_path) plt.close(fig) # # Draw enc attn if isEncAttn: img_dir = os.path.join(lane_debug_dir, '{}_{}_{}'.format( img_lst[-3], img_lst[-2], os.path.basename(image_file[:-4]))) if not os.path.exists(img_dir): os.makedirs(img_dir) f_map = conv_features # print('encoder attention: {}'.format(enc_attn_weights[0].shape)) # print('feature map: {}'.format(f_map.shape)) shape = f_map.shape[-2:] image_height, image_width, _ = raw_img.shape sattn = enc_attn_weights[0].reshape(shape + shape).cpu() _, label, _ = db.__getitem__(ind) # 4, 115 # print(db.max_points) # 56 for i, lane in enumerate(label): if lane[0] == 0: # Skip invalid lanes continue lane = lane[3:] # remove conf, upper and lower positions xs = lane[:len(lane) // 2] ys = lane[len(lane) // 2:] ys = ys[xs >= 0] xs = xs[xs >= 0] # norm_idxs = zip(ys, xs) idxs = np.stack([ys * image_height, xs * image_width], axis=-1) attn_idxs = np.stack([ys * shape[0], xs * shape[1]], axis=-1) for idx_o, idx, num in zip(idxs, attn_idxs, range(xs.shape[0])): fig, axs = plt.subplots(ncols=1, nrows=2, figsize=(20, 14)) ax_i = axs.T ax = ax_i[0] ax.imshow(sattn[..., int(idx[0]), int(idx[1])], cmap='cividis', interpolation='nearest') ax.axis('off') ax.set_title('{}'.format(idx_o.astype(int))) ax = ax_i[1] ax.imshow(raw_img) ax.add_patch(plt.Circle((int(idx_o[1]), int(idx_o[0])), color='r', radius=16)) ax.axis('off') fig.tight_layout() img_path = os.path.join(img_dir, 'encAttn_lane{}_{}_{}.jpg'.format( i, num, idx_o.astype(int))) plt.savefig(img_path) plt.close(fig) if not isEncAttn and not isDecAttn: preds = db.draw_annotation(ind, pred=results[0].cpu().numpy(), cls_pred=None, img=image) cv2.imwrite(os.path.join(lane_debug_dir, img_lst[-3] + '_' + img_lst[-2] + '_' + os.path.basename(image_file[:-4]) + '.jpg'), preds) if not debug: exp_name = 'tusimple' evaluator.exp_name = exp_name eval_str, _ = evaluator.eval(label='{}'.format(os.path.basename(exp_name))) print(eval_str) return 0
def test(db, split, testiter, debug=False, suffix=None): result_dir = system_configs.result_dir result_dir = os.path.join(result_dir, str(testiter), split) class_name = [] for i in range(1, len(db._coco.cats)): # if db._coco.cats[i] is None: # continue # else: ind = db._cat_ids[i] class_name.append(db._coco.cats[ind]['name']) if suffix is not None: result_dir = os.path.join(result_dir, suffix) make_dirs([result_dir]) test_iter = system_configs.max_iter if testiter is None else testiter print("loading parameters at iteration: {}".format(test_iter)) print("building neural network...") nnet = NetworkFactory(db) print("loading parameters...") nnet.load_params(test_iter) # test_file = "test.{}".format(db.data) # testing = importlib.import_module(test_file).testing nnet.cuda() nnet.eval_mode() debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) if db.split != "trainval": db_inds = db.db_inds[:100] if debug else db.db_inds else: db_inds = db.db_inds[:100] if debug else db.db_inds[:5000] K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] img_name = os.listdir(db._image_dir) for i in range(0, len(img_name)): top_bboxes = {} # for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = i + 1 # image_id = db.image_ids(db_ind) image_id = img_name[i] image_file = db._image_dir + '/' + img_name[i] image = cv2.imread(image_file) height, width = image.shape[0:2] detections = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets = kp_decode(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] dets = dets.reshape(1, -1, 8) _rescale_dets(dets, ratios, borders, sizes) dets[:, :, 0:4] /= scale detections.append(dets) detections = np.concatenate(detections, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack([ top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] # result_json = os.path.join(result_dir, "results.json") detections = db.convert_to_list(top_bboxes) print('demo for {}'.format(image_id)) img = cv2.imread(image_file) box = [] if detections is not None: for i in range(len(detections)): name = db._coco.cats[detections[i][1]]['name'] #db._coco.cats[ind]['name'] confi = detections[i][-1] if confi <0.3: continue for j in range(0, 4): box.append(detections[i][j + 2]) cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 255), 1) # cv2.putText(img, name[0] + ' ' + '{:.3f}'.format(confi), (int(box[0]), int(box[1] - 10)), # cv2.FONT_ITALIC, 1, (0, 0, 255), 1) while (box): box.pop(-1) cv2.imshow('Detecting image...', img) # timer.total_time = 0 if cv2.waitKey(3000) & 0xFF == ord('q'): break print(detections)
def kp_detection(db, k_ind, data_aug, debug): data_rng = system_configs.data_rng batch_size = system_configs.batch_size categories = db.configs["categories"] input_size = db.configs["input_size"] output_size = db.configs["output_sizes"][0] border = db.configs["border"] lighting = db.configs["lighting"] rand_crop = db.configs["rand_crop"] rand_color = db.configs["rand_color"] rand_scales = db.configs["rand_scales"] gaussian_bump = db.configs["gaussian_bump"] gaussian_iou = db.configs["gaussian_iou"] gaussian_rad = db.configs["gaussian_radius"] max_tag_len = 128 # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) tl_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) br_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) tag_lens = np.zeros((batch_size, ), dtype=np.int32) db_size = db.db_inds.size for b_ind in range(batch_size): if not debug and k_ind == 0: db.shuffle_inds() db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading image image_file = db.image_file(db_ind) #print(image_file) image = cv2.imread(image_file) # reading detections detections = db.detections(db_ind) # cropping an image randomly if not debug and rand_crop: image, detections = random_crop(image, detections, rand_scales, input_size, border=border) else: image, detections = _full_image_crop(image, detections) #print("Image_size") #print(image.shape) image, detections = _resize_image(image, detections, input_size) detections = _clip_detections(image, detections) width_ratio = output_size[1] / input_size[1] height_ratio = output_size[0] / input_size[0] if not debug: image = image.astype(np.float32) / 255. if rand_color: color_jittering_(data_rng, image) if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) normalize_(image, db.mean, db.std) images[b_ind] = image.transpose((2, 0, 1)) for ind, detection in enumerate(detections): category = int(detection[-1]) - 1 #print("Category: %d" %category) #print("Detections: %d" % len(detections)) xtl, ytl = detection[0], detection[1] xbr, ybr = detection[2], detection[3] fxtl = (xtl * width_ratio) fytl = (ytl * height_ratio) fxbr = (xbr * width_ratio) fybr = (ybr * height_ratio) xtl = int(fxtl) ytl = int(fytl) xbr = int(fxbr) ybr = int(fybr) if gaussian_bump: width = detection[2] - detection[0] height = detection[3] - detection[1] width = math.ceil(width * width_ratio) height = math.ceil(height * height_ratio) if gaussian_rad == -1: radius = gaussian_radius((height, width), gaussian_iou) radius = max(0, int(radius)) else: radius = gaussian_rad draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius) draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius) else: tl_heatmaps[b_ind, category, ytl, xtl] = 1 br_heatmaps[b_ind, category, ybr, xbr] = 1 tag_ind = tag_lens[b_ind] tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl] br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr] tag_lens[b_ind] += 1 if tag_lens[b_ind] >= max_tag_len - 1: print("Too many targets, skip!") print(tag_lens[b_ind]) print(image_file) break #print("Pre_tag_ing:%d" %tag_ind) for b_ind in range(batch_size): tag_len = tag_lens[b_ind] tag_masks[b_ind, :tag_len] = 1 images = torch.from_numpy(images) tl_heatmaps = torch.from_numpy(tl_heatmaps) br_heatmaps = torch.from_numpy(br_heatmaps) tl_regrs = torch.from_numpy(tl_regrs) br_regrs = torch.from_numpy(br_regrs) tag_masks = torch.from_numpy(tag_masks) return { "xs": [images], "ys": [tl_heatmaps, br_heatmaps, tag_masks, tl_regrs, br_regrs] }, k_ind
def kp_detection(self, image, db, result_dir, debug=False): K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] top_bboxes = {} if True: #db_ind = db_inds[ind] image_id = 0 height, width = image.shape[0:2] detections = [] center_points = [] if True: scale = 1 new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image( resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets, center = self.kp_decode(images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) self._rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack([ top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][ keep_inds] return top_bboxes[image_id] return 0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): point_json_tl = os.path.join(result_dir, "points_tl.json") point_json_br = os.path.join(result_dir, "points_br.json") debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) db_inds = db.db_inds num_images = db_inds.size K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] time_backbones = 0 time_psns = 0 if True: top_points_tl = {} top_points_br = {} for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) #print(image_file) image = cv2.imread(image_file) height, width = image.shape[0:2] detections_point_tl = [] detections_point_br = [] scale = 1.0 new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = torch.from_numpy(images) dets_tl, dets_br, time_backbone, time_psn, flag = decode_func( nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) time_backbones += time_backbone time_psns += time_psn #print('b time:%f' % (time_backbones / float(ind + 1))) #print('p time:%f' % (time_psns / float(ind + 1))) #print(0) #print(dets_tl) if not flag: print("error when try to test %s" % image_file) continue #print(dets_tl.shape) _rescale_points(dets_tl, ratios, borders, sizes) _rescale_points(dets_br, ratios, borders, sizes) detections_point_tl.append(dets_tl) detections_point_br.append(dets_br) detections_point_tl = np.concatenate(detections_point_tl, axis=1) detections_point_br = np.concatenate(detections_point_br, axis=1) #print('1') #print(detections_point.shape) classes_p_tl = detections_point_tl[:, 0, 1] classes_p_br = detections_point_br[:, 0, 1] #print('2') #print(classes_p.shape) # reject detections with negative scores keep_inds_p = (detections_point_tl[:, 0, 0] > 0) detections_point_tl = detections_point_tl[keep_inds_p, 0] classes_p_tl = classes_p_tl[keep_inds_p] keep_inds_p = (detections_point_br[:, 0, 0] > 0) detections_point_br = detections_point_br[keep_inds_p, 0] classes_p_br = classes_p_br[keep_inds_p] #print('3') #print(detections_point.shape) top_points_tl[image_id] = {} top_points_br[image_id] = {} for j in range(categories): keep_inds_p = (classes_p_tl == j) top_points_tl[image_id][ j + 1] = detections_point_tl[keep_inds_p].astype( np.float32) keep_inds_p = (classes_p_br == j) top_points_br[image_id][ j + 1] = detections_point_br[keep_inds_p].astype( np.float32) #print(top_points[image_id][j + 1][0]) scores = np.hstack([ top_points_tl[image_id][j][:, 0] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_points_tl[image_id][j][:, 0] >= thresh) top_points_tl[image_id][j] = top_points_tl[image_id][j][ keep_inds] scores = np.hstack([ top_points_br[image_id][j][:, 0] for j in range(1, categories + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_points_br[image_id][j][:, 0] >= thresh) top_points_br[image_id][j] = top_points_br[image_id][j][ keep_inds] if debug: image_file = db.image_file(db_ind) image = cv2.imread(image_file) detections_point_tl = db.convert_to_coco_points_pure(top_points_tl) detections_point_br = db.convert_to_coco_points_pure(top_points_br) with open(point_json_tl, "w") as f: json.dump(detections_point_tl, f) with open(point_json_br, "w") as f: json.dump(detections_point_br, f) ''' image_ids = [db.image_ids(ind) for ind in db_inds] with open(result_json, "r") as f: result_json = json.load(f) for cls_type in range(1, categories+1): db.evaluate(result_json, [cls_type], image_ids) ''' return 0
def kp_detection(db, k_ind, data_aug, debug): ################################################################ # kp_detectin , input whole dataset, # from dataset load a batch images and annotations # based on the annotations build relevant heatmat, regression tag, ################################################################ # train.py--> train()--->init_parallel_jobs --->for each thread: prefetch_data---> sample_data ---> kp_detection # input: in training db is a MSCOCO instance and dataset is trainval2014 # in validation db is a MSCOCO instance and dataset is minival2014 # k_ind first call it is 0, then it will change inside kp_detection method. yes it is k_ind = (k_ind+1)%db_size # data_aug is true when training , and it is false when validating # debug is set in sample_data method. it is set to False in both case data_rng = system_configs.data_rng # check in config.py data_rng = np.random.RandomState(123) batch_size = system_configs.batch_size # check in CenteNet-104.py batch_size = 48 # this is check in COCO class db_config content is listed below, # "db": { # "rand_scale_min": 0.6, # "rand_scale_max": 1.4, # "rand_scale_step": 0.1, # "rand_scales": null, # # "rand_crop": true, # "rand_color": true, # # "border": 128, # "gaussian_bump": true, # # "input_size": [511, 511], # "output_sizes": [[128, 128]], # # "test_scales": [1], # # "top_k": 70, # "categories": 80, # "kp_categories": 1, # "ae_threshold": 0.5, # "nms_threshold": 0.5, # # "max_per_image": 100 # } # and above para is from CenterNet-104.py # if there is any para cant find in CenterNet-104,then goto db/detection.py to chekc categories = db.configs["categories"]# 80 input_size = db.configs["input_size"]# [511,511] output_size = db.configs["output_sizes"][0] # [ 128, 128] border = db.configs["border"] # 128 lighting = db.configs["lighting"] # from detection.py lighting = true rand_crop = db.configs["rand_crop"] # true rand_color = db.configs["rand_color"] # true rand_scales = db.configs["rand_scales"] # check CenterNet-104.json # "rand_scale_min": 0.6, # "rand_scale_max": 1.4, # "rand_scale_step": 0.1, # "rand_scales": null, # and check detection.py # if self._configs["rand_scales"] is None: # self._configs["rand_scales"] = np.arange( # self._configs["rand_scale_min"], # self._configs["rand_scale_max"], # self._configs["rand_scale_step"] # ) # so here rand_scales = np.arange(0.6,1.4,0.1) that is 0.6 0.7 0.8 0.9 .... 1.4 gaussian_bump = db.configs["gaussian_bump"] # from detection.py true gaussian_iou = db.configs["gaussian_iou"] # from detection.py 0.7 gaussian_rad = db.configs["gaussian_radius"] # from detection.py -1 max_tag_len = 128 # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) # 48 ,3 , 511, 511 tl_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) # 48 , 80 , 128 , 128 br_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) # 48 , 80 , 128 , 128 ct_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) # 48 , 80 , 128 , 128 tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) # 48 , 128 , 2 br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) # 48 , 128 , 2 ct_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) # 48 , 128 , 2 tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) tag_lens = np.zeros((batch_size, ), dtype=np.int32) # 48 , db_size = db.db_inds.size # back to db/coco.py to check db.db_inds # self._db_inds = np.arange(len(self._image_ids)) # so here db_size means how many images does this dataset has. eg.10000 images then db_size = 10000 for b_ind in range(batch_size): # iterate images one by one if not debug and k_ind == 0: db.shuffle_inds() # since when we call, we always set debug to False no matter it is training or validation # and k_ind only have one chance to be 0, that is when we first call ke_detection # this shuffle_inds() method is written in base.py db_ind = db.db_inds[k_ind] # db_inds are shuffled in the first iteration, then take the index useing k_ind k_ind = (k_ind + 1) % db_size # # reading image image_file = db.image_file(db_ind) image = cv2.imread(image_file) # reading detections detections = db.detections(db_ind) # db is a MSCOCO instance, and MSCOCO.detection is written in db/coco.py # in train.py , MSCOCO is initialized and MSCOCO._detections are filled with all annotations infomation. # here db.detections(db_ind) # db_ind is the id of an image # then use the id to get the annotation of that image # so here detections is the label infomation of a single image # cropping an image randomly if not debug and rand_crop: image, detections = random_crop(image, detections, rand_scales, input_size, border=border) # image is cropped and detections(bounding box is changed at the same time) else: image, detections = _full_image_crop(image, detections) image, detections = _resize_image(image, detections, input_size) # resize image and detections to another shape at the same time. # And there is risk that the detections are not within the boundaries of the image. detections = _clip_detections(image, detections) # so here clip the detections keep you away from above metioned risk. # make all the detections within the boundaries width_ratio = output_size[1] / input_size[1] height_ratio = output_size[0] / input_size[0] #input size and output size can be found in CenterNet-104.json # input size = 511,511 # output size = 128,128 # so width_ratio = 511/128 = 3.9921875 # flipping an image randomly if not debug and np.random.uniform() > 0.5: image[:] = image[:, ::-1, :] width = image.shape[1] detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1 if not debug: image = image.astype(np.float32) / 255. if rand_color: color_jittering_(data_rng, image) if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) normalize_(image, db.mean, db.std) images[b_ind] = image.transpose((2, 0, 1)) # make image to be channel first for ind, detection in enumerate(detections): # all these operations are for one single image # since below code will apply scale to detections, # detections should be integers not within (0,1) range category = int(detection[-1]) - 1 #category = 0 xtl, ytl = detection[0], detection[1] xbr, ybr = detection[2], detection[3] xct, yct = (detection[2] + detection[0])/2., (detection[3]+detection[1])/2. fxtl = (xtl * width_ratio) fytl = (ytl * height_ratio) fxbr = (xbr * width_ratio) fybr = (ybr * height_ratio) fxct = (xct * width_ratio) fyct = (yct * height_ratio) xtl = int(fxtl) ytl = int(fytl) xbr = int(fxbr) ybr = int(fybr) xct = int(fxct) yct = int(fyct) if gaussian_bump: # CenterNet-104 set to true width = detection[2] - detection[0]# original value height = detection[3] - detection[1] width = math.ceil(width * width_ratio) # multiply ratio so it is for output size height = math.ceil(height * height_ratio) if gaussian_rad == -1:# -1 means auto calculate gaussian rad # match CenterNet-104 setting radius = gaussian_radius((height, width), gaussian_iou) # gaussian_iou = 0.7 radius = max(0, int(radius)) # eg. if an obj bounding box is 50,80, then the radius is just 17 or so else: radius = gaussian_rad draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius) draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius) draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius, delte = 5) # all three inputs are zeros with shape 48 , 80 , 128 , 128 # tl_heatmaps[b_ind, category] is 128 x 128 # top left corner # bottom right corner # center corner each one has an heatmap # about the delte para , topleft and bottom right are both set to 6, # why center heatmap set it to 5? # in draw_gaussian: sigma=diameter / delte so the bigger delte ,the smaller sigma, and the heatmap value # in that keypoint is higher, # here it set the heatmap value of center keypoint larger than two corner keypoints. # important****** the else: tl_heatmaps[b_ind, category, ytl, xtl] = 1 br_heatmaps[b_ind, category, ybr, xbr] = 1 ct_heatmaps[b_ind, category, yct, xct] = 1 # if---else if is using gaussian distribution,and else if use only one peak tag_ind = tag_lens[b_ind] # tag_lens is (batch_size,) # and b_ind is the image index within batch # tag_lens is used to store how many detections the image has. # you can confirm with 6 lines below tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl] br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr] ct_regrs[b_ind, tag_ind, :] = [fxct - xct, fyct - yct] # all the three regression varibles are 3 dementional. # (b_ind,tag_ind,2) # for example. in one batch we have 48 images, # for each image we have differnt numbers of detections, may be first image has 4 detections. # may be the second has 15 detections. # but when we forward the network,we need it to have stable shape. # so here is how these arrays are initialized. # ct_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct # these 3 arrays are used together with above three arrays. # these 3 are used to store the integer part of the scale to outputsize detection # the above 3 variables are used to store the fractions. # ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) tag_lens[b_ind] += 1 for b_ind in range(batch_size): # for image in batches tag_len = tag_lens[b_ind]# how many detections the image has tag_masks[b_ind, :tag_len] = 1 # tag_masks first appears in the begining of this method. # tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) this is how it is initialized images = torch.from_numpy(images) tl_heatmaps = torch.from_numpy(tl_heatmaps) br_heatmaps = torch.from_numpy(br_heatmaps) ct_heatmaps = torch.from_numpy(ct_heatmaps) tl_regrs = torch.from_numpy(tl_regrs) br_regrs = torch.from_numpy(br_regrs) ct_regrs = torch.from_numpy(ct_regrs) tl_tags = torch.from_numpy(tl_tags) br_tags = torch.from_numpy(br_tags) ct_tags = torch.from_numpy(ct_tags) tag_masks = torch.from_numpy(tag_masks) return { "xs": [images, tl_tags, br_tags, ct_tags], "ys": [tl_heatmaps, br_heatmaps, ct_heatmaps, tag_masks, tl_regrs, br_regrs, ct_regrs] }, k_ind
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) partial_num = 3000 db_inds = db.db_inds[:partial_num] if debug else db.db_inds K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] seq_length = db.configs["max_query_len"] bert_model = db.configs["bert_model"] textdim = 768 if bert_model == 'bert-base-uncased' else 1024 top_bboxes = {} best_bboxes = {} for ind in tqdm(range(db_inds.size), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_file = db.images[db_ind][0] image, bert_feature, gt_detections, phrase = db.detections_with_phrase( db_ind) height, width = image.shape[0:2] detections = [] center_points = [] tl_hms = [] br_hms = [] ct_hms = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) bert_features = np.zeros((1, textdim), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) bert_features[0] = bert_feature out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] # Flip to perform detection twice images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) bert_features = np.concatenate((bert_features, bert_features), axis=0) images = torch.from_numpy(images) bert_features = torch.from_numpy(bert_features) dets, center, heatmaps = decode_func(nnet, [images, bert_features], K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) tl_hm, br_hm, ct_hm = heatmaps _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) tl_hms.append(tl_hm) br_hms.append(br_hm) ct_hms.append(ct_hm) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) tl_hms = np.concatenate(tl_hms, axis=1) br_hms = np.concatenate(br_hms, axis=1) ct_hms = np.concatenate(ct_hms, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] tl_hms = tl_hms[0] br_hms = br_hms[0] ct_hms = ct_hms[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[db_ind] = {} top_bboxes[db_ind] = detections[:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(top_bboxes[db_ind], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[db_ind], Nt=nms_threshold, method=nms_algorithm) top_bboxes[db_ind] = top_bboxes[db_ind][:, 0:5] scores = top_bboxes[db_ind][:, -1] if scores is not None and len(scores) > 0: best_bboxes[db_ind] = top_bboxes[db_ind][np.argmax(scores)] else: best_bboxes[db_ind] = None if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] keep_inds = (top_bboxes[db_ind][:, -1] >= thresh) top_bboxes[db_ind] = top_bboxes[db_ind][keep_inds] if debug: image_file = db.image_file(db_ind) image = cv2.imread(image_file) im = image[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(28, 12)) ax = plt.subplot(152) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) if best_bboxes[db_ind] is not None: bbox = best_bboxes[db_ind].astype(np.int32) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] ax.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor='red', linewidth=5.0)) ax.text(xmin + 1, ymin - 3, 'prediction', bbox=dict(facecolor='red', ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') ax = plt.subplot(151) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) bbox = gt_detections[0].astype(np.int32) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] ax.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor='red', linewidth=5.0)) ax.text(xmin + 1, ymin - 3, phrase, bbox=dict(facecolor='red', ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') ax = plt.subplot(153) ax.imshow(tl_hms[0], cmap='jet') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) ax = plt.subplot(154) ax.imshow(br_hms[0], cmap='jet') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) ax = plt.subplot(155) ax.imshow(ct_hms[0], cmap='jet') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind)) debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind)) # plt.savefig(debug_file1) plt.savefig(debug_file2) plt.close() result_json = os.path.join(result_dir, "results.json") detections = db.convert_to_json(top_bboxes) with open(result_json, "w") as f: json.dump(detections, f) db.evaluate(best_bboxes) return 0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) if db.split != "trainval": db_inds = db.db_inds[:100] if debug else db.db_inds else: db_inds = db.db_inds[:100] if debug else db.db_inds[:5000] num_images = db_inds.size K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] top_bboxes = {} for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_id = db.image_ids(db_ind) image_file = db.image_file(db_ind) image = cv2.imread(image_file) height, width = image.shape[0:2] detections = [] center_points = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, db.mean, db.std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] #for i in range(detections.shape[0]): # box_width = detections[i,2]-detections[i,0] # box_height = detections[i,3]-detections[i,1] # if box_width*box_height<=22500 and detections[i,4]!=-1: # left_x = (2*detections[i,0]+1*detections[i,2])/3 # right_x = (1*detections[i,0]+2*detections[i,2])/3 # top_y = (2*detections[i,1]+1*detections[i,3])/3 # bottom_y = (1*detections[i,1]+2*detections[i,3])/3 # temp_score = copy.copy(detections[i,4]) # detections[i,4] = -1 # for j in range(center_points.shape[0]): # if (classes[i] == center_points[j,2])and \ # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \ # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)): # detections[i,4] = (temp_score*2 + center_points[j,3])/3 # break # elif box_width*box_height > 22500 and detections[i,4]!=-1: # left_x = (3*detections[i,0]+2*detections[i,2])/5 # right_x = (2*detections[i,0]+3*detections[i,2])/5 # top_y = (3*detections[i,1]+2*detections[i,3])/5 # bottom_y = (2*detections[i,1]+3*detections[i,3])/5 # temp_score = copy.copy(detections[i,4]) # detections[i,4] = -1 # for j in range(center_points.shape[0]): # if (classes[i] == center_points[j,2])and \ # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \ # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)): # detections[i,4] = (temp_score*2 + center_points[j,3])/3 # break # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] top_bboxes[image_id] = {} for j in range(categories): keep_inds = (classes == j) top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype( np.float32) if merge_bbox: soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] scores = np.hstack( [top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] if debug: image_file = db.image_file(db_ind) image = cv2.imread(image_file) im = image[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) fig = ax.imshow(im, aspect='equal') plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) #bboxes = {} for j in range(1, categories + 1): keep_inds = (top_bboxes[image_id][j][:, -1] >= 0.4) cat_name = db.class_name(j) for bbox in top_bboxes[image_id][j][keep_inds]: bbox = bbox[0:4].astype(np.int32) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] #if (xmax - xmin) * (ymax - ymin) > 5184: ax.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1], linewidth=4.0)) ax.text(xmin + 1, ymin - 3, '{:s}'.format(cat_name), bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5), fontsize=15, color='white', weight='bold') debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind)) debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind)) plt.savefig(debug_file1) plt.savefig(debug_file2) plt.close() #cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) result_json = os.path.join(result_dir, "results.json") detections = db.convert_to_coco(top_bboxes) with open(result_json, "w") as f: json.dump(detections, f) cls_ids = list(range(1, categories + 1)) image_ids = [db.image_ids(ind) for ind in db_inds] db.evaluate(result_json, cls_ids, image_ids) return 0
def kp_detection(db, k_ind, data_aug, debug): data_rng = system_configs.data_rng batch_size = system_configs.batch_size categories = db.configs["categories"] input_size = db.configs["input_size"] output_size = db.configs["output_sizes"][0] border = db.configs["border"] lighting = db.configs["lighting"] rand_crop = db.configs["rand_crop"] rand_color = db.configs["rand_color"] rand_scales = db.configs["rand_scales"] gaussian_bump = db.configs["gaussian_bump"] gaussian_iou = db.configs["gaussian_iou"] gaussian_rad = db.configs["gaussian_radius"] max_tag_len = 128 # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) t_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) l_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) b_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) r_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) ct_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) t_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) l_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) b_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) r_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) t_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) l_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) b_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) r_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) tag_lens = np.zeros((batch_size, ), dtype=np.int32) db_size = db.db_inds.size for b_ind in range(batch_size): if not debug and k_ind == 0: db.shuffle_inds() db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading image image_file = db.image_file(db_ind) image = cv2.imread(image_file) # reading detections detections, extreme_pts = db.detections(db_ind) # cropping an image randomly if rand_crop: image, detections, extreme_pts = random_crop_pts(image, detections, extreme_pts, rand_scales, input_size, border=border) else: assert 0 # image, detections = _full_image_crop(image, detections) image, detections, extreme_pts = _resize_image_pts( image, detections, extreme_pts, input_size) detections, extreme_pts = _clip_detections_pts(image, detections, extreme_pts) width_ratio = output_size[1] / input_size[1] height_ratio = output_size[0] / input_size[0] # flipping an image randomly if np.random.uniform() > 0.5: image[:] = image[:, ::-1, :] width = image.shape[1] detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1 extreme_pts[:, :, 0] = width - extreme_pts[:, :, 0] - 1 extreme_pts[:, 1, :], extreme_pts[:, 3, :] = \ extreme_pts[:, 3, :].copy(), extreme_pts[:, 1, :].copy() image = image.astype(np.float32) / 255. if not debug: if rand_color: color_jittering_(data_rng, image) if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) normalize_(image, db.mean, db.std) images[b_ind] = image.transpose((2, 0, 1)) for ind, detection in enumerate(detections): category = int(detection[-1]) - 1 extreme_pt = extreme_pts[ind] xt, yt = extreme_pt[0, 0], extreme_pt[0, 1] xl, yl = extreme_pt[1, 0], extreme_pt[1, 1] xb, yb = extreme_pt[2, 0], extreme_pt[2, 1] xr, yr = extreme_pt[3, 0], extreme_pt[3, 1] xct = (xl + xr) / 2 yct = (yt + yb) / 2 fxt = (xt * width_ratio) fyt = (yt * height_ratio) fxl = (xl * width_ratio) fyl = (yl * height_ratio) fxb = (xb * width_ratio) fyb = (yb * height_ratio) fxr = (xr * width_ratio) fyr = (yr * height_ratio) fxct = (xct * width_ratio) fyct = (yct * height_ratio) xt = int(fxt) yt = int(fyt) xl = int(fxl) yl = int(fyl) xb = int(fxb) yb = int(fyb) xr = int(fxr) yr = int(fyr) xct = int(fxct) yct = int(fyct) if gaussian_bump: width = detection[2] - detection[0] height = detection[3] - detection[1] width = math.ceil(width * width_ratio) height = math.ceil(height * height_ratio) if gaussian_rad == -1: radius = gaussian_radius((height, width), gaussian_iou) radius = max(0, int(radius)) else: radius = gaussian_rad draw_gaussian(t_heatmaps[b_ind, category], [xt, yt], radius) draw_gaussian(l_heatmaps[b_ind, category], [xl, yl], radius) draw_gaussian(b_heatmaps[b_ind, category], [xb, yb], radius) draw_gaussian(r_heatmaps[b_ind, category], [xr, yr], radius) draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius) else: t_heatmaps[b_ind, category, yt, xt] = 1 l_heatmaps[b_ind, category, yl, xl] = 1 b_heatmaps[b_ind, category, yb, xb] = 1 r_heatmaps[b_ind, category, yr, xr] = 1 tag_ind = tag_lens[b_ind] t_regrs[b_ind, tag_ind, :] = [fxt - xt, fyt - yt] l_regrs[b_ind, tag_ind, :] = [fxl - xl, fyl - yl] b_regrs[b_ind, tag_ind, :] = [fxb - xb, fyb - yb] r_regrs[b_ind, tag_ind, :] = [fxr - xr, fyr - yr] t_tags[b_ind, tag_ind] = yt * output_size[1] + xt l_tags[b_ind, tag_ind] = yl * output_size[1] + xl b_tags[b_ind, tag_ind] = yb * output_size[1] + xb r_tags[b_ind, tag_ind] = yr * output_size[1] + xr ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct tag_lens[b_ind] += 1 for b_ind in range(batch_size): tag_len = tag_lens[b_ind] tag_masks[b_ind, :tag_len] = 1 if debug: debugger = Debugger(num_classes=80) t_hm = debugger.gen_colormap(t_heatmaps[0]) l_hm = debugger.gen_colormap(l_heatmaps[0]) b_hm = debugger.gen_colormap(b_heatmaps[0]) r_hm = debugger.gen_colormap(r_heatmaps[0]) ct_hm = debugger.gen_colormap(ct_heatmaps[0]) img = images[0] * db.std.reshape(3, 1, 1) + db.mean.reshape(3, 1, 1) img = (img * 255).astype(np.uint8).transpose(1, 2, 0) debugger.add_blend_img(img, t_hm, 't_hm') debugger.add_blend_img(img, l_hm, 'l_hm') debugger.add_blend_img(img, b_hm, 'b_hm') debugger.add_blend_img(img, r_hm, 'r_hm') debugger.add_blend_img( img, np.maximum(np.maximum(t_hm, l_hm), np.maximum(b_hm, r_hm)), 'extreme') debugger.add_blend_img(img, ct_hm, 'center') debugger.show_all_imgs(pause=True) images = torch.from_numpy(images) t_heatmaps = torch.from_numpy(t_heatmaps) l_heatmaps = torch.from_numpy(l_heatmaps) b_heatmaps = torch.from_numpy(b_heatmaps) r_heatmaps = torch.from_numpy(r_heatmaps) ct_heatmaps = torch.from_numpy(ct_heatmaps) t_regrs = torch.from_numpy(t_regrs) l_regrs = torch.from_numpy(l_regrs) b_regrs = torch.from_numpy(b_regrs) r_regrs = torch.from_numpy(r_regrs) t_tags = torch.from_numpy(t_tags) l_tags = torch.from_numpy(l_tags) b_tags = torch.from_numpy(b_tags) r_tags = torch.from_numpy(r_tags) ct_tags = torch.from_numpy(ct_tags) tag_masks = torch.from_numpy(tag_masks) return { "xs": [images, t_tags, l_tags, b_tags, r_tags, ct_tags], "ys": [ t_heatmaps, l_heatmaps, b_heatmaps, r_heatmaps, ct_heatmaps, tag_masks, t_regrs, l_regrs, b_regrs, r_regrs ] }, k_ind
images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, mean, std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) dets = kp_decode(nnet, images, K, aggr_weight=aggr_weight, scores_thresh=scores_thresh, center_thresh=center_thresh, kernel=nms_kernel,
def apply_detection(image, nnet, scales, decode_func, top_k, avg, std, categories, merge_bbox, max_per_image=100, ae_threshold=0.5, nms_kernel=3, nms_algorithm=2, nms_threshold=0.45, weight_exp=1): height, width = image.shape[0:2] detections = [] center_points = [] for scale in scales: new_height = int(height * scale) new_width = int(width * scale) new_center = np.array([new_height // 2, new_width // 2]) # N | M = M if N <= M else (N%M)*M+1 inp_height = new_height | 127 inp_width = new_width | 127 images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) ratios = np.zeros((1, 2), dtype=np.float32) borders = np.zeros((1, 4), dtype=np.float32) sizes = np.zeros((1, 2), dtype=np.float32) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 height_ratio = out_height / inp_height width_ratio = out_width / inp_width resized_image = cv2.resize(image, (new_width, new_height)) resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) resized_image = resized_image / 255. normalize_(resized_image, avg, std) images[0] = resized_image.transpose((2, 0, 1)) borders[0] = border sizes[0] = [int(height * scale), int(width * scale)] ratios[0] = [height_ratio, width_ratio] images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) images = torch.from_numpy(images) # do detection dets, center = decode_func(nnet, images, top_k, ae_threshold=ae_threshold, kernel=nms_kernel) # post processing dets = dets.reshape(2, -1, 8) center = center.reshape(2, -1, 4) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] center[1, :, [0]] = out_width - center[1, :, [0]] dets = dets.reshape(1, -1, 8) center = center.reshape(1, -1, 4) _rescale_dets(dets, ratios, borders, sizes) center[..., [0]] /= ratios[:, 1][:, None, None] center[..., [1]] /= ratios[:, 0][:, None, None] center[..., [0]] -= borders[:, 2][:, None, None] center[..., [1]] -= borders[:, 0][:, None, None] np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) dets[:, :, 0:4] /= scale center[:, :, 0:2] /= scale if scale == 1: center_points.append(center) detections.append(dets) detections = np.concatenate(detections, axis=1) center_points = np.concatenate(center_points, axis=1) classes = detections[..., -1] classes = classes[0] detections = detections[0] center_points = center_points[0] valid_ind = detections[:, 4] > -1 valid_detections = detections[valid_ind] box_width = valid_detections[:, 2] - valid_detections[:, 0] box_height = valid_detections[:, 3] - valid_detections[:, 1] s_ind = (box_width * box_height <= 22500) l_ind = (box_width * box_height > 22500) s_detections = valid_detections[s_ind] l_detections = valid_detections[l_ind] s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 s_temp_score = copy.copy(s_detections[:, 4]) s_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] s_left_x = s_left_x[np.newaxis, :] s_right_x = s_right_x[np.newaxis, :] s_top_y = s_top_y[np.newaxis, :] s_bottom_y = s_bottom_y[np.newaxis, :] ind_lx = (center_x - s_left_x) > 0 ind_rx = (center_x - s_right_x) < 0 ind_ty = (center_y - s_top_y) > 0 ind_by = (center_y - s_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_s_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0) s_detections[:, 4][ind_s_new_score] = ( s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 l_temp_score = copy.copy(l_detections[:, 4]) l_detections[:, 4] = -1 center_x = center_points[:, 0][:, np.newaxis] center_y = center_points[:, 1][:, np.newaxis] l_left_x = l_left_x[np.newaxis, :] l_right_x = l_right_x[np.newaxis, :] l_top_y = l_top_y[np.newaxis, :] l_bottom_y = l_bottom_y[np.newaxis, :] ind_lx = (center_x - l_left_x) > 0 ind_rx = (center_x - l_right_x) < 0 ind_ty = (center_y - l_top_y) > 0 ind_by = (center_y - l_bottom_y) < 0 ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 index_l_new_score = np.argmax( ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) l_detections[:, 4][ind_l_new_score] = ( l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 detections = np.concatenate([l_detections, s_detections], axis=0) detections = detections[np.argsort(-detections[:, 4])] classes = detections[..., -1] #for i in range(detections.shape[0]): # box_width = detections[i,2]-detections[i,0] # box_height = detections[i,3]-detections[i,1] # if box_width*box_height<=22500 and detections[i,4]!=-1: # left_x = (2*detections[i,0]+1*detections[i,2])/3 # right_x = (1*detections[i,0]+2*detections[i,2])/3 # top_y = (2*detections[i,1]+1*detections[i,3])/3 # bottom_y = (1*detections[i,1]+2*detections[i,3])/3 # temp_score = copy.copy(detections[i,4]) # detections[i,4] = -1 # for j in range(center_points.shape[0]): # if (classes[i] == center_points[j,2])and \ # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \ # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)): # detections[i,4] = (temp_score*2 + center_points[j,3])/3 # break # elif box_width*box_height > 22500 and detections[i,4]!=-1: # left_x = (3*detections[i,0]+2*detections[i,2])/5 # right_x = (2*detections[i,0]+3*detections[i,2])/5 # top_y = (3*detections[i,1]+2*detections[i,3])/5 # bottom_y = (2*detections[i,1]+3*detections[i,3])/5 # temp_score = copy.copy(detections[i,4]) # detections[i,4] = -1 # for j in range(center_points.shape[0]): # if (classes[i] == center_points[j,2])and \ # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \ # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)): # detections[i,4] = (temp_score*2 + center_points[j,3])/3 # break # reject detections with negative scores keep_inds = (detections[:, 4] > -1) detections = detections[keep_inds] classes = classes[keep_inds] final_bboxes = {} for j in range(categories): keep_inds = (classes == j) final_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) if merge_bbox: soft_nms_merge(final_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) else: soft_nms(final_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm) final_bboxes[j + 1] = final_bboxes[j + 1][:, 0:5] scores = np.hstack( [final_bboxes[j][:, -1] for j in range(1, categories + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, categories + 1): keep_inds = (final_bboxes[j][:, -1] >= thresh) final_bboxes[j] = final_bboxes[j][keep_inds] return final_bboxes
def kp_detection(db, k_ind, data_aug, debug): data_rng = system_configs.data_rng batch_size = system_configs.batch_size categories = db.configs["categories"] input_size = db.configs["input_size"] output_size = db.configs["output_sizes"][0] border = db.configs["border"] lighting = db.configs["lighting"] rand_crop = db.configs["rand_crop"] rand_color = db.configs["rand_color"] rand_scales = db.configs["rand_scales"] gaussian_bump = db.configs["gaussian_bump"] gaussian_iou = db.configs["gaussian_iou"] gaussian_rad = db.configs["gaussian_radius"] max_tag_len = 500 # allocating memory images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) tl_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) br_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) ct_heatmaps = np.zeros( (batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) ct_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) tag_lens = np.zeros((batch_size, ), dtype=np.int32) db_size = db.db_inds.size for b_ind in range(batch_size): if not debug and k_ind == 0: db.shuffle_inds() db_ind = db.db_inds[k_ind] k_ind = (k_ind + 1) % db_size # reading image image_file = db.image_file(db_ind) image = cv2.imread(image_file) # print("reading image", image_file) # reading detections detections = db.detections(db_ind) # print("reading detections", detections) # cropping an image randomly if not debug and rand_crop: image, detections = random_crop(image, detections, rand_scales, input_size, border=border) else: image, detections = _full_image_crop(image, detections) image, detections = _resize_image(image, detections, input_size) detections = _clip_detections(image, detections) width_ratio = output_size[1] / input_size[1] height_ratio = output_size[0] / input_size[0] # flipping an image randomly if not debug and np.random.uniform() > 0.5: image[:] = image[:, ::-1, :] width = image.shape[1] detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1 # print("after flopping", detections) if not debug: image = image.astype(np.float32) / 255. if rand_color: # print("before rand color") # print(data_rng) color_jittering_(data_rng, image) # print("this test for color") if lighting: lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) # print("after rand color") # image = image.astype(np.float32) / 255. normalize_(image, db.mean, db.std) images[b_ind] = image.transpose((2, 0, 1)) # print("modify detections", detections) for ind, detection in enumerate(detections): category = int(detection[-1]) - 1 #category = 0 xtl, ytl = detection[0], detection[1] xbr, ybr = detection[2], detection[3] xct, yct = (detection[2] + detection[0]) / 2., (detection[3] + detection[1]) / 2. fxtl = (xtl * width_ratio) fytl = (ytl * height_ratio) fxbr = (xbr * width_ratio) fybr = (ybr * height_ratio) fxct = (xct * width_ratio) fyct = (yct * height_ratio) xtl = int(fxtl) ytl = int(fytl) xbr = int(fxbr) ybr = int(fybr) xct = int(fxct) yct = int(fyct) if gaussian_bump: width = detection[2] - detection[0] height = detection[3] - detection[1] width = math.ceil(width * width_ratio) height = math.ceil(height * height_ratio) if gaussian_rad == -1: radius = gaussian_radius((height, width), gaussian_iou) radius = max(0, int(radius)) else: radius = gaussian_rad draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius) draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius) draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius, delte=5) else: tl_heatmaps[b_ind, category, ytl, xtl] = 1 br_heatmaps[b_ind, category, ybr, xbr] = 1 ct_heatmaps[b_ind, category, yct, xct] = 1 tag_ind = tag_lens[b_ind] tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl] br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr] ct_regrs[b_ind, tag_ind, :] = [fxct - xct, fyct - yct] tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct tag_lens[b_ind] += 1 for b_ind in range(batch_size): tag_len = tag_lens[b_ind] tag_masks[b_ind, :tag_len] = 1 images = torch.from_numpy(images) tl_heatmaps = torch.from_numpy(tl_heatmaps) br_heatmaps = torch.from_numpy(br_heatmaps) ct_heatmaps = torch.from_numpy(ct_heatmaps) tl_regrs = torch.from_numpy(tl_regrs) br_regrs = torch.from_numpy(br_regrs) ct_regrs = torch.from_numpy(ct_regrs) tl_tags = torch.from_numpy(tl_tags) br_tags = torch.from_numpy(br_tags) ct_tags = torch.from_numpy(ct_tags) tag_masks = torch.from_numpy(tag_masks) # print("finish this image") return { "xs": [images, tl_tags, br_tags, ct_tags], "ys": [ tl_heatmaps, br_heatmaps, ct_heatmaps, tag_masks, tl_regrs, br_regrs, ct_regrs ] }, k_ind
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): debug_dir = os.path.join(result_dir, "debug") if not os.path.exists(debug_dir): os.makedirs(debug_dir) if db.split != "trainval": db_inds = db.db_inds[:100] if debug else db.db_inds else: db_inds = db.db_inds[:100] if debug else db.db_inds[:5000] num_images = db_inds.size K = db.configs["top_k"] ae_threshold = db.configs["ae_threshold"] nms_kernel = db.configs["nms_kernel"] scales = db.configs["test_scales"] weight_exp = db.configs["weight_exp"] merge_bbox = db.configs["merge_bbox"] categories = db.configs["categories"] nms_threshold = db.configs["nms_threshold"] max_per_image = db.configs["max_per_image"] nms_algorithm = { "nms": 0, "linear_soft_nms": 1, "exp_soft_nms": 2 }[db.configs["nms_algorithm"]] max_height = 600 max_width = 1000 detections = [] for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): db_ind = db_inds[ind] image_file = db.image_file(db_ind) ori_image = cv2.imread(image_file) ori_height, ori_width = ori_image.shape[0:2] print("image nmae: %s, width: %d, height: %d" %(image_file, ori_width, ori_height)) height = min(max_height, ori_height) width = min(max_width, ori_width) input_image = cv2.resize(ori_image, (width, height)) inp_height = max_height | 127 inp_width = max_width | 127 input_image_full = cv2.resize(ori_image, (inp_width, inp_height)) images = np.zeros((2, 3, inp_height, inp_width), dtype=np.float32) input_image = input_image / 255. normalize_(input_image, db.mean, db.std) input_image_full = input_image_full / 255. normalize_(input_image_full, db.mean, db.std) out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 images[0, :, 0:height, 0:width] = input_image.transpose((2, 0, 1)) images[1] = input_image_full.transpose((2, 0, 1)) images = torch.from_numpy(images) detections_tl, detections_br, flag = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) ratio_height = inp_height / out_height * ori_height / height ratio_width = inp_width / out_width * ori_width / width detections_tl[3, 0] *= ratio_width detections_tl[4, 0] *= ratio_height detections_br[3, 0] *= ratio_width detections_br[4, 0] *= ratio_height ratio_height = inp_height / out_height * ori_height / inp_height ratio_width = inp_width / out_width * ori_width / inp_width detections_tl[3, 1] *= ratio_width detections_tl[4, 1] *= ratio_height detections_br[3, 1] *= ratio_width detections_br[4, 1] *= ratio_height if flag: detections.append([detections_tl, detections_br]) result_pickle = os.path.join(result_dir, "results_points.pickle") with open(result_pickle, "wb") as f: pickle.dump(detections, f) return 0