def draw_detections_3D(image, detections, cam, model_map): """Draws 6D detections onto resized image Parameters ---------- image: Numpy array, normalized to [0-1] detections: A list of detections for this image, coming from SSD.detect() in the form [l, t, r, b, name, confidence, 6D_pose0, ..., 6D_poseN] cam: Intrinsics for rendering model_map: Mapping of model name to Model3D instance {'obj': model3D} """ if not detections: return np.copy(image) ren = Renderer((image.shape[1], image.shape[0]), cam) ren.clear() out = np.copy(image) for det in detections: model = model_map[det[4]] for pose in det[6:]: ren.draw_model(model, pose) ren.draw_boundingbox(model, pose) col, dep = ren.finish() # Copy the rendering over into the scene mask = np.dstack((dep, dep, dep)) > 0 out[mask] = col[mask] return out
def precompute_projections(dataset, views, cam, model_map, models): """Precomputes the projection information needed for 6D pose construction """ save_path = f"output/{dataset}" # save the rendered images if not os.path.exists(save_path): os.makedirs(save_path) w, h = 640, 480 ren = Renderer((w, h), cam) for model_name in models: count = 0 # 图片计数 scene_camera = {} scene_gt = {} model_path = os.path.join(save_path, model_name[-6:]) rgb_dir = os.path.join(model_path, 'rgb') depth_dir = os.path.join(model_path, 'mask') ensure_dir(rgb_dir) ensure_dir(depth_dir) for i in tqdm(range(len(views))): pose = create_pose(views[i], angle_deg=0) pose[:3, 3] = [0, 0, 0.5] # zr = 0.5 model = model_map[model_name] # 随机选出一个模型 ren.clear() ren.draw_model(model, pose) # ren.draw_boundingbox(model, pose) col, dep = ren.finish() # dep 缩放因子是0.001 col *= 255 dep[dep > 0] = 255 # cam2d = project(model.fps, pose, cam) # ys, xs = np.nonzero(dep > 0) # xs_min = xs_original.min() # ys_min = ys_original.min() # xs_max = xs_original.max() # ys_max = ys_original.max() scene_camera[str(i)] = { 'cam_K': cam.flatten().tolist(), 'depth_scale': 1, 'view_level': 4, } scene_gt[str(i)] = [{ 'cam_R_m2c': pose[:3, :3].flatten().tolist(), 'cam_t_m2c': pose[:3, 3].flatten().tolist(), 'obj_id': int(model_name[-2:]) }] # draw fps # for i_p, point in enumerate(cam2d): # if i_p == 0: # col = cv2.circle(col, (point[0], point[1]), 3, (0, 255, 0), thickness=-1) # else: # col = cv2.circle(col, (point[0], point[1]), 3, (0, 0, 255), thickness=-1) cv2.imwrite(os.path.join(rgb_dir, str(count).zfill(6) + '.png'), col) cv2.imwrite(os.path.join(depth_dir, str(count).zfill(6) + '_000000' + '.png'), dep) count += 1 save_json(os.path.join(model_path, 'scene_camera.json'), scene_camera) save_json(os.path.join(model_path, 'scene_gt.json'), scene_gt)
def verify_6D_poses(detections, model_map, cam, image): """For one image, select for each detection the best pose from the 6D pool # Arguments detections: List of predictions for one image. Each prediction is: [xmin, ymin, xmax, ymax, label, confidence, (pose00), ..., (poseNM)] where poseXX is a 4x4 matrix model_map: Mapping of model name to Model3D instance {'obj': model3D} cam: Intrinsics to use for backprojection image: The scene color image # Returns filtered: List of predictions for one image. Each prediction has the form: [xmin, ymin, xmax, ymax, label, confidence, pose] where pose is a 4x4 matrix """ def compute_grads_and_mags(color): gray = cv2.cvtColor(color, cv2.COLOR_BGR2GRAY) grads = np.dstack( (cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=5), cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=5))) mags = np.sqrt(np.sum(grads**2, axis=2)) + 0.001 # To avoid div/0 grads /= np.dstack((mags, mags)) mask = mags < 5 mags[mask] = 0 grads[np.dstack((mask, mask))] = 0 return grads, mags scene_grads, scene_mags = compute_grads_and_mags(image) scene_grads = np.reshape(scene_grads, (-1, 2)) #cv2.imshow('mags', scene_mags) ren = Renderer((image.shape[1], image.shape[0]), cam) filtered = [] for det in detections: model = model_map[det[4]] scores = [] for pose in det[6:]: ren.clear() ren.draw_model(model, pose) ren_grads, ren_mags = compute_grads_and_mags(ren.finish()[0]) ren_grads = np.reshape(ren_grads, (-1, 2)) dot = np.sum( np.abs(ren_grads[:, 0] * scene_grads[:, 0] + ren_grads[:, 1] * scene_grads[:, 1])) sum = np.sum(ren_mags > 0) scores.append(dot / sum) new_det = det[:6] new_det.append( det[6 + np.argmax(np.asarray(scores))]) # Put best pose first filtered.append(new_det) return filtered
def verify_6D_poses(detections, model_map, cam, image): """For one image, select for each detection the best pose from the 6D pool # Arguments detections: List of predictions for one image. Each prediction is: [xmin, ymin, xmax, ymax, label, confidence, (pose00), ..., (poseNM)] where poseXX is a 4x4 matrix model_map: Mapping of model name to Model3D instance {'obj': model3D} cam: Intrinsics to use for backprojection image: The scene color image # Returns filtered: List of predictions for one image. Each prediction has the form: [xmin, ymin, xmax, ymax, label, confidence, pose] where pose is a 4x4 matrix """ def compute_grads_and_mags(color): gray = cv2.cvtColor(color, cv2.COLOR_BGR2GRAY) grads = np.dstack((cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=5), cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=5))) mags = np.sqrt(np.sum(grads**2, axis=2)) + 0.001 # To avoid div/0 grads /= np.dstack((mags, mags)) mask = mags < 5 mags[mask] = 0 grads[np.dstack((mask, mask))] = 0 return grads, mags scene_grads, scene_mags = compute_grads_and_mags(image) scene_grads = np.reshape(scene_grads, (-1, 2)) #cv2.imshow('mags', scene_mags) ren = Renderer((image.shape[1], image.shape[0]), cam) filtered = [] for det in detections: model = model_map[det[4]] scores = [] for pose in det[6:]: ren.clear() ren.draw_model(model, pose) ren_grads, ren_mags = compute_grads_and_mags(ren.finish()[0]) ren_grads = np.reshape(ren_grads, (-1, 2)) dot = np.sum(np.abs(ren_grads[:, 0]*scene_grads[:, 0] + ren_grads[:, 1]*scene_grads[:, 1])) sum = np.sum(ren_mags>0) scores.append(dot / sum) new_det = det[:6] new_det.append(det[6 + np.argmax(np.asarray(scores))]) # Put best pose first filtered.append(new_det) return filtered
def precompute_projections(views, inplanes, cam, model3D): """Precomputes the projection information needed for 6D pose construction # Arguments views: List of 3D viewpoint positions inplanes: List of inplane angles in degrees cam: Intrinsics to use for translation estimation model3D: Model3D instance # Returns data: a 3D list with precomputed entities with shape (views, inplanes, (4x4 pose matrix, 3) ) """ w, h = 640, 480 ren = Renderer((w, h), cam) data = [] if model3D.vertices is None: return data for v in tqdm(range(len(views))): data.append([]) for i in inplanes: pose = create_pose(views[v], angle_deg=i) pose[:3, 3] = [0, 0, 0.5] # zr = 0.5 # Render object and extract tight 2D bbox and projected 2D centroid ren.clear() ren.draw_model(model3D, pose) box = np.argwhere( ren.finish()[1]) # Deduct bbox from depth rendering box = [ box.min(0)[1], box.min(0)[0], box.max(0)[1] + 1, box.max(0)[0] + 1 ] centroid = np.matmul(pose[:3, :3], model3D.centroid) + pose[:3, 3] centroid_x = cam[0, 2] + centroid[0] * cam[0, 0] / centroid[2] centroid_y = cam[1, 2] + centroid[1] * cam[1, 1] / centroid[2] # Compute 2D centroid position in normalized, box-local reference frame box_w, box_h = (box[2] - box[0]), (box[3] - box[1]) norm_centroid_x = (centroid_x - box[0]) / box_w norm_centroid_y = (centroid_y - box[1]) / box_h # Compute normalized diagonal box length lr = np.sqrt((box_w / w)**2 + (box_h / h)**2) data[-1].append((pose, [norm_centroid_x, norm_centroid_y, lr])) return data
def precompute_projections(views, inplanes, cam, model3D): """Precomputes the projection information needed for 6D pose construction # Arguments views: List of 3D viewpoint positions inplanes: List of inplane angles in degrees cam: Intrinsics to use for translation estimation model3D: Model3D instance # Returns data: a 3D list with precomputed entities with shape (views, inplanes, (4x4 pose matrix, 3) ) """ w, h = 640, 480 ren = Renderer((w, h), cam) data = [] if model3D.vertices is None: return data for v in tqdm(range(len(views))): data.append([]) for i in inplanes: pose = create_pose(views[v], angle_deg=i) pose[:3, 3] = [0, 0, 0.5] # zr = 0.5 # Render object and extract tight 2D bbox and projected 2D centroid ren.clear() ren.draw_model(model3D, pose) box = np.argwhere(ren.finish()[1]) # Deduct bbox from depth rendering box = [box.min(0)[1], box.min(0)[0], box.max(0)[1] + 1, box.max(0)[0] + 1] centroid = np.matmul(pose[:3, :3], model3D.centroid) + pose[:3, 3] centroid_x = cam[0, 2] + centroid[0] * cam[0, 0] / centroid[2] centroid_y = cam[1, 2] + centroid[1] * cam[1, 1] / centroid[2] # Compute 2D centroid position in normalized, box-local reference frame box_w, box_h = (box[2] - box[0]), (box[3] - box[1]) norm_centroid_x = (centroid_x - box[0]) / box_w norm_centroid_y = (centroid_y - box[1]) / box_h # Compute normalized diagonal box length lr = np.sqrt((box_w / w) ** 2 + (box_h / h) ** 2) data[-1].append((pose, [norm_centroid_x, norm_centroid_y, lr])) return data
_, gt_pose, _ = frame.gt[0] perturbed_pose = perturb_pose(gt_pose, max_rot_pert, max_trans_pert) refinable = Refinable(model=bench.models[str(int(obj))], label=0, hypo_pose=perturbed_pose, metric_crop_shape=croppings[dataset_name]['obj_{:02d}'.format(int(obj))], input_col=col) for i in range(iterations): refinable.input_col = col.copy() start = timer() refiner.iterative_contour_alignment(refinable=refinable, max_iterations=1) end = timer() # Rendering of results ren.clear() ren.draw_background(col) ren.draw_boundingbox(refinable.model, refinable.hypo_pose) ren.draw_model(refinable.model, refinable.hypo_pose, ambient=0.5, specular=0, shininess=100, light_col=[1, 1, 1], light=[0, 0, -1]) render_col, _ = ren.finish() render_col = render_col.copy() cv2.imshow("Input Image", col) # Draw FPS in top left corner fps = "FPS: " + str(int(1 / (end - start))) cv2.rectangle(render_col, (0, 0), (133, 40), (1., 1., 1.), -1) cv2.putText(render_col, fps, (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)