def show_lidar_with_boxes(pc_velo, objects, calib, img_fov=False, img_width=None, img_height=None, fig=None): ''' Show all LiDAR points. Draw 3d box in LiDAR point cloud (in velo coord system) ''' if not fig: fig = mlab.figure(figure="KITTI_POINT_CLOUD", bgcolor=(0, 0, 0), fgcolor=None, engine=None, size=(1250, 550)) if img_fov: pc_velo = get_lidar_in_image_fov(pc_velo, calib, 0, 0, img_width, img_height) draw_lidar(pc_velo, fig1=fig) for obj in objects: if obj.type == 'DontCare': continue # Draw 3d bounding box box3d_pts_2d, box3d_pts_3d = kitti_data_utils.compute_box_3d( obj, calib.P) box3d_pts_3d_velo = calib.project_rect_to_velo(box3d_pts_3d) # Draw heading arrow ori3d_pts_2d, ori3d_pts_3d = kitti_data_utils.compute_orientation_3d( obj, calib.P) ori3d_pts_3d_velo = calib.project_rect_to_velo(ori3d_pts_3d) x1, y1, z1 = ori3d_pts_3d_velo[0, :] x2, y2, z2 = ori3d_pts_3d_velo[1, :] draw_gt_boxes3d([box3d_pts_3d_velo], fig=fig, color=(0, 1, 1), line_width=2, draw_text=False) mlab.plot3d([x1, x2], [y1, y2], [z1, z2], color=(0.5, 0.5, 0.5), tube_radius=None, line_width=1, figure=fig) mlab.view(distance=90)
def show_image_with_boxes(img, objects, calib, show3d=False): ''' Show image with 2D bounding boxes ''' img2 = np.copy(img) # for 3d bbox for obj in objects: if obj.type == 'DontCare': continue # cv2.rectangle(img2, (int(obj.xmin),int(obj.ymin)), # (int(obj.xmax),int(obj.ymax)), (0,255,0), 2) box3d_pts_2d, box3d_pts_3d = kitti_data_utils.compute_box_3d( obj, calib.P) if box3d_pts_2d is not None: img2 = kitti_data_utils.draw_projected_box3d( img2, box3d_pts_2d, cnf.colors[obj.cls_id]) if show3d: cv2.imshow("img", img2) return img2
def draw_predictions(img, detections, colors, K, num_classes=3, show_3dbox=False): for j in range(num_classes): if len(detections[j] > 0): for det in detections[j]: # (scores-0:1, xs-1:2, ys-2:3, wh-3:5, bboxes-5:9, ver_coor-9:25, rot-25:26, depth-26:27, dim-27:30) _score = det[0] _x, _y, _wh, _bbox, _ver_coor = int(det[1]), int( det[2]), det[3:5], det[5:9], det[9:25] _rot, _depth, _dim = det[25], det[26], det[27:30] _bbox = np.array(_bbox, dtype=np.int) pad_y = (384 - img.shape[0]) // 2 pad_x = (1280 - img.shape[1]) // 2 _bbox = [ _bbox[0] - pad_x, _bbox[1] - pad_y, _bbox[2] - pad_x, _bbox[3] - pad_y ] _loc = image_to_project([_x - pad_x, _bbox[-1]], _depth, K[:3, :3]) # cent_3d = np.array(np.dot(K[:3,:3], _loc), np.int) # img = cv2.circle(img, tuple(cent_3d[:2]), 3, (0, 255, 0), -1) # _loc = np.array([1.84, 1.47, 8.41]) # _dim = np.array([1.89, 0.48, 1.20]) bbox_3d = compute_box_3d(_dim, _loc, _rot) bbox_3d = project_to_image(bbox_3d, K) # import pdb;pdb.set_trace() img = cv2.rectangle(img, (_bbox[0], _bbox[1]), (_bbox[2], _bbox[3]), colors[-j - 1], 2) if show_3dbox: _ver_coor = np.array(bbox_3d, dtype=np.int).reshape(-1, 2) img = draw_box_3d(img, _ver_coor, color=colors[j]) # print('_depth: {:.2f}n, _dim: {}, _rot: {:.2f} radian'.format(_depth, _dim, _rot)) return img
def client_recv(self, client, address): while True: # read message from socket # client_msg_0\x00\x00\x00\x00\x00... msg = client.recv(1024).decode("utf-8") msg = msg.rstrip("\x00") if msg == '': return if msg == "EOF": return elif msg == "quit_client": client.close() # self.sock.close() print("> client exit...") return elif msg == "quit_server": client.close() self.sock.close() print("> server exit...") sys.exit(0) else: print("> -------", time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())), "-------") print("> receive the msg from client : {0}".format(msg)) print('> inference for {0}'.format(msg)) if(self.need_create_window): # NOTE ObjSLAM cv2.namedWindow("YOLO", flags=cv2.WINDOW_GUI_NORMAL) self.need_create_window = False # Inference with torch.no_grad(): # img_paths, imgs_bev = self.test_dataloader_iter.next() img_paths, imgs_bev = self.test_dataset[int(msg)] img_paths = [img_paths] imgs_bev = torch.from_numpy( np.expand_dims(imgs_bev, axis=0)) input_imgs = imgs_bev.to( device=self.configs.device).float() outputs = self.model(input_imgs) detections = post_processing_v2( outputs, conf_thresh=self.configs.conf_thresh, nms_thresh=self.configs.nms_thresh) img_detections = [] # Stores detections for each image index img_detections.extend(detections) img_bev = imgs_bev.squeeze() * 255 img_bev = img_bev.permute(1, 2, 0).numpy().astype(np.uint8) img_bev = cv2.resize( img_bev, (self.configs.img_size, self.configs.img_size)) for detections in img_detections: if detections is None: continue # Rescale boxes to original image detections = rescale_boxes( detections, self.configs.img_size, img_bev.shape[:2]) for x, y, w, l, im, re, *_, cls_pred in detections: yaw = np.arctan2(im, re) # Draw rotated box kitti_bev_utils.drawRotatedBox( img_bev, x, y, w, l, yaw, cnf.colors[int(cls_pred)]) img_rgb = cv2.imread(img_paths[0]) calib = kitti_data_utils.Calibration(img_paths[0].replace( ".png", ".txt").replace("image_2", "calib")) objects_pred = predictions_to_kitti_format( img_detections, calib, img_rgb.shape, self.configs.img_size) # NOTE: 输出json的代码 frame_object_list = [] for i in objects_pred: frame_object = dict() frame_object['type'] = i.type frame_object['center'] = i.t frame_object['length'] = i.l frame_object['width'] = i.w frame_object['height'] = i.h frame_object['theta'] = i.ry box3d_pts_2d, _ = kitti_data_utils.compute_box_3d( i, calib.P) if box3d_pts_2d is None: frame_object['box3d_pts_2d'] = box3d_pts_2d elif box3d_pts_2d.size == 16: frame_object['box3d_pts_2d'] = box3d_pts_2d else: frame_object['box3d_pts_2d'] = box3d_pts_2d[:8, :] frame_object_list.append(frame_object) result = json.dumps(frame_object_list, cls=NumpyEncoder) img_bev = cv2.flip(cv2.flip(img_bev, 0), 1) scale = 1.5 cv2.resizeWindow("YOLO", width=int(img_bev.shape[1] * scale), height=int(img_bev.shape[0] * scale)) cv2.imshow('YOLO', img_bev) cv2.waitKey(10) self.batch_idx += 1 if len(result) > self.configs.max_length: print("> WARNING: STRING IS TOO LONG! (MAX_LENGTH {0})".format( self.configs.max_length)) client.send(result.encode(encoding='utf-8')) print("> send the responce back to client, string length: {0}".format( len(result))) return
def predictions_to_kitti_format(img_detections, calib, img_shape_2d, img_size, RGB_Map=None): predictions = [] for detections in img_detections: if detections is None: continue # Rescale boxes to original image for x, y, z, h, w, l, im, re, *_, cls_pred in detections: predictions.append([ cls_pred, x / img_size, y / img_size, z, h / img_size, w / img_size, l / img_size, im, re ]) predictions = kitti_bev_utils.inverse_yolo_target(np.array(predictions), cnf.boundary) if predictions.shape[0]: predictions[:, 1:] = transformation.lidar_to_camera_box( predictions[:, 1:], calib.V2C, calib.R0, calib.P) objects_new = [] corners3d = [] for index, l in enumerate(predictions): if l[0] == 0: str = "Car" elif l[0] == 1: str = "Pedestrian" elif l[0] == 2: str = "Cyclist" else: str = "Ignore" line = '%s -1 -1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0' % str obj = kitti_data_utils.Object3d(line) obj.t = l[1:4] obj.h, obj.w, obj.l = l[4:7] obj.ry = np.arctan2(math.sin(l[7]), math.cos(l[7])) _, corners_3d = kitti_data_utils.compute_box_3d(obj, calib.P) corners3d.append(corners_3d) objects_new.append(obj) if len(corners3d) > 0: corners3d = np.array(corners3d) img_boxes, _ = calib.corners3d_to_img_boxes(corners3d) img_boxes[:, 0] = np.clip(img_boxes[:, 0], 0, img_shape_2d[1] - 1) img_boxes[:, 1] = np.clip(img_boxes[:, 1], 0, img_shape_2d[0] - 1) img_boxes[:, 2] = np.clip(img_boxes[:, 2], 0, img_shape_2d[1] - 1) img_boxes[:, 3] = np.clip(img_boxes[:, 3], 0, img_shape_2d[0] - 1) img_boxes_w = img_boxes[:, 2] - img_boxes[:, 0] img_boxes_h = img_boxes[:, 3] - img_boxes[:, 1] box_valid_mask = np.logical_and(img_boxes_w < img_shape_2d[1] * 0.8, img_boxes_h < img_shape_2d[0] * 0.8) for i, obj in enumerate(objects_new): x, z, ry = obj.t[0], obj.t[2], obj.ry beta = np.arctan2(z, x) alpha = -np.sign(beta) * np.pi / 2 + beta + ry obj.alpha = alpha obj.box2d = img_boxes[i, :] if RGB_Map is not None: labels, noObjectLabels = kitti_bev_utils.read_labels_for_bevbox( objects_new) if not noObjectLabels: labels[:, 1:] = transformation.camera_to_lidar_box( labels[:, 1:], calib.V2C, calib.R0, calib.P) # convert rect cam to velo cord target = kitti_bev_utils.build_yolo_target(labels) kitti_bev_utils.draw_box_in_bev(RGB_Map, target) return objects_new