def get_IOU(img_original, bboxes, segms, six_dof, car_id2name, car_model_dict, unique_car_mode, camera_matrix): img = img_original[1480:, :, :].copy() bboxes_with_IOU = np.zeros((bboxes.shape[0], bboxes.shape[1] + 1)).astype( bboxes.dtype) ## we add IOU score for each line quaternion_pred = six_dof['quaternion_pred'] euler_angles = np.array( [quaternion_to_euler_angle(x) for x in quaternion_pred]) car_cls_score_pred = six_dof['car_cls_score_pred'] trans_pred_world = six_dof['trans_pred_world'] car_labels = np.argmax(car_cls_score_pred, axis=1) kaggle_car_labels = [unique_car_mode[x] for x in car_labels] car_names = np.array([car_id2name[x].name for x in kaggle_car_labels]) for bbox_idx in range(len(bboxes)): box = bboxes[bbox_idx] t = trans_pred_world[bbox_idx] ## below is the predicted mask mask_all_pred = np.zeros( img.shape[:-1]) ## this is the background mask mask_all_mesh = np.zeros(img.shape[:-1]) mask_pred = maskUtils.decode(segms[bbox_idx]).astype(np.bool) mask_all_pred += mask_pred vertices = np.array(car_model_dict[car_names[bbox_idx]]['vertices']) vertices[:, 1] = -vertices[:, 1] triangles = np.array(car_model_dict[car_names[bbox_idx]]['faces']) - 1 ea = euler_angles[bbox_idx] yaw, pitch, roll = ea[0], ea[1], ea[2] yaw, pitch, roll = -pitch, -yaw, -roll Rt = np.eye(4) Rt[:3, 3] = t Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T Rt = Rt[:3, :] P = np.ones((vertices.shape[0], vertices.shape[1] + 1)) P[:, :-1] = vertices P = P.T img_cor_points = np.dot(camera_matrix, np.dot(Rt, P)) img_cor_points = img_cor_points.T img_cor_points[:, 0] /= img_cor_points[:, 2] img_cor_points[:, 1] /= img_cor_points[:, 2] for tri in triangles: coord = np.array([ img_cor_points[tri[0]][:2], img_cor_points[tri[1]][:2], img_cor_points[tri[2]][:2] ], dtype=np.int32) coord[:, 1] -= 1480 cv2.drawContours(mask_all_mesh, np.int32([coord]), 0, 1, -1) intersection_area = np.sum(mask_all_pred * mask_all_mesh) union_area = np.sum(np.logical_or(mask_all_pred, mask_all_mesh)) iou_score = intersection_area / union_area bboxes_with_IOU[bbox_idx] = np.append(box, iou_score) return bboxes_with_IOU
def visual_PnP(img, PnP_pred, camera_matrix, vertices, triangles): """Draw bboxes and class labels (with scores) on an image. Args: img (str or ndarray): The image to be displayed. bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or (n, 5). labels (ndarray): Labels of bboxes. class_names (list[str]): Names of each classes. score_thr (float): Minimum score of bboxes to be shown. bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. text_color (str or tuple or :obj:`Color`): Color of texts. thickness (int): Thickness of lines. font_scale (float): Font scales of texts. show (bool): Whether to show the image. win_name (str): The window name. wait_time (int): Value of waitKey param. out_file (str or None): The filename to write the image. """ for pcar_idx in range(len(PnP_pred)): # now we draw mesh pcar = PnP_pred[pcar_idx] t = pcar['x'], pcar['y'], pcar['z'] yaw, pitch, roll = pcar['yaw'], pcar['pitch'], pcar['roll'] Rt = np.eye(4) Rt[:3, 3] = t Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T Rt = Rt[:3, :] P = np.ones((vertices.shape[0], vertices.shape[1] + 1)) P[:, :-1] = vertices P = P.T img_cor_points = np.dot(camera_matrix, np.dot(Rt, P)) img_cor_points = img_cor_points.T img_cor_points[:, 0] /= img_cor_points[:, 2] img_cor_points[:, 1] /= img_cor_points[:, 2] color_mesh = np.random.randint(0, 256, (1, 3), dtype=np.uint8) color_tuple = tuple([int(x) for x in color_mesh[0]]) for t in triangles: coord = np.array([ img_cor_points[t[0]][:2], img_cor_points[t[1]][:2], img_cor_points[t[2]][:2] ], dtype=np.int32) cv2.polylines(img, np.int32([coord]), 1, color=color_tuple) return img
def get_iou_score(bbox_idx, car_model_dict, camera_matrix, class_names, mask_all_pred, mask_all_mesh, mask_all_pred_area, euler_angle, t): vertices = np.array(car_model_dict[class_names[bbox_idx]]['vertices']) vertices[:, 1] = -vertices[:, 1] triangles = np.array(car_model_dict[class_names[bbox_idx]]['faces']) - 1 ea = euler_angle[bbox_idx] yaw, pitch, roll = ea[0], ea[1], ea[2] yaw, pitch, roll = -pitch, -yaw, -roll Rt = np.eye(4) Rt[:3, 3] = t Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T Rt = Rt[:3, :] P = np.ones((vertices.shape[0], vertices.shape[1] + 1)) P[:, :-1] = vertices P = P.T img_cor_points = np.dot(camera_matrix, np.dot(Rt, P)) img_cor_points = img_cor_points.T img_cor_points[:, 0] /= img_cor_points[:, 2] img_cor_points[:, 1] /= img_cor_points[:, 2] mask_all_mesh_tmp = mask_all_mesh.copy() for tri in triangles: coord = np.array([ img_cor_points[tri[0]][:2], img_cor_points[tri[1]][:2], img_cor_points[tri[2]][:2] ], dtype=np.int32) coord[:, 1] -= 1480 cv2.drawContours(mask_all_mesh_tmp, np.int32([coord]), 0, 1, -1) # cv2.drawContours(img,np.int32([coord]),0,color,-1) intersection_area = np.sum(mask_all_pred * mask_all_mesh_tmp) union_area = np.sum(np.logical_or(mask_all_pred, mask_all_mesh_tmp)) iou_mask_score = intersection_area / mask_all_pred_area iou_score = intersection_area / union_area return iou_mask_score, iou_score
def finetune_RT( output, dataset, loss_grayscale_light=0.05, loss_grayscale_RT=0.05, loss_IoU=0.9, num_epochs=50, draw_flag=True, lr=0.05, # lr=0.05, conf_thresh=0.8, tmp_save_dir='/data/Kaggle/wudi_data/tmp_output/', fix_rot=True, num_car_for_light_rendering=2): """ We first get the lighting parameters: using 2 cars gray scale, then use grayscale loss and IoU loss to update T, and R(optional) :param outputs: :param dataset: :param loss_grayscale_light: :param loss_grayscale_RT: default: 0.05 is a good guess :param loss_IoU: :param num_epochs: num epochs for both lighting and R,T :param draw_flag: :param lr: :param conf_thresh: confidence threshold for NMR process from bboxes, if lower, we will not process this individual car--> because we don't care and accelerate the learning process :param tmp_save_dir: tmp saving directory for plotting .gif images :param fix_rot: fix rotation, if set to True, we will not learn rotation :param fix_trans: fix translation, if set to True, we will not learn translation--> most likely we are learning the lighting is set to True :param fix_light_source: fix light source parameters if set to True :param num_car_for_light_rendering: default is 2 (consume 9 Gb GPU memory), for P100, we could use 3. We use the closest (smallest z) for rendering because the closer, the bigger car and more grayscale information. :return: the modified outputs """ CAR_IDX = 2 output_gif = None outputs_update = [output].copy() camera_matrix = dataset.camera_matrix.copy() camera_matrix[1, 2] -= 1480 # Because we have only bottom half # First we collect all the car instances info. in an image bboxes, segms, six_dof = output[0], output[1], output[2] car_cls_score_pred = six_dof['car_cls_score_pred'] quaternion_pred = six_dof['quaternion_pred'] trans_pred_world = six_dof['trans_pred_world'] car_labels = np.argmax(car_cls_score_pred, axis=1) kaggle_car_labels = [dataset.unique_car_mode[x] for x in car_labels] car_names = [car_id2name[x].name for x in kaggle_car_labels] euler_angles = np.array( [quaternion_to_euler_angle(x) for x in quaternion_pred]) conf = output[0][CAR_IDX][:, -1] # output [0] is the bbox conf_list = conf > conf_thresh # We choose the closest z two cars idx_conf = np.array([False] * len(conf)) # We choose only one car lighting_count = 0 for close_idx in np.argsort(trans_pred_world[:, -1]): if conf_list[close_idx]: idx_conf[close_idx] = True lighting_count += 1 if lighting_count >= num_car_for_light_rendering: break # Di Wu parrallise the code as below for one image per GPU rgb_image = imread(output[2]['file_name']) # convert the rgb image to grayscale grayscale_image = color.rgb2gray(rgb_image) vertices_img = [] max_vertices = 0 faces_img = [] # there are in total 4999-5000 faces... we choose 4999 faces, for some car, not rendering one # face should be alright. min_faces = 4999 Rotation_Matrix_img = [] T_img = [] euler_angles_img = [] mask_img = [] for car_idx in range(len(quaternion_pred)): # The the HTC predicted Mask which is served as the GT Mask segms_car = segms[CAR_IDX][car_idx] mask = maskUtils.decode(segms_car) # Get car mesh--> vertices and faces car_name = car_names[car_idx] vertices = np.array(dataset.car_model_dict[car_name]['vertices']) vertices[:, 1] = -vertices[:, 1] faces = np.array(dataset.car_model_dict[car_name]['faces']) - 1 # Get prediction of Rotation Matrix and Translation ea = euler_angles[car_idx] yaw, pitch, roll = ea[0], ea[1], ea[2] yaw, pitch, roll = -pitch, -yaw, -roll Rotation_Matrix = euler_to_Rot(yaw, pitch, roll).T T = trans_pred_world[car_idx] vertices_img.append(vertices) max_vertices = max(vertices.shape[0], max_vertices) faces_img.append(faces) min_faces = min(faces.shape[0], min_faces) Rotation_Matrix_img.append(Rotation_Matrix) T_img.append(T) euler_angles_img.append(np.array([yaw, pitch, roll])) mask_img.append(mask) Rotation_Matrix_img = np.stack(Rotation_Matrix_img) T_img = np.stack(T_img) euler_angles_img = np.stack(euler_angles_img) mask_img = np.stack(mask_img) masked_grayscale_img = mask_img[idx_conf].sum( axis=0) * grayscale_image[1480:, :] masked_grayscale_img = masked_grayscale_img / masked_grayscale_img.max() # For vertices and faces each car will generate different vertices_img_all = np.zeros((len(vertices_img), max_vertices, 3)) faces_img_all = np.zeros((len(faces_img), min_faces, 3)) for i in range(len(vertices_img)): vertices_img_all[i, :vertices_img[i].shape[0], :] = vertices_img[i] faces_img_all[i, :, :] = faces_img[i][:min_faces, :] if draw_flag: output_gif = tmp_save_dir + '/' + output[2]['file_name'].split( '/')[-1][:-4] + '.gif' # Now we start to fine tune R, T for i, true_flag in enumerate(conf_list): if true_flag: if draw_flag: output_gif = tmp_save_dir + '/' + output[2]['file_name'].split( '/')[-1][:-4] + '_' + str(i) + '.gif' # Now we consider only one masked grayscale car masked_grayscale_car = mask_img[i] * grayscale_image[1480:, :] # masked_grayscale_car = masked_grayscale_car / masked_grayscale_car.max() T_update, ea_update = get_updated_RT( vertices=vertices_img_all[None, i], faces=faces_img_all[None, i], Rotation_Matrix=Rotation_Matrix_img[None, i], T=T_img[None, i], euler_angle=euler_angles_img[i], mask_full_size=mask_img[None, i], masked_grayscale_img=masked_grayscale_car, camera_matrix=camera_matrix, image_size=(3384, 2710 - 1480), loss_RT=loss_IoU, num_epochs=num_epochs, draw_flag=draw_flag, output_gif=output_gif, lr=lr, fix_rot=fix_rot) if fix_rot: # we don't change the euler angle here R_update = -euler_angles_img[i][1], -euler_angles_img[i][ 0], -euler_angles_img[i][2] else: # We need to reverse here R_update = -ea_update[1], -ea_update[0], -ea_update[2] # outputs_update is a list of length 0 outputs_update[0][2]['trans_pred_world'][i] = T_update euler_angles[i] = R_update if not fix_rot: outputs_update[0][2]['euler_angle'] = euler_angles if not os.path.exists(tmp_save_dir): os.mkdir(tmp_save_dir) output_name = tmp_save_dir + '/' + output[2]['file_name'].split( '/')[-1][:-4] + '.pkl' mmcv.dump(outputs_update[0], output_name) return
def load_anno_idx( self, idx, img_concat, train, draw_dir='/data/home/yyj/code/kaggle/new_code/Kaggle_PKU_Baidu/data/pku_data/crop_visualization/crop_mesh' ): bboxes = [] img1, img2, img3 = img_concat mask_all = np.zeros(img1.shape) merged_image1 = img1.copy() merged_image2 = img2.copy() merged_image3 = img3.copy() alpha = 0.8 # transparency gt = self._str2coords(train['PredictionString'].iloc[idx]) for gt_pred in gt: eular_angle = np.array( [gt_pred['yaw'], gt_pred['pitch'], gt_pred['roll']]) translation = np.array([gt_pred['x'], gt_pred['y'], gt_pred['z']]) quaternion = euler_angles_to_quaternions(eular_angle) quaternion_semisphere = quaternion_upper_hemispher(quaternion) new_eular_angle = quaternion_to_euler_angle(quaternion_semisphere) # rendering the car according to: # https://www.kaggle.com/ebouteillon/augmented-reality # car_id2name is from: # https://github.com/ApolloScapeAuto/dataset-api/blob/master/car_instance/car_models.py car_name = car_id2name[gt_pred['id']].name vertices = np.array(self.car_model_dict[car_name]['vertices']) vertices[:, 1] = -vertices[:, 1] triangles = np.array(self.car_model_dict[car_name]['faces']) - 1 # project 3D points to 2d image plane yaw, pitch, roll = gt_pred['yaw'], gt_pred['pitch'], gt_pred[ 'roll'] # I think the pitch and yaw should be exchanged yaw, pitch, roll = -pitch, -yaw, -roll Rt = np.eye(4) t = np.array([gt_pred['x'], gt_pred['y'], gt_pred['z']]) Rt[:3, 3] = t Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T Rt = Rt[:3, :] P = np.ones((vertices.shape[0], vertices.shape[1] + 1)) P[:, :-1] = vertices P = P.T img_cor_points = np.dot(self.camera_matrix, np.dot(Rt, P)) img_cor_points = img_cor_points.T img_cor_points[:, 0] /= img_cor_points[:, 2] img_cor_points[:, 1] /= img_cor_points[:, 2] # project 3D points to 2d image plane x1, y1, x2, y2 = img_cor_points[:, 0].min(), img_cor_points[:, 1].min( ), img_cor_points[:, 0].max( ), img_cor_points[:, 1].max() bboxes.append([x1, y1, x2, y2]) # project 3D points to 2d image plane mask_seg = np.zeros(img1.shape, dtype=np.uint8) mask_seg_mesh = np.zeros(img1.shape, dtype=np.uint8) for t in triangles: coord = np.array([ img_cor_points[t[0]][:2], img_cor_points[t[1]][:2], img_cor_points[t[2]][:2] ], dtype=np.int32) # This will draw the mask for segmenation cv2.drawContours(mask_seg, np.int32([coord]), 0, (0, 0, 255), -1) # cv2.polylines(mask_seg_mesh, np.int32([coord]), 1, (0, 255, 0)) mask_all += mask_seg # if False: mask_all = mask_all * 255 / mask_all.max() cv2.addWeighted(img1.astype(np.uint8), 1.0, mask_all.astype(np.uint8), alpha, 0, merged_image1) cv2.addWeighted(img2.astype(np.uint8), 1.0, mask_all.astype(np.uint8), alpha, 0, merged_image2) cv2.addWeighted(img3.astype(np.uint8), 1.0, mask_all.astype(np.uint8), alpha, 0, merged_image3) imwrite(merged_image1, os.path.join(draw_dir, train['ImageId'].iloc[idx] + '_1.jpg')) imwrite(merged_image2, os.path.join(draw_dir, train['ImageId'].iloc[idx] + '_2.jpg')) imwrite(merged_image3, os.path.join(draw_dir, train['ImageId'].iloc[idx] + '_3.jpg'))
def imdraw_det_bboxes(img, bboxes, class_names, car_model_dict, camera_matrix, trans_pred_world, euler_angle, color_lists, score_thr=0, bbox_color='green', text_color='green', thickness=1, font_scale=0.5): """Draw bboxes and class labels (with scores) on an image. Args: img (str or ndarray): The image to be displayed. bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or (n, 5). labels (ndarray): Labels of bboxes. class_names (list[str]): Names of each classes. score_thr (float): Minimum score of bboxes to be shown. bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. text_color (str or tuple or :obj:`Color`): Color of texts. thickness (int): Thickness of lines. font_scale (float): Font scales of texts. show (bool): Whether to show the image. win_name (str): The window name. wait_time (int): Value of waitKey param. out_file (str or None): The filename to write the image. """ assert bboxes.ndim == 2 assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5 img_original = img.copy() img = img_original[1480:, :, :] if score_thr > 0: assert bboxes.shape[1] == 5 scores = bboxes[:, -1] inds = scores > score_thr bboxes = bboxes[inds, :] trans_pred_world = trans_pred_world[inds, :] euler_angle = euler_angle[inds, :] assert len(bboxes) == len(trans_pred_world) == len(euler_angle) bbox_color = color_val(bbox_color) text_color = color_val(text_color) for bbox_idx in range(len(bboxes)): bbox = bboxes[bbox_idx] label_text = class_names[bbox_idx] bbox_int = bbox.astype(np.int32) left_top = (bbox_int[0], bbox_int[1]) right_bottom = (bbox_int[2], bbox_int[3]) cv2.rectangle(img, left_top, right_bottom, bbox_color, thickness=thickness) if len(bbox) > 4: label_text += '|{:.02f}'.format(bbox[-1]) cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2), cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) # now we draw mesh vertices = np.array(car_model_dict[class_names[bbox_idx]]['vertices']) vertices[:, 1] = -vertices[:, 1] triangles = np.array( car_model_dict[class_names[bbox_idx]]['faces']) - 1 t = trans_pred_world[bbox_idx] ea = euler_angle[bbox_idx] yaw, pitch, roll = ea[0], ea[1], ea[2] yaw, pitch, roll = -pitch, -yaw, -roll Rt = np.eye(4) Rt[:3, 3] = t Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T Rt = Rt[:3, :] P = np.ones((vertices.shape[0], vertices.shape[1] + 1)) P[:, :-1] = vertices P = P.T img_cor_points = np.dot(camera_matrix, np.dot(Rt, P)) img_cor_points = img_cor_points.T img_cor_points[:, 0] /= img_cor_points[:, 2] img_cor_points[:, 1] /= img_cor_points[:, 2] color_mesh = np.int32(color_lists[bbox_idx][0]) color_tuple = tuple([int(x) for x in color_mesh]) for t in triangles: coord = np.array([ img_cor_points[t[0]][:2], img_cor_points[t[1]][:2], img_cor_points[t[2]][:2] ], dtype=np.int32) # This will draw the mask for segmenation # cv2.drawContours(mask_seg, np.int32([coord]), 0, (255, 255, 255), -1) coord[:, 1] -= 1480 cv2.polylines(img, np.int32([coord]), 1, color=color_tuple) im_combime = img_original.copy() im_combime[1480:, :, :] = img return im_combime
def draw_box_mesh_kaggle_pku( img_original, bboxes, segms, class_names, car_model_dict, camera_matrix, trans_pred_world, euler_angle, score_thr=0.8, thickness=1, transparency=0.5, font_scale=0.8, ): img = img_original[1480:, :, :].copy() ## crop half iou_flag = False trans_pred_world_raw = trans_pred_world.copy() if score_thr > 0: inds = bboxes[:, -1] > score_thr bboxes = bboxes[inds, :] segms = np.array(segms)[inds] trans_pred_world = trans_pred_world[inds, :] euler_angle = euler_angle[inds, :] class_names = class_names[inds] for bbox_idx in range(len(bboxes)): color_ndarray = np.random.randint(0, 256, (1, 3), dtype=np.uint8) color = tuple([int(i) for i in color_ndarray[0]]) bbox = bboxes[bbox_idx] ## below is the predicted mask mask_all_pred = np.zeros( img.shape[:-1]) ## this is the background mask mask_all_mesh = np.zeros(img.shape[:-1]) mask_pred = maskUtils.decode(segms[bbox_idx]).astype(np.bool) mask_all_pred += mask_pred mask_all_pred_area = np.sum(mask_all_pred == 1) # img[mask_pred] = img[mask_pred] * (1-transparency) + color_ndarray * transparency label_text = class_names[bbox_idx] bbox_int = bbox.astype(np.int32) left_top = (bbox_int[0], bbox_int[1]) right_bottom = (bbox_int[2], bbox_int[3]) t = trans_pred_world[bbox_idx] ## time to draw mesh vertices = np.array(car_model_dict[class_names[bbox_idx]]['vertices']) vertices[:, 1] = -vertices[:, 1] triangles = np.array( car_model_dict[class_names[bbox_idx]]['faces']) - 1 ea = euler_angle[bbox_idx] yaw, pitch, roll = ea[0], ea[1], ea[2] yaw, pitch, roll = -pitch, -yaw, -roll Rt = np.eye(4) Rt[:3, 3] = t Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T Rt = Rt[:3, :] P = np.ones((vertices.shape[0], vertices.shape[1] + 1)) P[:, :-1] = vertices P = P.T img_cor_points = np.dot(camera_matrix, np.dot(Rt, P)) img_cor_points = img_cor_points.T img_cor_points[:, 0] /= img_cor_points[:, 2] img_cor_points[:, 1] /= img_cor_points[:, 2] for tri in triangles: coord = np.array([ img_cor_points[tri[0]][:2], img_cor_points[tri[1]][:2], img_cor_points[tri[2]][:2] ], dtype=np.int32) coord[:, 1] -= 1480 cv2.polylines(img, np.int32([coord]), 1, color, thickness=1) cv2.drawContours(mask_all_mesh, np.int32([coord]), 0, 1, -1) # cv2.drawContours(img,np.int32([coord]),0,color,-1) intersection_area = np.sum(mask_all_pred * mask_all_mesh) union_area = np.sum(np.logical_or(mask_all_pred, mask_all_mesh)) iou_mask_score = round(intersection_area / mask_all_pred_area, 3) iou_score = round(intersection_area / union_area, 3) label_text_t = '' cls_score = bboxes[bbox_idx][-1] if iou_score < 0.5: print('iou_score', iou_score, cls_score) iou_flag = True # for i in ea: # i = round(i,4) # label_text_t += str(i) # label_text_t += ' ' # # for i in t: # i = round(i,4) # label_text_t += str(i) # label_text_t += ' ' # label_text_t += str(iou_mask_score) + ' ' + str(iou_score) + ' ' + str(cls_score) label_text_t += str(iou_score) + ' ' + str(cls_score) cv2.rectangle(img, left_top, right_bottom, color, thickness=thickness) if len(bbox) > 4: label_text += '|{:.02f}'.format(bbox[-1]) cv2.putText(img, label_text_t, (bbox_int[0], bbox_int[1] - 2), cv2.FONT_ITALIC, font_scale, color) im_combime = img_original.copy() im_combime[1480:, :, :] = img return im_combime, iou_flag