def write_submission(outputs, args, dataset, conf_thresh=0.15, filter_mask=False, horizontal_flip=False): img_prefix = dataset.img_prefix submission = args.out.replace('.pkl', '') submission += '_' + img_prefix.split('/')[-1] submission += '_conf_' + str(conf_thresh) if filter_mask: submission += '_filter_mask.csv' elif horizontal_flip: submission += '_horizontal_flip' submission += '_refined_test_cwx114_10_0.05.csv' predictions = {} CAR_IDX = 2 # this is the coco car class for idx_img, output in tqdm(enumerate(outputs)): file_name = os.path.basename(output[2]["file_name"]) ImageId = ".".join(file_name.split(".")[:-1]) # Wudi change the conf to car prediction if len(output[0][CAR_IDX]): conf = output[0][CAR_IDX][:, -1] # output [0] is the bbox idx_conf = conf > conf_thresh if filter_mask: # this filtering step will takes 2 second per iterations #idx_keep_mask = filter_igore_masked_images(ImageId[idx_img], output[1][CAR_IDX], img_prefix) idx_keep_mask = filter_igore_masked_using_RT(ImageId, output[2], img_prefix, dataset) # the final id should require both idx = idx_conf * idx_keep_mask else: idx = idx_conf #if 'euler_angle' in output[2].keys(): if False: #NMR has problem saving 'euler angle' Its eular_angle = output[2]['euler_angle'] else: eular_angle = np.array([quaternion_to_euler_angle(x) for x in output[2]['quaternion_pred']]) translation = output[2]['trans_pred_world'] coords = np.hstack((eular_angle[idx], translation[idx], conf[idx, None])) coords_str = coords2str(coords) predictions[ImageId] = coords_str else: predictions[ImageId] = "" pred_dict = {'ImageId': [], 'PredictionString': []} for k, v in predictions.items(): pred_dict['ImageId'].append(k) pred_dict['PredictionString'].append(v) df = pd.DataFrame(data=pred_dict) print("Writing submission csv file to: %s" % submission) df.to_csv(submission, index=False) return submission
def get_IOU(img_original, bboxes, segms, six_dof, car_id2name, car_model_dict, unique_car_mode, camera_matrix): img = img_original[1480:, :, :].copy() bboxes_with_IOU = np.zeros((bboxes.shape[0], bboxes.shape[1] + 1)).astype( bboxes.dtype) ## we add IOU score for each line quaternion_pred = six_dof['quaternion_pred'] euler_angles = np.array( [quaternion_to_euler_angle(x) for x in quaternion_pred]) car_cls_score_pred = six_dof['car_cls_score_pred'] trans_pred_world = six_dof['trans_pred_world'] car_labels = np.argmax(car_cls_score_pred, axis=1) kaggle_car_labels = [unique_car_mode[x] for x in car_labels] car_names = np.array([car_id2name[x].name for x in kaggle_car_labels]) for bbox_idx in range(len(bboxes)): box = bboxes[bbox_idx] t = trans_pred_world[bbox_idx] ## below is the predicted mask mask_all_pred = np.zeros( img.shape[:-1]) ## this is the background mask mask_all_mesh = np.zeros(img.shape[:-1]) mask_pred = maskUtils.decode(segms[bbox_idx]).astype(np.bool) mask_all_pred += mask_pred vertices = np.array(car_model_dict[car_names[bbox_idx]]['vertices']) vertices[:, 1] = -vertices[:, 1] triangles = np.array(car_model_dict[car_names[bbox_idx]]['faces']) - 1 ea = euler_angles[bbox_idx] yaw, pitch, roll = ea[0], ea[1], ea[2] yaw, pitch, roll = -pitch, -yaw, -roll Rt = np.eye(4) Rt[:3, 3] = t Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T Rt = Rt[:3, :] P = np.ones((vertices.shape[0], vertices.shape[1] + 1)) P[:, :-1] = vertices P = P.T img_cor_points = np.dot(camera_matrix, np.dot(Rt, P)) img_cor_points = img_cor_points.T img_cor_points[:, 0] /= img_cor_points[:, 2] img_cor_points[:, 1] /= img_cor_points[:, 2] for tri in triangles: coord = np.array([ img_cor_points[tri[0]][:2], img_cor_points[tri[1]][:2], img_cor_points[tri[2]][:2] ], dtype=np.int32) coord[:, 1] -= 1480 cv2.drawContours(mask_all_mesh, np.int32([coord]), 0, 1, -1) intersection_area = np.sum(mask_all_pred * mask_all_mesh) union_area = np.sum(np.logical_or(mask_all_pred, mask_all_mesh)) iou_score = intersection_area / union_area bboxes_with_IOU[bbox_idx] = np.append(box, iou_score) return bboxes_with_IOU
def evaluate(self, runner, results): predictions = {} CAR_IDX = 2 # this is the coco car class for idx_img, output in enumerate(results): # Wudi change the conf to car prediction conf = output[0][CAR_IDX][:, -1] # output [0] is the bbox idx = conf > self.conf_thresh file_name = os.path.basename(output[2]["file_name"]) ImageId = ".".join(file_name.split(".")[:-1]) euler_angle = np.array([ quaternion_to_euler_angle(x) for x in output[2]['quaternion_pred'] ]) # euler_angle[:, 0], euler_angle[:, 1], euler_angle[:, 2] = -euler_angle[:, 1], -euler_angle[:, 0], -euler_angle[:, 2] translation = output[2]['trans_pred_world'] coords = np.hstack( (euler_angle[idx], translation[idx], conf[idx, None])) coords_str = coords2str(coords) predictions[ImageId] = coords_str pred_dict = {'ImageId': [], 'PredictionString': []} for k, v in predictions.items(): pred_dict['ImageId'].append(k) pred_dict['PredictionString'].append(v) pred_df = pd.DataFrame(data=pred_dict) #pred_df.to_csv('/data/Kaggle/train_df.csv', index=False) gt_df = pd.read_csv(self.ann_file) expanded_train_df = expand_df( gt_df, ['model_type', 'pitch', 'yaw', 'roll', 'x', 'y', 'z']) # get the number of cars num_cars_gt = len(expanded_train_df) ap_list = [] max_workers = 10 p = Pool(processes=max_workers) for result_flg, scores in p.imap( match, zip([(i, gt_df, pred_df) for i in range(10)])): if np.sum(result_flg) > 0: n_tp = np.sum(result_flg) recall = n_tp / num_cars_gt ap = average_precision_score(result_flg, scores) * recall else: ap = 0 ap_list.append(ap) mean_ap = np.mean(ap_list) print('{} Valid 400 images mAP is: {}'.format(self.dataset_name, mean_ap)) key = 'mAP/{}'.format(self.dataset_name) runner.log_buffer.output[key] = mean_ap runner.log_buffer.ready = True
def restore_xyz_withIOU_single(self, idx, output_origin, car_cls_coco=2): output = copy.deepcopy(output_origin) print('idx', idx) img_name = os.path.join(self.test_image_folder, os.path.basename(output[2]['file_name'])) image = imread(img_name) bboxes, segms, six_dof = output[0], output[1], output[2] car_cls_score_pred = six_dof['car_cls_score_pred'] quaternion_pred = six_dof['quaternion_pred'] trans_pred_world = six_dof['trans_pred_world'].copy() euler_angle = np.array( [quaternion_to_euler_angle(x) for x in quaternion_pred]) car_labels = np.argmax(car_cls_score_pred, axis=1) kaggle_car_labels = [self.unique_car_mode[x] for x in car_labels] car_names = np.array([car_id2name[x].name for x in kaggle_car_labels]) assert len(bboxes[car_cls_coco]) == len(segms[car_cls_coco]) == len(kaggle_car_labels) \ == len(trans_pred_world) == len(euler_angle) == len(car_names) # now we start to plot the image from kaggle quaternion_semisphere_refined, flag = refine_yaw_and_roll( image, bboxes[car_cls_coco], segms[car_cls_coco], car_names, euler_angle, quaternion_pred, trans_pred_world, self.car_model_dict, self.camera_matrix) if flag: output[2]['quaternion_pred'] = quaternion_semisphere_refined euler_angle = np.array([ quaternion_to_euler_angle(x) for x in output[2]['quaternion_pred'] ]) trans_pred_world_refined = restore_x_y_from_z_withIOU( image, bboxes[car_cls_coco], segms[car_cls_coco], car_names, euler_angle, trans_pred_world, self.car_model_dict, self.camera_matrix) # print('change ',trans_pred_world,trans_pred_world_refined) output[2]['trans_pred_world'] = trans_pred_world_refined return output
def format_return_data(output): CAR_IDX = 2 # this is the coco car class file_name = os.path.basename(output[2]["file_name"]) ImageId = ".".join(file_name.split(".")[:-1]) # Wudi change the conf to car prediction if len(output[0][CAR_IDX]): conf = output[0][CAR_IDX][:, -1] # output [0] is the bbox idx = conf > 0.8 # if 'euler_angle' in output[2].keys(): eular_angle = np.array([ quaternion_to_euler_angle(x) for x in output[2]['quaternion_pred'] ]) translation = output[2]['trans_pred_world'] coords = np.hstack( (output[0][CAR_IDX][idx], eular_angle[idx], translation[idx])) return coords
def write_submission(outputs): import pandas as pd import numpy as np from scipy.special import softmax from mmdet.datasets.kaggle_pku_utils import quaternion_to_euler_angle submission = 'Nov20-18-24-45-epoch_50.csv' predictions = {} PATH = '/data/Kaggle/pku-autonomous-driving/' ImageId = [i.strip() for i in open(PATH + 'validation.txt').readlines()] # ImageId = [x.replace('.jpg', '') for x in os.listdir(PATH + 'test_images')] for idx, output in enumerate(outputs): conf = np.max(softmax(output[2]['car_cls_score_pred'], axis=1), axis=1) euler_angle = np.array([ quaternion_to_euler_angle(x) for x in output[2]['quaternion_pred'] ]) translation = output[2]['trans_pred_world'] coords = np.hstack((euler_angle, translation, conf[:, None])) coords_str = coords2str(coords) try: predictions[ImageId[idx]] = coords_str except: continue pred_dict = {'ImageId': [], 'PredictionString': []} for k, v in predictions.items(): pred_dict['ImageId'].append(k) pred_dict['PredictionString'].append(v) df = pd.DataFrame(data=pred_dict) print('df', df) # test = pd.read_csv(PATH + 'sample_submission.csv') # for im_id in test['ImageId']: # test.loc[test['ImageId'] == im_id, ['PredictionString']] = [predictions[im_id]] df.to_csv(submission, index=False)
def finetune_RT( output, dataset, loss_grayscale_light=0.05, loss_grayscale_RT=0.05, loss_IoU=0.9, num_epochs=50, draw_flag=True, lr=0.05, # lr=0.05, conf_thresh=0.8, tmp_save_dir='/data/Kaggle/wudi_data/tmp_output/', fix_rot=True, num_car_for_light_rendering=2): """ We first get the lighting parameters: using 2 cars gray scale, then use grayscale loss and IoU loss to update T, and R(optional) :param outputs: :param dataset: :param loss_grayscale_light: :param loss_grayscale_RT: default: 0.05 is a good guess :param loss_IoU: :param num_epochs: num epochs for both lighting and R,T :param draw_flag: :param lr: :param conf_thresh: confidence threshold for NMR process from bboxes, if lower, we will not process this individual car--> because we don't care and accelerate the learning process :param tmp_save_dir: tmp saving directory for plotting .gif images :param fix_rot: fix rotation, if set to True, we will not learn rotation :param fix_trans: fix translation, if set to True, we will not learn translation--> most likely we are learning the lighting is set to True :param fix_light_source: fix light source parameters if set to True :param num_car_for_light_rendering: default is 2 (consume 9 Gb GPU memory), for P100, we could use 3. We use the closest (smallest z) for rendering because the closer, the bigger car and more grayscale information. :return: the modified outputs """ CAR_IDX = 2 output_gif = None outputs_update = [output].copy() camera_matrix = dataset.camera_matrix.copy() camera_matrix[1, 2] -= 1480 # Because we have only bottom half # First we collect all the car instances info. in an image bboxes, segms, six_dof = output[0], output[1], output[2] car_cls_score_pred = six_dof['car_cls_score_pred'] quaternion_pred = six_dof['quaternion_pred'] trans_pred_world = six_dof['trans_pred_world'] car_labels = np.argmax(car_cls_score_pred, axis=1) kaggle_car_labels = [dataset.unique_car_mode[x] for x in car_labels] car_names = [car_id2name[x].name for x in kaggle_car_labels] euler_angles = np.array( [quaternion_to_euler_angle(x) for x in quaternion_pred]) conf = output[0][CAR_IDX][:, -1] # output [0] is the bbox conf_list = conf > conf_thresh # We choose the closest z two cars idx_conf = np.array([False] * len(conf)) # We choose only one car lighting_count = 0 for close_idx in np.argsort(trans_pred_world[:, -1]): if conf_list[close_idx]: idx_conf[close_idx] = True lighting_count += 1 if lighting_count >= num_car_for_light_rendering: break # Di Wu parrallise the code as below for one image per GPU rgb_image = imread(output[2]['file_name']) # convert the rgb image to grayscale grayscale_image = color.rgb2gray(rgb_image) vertices_img = [] max_vertices = 0 faces_img = [] # there are in total 4999-5000 faces... we choose 4999 faces, for some car, not rendering one # face should be alright. min_faces = 4999 Rotation_Matrix_img = [] T_img = [] euler_angles_img = [] mask_img = [] for car_idx in range(len(quaternion_pred)): # The the HTC predicted Mask which is served as the GT Mask segms_car = segms[CAR_IDX][car_idx] mask = maskUtils.decode(segms_car) # Get car mesh--> vertices and faces car_name = car_names[car_idx] vertices = np.array(dataset.car_model_dict[car_name]['vertices']) vertices[:, 1] = -vertices[:, 1] faces = np.array(dataset.car_model_dict[car_name]['faces']) - 1 # Get prediction of Rotation Matrix and Translation ea = euler_angles[car_idx] yaw, pitch, roll = ea[0], ea[1], ea[2] yaw, pitch, roll = -pitch, -yaw, -roll Rotation_Matrix = euler_to_Rot(yaw, pitch, roll).T T = trans_pred_world[car_idx] vertices_img.append(vertices) max_vertices = max(vertices.shape[0], max_vertices) faces_img.append(faces) min_faces = min(faces.shape[0], min_faces) Rotation_Matrix_img.append(Rotation_Matrix) T_img.append(T) euler_angles_img.append(np.array([yaw, pitch, roll])) mask_img.append(mask) Rotation_Matrix_img = np.stack(Rotation_Matrix_img) T_img = np.stack(T_img) euler_angles_img = np.stack(euler_angles_img) mask_img = np.stack(mask_img) masked_grayscale_img = mask_img[idx_conf].sum( axis=0) * grayscale_image[1480:, :] masked_grayscale_img = masked_grayscale_img / masked_grayscale_img.max() # For vertices and faces each car will generate different vertices_img_all = np.zeros((len(vertices_img), max_vertices, 3)) faces_img_all = np.zeros((len(faces_img), min_faces, 3)) for i in range(len(vertices_img)): vertices_img_all[i, :vertices_img[i].shape[0], :] = vertices_img[i] faces_img_all[i, :, :] = faces_img[i][:min_faces, :] if draw_flag: output_gif = tmp_save_dir + '/' + output[2]['file_name'].split( '/')[-1][:-4] + '.gif' # Now we start to fine tune R, T for i, true_flag in enumerate(conf_list): if true_flag: if draw_flag: output_gif = tmp_save_dir + '/' + output[2]['file_name'].split( '/')[-1][:-4] + '_' + str(i) + '.gif' # Now we consider only one masked grayscale car masked_grayscale_car = mask_img[i] * grayscale_image[1480:, :] # masked_grayscale_car = masked_grayscale_car / masked_grayscale_car.max() T_update, ea_update = get_updated_RT( vertices=vertices_img_all[None, i], faces=faces_img_all[None, i], Rotation_Matrix=Rotation_Matrix_img[None, i], T=T_img[None, i], euler_angle=euler_angles_img[i], mask_full_size=mask_img[None, i], masked_grayscale_img=masked_grayscale_car, camera_matrix=camera_matrix, image_size=(3384, 2710 - 1480), loss_RT=loss_IoU, num_epochs=num_epochs, draw_flag=draw_flag, output_gif=output_gif, lr=lr, fix_rot=fix_rot) if fix_rot: # we don't change the euler angle here R_update = -euler_angles_img[i][1], -euler_angles_img[i][ 0], -euler_angles_img[i][2] else: # We need to reverse here R_update = -ea_update[1], -ea_update[0], -ea_update[2] # outputs_update is a list of length 0 outputs_update[0][2]['trans_pred_world'][i] = T_update euler_angles[i] = R_update if not fix_rot: outputs_update[0][2]['euler_angle'] = euler_angles if not os.path.exists(tmp_save_dir): os.mkdir(tmp_save_dir) output_name = tmp_save_dir + '/' + output[2]['file_name'].split( '/')[-1][:-4] + '.pkl' mmcv.dump(outputs_update[0], output_name) return
def write_pose_to_json(out_pkl, output_dir, thresh=0.9, ignored_mask_binary=None, iou_ignore_threshold=None): """ Args: im_name: output_dir: thresh: ignored_mask_binary: iou_ignore_threshold: Returns: """ outputs = mmcv.load(out_pkl) for output in tqdm(outputs): # First we collect all the car instances info. in an image bboxes, segms, six_dof = output[0], output[1], output[2] boxes = bboxes[CAR_IDX] im_name = six_dof['file_name'].split('/')[-1][:-4] json_file = os.path.join(output_dir, im_name + '.json') car_cls_score_pred = six_dof['car_cls_score_pred'] quaternion_pred = six_dof['quaternion_pred'] trans_pred_world = six_dof['trans_pred_world'] car_labels = np.argmax(car_cls_score_pred, axis=1) euler_angles = np.array( [quaternion_to_euler_angle(x) for x in quaternion_pred]) car_list = [] if boxes is None or boxes.shape[0] == 0 or max(boxes[:, -1]) < thresh: with open(json_file, 'w') as outfile: json.dump(car_list, outfile, indent=4) # From largest to smallest order areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) for i in sorted_inds: score = boxes[i, -1] if score < thresh: continue # car_cls euler_angle_i = euler_angles[i] trans_pred_i = trans_pred_world[i] car_model_i = unique_car_mode[car_labels[i]] # filter out by ignored_mask_binary car_info = dict() car_info["car_id"] = int(car_model_i) car_info["pose"] = [float(x) for x in euler_angle_i ] + [float(x) for x in trans_pred_i] # We use rectangle area car_info["area"] = int(areas[i]) car_info["score"] = float(score) if iou_ignore_threshold: masks = np.zeros_like(ignored_mask_binary) masks[int(boxes[i][1]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][2])] = 1 iou_mask = masks * ignored_mask_binary iou = np.sum(iou_mask) / int(areas[i]) if iou <= iou_ignore_threshold: car_list.append(car_info) else: print('This mask has been ignored') else: car_list.append(car_info) with open(json_file, 'w') as outfile: json.dump(car_list, outfile, indent=4) return True
def load_anno_idx( self, idx, img_concat, train, draw_dir='/data/home/yyj/code/kaggle/new_code/Kaggle_PKU_Baidu/data/pku_data/crop_visualization/crop_mesh' ): bboxes = [] img1, img2, img3 = img_concat mask_all = np.zeros(img1.shape) merged_image1 = img1.copy() merged_image2 = img2.copy() merged_image3 = img3.copy() alpha = 0.8 # transparency gt = self._str2coords(train['PredictionString'].iloc[idx]) for gt_pred in gt: eular_angle = np.array( [gt_pred['yaw'], gt_pred['pitch'], gt_pred['roll']]) translation = np.array([gt_pred['x'], gt_pred['y'], gt_pred['z']]) quaternion = euler_angles_to_quaternions(eular_angle) quaternion_semisphere = quaternion_upper_hemispher(quaternion) new_eular_angle = quaternion_to_euler_angle(quaternion_semisphere) # rendering the car according to: # https://www.kaggle.com/ebouteillon/augmented-reality # car_id2name is from: # https://github.com/ApolloScapeAuto/dataset-api/blob/master/car_instance/car_models.py car_name = car_id2name[gt_pred['id']].name vertices = np.array(self.car_model_dict[car_name]['vertices']) vertices[:, 1] = -vertices[:, 1] triangles = np.array(self.car_model_dict[car_name]['faces']) - 1 # project 3D points to 2d image plane yaw, pitch, roll = gt_pred['yaw'], gt_pred['pitch'], gt_pred[ 'roll'] # I think the pitch and yaw should be exchanged yaw, pitch, roll = -pitch, -yaw, -roll Rt = np.eye(4) t = np.array([gt_pred['x'], gt_pred['y'], gt_pred['z']]) Rt[:3, 3] = t Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T Rt = Rt[:3, :] P = np.ones((vertices.shape[0], vertices.shape[1] + 1)) P[:, :-1] = vertices P = P.T img_cor_points = np.dot(self.camera_matrix, np.dot(Rt, P)) img_cor_points = img_cor_points.T img_cor_points[:, 0] /= img_cor_points[:, 2] img_cor_points[:, 1] /= img_cor_points[:, 2] # project 3D points to 2d image plane x1, y1, x2, y2 = img_cor_points[:, 0].min(), img_cor_points[:, 1].min( ), img_cor_points[:, 0].max( ), img_cor_points[:, 1].max() bboxes.append([x1, y1, x2, y2]) # project 3D points to 2d image plane mask_seg = np.zeros(img1.shape, dtype=np.uint8) mask_seg_mesh = np.zeros(img1.shape, dtype=np.uint8) for t in triangles: coord = np.array([ img_cor_points[t[0]][:2], img_cor_points[t[1]][:2], img_cor_points[t[2]][:2] ], dtype=np.int32) # This will draw the mask for segmenation cv2.drawContours(mask_seg, np.int32([coord]), 0, (0, 0, 255), -1) # cv2.polylines(mask_seg_mesh, np.int32([coord]), 1, (0, 255, 0)) mask_all += mask_seg # if False: mask_all = mask_all * 255 / mask_all.max() cv2.addWeighted(img1.astype(np.uint8), 1.0, mask_all.astype(np.uint8), alpha, 0, merged_image1) cv2.addWeighted(img2.astype(np.uint8), 1.0, mask_all.astype(np.uint8), alpha, 0, merged_image2) cv2.addWeighted(img3.astype(np.uint8), 1.0, mask_all.astype(np.uint8), alpha, 0, merged_image3) imwrite(merged_image1, os.path.join(draw_dir, train['ImageId'].iloc[idx] + '_1.jpg')) imwrite(merged_image2, os.path.join(draw_dir, train['ImageId'].iloc[idx] + '_2.jpg')) imwrite(merged_image3, os.path.join(draw_dir, train['ImageId'].iloc[idx] + '_3.jpg'))