def _fancy_deep_learning(frame): """Creates a prediction objects file.""" o_list = [] for camera_labels in frame.camera_labels: if camera_labels.name != 1: #Only use front camera continue for gt_label in camera_labels.labels: o = metrics_pb2.Object() # The following 3 fields are used to uniquely identify a frame a prediction # is predicted at. o.context_name = frame.context.name # The frame timestamp for the prediction. See Frame::timestamp_micros in # dataset.proto. o.frame_timestamp_micros = frame.timestamp_micros # This is only needed for 2D detection or tracking tasks. # Set it to the camera name the prediction is for. o.camera_name = camera_labels.name # Populating box and score. box = label_pb2.Label.Box() box.center_x = gt_label.box.center_x box.center_y = gt_label.box.center_y box.length = gt_label.box.length box.width = gt_label.box.width o.object.box.CopyFrom(box) # This must be within [0.0, 1.0]. It is better to filter those boxes with # small scores to speed up metrics computation. o.score = 0.9 # Use correct type. o.object.type = gt_label.type o_list.append(o) return o_list
def _load_meta(self, frame, camera_id): o = metrics_pb2.Object() o.camera_name = camera_id o.context_name = frame.context.name o.frame_timestamp_micros = frame.timestamp_micros return o
def createsubmisioncameraobject(objects, boxes, pred_cls, scores, context_name, frame_timestamp_micros, camera_name): total_boxes = len(boxes) for i in range(total_boxes): # patch in pred_bbox: label = pred_cls[i] #(center_x, center_y, width, height) in image size bbox = boxes[i] #[1246.5217, 750.64905, 113.49747, 103.9653] score = scores[i] o = metrics_pb2.Object() o.context_name = context_name # frame.context.name # frame.timestamp_micros) o.frame_timestamp_micros = int(frame_timestamp_micros) o.camera_name = camera_name #dataset_pb2.CameraName.FRONT o.score = score # Populating box and score. box = label_pb2.Label.Box() box.center_x = bbox[0] box.center_y = bbox[1] box.width = bbox[2] box.length = bbox[3] # box.length = bbox[1][0] - bbox[0][0] # box.width = bbox[1][1] - bbox[0][1] # box.center_x = bbox[0][0] + box.length * 0.5 # box.center_y = bbox[0][1] + box.width * 0.5 o.object.box.CopyFrom(box) o.object.detection_difficulty_level = label_pb2.Label.LEVEL_1 o.object.num_lidar_points_in_box = 100 # INSTANCE_CATEGORY_NAMES.index(label) #INSTANCE_pb2[label] o.object.type = INSTANCEindex_pb2[label] #INSTANCE_pb2[label] # print( # f'Object type label: {label}, {INSTANCE_pb2[label]}, {INSTANCE_CATEGORY_NAMES.index(label)}') assert o.object.type != label_pb2.Label.TYPE_UNKNOWN objects.objects.append(o)
def format_tracking_result(box, context_name, timestamp_micros): """ Convert a Bbox3D into an instance of class metrics_pb2.Object so that it can be serialized in Waymo OD 's format Args: box (Bbox3D): box converted from tracklet's State context_name (str): name of context timestamp_micros (int): time stamp of the frame where this box appears Returns: metrics_pb2.Object """ assert box.frame == 'ego_vehicle', "box must be in 'ego_vehicle' frame, while right now it is in {}".format( box.frame) o = metrics_pb2.Object() o.context_name = context_name o.frame_timestamp_micros = timestamp_micros # populate box & score waymo_box = label_pb2.Label.Box() waymo_box.center_x = box.center[0] waymo_box.center_y = box.center[1] waymo_box.center_z = box.center[2] waymo_box.length = box.l waymo_box.width = box.w waymo_box.height = box.h waymo_box.heading = box.yaw o.object.box.CopyFrom(waymo_box) o.score = box.score o.object.id = '{}-{}'.format(box.obj_type, box.id) o.object.type = waymo_tracking_names[box.obj_type] return o
def create_waymo_sumbit(waymo_annos, out_path): print('Creating Waymo submission...') objects = metrics_pb2.Objects() for token in tqdm(waymo_annos): for waymo_anno in waymo_annos[token]: o = metrics_pb2.Object() o.context_name = waymo_anno['context_name'] o.frame_timestamp_micros = waymo_anno['frame_timestamp_micros'] box = label_pb2.Label.Box() box.center_x = waymo_anno['ego_box_3d'][0] box.center_y = waymo_anno['ego_box_3d'][1] box.center_z = waymo_anno['ego_box_3d'][2] box.length = waymo_anno['dimension'][2] box.width = waymo_anno['dimension'][1] box.height = waymo_anno['dimension'][0] box.heading = waymo_anno['ego_heading'] o.object.box.CopyFrom(box) o.score = waymo_anno['score'] o.object.id = str(waymo_anno['object_id']) o.object.type = waymo_anno['cat'] o.object.num_lidar_points_in_box = 100 objects.objects.append(o) # Write objects to a file. with open(out_path, 'wb') as f: f.write(objects.SerializeToString())
def create_object(frame_features, x_max, x_min, y_max, y_min, label, difficulty=None, score=None): obj = metrics_pb2.Object() def create_label(img_width, img_height, x_max, x_min, y_max, y_min, label_type, difficulty=None): lab = label_pb2.Label() lab.box.center_x = (x_max + x_min) / 2 * img_width lab.box.center_y = (y_max + y_min) / 2 * img_height lab.box.length = (x_max - x_min) * img_width lab.box.width = (y_max - y_min) * img_height lab.type = 4 if label_type == 3 else label_type # Revert cyclist label to 4 if difficulty is not None: lab.detection_difficulty_level = 1 if difficulty == 0 else difficulty return lab label = create_label(frame_features['image/width'], frame_features['image/height'], x_max, x_min, y_max, y_min, label, difficulty) obj.object.MergeFrom(label) if score: obj.score = score obj.context_name = frame_features["image/context_name"] obj.frame_timestamp_micros = frame_features["image/frame_timestamp_micros"] obj.camera_name = frame_features["image/camera_name"] return obj
def tfRecordToBin(data_file_name, data_type): if data_type == 'vehicle': data_type_label = label_pb2.Label.TYPE_VEHICLE elif data_type == 'pedestrian': data_type_label = label_pb2.Label.TYPE_PEDESTRIAN elif data_type == 'cyclist': data_type_label = label_pb2.Label.TYPE_CYCLIST else: print("Usage: python tfRecordDataToLabel.py data.tfrecord vehicle") sys.exit(1) result_file_name = data_file_name[:-9] + "_" + data_type + "_gt" + '.bin' objs = metrics_pb2.Objects() dataset = tf.data.TFRecordDataset(data_file_name, compression_type='') for data in dataset: frame = dataset_pb2.Frame() frame.ParseFromString(bytearray(data.numpy())) for frame_obj in frame.laser_labels: if not frame_obj.type == data_type_label: continue obj = metrics_pb2.Object() obj.object.box.CopyFrom(frame_obj.box) obj.object.type = frame_obj.type obj.object.id = frame_obj.id obj.context_name = frame.context.name obj.frame_timestamp_micros = frame.timestamp_micros objs.objects.append(obj) with open(result_file_name, 'wb') as f: f.write(objs.SerializeToString())
def anns2proto(self, outfile_prefix): anns = self.coco.anns ground_truths = metrics_pb2.Objects() for _, ann in anns.items(): obj = metrics_pb2.Object() img_info = self.data_infos[ann['image_id']] x1, y1, w, h = ann['bbox'] cx = x1 + w / 2 cy = y1 + h / 2 lab = label_pb2.Label() lab.box.center_x = cx lab.box.center_y = cy lab.box.length = w lab.box.width = h lab.type = 4 if ann['category_id'] == 3 else ann['category_id'] lab.detection_difficulty_level = 1 if ann[ 'det_difficult'] == 0 else ann['det_difficult'] obj.object.MergeFrom(lab) obj.context_name = img_info["context_name"] obj.frame_timestamp_micros = img_info["timestamp_micros"] obj.camera_name = img_info["camera_id"] ground_truths.objects.append(obj) f = open(outfile_prefix + "_gt.bin", 'wb') serialized = ground_truths.SerializeToString() f.write(serialized) f.close()
def createsubmisionobject(objects, boxes, pred_cls, scores, context_name, frame_timestamp_micros): total_boxes = len(boxes) for i in range(total_boxes): #patch in pred_bbox: label = pred_cls[i] bbox = boxes[i] score = scores[i] o = metrics_pb2.Object() o.context_name = context_name #frame.context.name o.frame_timestamp_micros = int( frame_timestamp_micros) #frame.timestamp_micros) o.camera_name = dataset_pb2.CameraName.FRONT o.score = score # Populating box and score. box = label_pb2.Label.Box() box.length = bbox[1][0] - bbox[0][0] box.width = bbox[1][1] - bbox[0][1] box.center_x = bbox[0][0] + box.length * 0.5 box.center_y = bbox[0][1] + box.width * 0.5 o.object.box.CopyFrom(box) o.object.detection_difficulty_level = label_pb2.Label.LEVEL_1 o.object.num_lidar_points_in_box = 100 o.object.type = INSTANCE_pb2[ label] # INSTANCE_CATEGORY_NAMES.index(label) #INSTANCE_pb2[label] print( f'Object type label: {label}, {INSTANCE_pb2[label]}, {INSTANCE_CATEGORY_NAMES.index(label)}' ) assert o.object.type != label_pb2.Label.TYPE_UNKNOWN objects.objects.append(o)
def _create_gt_detection(infos, tracking=True): """Creates a gt prediction object file for local evaluation.""" from waymo_open_dataset import label_pb2 from waymo_open_dataset.protos import metrics_pb2 objects = metrics_pb2.Objects() for idx in tqdm(range(len(infos))): info = infos[idx] obj = get_obj(info['path']) annos = obj['objects'] num_points_in_gt = np.array([ann['num_points'] for ann in annos]) box3d = np.array([ann['box'] for ann in annos]) if len(box3d) == 0: continue names = np.array([TYPE_LIST[ann['label']] for ann in annos]) box3d = box3d[:, [0, 1, 2, 3, 4, 5, -1]] for i in range(box3d.shape[0]): if num_points_in_gt[i] == 0: continue if names[i] == 'UNKNOWN': continue det = box3d[i] score = 1.0 label = names[i] o = metrics_pb2.Object() o.context_name = obj['scene_name'] o.frame_timestamp_micros = int(obj['frame_name'].split("_")[-1]) # Populating box and score. box = label_pb2.Label.Box() box.center_x = det[0] box.center_y = det[1] box.center_z = det[2] box.length = det[3] box.width = det[4] box.height = det[5] box.heading = det[-1] o.object.box.CopyFrom(box) o.score = score # Use correct type. o.object.type = CAT_NAME_TO_ID[label] o.object.num_lidar_points_in_box = num_points_in_gt[i] o.object.id = annos[i]['name'] objects.objects.append(o) # Write objects to a file. f = open(os.path.join(args.result_path, 'gt_preds.bin'), 'wb') f.write(objects.SerializeToString()) f.close()
def create_result(pose_v, theta_v, trackers, tracking_name, scene_token, current_sample_token): """Creates a prediction objects file.""" objects = metrics_pb2.Objects() o = metrics_pb2.Object() # The following 3 fields are used to uniquely identify a frame a prediction # is predicted at. Make sure you set them to values exactly the same as what # we provided in the raw data. Otherwise your prediction is considered as a # false positive. o.context_name = scene_token # The frame timestamp for the prediction. See Frame::timestamp_micros in # dataset.proto. invalid_ts = -1 o.frame_timestamp_micros = current_sample_token # This is only needed for 2D detection or tracking tasks. # Set it to the camera name the prediction is for. #o.camera_name = dataset_pb2.CameraName.FRONT # Populating box and score. box = label_pb2.Label.Box() box.center_x = pose_v[0] box.center_y = pose_v[1] box.center_z = pose_v[2] box.length = trackers[2] box.width = trackers[1] box.height = trackers[0] box.heading = 0 o.object.box.CopyFrom(box) # This must be within [0.0, 1.0]. It is better to filter those boxes with # small scores to speed up metrics computation. o.score = 0.5 # For tracking, this must be set and it must be unique for each tracked # sequence. # tracking_id = np.array(trackers[7]) # tracking_id = tracking_id.tobytes() # tracking_id = tracking_id.encode('utf-8') o.object.id = str(int(trackers[7])) # Use correct type. if tracking_name == 1: o.object.type = label_pb2.Label.TYPE_VEHICLE elif tracking_name == 2: o.object.type = label_pb2.Label.TYPE_PEDESTRIAN elif tracking_name == 4: o.object.type = label_pb2.Label.TYPE_CYCLIST objects.objects.append(o) # Add more objects. Note that a reasonable detector should limit its maximum # number of boxes predicted per frame. A reasonable value is around 400. A # huge number of boxes can slow down metrics computation. # Write objects to a file. f = open('/home/shk642/waymo/waymo-od/waymo-dataset-viewer/tmp/pred00.bin', 'wb') f.write(objects.SerializeToString()) f.close()
def dump_detection_output(self, idx: Union[int, tuple], detections: Target3DArray, fout: RawIOBase) -> None: ''' :param detections: detection result :param ids: auxiliary information for output, each item contains context name and timestamp :param fout: output file-like object ''' try: from waymo_open_dataset import label_pb2 from waymo_open_dataset.protos import metrics_pb2 except: _logger.error( "Cannot find waymo_open_dataset, install the package at " "https://github.com/waymo-research/waymo-open-dataset, output will be skipped now." ) return label_map = { WaymoObjectClass.Unknown: label_pb2.Label.TYPE_UNKNOWN, WaymoObjectClass.Vehicle: label_pb2.Label.TYPE_VEHICLE, WaymoObjectClass.Pedestrian: label_pb2.Label.TYPE_PEDESTRIAN, WaymoObjectClass.Sign: label_pb2.Label.TYPE_SIGN, WaymoObjectClass.Cyclist: label_pb2.Label.TYPE_CYCLIST } waymo_array = metrics_pb2.Objects() for target in detections: waymo_target = metrics_pb2.Object() # convert box parameters box = label_pb2.Label.Box() box.center_x = target.position[0] box.center_y = target.position[1] box.center_z = target.position[2] box.length = target.dimension[0] box.width = target.dimension[1] box.height = target.dimension[2] box.heading = target.yaw waymo_target.object.box.CopyFrom(box) # convert label waymo_target.object.type = label_map[target.tag_top] waymo_target.score = target.tag_top_score waymo_target.context_name = idx[ 0] # the name of the sequence is the context waymo_target.frame_timestamp_micros = int( self.timestamp(idx) * 1e6) waymo_array.objects.append(waymo_target) bindata = waymo_array.SerializeToString() if isinstance(fout, (str, Path)): Path(fout).write_bytes(bindata) else: fout.write(bindata)
def _create_pd_detection(detections, infos, result_path, tracking=False): """Creates a prediction objects file.""" assert tracking is False, "Not Supported Yet" from waymo_open_dataset import dataset_pb2 from waymo_open_dataset import label_pb2 from waymo_open_dataset.protos import metrics_pb2 from waymo_open_dataset.utils import box_utils objects = metrics_pb2.Objects() for token, detection in tqdm(detections.items()): info = infos[token] obj = get_obj(info['path']) box3d = detection["box3d_lidar"].detach().cpu().numpy() scores = detection["scores"].detach().cpu().numpy() labels = detection["label_preds"].detach().cpu().numpy() box3d[:, -1] = -box3d[:, -1] - np.pi / 2 if box3d.shape[1] > 7: # drop velocity box3d = box3d[:, [0, 1, 2, 3, 4, 5, -1]] for i in range(box3d.shape[0]): det = box3d[i] score = scores[i] label = labels[i] o = metrics_pb2.Object() o.context_name = obj['scene_name'] o.frame_timestamp_micros = int(obj['frame_name'].split("_")[-1]) # Populating box and score. box = label_pb2.Label.Box() box.center_x = det[0] box.center_y = det[1] box.center_z = det[2] box.length = det[3] box.width = det[4] box.height = det[5] box.heading = det[-1] o.object.box.CopyFrom(box) o.score = score # Use correct type. o.object.type = label_to_type(label) # int(label)+1 objects.objects.append(o) # Write objects to a file. f = open(os.path.join(result_path, 'my_preds.bin'), 'wb') f.write(objects.SerializeToString()) f.close()
def parse_one_object(instance_idx): """Parse one instance in kitti format and convert them to `Object` proto. Args: instance_idx (int): Index of the instance to be converted. Returns: :obj:`Object`: Predicted instance in waymo dataset \ Object proto. """ cls = kitti_result['name'][instance_idx] length = round(kitti_result['dimensions'][instance_idx, 0], 4) height = round(kitti_result['dimensions'][instance_idx, 1], 4) width = round(kitti_result['dimensions'][instance_idx, 2], 4) x = round(kitti_result['location'][instance_idx, 0], 4) y = round(kitti_result['location'][instance_idx, 1], 4) z = round(kitti_result['location'][instance_idx, 2], 4) rotation_y = round(kitti_result['rotation_y'][instance_idx], 4) score = round(kitti_result['score'][instance_idx], 4) # y: downwards; move box origin from bottom center (kitti) to # true center (waymo) y -= height / 2 # frame transformation: kitti -> waymo x, y, z = self.transform(T_k2w, x, y, z) # different conventions heading = -(rotation_y + np.pi / 2) while heading < -np.pi: heading += 2 * np.pi while heading > np.pi: heading -= 2 * np.pi box = label_pb2.Label.Box() box.center_x = x box.center_y = y box.center_z = z box.length = length box.width = width box.height = height box.heading = heading o = metrics_pb2.Object() o.object.box.CopyFrom(box) o.object.type = self.k2w_cls_map[cls] o.score = score o.context_name = context_name o.frame_timestamp_micros = frame_timestamp_micros return o
def convert(obj, context_name, frame_timestamp_micros): o = metrics_pb2.Object() o.object.box.CopyFrom(obj.box) o.object.type = obj.type o.score = 1.0 o.object.num_lidar_points_in_box = obj.num_lidar_points_in_box # needed for gt generation # for identification of the frame o.context_name = context_name o.frame_timestamp_micros = frame_timestamp_micros return o
def _create_pd_file_example(): """Creates a prediction objects file.""" objects = metrics_pb2.Objects() o = metrics_pb2.Object() # The following 3 fields are used to uniquely identify a frame a prediction # is predicted at. Make sure you set them to values exactly the same as what # we provided in the raw data. Otherwise your prediction is considered as a # false positive. o.context_name = ( 'context_name for the prediction. See Frame::context::name ' 'in dataset.proto.') # The frame timestamp for the prediction. See Frame::timestamp_micros in # dataset.proto. invalid_ts = -1 o.frame_timestamp_micros = invalid_ts # This is only needed for 2D detection or tracking tasks. # Set it to the camera name the prediction is for. o.camera_name = dataset_pb2.CameraName.FRONT # Populating box and score. box = label_pb2.Label.Box() box.center_x = 0 box.center_y = 0 box.center_z = 0 box.length = 0 box.width = 0 box.height = 0 box.heading = 0 o.object.box.CopyFrom(box) # This must be within [0.0, 1.0]. It is better to filter those boxes with # small scores to speed up metrics computation. o.score = 0.5 # For tracking, this must be set and it must be unique for each tracked # sequence. o.object.id = 'unique object tracking ID' # Use correct type. o.object.type = label_pb2.Label.TYPE_PEDESTRIAN objects.objects.append(o) # Add more objects. Note that a reasonable detector should limit its maximum # number of boxes predicted per frame. A reasonable value is around 400. A # huge number of boxes can slow down metrics computation. # Write objects to a file. f = open('/tmp/your_preds.bin', 'wb') f.write(objects.SerializeToString()) f.close()
def create_gt_obj(frame, label, view): o = metrics_pb2.Object() o.context_name = frame.context.name o.frame_timestamp_micros = frame.timestamp_micros o.camera_name = view box = label_pb2.Label.Box() box.center_x, box.center_y = label.box.center_x, label.box.center_y box.length, box.width = label.box.length, label.box.width o.object.box.CopyFrom(label.box) o.object.type = label.type return o
def parse_one_object(line): attrs = line.split() cls = attrs[0] height = float(attrs[8]) width = float(attrs[9]) length = float(attrs[10]) x = float(attrs[11]) y = float(attrs[12]) z = float(attrs[13]) rotation_y = float(attrs[14]) score = float(attrs[15]) # y: downwards; move box origin from bottom center (kitti) to true center (waymo) y = float(attrs[12]) - height / 2 x, y, z = self.transform(T_k2w, x, y, z) # frame transformation: kitti -> waymo # different conventions heading = -(rotation_y + np.pi / 2) while heading < -np.pi: heading += 2 * np.pi while heading > np.pi: heading -= 2 * np.pi # populate box box = label_pb2.Label.Box() box.center_x = x box.center_y = y box.center_z = z box.length = length box.width = width box.height = height box.heading = heading o = metrics_pb2.Object() o.object.box.CopyFrom(box) o.object.type = self.k2w_cls_map[cls] o.score = score # for identification of the frame o.context_name = context_name o.frame_timestamp_micros = frame_timestamp_micros return o
def _create_pd_file_example(path, json_data, objects): """Creates a prediction objects file.""" kitti_file = open(path) for line in kitti_file.readlines(): line = line.strip('\n').split() if line[0] == 'unknown' or line[0] == 'Sign': continue if line[15] == '0': continue o = metrics_pb2.Object() o.context_name = json_data["context_name"] o.frame_timestamp_micros = json_data["frame_timestamp_micros"] # if int(line[15]) > 5: # o.difficulty = 1 # else: # o.difficulty = 2 box = label_pb2.Label.Box() box.center_x = float(line[11]) box.center_y = float(line[12]) box.center_z = float(line[13]) box.length = float(line[10]) box.width = float(line[9]) box.height = float(line[8]) # box.length = float(line[8]) # box.width = float(line[10]) # box.height = float(line[9]) box.heading = float(line[14]) o.object.box.CopyFrom(box) # This must be within [0.0, 1.0]. It is better to filter those boxes with # small scores to speed up metrics computation. o.score = float(line[15]) # if float(line[15])<1: # o.score = float(line[15]) # else: # o.score = 0.5 # if float(line[15])>=1: # o.object.num_lidar_points_in_box = int(line[15]) # For tracking, this must be set and it must be unique for each tracked # sequence. # o.object.id = # Use correct type. o.object.type = __type_list[line[0]] objects.objects.append(o) return objects
def _create_pd_file_example(): """Creates a prediction objects file.""" objects = metrics_pb2.Objects() o = metrics_pb2.Object() # 이후에 나오는 3개의 field는 예측을 수행하는 프레임 식별에 사용 # raw data에서 제공한 값들과 field의 값들을 동일하게 설정할 것 - 그렇지 않을 경우 잘못된 것으로 간주될 수 있음 o.context_name = ( 'context_name for the prediction. See Frame::context::name ' 'in dataset.proto.') # 예측에 대한 timestamp invalid_ts = -1 o.frame_timestamp_micros = invalid_ts # 2D 대상의 검출 또는 추적 작업에만 필요한 것 # 예측 대상을 카메라 이름으로 설정 o.camera_name = dataset_pb2.CameraName.FRONT # box와 score(값)을 매칭 box = label_pb2.Label.Box() box.center_x = 0 box.center_y = 0 box.center_z = 0 box.length = 0 box.width = 0 box.height = 0 box.heading = 0 o.object.box.CopyFrom(box) # 0.0과 1.0 사이의 값이 되어야 함 - 그렇지 않을 경우 필터링을 거쳐 작은 점수로 변환(matrics 연산 속도 높이기 위해 필요) o.score = 0.5 # 추적을 위해 각각의 추적될 sequence에 대해 설정되고, 또한 고유의 값을 가져야만 함 o.object.id = 'unique object tracking ID' # type 올바르게 설정하기 o.object.type = label_pb2.Label.TYPE_PEDESTRIAN objects.objects.append(o) # 합리적인 검출 위해서는 프레임 당 box의 수를 제한해야 함(합리적인 값 = 약 400) - box의 수가 많아질 경우 metrics 연산 속도 느려질 수 있음 # 파일에 객체 작성 f = open('/tmp/your_preds.bin', 'wb') f.write(objects.SerializeToString()) f.close()
def results2proto(self, results, outfile_prefix): if isinstance(results[0], list): dict_results = self._det2dicts(results) detections = metrics_pb2.Objects() for detection in dict_results: obj = metrics_pb2.Object() # fig = plt.figure() # img = mpimg.imread('data/waymococo_f0/val2020/'+detection['filename']) # plt.imshow(img) # # rect = patches.Rectangle((detection['center_x']-detection['length']/2, detection['center_y'] -detection['width']/2 ) # , detection['length'], detection['width'], linewidth=1, edgecolor='r', facecolor='none') # # ax = plt.gca() # # Add the patch to the Axes # ax.add_patch(rect) # plt.show() lab = label_pb2.Label() lab.box.center_x = detection['center_x'] lab.box.center_y = detection['center_y'] lab.box.length = detection['length'] lab.box.width = detection['width'] lab.type = detection['type'] obj.object.MergeFrom(lab) if detection['score']: obj.score = detection['score'] obj.context_name = detection["context_name"] obj.frame_timestamp_micros = detection["timestamp_micros"] obj.camera_name = detection["camera_name"] detections.objects.append(obj) f = open(outfile_prefix + ".bin", 'wb') serialized = detections.SerializeToString() f.write(serialized) f.close()
def _create_bbox_prediction(det, class_id, frame_name, marco_ts): o = metrics_pb2.Object() o.context_name = (frame_name) o.frame_timestamp_micros = marco_ts box = label_pb2.Label.Box() box.center_x = np.mean(det[[0, 2, 4, 6]]) box.center_y = np.mean(det[[1, 3, 5, 7]]) z0 = det[9] height = np.exp(det[10]) box.center_z = z0 + height / 2 box.width = np.sqrt((det[2] - det[4])**2 + (det[3] - det[5])**2) box.length = np.sqrt((det[2] - det[0])**2 + (det[3] - det[1])**2) box.height = height box.heading = det[8] o.object.box.CopyFrom(box) if len(det) == 12: o.score = det[11] o.object.id = '' o.object.type = class_id return o
def get_objects(tf_root): objects = metrics_pb2.Objects() frame = open_dataset.Frame() files = glob.glob(os.path.join(tf_root, "*.tfrecord")) for file_name in tqdm(files[:]): dataset = tf.data.TFRecordDataset(file_name, compression_type='') for idx, data in enumerate(dataset): frame.ParseFromString(bytearray(data.numpy())) context_name = frame.context.name timestamp_micros = frame.timestamp_micros for camera_image, camera_label in zip_longest( frame.images, frame.camera_labels): camera_name = open_dataset.CameraName.Name.Name( camera_image.name) if camera_label is not None: for label in camera_label.labels: o = metrics_pb2.Object() o.context_name = context_name o.frame_timestamp_micros = timestamp_micros o.camera_name = getattr(dataset_pb2.CameraName, camera_name) box = label_pb2.Label.Box() box.center_x = label.box.center_x box.center_y = label.box.center_y box.length = label.box.length box.width = label.box.width o.object.box.CopyFrom(box) # This must be within [0.0, 1.0]. It is better to filter those boxes with # small scores to speed up metrics computation. o.score = 1.0 # For tracking, this must be set and it must be unique for each tracked # sequence. o.object.id = '' # Use correct type. o.object.type = label.type objects.objects.append(o) return objects
def create_prediction_obj(frame, pred_box, pred_score, pred_label, view, orig_size): o = metrics_pb2.Object() o.context_name = frame.context.name o.frame_timestamp_micros = frame.timestamp_micros o.camera_name = view box = label_pb2.Label.Box() box.center_x = (pred_box[0] + pred_box[2]) / 2 * orig_size[1] box.center_y = (pred_box[1] + pred_box[3]) / 2 * orig_size[0] box.length = (pred_box[2] - pred_box[0]) * orig_size[1] box.width = (pred_box[3] - pred_box[1]) * orig_size[0] o.object.box.CopyFrom(box) # This must be within [0.0, 1.0]. It is better to filter those boxes with # small scores to speed up metrics computation. o.score = pred_score # Use correct type. o.object.type = pred_label + 1 if pred_label == 3 else pred_label return o
def create_pd_object(detection, context_name, frame_timestamp_micros, camera_name): """Creates a prediction objects file.""" o = metrics_pb2.Object() # The following 3 fields are used to uniquely identify a frame a prediction # is predicted at. Make sure you set them to values exactly the same as what # we provided in the raw data. Otherwise your prediction is considered as a # false negative. o.context_name = context_name # The frame timestamp for the prediction. See Frame::timestamp_micros in # dataset.proto. # invalid_ts = -1 o.frame_timestamp_micros = frame_timestamp_micros # This is only needed for 2D detection or tracking tasks. # Set it to the camera name the prediction is for. o.camera_name = dataset_pb2.CameraName.Name.Value(camera_name) bbox, score, label = detection['bbox'], detection['score'], detection[ 'category_id'] # Populating box and score. box = label_pb2.Label.Box() box.center_x = bbox[0] + bbox[2] * 0.5 box.center_y = bbox[1] + bbox[3] * 0.5 box.length = bbox[2] box.width = bbox[3] o.object.box.CopyFrom(box) # This must be within [0.0, 1.0]. It is better to filter those boxes with # small scores to speed up metrics computation. o.score = score # For tracking, this must be set and it must be unique for each tracked sequence. if 'object_id' in detection: o.object.id = detection['object_id'] # Use correct type. o.object.type = label assert o.object.type != label_pb2.Label.TYPE_UNKNOWN return o
def _create_pd(args): objects = metrics_pb2.Objects() coco = COCO(args.anno_file) with open(args.results_file) as data_file: data = data_file.read() data_content = json.loads(data) results_per_id = [] current_predictions = [] imgIds = coco.getImgIds() current = imgIds[0] for pred in data_content: if pred['image_id'] == current: added = False bbox = pred['bbox'] center_x = bbox[0] + (bbox[2] / 2) center_y = bbox[1] + (bbox[3] / 2) length = bbox[2] width = bbox[3] sub = [ center_x, center_y, length, width, pred['score'], pred['category_id'] ] current_predictions.append(sub) else: results_per_id.append(current_predictions) current_predictions = [] current += 1 added = True if not added: results_per_id.append(current_predictions) print(len(results_per_id)) print(len(imgIds)) print(imgIds[0]) assert len(results_per_id) == len(imgIds) for idx in range(0, len(imgIds)): if (idx % 10000 == 0): print(idx) imgId = imgIds[idx] predictions = results_per_id[imgId - 1] o = metrics_pb2.Object() imgInfo = coco.loadImgs(imgId) o.context_name = imgInfo[0]['context'] o.frame_timestamp_micros = imgInfo[0]['frame_timestamp_micros'] o.camera_name = camera_name[imgInfo[0]['camera_name'] - 1] for pred in predictions: if pred[4] < 0.05: continue box = label_pb2.Label.Box() box.center_x = pred[0] box.center_y = pred[1] box.center_z = 0 box.length = pred[2] box.width = pred[3] box.height = 0 o.object.box.CopyFrom(box) o.score = pred[4] o.object.id = 'id' o.object.type = label_name[pred[5] - 1] objects.objects.append(o) f = open('valid.bin', 'wb') f.write(objects.SerializeToString()) f.close()
def generatevalidationsubmission(PATH, outputfilepath, MODEL_DIR): now = datetime.datetime.now() print("In generatevalidationsubmission, current date and time : ") print(now.strftime("%Y-%m-%d %H:%M:%S")) tf.enable_eager_execution() print(tf.__version__) print(torch.cuda.is_available()) print(torch.cuda.device_count()) print(torch.cuda.get_device_name()) device = torch.device("cuda") print("Loading Waymo validation frames...") waymovalidationframes = loadWaymoValidationFrames(PATH) #mywaymovaldataset = myNewWaymoDataset(PATH, waymovalidationframes, get_transform(train=False)) print("Total validation frames: ", len(waymovalidationframes)) num_classes = 4 #Unknown:0, Vehicles: 1, Pedestrians: 2, Cyclists: 3, Signs (removed) # get the model using our helper function print("Loading previous model: " + MODEL_DIR) #model = get_previous_object_detection_model(num_classes, previous_model_path) model = load_previous_object_detection_model(num_classes, MODEL_DIR) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # move model to the right device model.to(device) model.eval() objects = metrics_pb2.Objects() f = open(outputfilepath, 'wb') #waymovalidationframes, waymotestframes outputallframes = waymovalidationframes print("Total frames: ", len(outputallframes)) #step=5 for i in range(len(outputallframes)): #len(outputallframes) if i % 10 == 0: print("current frame: ", i) frame = outputallframes[i] image = tf.image.decode_jpeg( frame.images[0].image).numpy() #front camera image img = Image.fromarray(image) boxes, pred_cls, scores = get_prediction(model, img, device, score_threshold) total_boxes = len(boxes) if len(boxes) == 0: continue for i in range(total_boxes): #patch in pred_bbox: label = pred_cls[i] bbox = boxes[i] score = scores[i] o = metrics_pb2.Object() o.context_name = frame.context.name o.frame_timestamp_micros = int(frame.timestamp_micros) o.camera_name = dataset_pb2.CameraName.FRONT o.score = score # Populating box and score. box = label_pb2.Label.Box() box.length = bbox[1][0] - bbox[0][0] box.width = bbox[1][1] - bbox[0][1] box.center_x = bbox[0][0] + box.length * 0.5 box.center_y = bbox[0][1] + box.width * 0.5 o.object.box.CopyFrom(box) o.object.detection_difficulty_level = label_pb2.Label.LEVEL_1 o.object.num_lidar_points_in_box = 100 o.object.type = INSTANCE_pb2[ label] # INSTANCE_CATEGORY_NAMES.index(label) #INSTANCE_pb2[label] print( f'Object type label: {label}, {INSTANCE_pb2[label]}, {INSTANCE_CATEGORY_NAMES.index(label)}' ) assert o.object.type != label_pb2.Label.TYPE_UNKNOWN objects.objects.append(o) submission = submission_pb2.Submission() submission.task = submission_pb2.Submission.DETECTION_2D submission.account_name = '*****@*****.**' submission.authors.append('Kaikai Liu') submission.affiliation = 'None' submission.unique_method_name = 'torchvisionfaster' submission.description = 'none' submission.method_link = "empty method" submission.sensor_type = submission_pb2.Submission.CAMERA_ALL submission.number_past_frames_exclude_current = 0 submission.number_future_frames_exclude_current = 0 submission.inference_results.CopyFrom(objects) f = open(outputfilepath, 'wb') #f = open("./drive/My Drive/waymo_submission/waymo35.bin", 'wb') f.write(submission.SerializeToString()) f.close() now = datetime.datetime.now() print("Finished validation, current date and time : ") print(now.strftime("%Y-%m-%d %H:%M:%S"))
def create_pd(frame, objmodel, device, score_threshold): """Creates a prediction objects file.""" objects = metrics_pb2.Objects() image = tf.image.decode_jpeg( frame.images[0].image).numpy() #front camera image img = Image.fromarray(image) #print(frame.camera_labels)#no labels #print(frame.context.name)#Refer to dataset.proto for the data format. The context contains shared information among all frames in the scene. #print(frame.timestamp_micros) #run the prediction boxes, pred_cls, scores = get_prediction(objmodel, img, device, score_threshold) # print(pred_cls) # print(boxes) boxnum = min(len(boxes), 400) for i in range(boxnum): #patch in pred_bbox: patch = boxes[i] label = pred_cls[i] #print(patch)#[(827.3006, 617.69965), (917.02795, 656.8029)] o = metrics_pb2.Object( ) #One frame: https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/protos/metrics.proto # The following 3 fields are used to uniquely identify a frame a prediction # is predicted at. Make sure you set them to values exactly the same as what # we provided in the raw data. Otherwise your prediction is considered as a # false negative. o.context_name = frame.context.name #('context_name for the prediction. See Frame::context::name ''in dataset.proto.') # The frame timestamp for the prediction. See Frame::timestamp_micros in # dataset.proto. invalid_ts = frame.timestamp_micros #-1 o.frame_timestamp_micros = int(invalid_ts) # This is only needed for 2D detection or tracking tasks. # Set it to the camera name the prediction is for. o.camera_name = dataset_pb2.CameraName.FRONT # Populating box and score. box = label_pb2.Label.Box( ) #Bounding box: https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/label.proto width = patch[1][0] - patch[0][0] height = patch[1][1] - patch[0][1] box.center_x = patch[0][0] + width / 2 box.center_y = patch[0][1] + height / 2 box.center_z = 0 box.length = 0 box.width = width box.height = height box.heading = 0 o.object.box.CopyFrom(box) #o.object: Label type # This must be within [0.0, 1.0]. It is better to filter those boxes with # small scores to speed up metrics computation. o.score = 0.5 # For tracking, this must be set and it must be unique for each tracked # sequence. o.object.id = 'xxx' #'unique object tracking ID' # Use correct type. o.object.type = INSTANCE_pb2[label] #label_pb2.Label.TYPE_PEDESTRIAN #print(o) objects.objects.append(o) return objects
def make_allcameraobject_list_from_subdir(np_dir, frame_context_name, frame_timestamp_micros): #for all camera, we changed the individual boxes.npy, classes.npy, scores.npy to one allcameraresult.npy #allcameraresult.npy uses camera name as the key, value is the result dict # boxes = np.load(os.path.join(np_dir, 'boxes.npy')) # classes = np.load(os.path.join(np_dir, 'classes.npy')) # scores = np.load(os.path.join(np_dir, 'scores.npy')) allcameraresult = np.load(os.path.join(np_dir, 'allcameraresult.npy'), allow_pickle=True) allcameraresult = allcameraresult.item() #print(type(allcameraresult)) obj_list = [] for imagename in allcameras: #go through all cameras resultdict = allcameraresult[imagename] #one camera boxes = resultdict['boxes'] classes = resultdict['classes'] scores = resultdict['scores'] # Read the input fields file if it exists. # input_fields = [] # input_field_path = os.path.join(np_dir, 'input_fields.txt') # if os.path.isfile(input_field_path): # with open(input_field_path, 'r') as input_field_file: # input_fields = input_field_file.readlines() input_fields = [imagename] #["FRONT_IMAGE"]# input_fields.txt is empty num_objs = boxes.shape[0] assert classes.shape[0] == num_objs assert scores.shape[0] == num_objs for i in range(num_objs): obj = metrics_pb2.Object() obj.context_name = frame_context_name obj.frame_timestamp_micros = frame_timestamp_micros obj.score = scores[i] obj.object.type = classes[i] # Handle the box creation differently for 3D boxes (where the inner # dimension is 7) and 2D boxes (where the inner dimension is 4). if boxes.shape[1] == 7: obj.object.box.center_x = boxes[i, 0] obj.object.box.center_y = boxes[i, 1] obj.object.box.center_z = boxes[i, 2] obj.object.box.length = boxes[i, 3] obj.object.box.width = boxes[i, 4] obj.object.box.height = boxes[i, 5] obj.object.box.heading = boxes[i, 6] elif boxes.shape[1] == 4: obj.object.box.center_x = boxes[i, 0] obj.object.box.center_y = boxes[i, 1] obj.object.box.length = boxes[i, 2] obj.object.box.width = boxes[i, 3] # For 2D detection objects, the camera name of the object proto comes from # the camera whose image was used as input. Thus, the input_fields # specified by the user are checked to ensure that they only used a single # input and that the input was the RGB image from one of the cameras. #print("input fields:", input_fields) if len(input_fields) != 1: raise ValueError( 'Can only use one input when submitting 2D detection ' 'results; instead was using:\n' + '\n'.join(input_fields)) input_field = input_fields[0] if not input_field.endswith('_IMAGE'): raise ValueError( 'For 2D detection results, the input field should be ' 'one of the camera images, but got ' + input_field) obj.camera_name = dataset_pb2.CameraName.Name.Value( input_field[:-6]) #remove _IMAGE print( f'obj camera name: {obj.camera_name}, input_field[:-6]: {input_field[:-6]}' ) #obj camera name: 1, input_field[:-6]: FRONT # Run some checks to avoid adding invalid objects. These are the same checks # used in metrics/tools/create_submission.cc if (obj.score < 0.03 or obj.object.box.length < 0.01 or obj.object.box.width < 0.01 or (obj.object.box.HasField('height') and obj.object.box.height < 0.01)): print('Skipping invalid object', obj) continue obj_list.append(obj) return obj_list
def make_object_list_from_subdir(np_dir, frame_context_name, frame_timestamp_micros): """Make a list of Object protos from the detection results in a directory. In particular, this function assumes that np_dir is a subdirectory like one created by the latency evaluator for a particular frame, and thus that it contains three npy files: * boxes.npy: a N x 7 float array with the x, y, z, length, width, height, and heading for all the detections in this frame. * classes.npy: a N-dim uint8 array with the type IDs in {0, 1, 2, 3, 4} for all the detections in this frame. * scores.npy: a N-dim float array with the scores in [0, 1] for all the detections in this frame. These arrays are converted into a list of N Object protos, one for each detection, where all the protos have the frame_context_name and frame_timestamp_micros set by the arguments. Args: np_dir: string directory name containing the npy files. frame_context_name: string context_name to set for each Object proto. frame_timestamp_micros: int timestamp micros to set for each Object proto. Returns: List of N Object protos, one for each detection present in the npy files. They all have the same context name and frame_timestamp_micros, while their boxes, scores, and types come from the numpy arrays. """ boxes = np.load(os.path.join(np_dir, 'boxes.npy')) classes = np.load(os.path.join(np_dir, 'classes.npy')) scores = np.load(os.path.join(np_dir, 'scores.npy')) # Read the input fields file if it exists. input_fields = [] input_field_path = os.path.join(np_dir, 'input_fields.txt') if os.path.isfile(input_field_path): with open(input_field_path, 'r') as input_field_file: input_fields = input_field_file.readlines() num_objs = boxes.shape[0] assert classes.shape[0] == num_objs assert scores.shape[0] == num_objs obj_list = [] for i in range(num_objs): obj = metrics_pb2.Object() obj.context_name = frame_context_name obj.frame_timestamp_micros = frame_timestamp_micros obj.score = scores[i] obj.object.type = classes[i] # Handle the box creation differently for 3D boxes (where the inner # dimension is 7) and 2D boxes (where the inner dimension is 4). if boxes.shape[1] == 7: obj.object.box.center_x = boxes[i, 0] obj.object.box.center_y = boxes[i, 1] obj.object.box.center_z = boxes[i, 2] obj.object.box.length = boxes[i, 3] obj.object.box.width = boxes[i, 4] obj.object.box.height = boxes[i, 5] obj.object.box.heading = boxes[i, 6] elif boxes.shape[1] == 4: obj.object.box.center_x = boxes[i, 0] obj.object.box.center_y = boxes[i, 1] obj.object.box.length = boxes[i, 2] obj.object.box.width = boxes[i, 3] # For 2D detection objects, the camera name of the object proto comes from # the camera whose image was used as input. Thus, the input_fields # specified by the user are checked to ensure that they only used a single # input and that the input was the RGB image from one of the cameras. if len(input_fields) != 1: raise ValueError( 'Can only use one input when submitting 2D detection ' 'results; instead was using:\n' + '\n'.join(input_fields)) input_field = input_fields[0] if not input_field.endswith('_IMAGE'): raise ValueError( 'For 2D detection results, the input field should be ' 'one of the camera images, but got ' + input_field) obj.camera_name = dataset_pb2.CameraName.Name.Value( input_field[:-6]) # Run some checks to avoid adding invalid objects. These are the same checks # used in metrics/tools/create_submission.cc if (obj.score < 0.03 or obj.object.box.length < 0.01 or obj.object.box.width < 0.01 or (obj.object.box.HasField('height') and obj.object.box.height < 0.01)): print('Skipping invalid object', obj) continue obj_list.append(obj) return obj_list