def _preload_tracklets(self, seq_id): if seq_id in self._tracklet_cache: return date = self._get_date(seq_id) fname = Path(date, seq_id, "tracklet_labels.xml") if self.inzip: zname = seq_id[:-len(self.datatype)] + "tracklets" with ZipFile(self.base_path / f"{zname}.zip") as data: tracklets = utils.load_tracklets(data, fname) else: tracklets = utils.load_tracklets(self.base_path, fname) # inverse mapping objs = defaultdict(list) # (frame -> list of objects) for tid, tr in enumerate(tracklets): dim = [tr.l, tr.w, tr.h] tag = ObjectTag(tr.objectType, KittiObjectClass) for pose_idx, pose in enumerate(tr.poses): pos = [pose.tx, pose.ty, pose.tz] pos[2] += dim[2] / 2 ori = Rotation.from_euler("ZYX", (pose.rz, pose.ry, pose.rx)) objs[pose_idx + tr.first_frame].append( ObjectTarget3D(pos, ori, dim, tag, tid=tid)) self._tracklet_cache[seq_id] = { k: Target3DArray(l, frame="velo") for k, l in objs.items() }
def annotation_3dobject(self, idx, raw=False): seq_id, frame_idx = idx fname = "label_lidars/%04d.json" % frame_idx if self._return_file_path: return self.base_path / seq_id / fname if self.inzip: with PatchedZipFile(self.base_path / (seq_id + ".zip"), to_extract=fname) as ar: data = json.loads(ar.read(fname)) else: with (self.base_path / seq_id / fname).open() as fin: data = json.load(fin) labels = list(map(edict, data)) if raw: return labels arr = Target3DArray(frame="vehicle") # or frame=None for label in labels: tid = base64.urlsafe_b64decode(label.id[:12]) tid, = struct.unpack('Q', tid[:8]) target = ObjectTarget3D(label.center, Rotation.from_euler("z", label.heading), label.size, ObjectTag(label.label, WaymoObjectClass), tid=tid) arr.append(target) return arr
def parse_label(label: list, raw_calib: dict): ''' Generate object array from loaded label or result text ''' Tr = raw_calib['Tr_velo_to_cam'].reshape(3, 4) RRect = Rotation.from_matrix(raw_calib['R0_rect'].reshape(3, 3)) HR, HT = Rotation.from_matrix(Tr[:, :3]), Tr[:, 3] objects = Target3DArray() objects.frame = "velo" for item in label: if item[0] == KittiObjectClass.DontCare: continue h, w, l = item[8:11] # height, width, length position = item[11:14] # x, y, z in camera coordinate ry = item[14] # rotation of y axis in camera coordinate position[1] -= h / 2 # parse object position and orientation position = np.dot(position, RRect.inv().as_matrix().T) position = HR.inv().as_matrix().dot(position - HT) orientation = HR.inv() * RRect.inv() * Rotation.from_euler('y', ry) orientation *= Rotation.from_euler( "x", np.pi / 2) # change dimension from l,h,w to l,w,h score = item[15] if len(item) == 16 else None tag = ObjectTag(item[0], KittiObjectClass, scores=score) target = ObjectTarget3D(position, orientation, [l, w, h], tag) objects.append(target) return objects
def annotation_3dobject( self, idx, raw=False, visible_range=80 ): # TODO: it seems that dynamic objects need interpolation ''' :param visible_range: range for visible objects. Objects beyond that distance will be removed when reporting ''' assert not self._return_file_path, "The annotation is not in a single file!" seq_id, frame_idx = idx self._preload_3dobjects(seq_id) objects = [ self._3dobjects_cache[seq_id][i.data] for i in self._3dobjects_mapping[seq_id][frame_idx] ] if raw: return objects self._preload_poses(seq_id) pr, pt = self._poses_r[seq_id][frame_idx], self._poses_t[seq_id][ frame_idx] boxes = Target3DArray(frame="pose") for box in objects: RS, T = box.transform[:3, :3], box.transform[:3, 3] S = np.linalg.norm(RS, axis=0) # scale R = Rotation.from_matrix(RS / S) # rotation R = pr.inv() * R # relative rotation T = pr.inv().as_matrix().dot(T - pt) # relative translation # skip static objects beyond vision if np.linalg.norm(T) > visible_range: continue global_id = box.semanticID * 1000 + box.instanceID tag = ObjectTag(kittiId2label[box.semanticId].name, Kitti360Class) boxes.append(ObjectTarget3D(T, R, S, tag, tid=global_id)) return boxes
def set_up_matcher(self): # src boxes r = Rotation.from_euler("Z", 0) d = [2, 2, 2] dt1 = ObjectTarget3D([0, 0, 0], r, d, ObjectTag(KittiObjectClass.Car, scores=0.8)) dt2 = ObjectTarget3D([1, 1, 0], r, d, ObjectTag(KittiObjectClass.Van, scores=0.7)) dt3 = ObjectTarget3D([-1, -1, 0], r, d, ObjectTag(KittiObjectClass.Car, scores=0.8)) dt_boxes = Target3DArray([dt1, dt2, dt3], frame="test") # dst boxes r = Rotation.from_euler("Z", 0) d = [2, 2, 2] gt1 = ObjectTarget3D([0, 0, 0], r, d, ObjectTag(KittiObjectClass.Van)) gt2 = ObjectTarget3D([-1, 1, 0], r, d, ObjectTag(KittiObjectClass.Car)) gt3 = ObjectTarget3D([1, -1, 0], r, d, ObjectTag(KittiObjectClass.Van)) gt_boxes = Target3DArray([gt1, gt2, gt3], frame="test") self.matcher_case1 = (dt_boxes, gt_boxes)
def test_scenarios(self): eval_classes = [KittiObjectClass.Car, KittiObjectClass.Van] evaluator = TrackingEvaluator(eval_classes, [0.5, 1]) # scenario: X-crossing switch r = Rotation.from_euler("Z", 0) d = [1, 1, 1] t = ObjectTag(KittiObjectClass.Car, scores=0.8) v = [0, 0, 0] tid = 1 traj1 = [ TrackingTarget3D([-2, 2, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([-1, 1, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([0, 0, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([1, 1, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([2, 2, 0], r, d, v, v, t, tid=tid), ] t = ObjectTag(KittiObjectClass.Car, scores=0.9) tid = 2 traj2 = [ TrackingTarget3D([-2, -2, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([-1, -1, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([0, 0, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([1, -1, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([2, -2, 0], r, d, v, v, t, tid=tid), ] dt_trajs = [ Target3DArray([t1, t2], frame="test") for t1, t2 in zip(traj1, traj2) ] r = Rotation.from_euler("Z", 0.01) d = [1.1, 1.1, 1.1] t = ObjectTag(KittiObjectClass.Car) tid = 1001 gt1 = [ ObjectTarget3D([-2.1, 2.1, 0], r, d, t, tid=tid), ObjectTarget3D([-1.1, 0.9, 0], r, d, t, tid=tid), ObjectTarget3D([-0.1, 0.1, 0], r, d, t, tid=tid), ObjectTarget3D([0.9, -1.1, 0], r, d, t, tid=tid), ObjectTarget3D([1.9, -1.9, 0], r, d, t, tid=tid), ] tid = 1002 gt2 = [ ObjectTarget3D([-2.1, -2.1, 0], r, d, t, tid=tid), ObjectTarget3D([-1.1, -0.9, 0], r, d, t, tid=tid), ObjectTarget3D([-0.1, 0.1, 0], r, d, t, tid=tid), ObjectTarget3D([0.9, 1.1, 0], r, d, t, tid=tid), ObjectTarget3D([1.9, 1.9, 0], r, d, t, tid=tid), ] gt_trajs = [ Target3DArray([t1, t2], frame="test") for t1, t2 in zip(gt1, gt2) ] for dt_array, gt_array in zip(dt_trajs, gt_trajs): stats = evaluator.calc_stats(gt_array, dt_array) evaluator.add_stats(stats) assert evaluator.tp()[KittiObjectClass.Car] == 10 assert evaluator.fp()[KittiObjectClass.Car] == 0 assert evaluator.fn()[KittiObjectClass.Car] == 0 assert evaluator.fn()[KittiObjectClass.Car] == 0 assert evaluator.id_switches()[KittiObjectClass.Car] == 2 assert evaluator.fragments()[KittiObjectClass.Car] == 2 # scenario: X-crossing with 3 tracklets evaluator.reset() r = Rotation.from_euler("Z", 0) d = [1, 1, 1] t = ObjectTag(KittiObjectClass.Car, scores=0.8) v = [0, 0, 0] tid = 1 traj1 = [ TrackingTarget3D([-2, 2, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([-1, 1, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([0, 0, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([1, 1, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([2, 2, 0], r, d, v, v, t, tid=tid), ] t = ObjectTag(KittiObjectClass.Car, scores=0.9) tid = 2 traj2 = [ TrackingTarget3D([-2, -2, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([-1, -1, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([0, 0, 0], r, d, v, v, t, tid=tid) ] tid = 3 traj3 = [ TrackingTarget3D([1, -1, 0], r, d, v, v, t, tid=tid), TrackingTarget3D([2, -2, 0], r, d, v, v, t, tid=tid) ] dt_trajs = [ Target3DArray([t2, t1], frame="test") for t1, t2 in zip(traj1[:3], traj2) ] dt_trajs += [ Target3DArray([t3, t1], frame="test") for t1, t3 in zip(traj1[3:], traj3) ] for dt_array, gt_array in zip(dt_trajs, gt_trajs): stats = evaluator.calc_stats(gt_array, dt_array) evaluator.add_stats(stats) assert evaluator.tp()[KittiObjectClass.Car] == 10 assert evaluator.fp()[KittiObjectClass.Car] == 0 assert evaluator.fn()[KittiObjectClass.Car] == 0 assert evaluator.fn()[KittiObjectClass.Car] == 0 assert evaluator.id_switches()[KittiObjectClass.Car] == 2 assert evaluator.fragments()[KittiObjectClass.Car] == 1 assert evaluator.tracked_ratio()[KittiObjectClass.Car] == 1. assert evaluator.lost_ratio()[KittiObjectClass.Car] == 0.
def test_calc_stats(self): eval_classes = [KittiObjectClass.Car, KittiObjectClass.Van] evaluator = DetectionEvaluator(eval_classes, [0.1, 0.2]) r = Rotation.from_euler("Z", 0) d = [2, 2, 2] dt1 = ObjectTarget3D([0, 0, 0], r, d, ObjectTag(KittiObjectClass.Car, scores=0.8)) dt2 = ObjectTarget3D([1, 1, 1], r, d, ObjectTag(KittiObjectClass.Van, scores=0.7)) dt3 = ObjectTarget3D([-1, -1, -1], r, d, ObjectTag(KittiObjectClass.Pedestrian, scores=0.8)) dt_boxes = Target3DArray([dt1, dt2, dt3], frame="test") # test match with self result = evaluator.calc_stats(dt_boxes, dt_boxes) for clsobj in eval_classes: clsid = clsobj.value assert result.ngt[clsid] == 1 assert result.ndt[clsid][0] == 1 and result.ndt[clsid][-1] == 0 assert result.tp[clsid][0] == 1 and result.tp[clsid][-1] == 0 assert result.fp[clsid][0] == 0 and result.fp[clsid][-1] == 0 assert result.fn[clsid][0] == 0 and result.fn[clsid][-1] == 1 assert np.isclose(result.acc_iou[clsid][0], 1) and np.isnan( result.acc_iou[clsid][-1]) assert np.isclose(result.acc_angular[clsid][0], 0) and np.isnan( result.acc_angular[clsid][-1]) assert np.isclose(result.acc_dist[clsid][0], 0) and np.isnan( result.acc_dist[clsid][-1]) assert np.isclose(result.acc_box[clsid][0], 0) and np.isnan( result.acc_box[clsid][-1]) assert np.isinf(result.acc_var[clsid][0]) and np.isnan( result.acc_var[clsid][-1]) # test match with other boxes r = Rotation.from_euler("Z", 0.01) d = [2.1, 2.1, 2.1] gt1 = ObjectTarget3D([0, 0, 0], r, d, ObjectTag(KittiObjectClass.Van)) gt2 = ObjectTarget3D([-1, 1, 0], r, d, ObjectTag(KittiObjectClass.Car)) gt3 = ObjectTarget3D([1, -1, 0], r, d, ObjectTag(KittiObjectClass.Pedestrian)) gt_boxes = Target3DArray([gt1, gt2, gt3], frame="test") result = evaluator.calc_stats(gt_boxes, dt_boxes) for clsobj in eval_classes: clsid = clsobj.value assert result.ngt[clsid] == 1 assert result.ndt[clsid][0] == 1 and result.ndt[clsid][-1] == 0 if clsobj == KittiObjectClass.Car: assert result.tp[clsid][0] == 1 and result.tp[clsid][-1] == 0 assert result.fp[clsid][0] == 0 and result.fp[clsid][-1] == 0 assert result.fn[clsid][0] == 0 and result.fn[clsid][-1] == 1 assert result.acc_iou[clsid][0] > 0.1 and np.isnan( result.acc_iou[clsid][-1]) assert result.acc_angular[clsid][0] > 0 and np.isnan( result.acc_angular[clsid][-1]) assert result.acc_dist[clsid][0] > 1 and np.isnan( result.acc_dist[clsid][-1]) assert result.acc_box[clsid][0] > 0 and np.isnan( result.acc_box[clsid][-1]) assert np.isinf(result.acc_var[clsid][0]) and np.isnan( result.acc_var[clsid][-1]) else: assert result.tp[clsid][0] == 0 and result.tp[clsid][-1] == 0 assert result.fp[clsid][0] == 1 and result.fp[clsid][-1] == 0 assert result.fn[clsid][0] == 1 and result.fn[clsid][-1] == 1 assert np.isnan(result.acc_iou[clsid][0]) and np.isnan( result.acc_iou[clsid][-1]) assert np.isnan(result.acc_angular[clsid][0]) and np.isnan( result.acc_angular[clsid][-1]) assert np.isnan(result.acc_dist[clsid][0]) and np.isnan( result.acc_dist[clsid][-1]) assert np.isnan(result.acc_box[clsid][0]) and np.isnan( result.acc_box[clsid][-1]) assert np.isnan(result.acc_var[clsid][0]) and np.isnan( result.acc_var[clsid][-1])
def test_dump_and_load(self): obj_arr = Target3DArray(frame="someframe", timestamp=1.2345) track_arr = Target3DArray(frame="fixed", timestamp=0.1234) # add targets for i in range(10): position = np.array([i] * 3) position_var = np.diag(position) dimension = np.array([i] * 3) dimension_var = np.diag(position) orientation = Rotation.from_euler("Z", i) tid = "test%d" % i tag = ObjectTag(KittiObjectClass.Car, KittiObjectClass, 0.9) obj = ObjectTarget3D(position, orientation, dimension, tag, tid, position_var=position_var, dimension_var=dimension_var) obj_arr.append(obj) velocity = np.random.rand(3) velocity_var = np.random.rand(3, 3) avel = np.random.rand(3) avel_var = np.random.rand(3, 3) history = i * 0.1 track = TrackingTarget3D(position, orientation, dimension, velocity, avel, tag, tid=tid, position_var=position_var, dimension_var=dimension_var, velocity_var=velocity_var, angular_velocity_var=avel_var, history=history) track_arr.append(track) data = msgpack.packb(obj_arr.serialize(), use_single_float=True) obj_arr_copy = Target3DArray.deserialize(msgpack.unpackb(data)) assert len(obj_arr_copy) == len(obj_arr) assert obj_arr_copy.frame == obj_arr.frame assert obj_arr_copy.timestamp == obj_arr.timestamp for i in range(10): assert np.allclose(obj_arr_copy[i].position, obj_arr[i].position) assert np.allclose(obj_arr_copy[i].position_var, obj_arr[i].position_var) assert np.allclose(obj_arr_copy[i].dimension, obj_arr[i].dimension) assert np.allclose(obj_arr_copy[i].dimension_var, obj_arr[i].dimension_var) assert np.allclose(obj_arr_copy[i].orientation.as_quat(), obj_arr[i].orientation.as_quat()) assert obj_arr_copy[i].tid == obj_arr[i].tid assert obj_arr_copy[i].tag.mapping == obj_arr[i].tag.mapping assert obj_arr_copy[i].tag.labels == obj_arr[i].tag.labels assert np.allclose(obj_arr_copy[i].tag.scores, obj_arr[i].tag.scores) data = msgpack.packb(track_arr.serialize(), use_single_float=True) track_arr_copy = Target3DArray.deserialize(msgpack.unpackb(data)) assert len(track_arr_copy) == len(track_arr) assert track_arr_copy.frame == track_arr.frame assert track_arr_copy.timestamp == track_arr.timestamp for i in range(10): assert np.allclose(track_arr_copy[i].position, track_arr[i].position) assert np.allclose(track_arr_copy[i].position_var, track_arr[i].position_var) assert np.allclose(track_arr_copy[i].dimension, track_arr[i].dimension) assert np.allclose(track_arr_copy[i].dimension_var, track_arr[i].dimension_var) assert np.allclose(track_arr_copy[i].orientation.as_quat(), track_arr[i].orientation.as_quat()) assert np.allclose(track_arr_copy[i].velocity, track_arr[i].velocity) assert np.allclose(track_arr_copy[i].velocity_var, track_arr[i].velocity_var) assert np.allclose(track_arr_copy[i].angular_velocity, track_arr[i].angular_velocity) assert np.allclose(track_arr_copy[i].angular_velocity_var, track_arr[i].angular_velocity_var) assert track_arr_copy[i].tid == track_arr[i].tid assert track_arr_copy[i].history == track_arr[i].history assert track_arr_copy[i].tag.mapping == track_arr[i].tag.mapping assert track_arr_copy[i].tag.labels == track_arr[i].tag.labels assert np.allclose(track_arr_copy[i].tag.scores, track_arr[i].tag.scores)
def annotation_3dobject(self, idx, raw=False, convert_tag=True, with_velocity=True): seq_id, frame_idx = idx fname = "annotation/%03d.json" % frame_idx if self._return_file_path: return self.base_path / seq_id / fname if self.inzip: with PatchedZipFile(self.base_path / f"{seq_id}.zip", to_extract=fname) as ar: labels = json.loads(ar.read(fname)) else: with Path(self.base_path, seq_id, fname).open() as fin: labels = json.load(fin) labels = list(map(edict, labels)) if raw: return labels # parse annotations ego_pose = self.pose(idx, bypass=True) ego_r, ego_t = ego_pose.orientation, ego_pose.position ego_t = ego_r.as_matrix().dot( ego_t) # convert to original representation ego_ri = ego_r.inv() ego_rim = ego_ri.as_matrix() outputs = Target3DArray(frame="ego") for label in labels: # convert tags tag = NuscenesObjectClass.parse(label.category) for attr in label.attribute: tag = tag | NuscenesObjectClass.parse(attr) if convert_tag: tag = ObjectTag(tag.to_detection(), NuscenesDetectionClass) else: tag = ObjectTag(tag, NuscenesObjectClass) aux = dict(num_lidar_pts=label['num_lidar_pts'], num_radar_pts=label['num_radar_pts']) # caculate relative pose r = Rotation.from_quat(label.rotation[1:] + [label.rotation[0]]) t = label.translation rel_r = ego_ri * r rel_t = np.dot(ego_rim, t - ego_t) size = [label.size[1], label.size[0], label.size[2]] # wlh -> lwh tid = int(label.instance[:8], 16) # truncate into uint64 # create object if with_velocity: v = np.dot(ego_rim, label.velocity) w = label.angular_velocity target = TrackingTarget3D(rel_t, rel_r, size, v, w, tag, tid=tid, aux=aux) outputs.append(target) else: target = ObjectTarget3D(rel_t, rel_r, size, tag, tid=tid, aux=aux) outputs.append(target) return outputs