def __init__(self, vcd_330_data, vcd_430): # Main VCD element if 'VCD' not in vcd_330_data: raise Exception("This is not a valid VCD 3.3.0 file.") # Metadata and other # NOTE: 'scdName' field is lost as VCD 4.3.0 does not support SCD # NOTE: 'frameInterval' from 'VCD' is not copied, but computed from frames # NOTE: 'guid' in 'Object's is ignored in VCD 4.3.0 # TODO: Apparently 'streamProperties" can exist in VCD3.3.0, although it is not in the schema if 'annotator' in vcd_330_data['VCD']: vcd_430.add_annotator(vcd_330_data['VCD']['annotator']) if 'name' in vcd_330_data['VCD']: vcd_430.add_name(vcd_330_data['VCD']['name']) if 'metaData' in vcd_330_data['VCD']: metadata = vcd_330_data['VCD']['metaData'] streams = metadata['stream'] for stream in streams: stream_type = stream['type'] if stream_type == 'video': stream_type = 'camera' elif stream_type == 'pointcloud' or stream_type == 'lidar': stream_type = 'lidar' vcd_430.add_stream( stream['name'], stream['uri'], stream['description'], stream_type ) if 'ontologyManager' in vcd_330_data['VCD']: ontologyManager = vcd_330_data['VCD']['ontologyManager'] for ontology in ontologyManager['ontology']: vcd_430.add_ontology(ontology) if 'frames' in vcd_330_data['VCD']: # This is Frame-wise VCD for frame in vcd_330_data['VCD']['frames']: # A frame has required "frame", and optional "streamSync", "frameProperties", "frame", "timestamp" # and then "objects", "actions", etc. frame_num = frame['frame'] if 'timestamp' in frame: vcd_430.add_frame_properties(frame_num=frame_num, timestamp=frame['timestamp']) if 'frameProperties' in frame: frame_properties=dict() for frameProperty in frame['frameProperties']: val = frameProperty['val'] name = frameProperty['name'] if frameProperty['name'] == 'timestamp': vcd_430.add_frame_properties(frame_num, timestamp=val) else: frame_properties[name] = val if frame_properties: vcd_430.add_frame_properties(frame_num, timestamp=None, properties=frame_properties) if 'streamSync' in frame: for streamSyncItem in frame['streamSync']: vcd_430.add_stream_properties( streamSyncItem['name'], stream_sync=types.StreamSync( frame_vcd=frame_num, frame_stream=streamSyncItem['frame'] ) ) # Now the elements self.__copy_elements(vcd_430, frame, frame_num) if 'staticAttributes' in vcd_330_data['VCD']: self.__copy_elements(vcd_430, vcd_330_data['VCD']['staticAttributes'])
def update_vcd(self, annotations, validations, statics=None, metadata=None): """ Convert annotations into VCD4 format """ # But, if there are already static annotations in vcd, take and keep # them for the next vcd areStatics = bool(statics) isMetadata = bool(metadata) if isMetadata: # @metadata: [face_meta, body_meta,hands_meta] # @face_meta (5): [rgb_video_frames,mat] # @body_meta (6): [date_time,rgb_video_frames,mat] # @hands_meta (7): [rgb_video_frames,mat] self._f_frames = int(metadata[0][0]) self._f_intrinsics = metadata[0][1] self.timeStamp = str(metadata[1][0]) # Change ":" symbol to ";" for windows correct visualization self.timeStamp.replace(":", ";") self._b_frames = int(metadata[1][1]) self._b_intrinsics = metadata[1][2] self._h_frames = int(metadata[2][0]) self._h_intrinsics = metadata[2][1] if areStatics: # Driver Data age = int(statics[0]["val"]) gender = statics[1]["val"] glasses = bool(statics[2]["val"]) drive_freq = statics[3]["val"] experience = statics[4]["val"] # Context Data weather = statics[5]["val"] setup = statics[6]["val"] # Annotator annotatorID = str(statics[7]["val"]) if self._bf_shift is None or self._hb_shift is None or \ self._hf_shift is None: raise RuntimeError( "Shift values have not been set. Run set_shifts() function " "before") body_face_shift = self._bf_shift # hands_body_shift = self.__hb_shift hands_face_shift = self._hf_shift # Get total number of lines which is equivalent to total number of # frames of mosaic assert (len(annotations) == len(validations)) total_frames = len(annotations) # 1.- Create a VCD instance vcd = core.VCD() # 2.- Add Object for Subject self.uid_driver = vcd.add_object(self.subject, "driver", ont_uid=0, frame_value=(0, total_frames - 1)) # 3.- VCD Name vcd.add_name(self.group + '_' + self.subject + '_' + self.session + '_' + self.date + '_' + self._annotation_mode) # 4.- Annotator if areStatics: vcd.add_annotator(annotatorID) # 5- Ontology vcd.add_ontology('http://dmd.vicomtech.org/ontology') # 6.- Cameras # Build Uri to video files if self._setUpManager._external_struct: video_root_path = Path() / self.group / self.subject / self.session face_uri = video_root_path / (self.group + '_' + self.subject + '_' + self.session + '_' + self.date + '_rgb_face.mp4') body_uri = video_root_path / (self.group + '_' + self.subject + '_' + self.session + '_' + self.date + '_rgb_body.mp4') hands_uri = video_root_path / (self.group + '_' + self.subject + '_' + self.session + '_' + self.date + '_rgb_hands.mp4') else: video_root_path = Path() / self.group / self.date / self.subject face_uri = video_root_path / (self.subject + '_' + self.session + '_' + 'face' + '_' + self.date + '.mp4') body_uri = video_root_path / (self.subject + '_' + self.session + '_' + 'body' + '_' + self.date + '.mp4') hands_uri = video_root_path / (self.subject + '_' + self.session + '_' + 'hands' + '_' + self.date + '.mp4') face_video_descr = 'Frontal face looking camera' body_video_descr = 'Side body looking camera' hands_video_descr = 'Hands and wheel looking camera' vcd.add_stream('face_camera', str(face_uri), face_video_descr, core.StreamType.camera) vcd.add_stream('body_camera', str(body_uri), body_video_descr, core.StreamType.camera) vcd.add_stream('hands_camera', str(hands_uri), hands_video_descr, core.StreamType.camera) # 7.- Stream Properties # Real Intrinsics of cameras vcd.add_stream_properties(stream_name='face_camera', properties={ 'cam_module': 'Intel RealSense D415', 'total_frames': self._f_frames, }, stream_sync=types.StreamSync(frame_shift=0), intrinsics=types.IntrinsicsPinhole( width_px=1280, height_px=720, camera_matrix_3x4=self._f_intrinsics)) vcd.add_stream_properties( stream_name='body_camera', properties={ 'camera_module': 'Intel RealSense D435', 'total_frames': self._b_frames, }, stream_sync=types.StreamSync(frame_shift=body_face_shift), intrinsics=types.IntrinsicsPinhole( width_px=1280, height_px=720, camera_matrix_3x4=self._b_intrinsics)) vcd.add_stream_properties( stream_name='hands_camera', properties={ 'camera_module': 'Intel RealSense D415', 'total_frames': self._h_frames, }, stream_sync=types.StreamSync(frame_shift=hands_face_shift), intrinsics=types.IntrinsicsPinhole( width_px=1280, height_px=720, camera_matrix_3x4=self._h_intrinsics)) if areStatics or isMetadata: # 8.- Add Context of Recording session last_frame = total_frames - 1 ctx_txt = 'recording_context' rec_context_uid = vcd.add_context(name='', semantic_type=ctx_txt, frame_value=(0, last_frame)) if areStatics: vcd.add_context_data(rec_context_uid, types.text(name='weather', val=weather)) vcd.add_context_data(rec_context_uid, types.text(name='setup', val=setup)) # 9.- Add Driver static properties vcd.add_object_data(self.uid_driver, types.num(name='age', val=age)) vcd.add_object_data(self.uid_driver, types.text(name='gender', val=gender)) vcd.add_object_data(self.uid_driver, types.boolean(name='glasses', val=glasses)) vcd.add_object_data( self.uid_driver, types.text(name='experience', val=experience)) vcd.add_object_data( self.uid_driver, types.text(name='drive_freq', val=drive_freq)) if isMetadata: vcd.add_context_data( rec_context_uid, types.text(name='recordTime', val=self.timeStamp)) # 10.- Save annotation and validation vectors in VCD format # Perform general update new_vcd = self.add_annotationsx(vcd, annotations, validations, self.ont_uid) # Update class variable __vcd with newly created object self._vcd = new_vcd return True
def test_create_streams_simple(self): # This example shows how to introduce Stream (Intrinsics, Extrinsics), Sync and Odometry information # Fully detailed examples will be introduced for specific datasets such as KITTI tracking and nuScenes vcd = core.VCD() # FIRST: define all the involved coordinate systems vcd.add_coordinate_system("odom", cs_type=types.CoordinateSystemType.scene_cs) vcd.add_coordinate_system("vehicle-iso8855", cs_type=types.CoordinateSystemType.local_cs, parent_name="odom", pose_wrt_parent=[ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ]) # SECOND: Add the streams vcd.add_stream(stream_name='Camera1', uri='./somePath/someVideo1.mp4', description='Description 1', stream_type=core.StreamType.camera) vcd.add_stream(stream_name='Camera2', uri='./somePath/someVideo2.mp4', description='Description 2', stream_type=core.StreamType.camera) # THIRD: Generic stream properties can be added... # ... for the Stream vcd.add_stream_properties(stream_name="Camera1", properties={"someProperty": "someValue"}) # ... for the Stream at specific frame number vcd.add_stream_properties( stream_name="Camera1", stream_sync=types.StreamSync(frame_vcd=2), properties={"somePropertyForThisFrame": "someValue"}) # Sensor-domain-specific information such as INTRINSICS, EXTRINSICS and ODOMETRY can be added as well # See schema.py for more details on Coordinate Systems # Extrinsics are added as coordinate systems vcd.add_stream_properties(stream_name="Camera1", intrinsics=types.IntrinsicsPinhole( width_px=640, height_px=480, camera_matrix_3x4=[ 1000.0, 0.0, 500.0, 0.0, 0.0, 1000.0, 500.0, 0.0, 0.0, 0.0, 1.0, 0.0 ], distortion_coeffs_1xN=None)) vcd.add_coordinate_system("Camera1", cs_type=types.CoordinateSystemType.sensor_cs, parent_name="vehicle-iso8855", pose_wrt_parent=[ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ]) # Sync info can be added as a shift between the master vcd frame count and each of the sensors # e.g. Camera2 may have started 3 frames after Camera1, therefore, to label Elements for Camera2, we can use # frame_shift=3 for Camera2 vcd.add_stream_properties(stream_name="Camera2", intrinsics=types.IntrinsicsPinhole( width_px=640, height_px=480, camera_matrix_3x4=[ 1000.0, 0.0, 500.0, 0.0, 0.0, 1000.0, 500.0, 0.0, 0.0, 0.0, 1.0, 0.0 ], distortion_coeffs_1xN=None), stream_sync=types.StreamSync(frame_shift=3)) vcd.add_coordinate_system("Camera2", cs_type=types.CoordinateSystemType.sensor_cs, parent_name="vehicle-iso8855", pose_wrt_parent=[ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ]) # Let's suppose we want to add a master timestamp coming from a GPS or LIDAR sensor # Let's create here some dummy timestamps t_start = datetime(year=2020, month=4, day=11, hour=12, minute=0, second=1) t_end = datetime(year=2020, month=4, day=11, hour=12, minute=0, second=31) t_diff = t_end - t_start steps = 10 t_step = t_diff / steps t_data = [t_start + i * t_step for i in range(0, steps)] for frame_num, t in enumerate(t_data): vcd.add_frame_properties(frame_num=frame_num, timestamp=str(t)) # Additionally, we may want to introduce timestamping, intrinsics and extrinsics specific for each Sensor # and for each frame, for increased detail for frame_num, t in enumerate(t_data): vcd.add_stream_properties( stream_name="Camera1", stream_sync=types.StreamSync( frame_vcd=frame_num, frame_stream=frame_num + 1, # Camera1's frames are shifted wrt to master count timestamp_ISO8601=str(t)), intrinsics=types.IntrinsicsPinhole(width_px=640, height_px=480, camera_matrix_3x4=[ 1001.0, 0.0, 500.0, 0.0, 0.0, 1001.0, 500.0, 0.0, 0.0, 0.0, 1.0, 0.0 ], distortion_coeffs_1xN=None)) vcd.add_transform(frame_num, transform=types.Transform( src_name="vehicle-iso8855", dst_name="Camera1", transform_src_to_dst_4x4=[ 1.0, 0.0, 0.0, 0.1, 0.0, 1.0, 0.0, 0.1, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ])) # Odometry information is also included as frame_properties # Odometry must be provided as pose_lcs_wrt_wcs (i.e. Local Coordinate System wrt World Coordinate System) # in the form of pose 4x4 matrices. # As additional properties you can include raw GPS/IMU for instance vcd.add_transform( frame_num=6, transform=types.Transform( src_name="odom", dst_name="vehicle-iso8855", transform_src_to_dst_4x4=[ 1.0, 0.0, 0.0, 20.0, 0.0, 1.0, 0.0, 20.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ], raw_gps_data=[ 49.011212804408, 8.4228850417969, 112.83492279053, 0.022447, 1e-05, -1.2219096732051, -3.3256321640686, 1.1384311814592, 3.5147680214713, 0.037625160413037, -0.03878884255623, -0.29437452763793, 0.037166856911681, 9.9957015129717, -0.30581030960531, -0.19635662515203, 9.9942128010936, -0.017332142869546, 0.024792163815438, 0.14511808479348, -0.017498934149631, 0.021393359392165, 0.14563031426063, 0.49229361157748, 0.068883960397178, 4, 10, 4, 4, 0 ], status= "interpolated", # we can add any thing (it is permitted by VCD schema) )) self.assertEqual(len(vcd.get_streams()), 2) self.assertEqual(vcd.has_stream('Camera1'), True) self.assertEqual( vcd.get_stream('Camera1')['uri'], './somePath/someVideo1.mp4') self.assertEqual( vcd.get_stream('Camera1')['description'], 'Description 1') self.assertEqual(vcd.get_stream('Camera1')['type'], 'camera') self.assertEqual(vcd.get_stream('Non-Valid_Stream'), None) self.assertEqual(len(vcd.get_coordinate_systems()), 4) self.assertEqual(vcd.has_coordinate_system('vehicle-iso8855'), True) self.assertEqual( vcd.get_coordinate_system('vehicle-iso8855')['parent'], 'odom') self.assertEqual(vcd.get_coordinate_system('Non-existing-Coordinate'), None) if not os.path.isfile( './etc/vcd430_test_stream_frame_properties.json'): vcd.save('./etc/vcd430_test_stream_frame_properties.json', True) vcd_read = core.VCD('./etc/vcd430_test_stream_frame_properties.json', validation=True) self.assertEqual(vcd_read.stringify(), vcd.stringify())
def add_annotations(self, vcd: core.VCD, annotations, validations, ontology_uid: int): # Loop over all annotation levels to add the elements present in # annotation vector for level_code, level_type in zip(self._annotation_levels, self._annotation_types): level_idx = int(level_code[0]) level_name = level_code[1] level_type_idx = int(level_type[0]) level_type_name = level_type[1] assert (level_idx == level_type_idx) assert (len(self._level_labels) > 0) level_labels = self._level_labels[level_idx] for label_idx, label_name in level_labels.items(): # Do not save NaN and Empty annotations if label_idx == 100 or label_idx == 99: continue annotations = np.array(annotations) validations = np.array(validations) # Compute frame number of all occurrences of label_idx f_list = np.where(annotations[:, level_idx] == label_idx)[0] v_list = validations[f_list, level_idx] #From frames with lable_idx, select frames with validation 0, 1 and 2 v_list_0 = f_list[np.where(v_list == 0)] v_list_1 = f_list[np.where(v_list == 1)] v_list_2 = f_list[np.where(v_list == 2)] #If there are not annotated frames, then all validations are 0 (unchanged) if len(f_list) == 0: v_list_0 = validations[f_list, level_idx] # Make intervals of frames f_interv = [] f_interv = list(self.interval_extract(f_list)) #Make intervals of validation v_0_intervals = list(self.interval_extract(v_list_0)) v_1_intervals = list(self.interval_extract(v_list_1)) v_2_intervals = list(self.interval_extract(v_list_2)) # ## Add the elements # Add an action if level_type_name == 'action': action_type = level_name + '/' + label_name if len(f_interv) > 0: el_uid = vcd.add_action("", semantic_type=action_type, frame_value=f_interv, ont_uid=ontology_uid) # Add how the annotation was done if len(v_0_intervals) > 0: #Intervals with validation 0 validation_data = types.text(name='annotated', val=annotate_dict[0]) vcd.add_action_data(uid=el_uid, action_data=validation_data, frame_value=v_0_intervals) if len(v_1_intervals) > 0: #Intervals with validation 1 validation_data = types.text(name='annotated', val=annotate_dict[1]) vcd.add_action_data(uid=el_uid, action_data=validation_data, frame_value=v_1_intervals) if len(v_2_intervals) > 0: #Intervals with validation 2 validation_data = types.text(name='annotated', val=annotate_dict[2]) vcd.add_action_data(uid=el_uid, action_data=validation_data, frame_value=v_2_intervals) # Add an object elif level_type_name == 'object': object_type = label_name if len(f_interv) > 0: el_uid = vcd.add_object("", semantic_type=object_type, frame_value=f_interv, ont_uid=ontology_uid) # Add how the annotation was done #Intervals with validation 0 validation_data = types.text(name='annotated', val=annotate_dict[0]) vcd.add_object_data(uid=el_uid, object_data=validation_data, frame_value=v_0_intervals) #Intervals with validation 1 validation_data = types.text(name='annotated', val=annotate_dict[1]) vcd.add_object_data(uid=el_uid, object_data=validation_data, frame_value=v_1_intervals) #Intervals with validation 2 validation_data = types.text(name='annotated', val=annotate_dict[2]) vcd.add_object_data(uid=el_uid, object_data=validation_data, frame_value=v_2_intervals) # Add stream properties elif level_type_name == 'stream_properties': # When a level is defined as stream_properties, the annotations # will always be considered as boolean, since TaTo only allows # the presence or absence of that property. # E.g. occlusion can only be True or False if len(f_interv) > 0: for i, frame_num in enumerate(f_list): stream = label_name if stream == "--": continue property_dict = { level_name: { 'val': True, 'annotated': annotate_dict[int(v_list[i])] } } vcd.add_stream_properties( stream_name=stream, stream_sync=types.StreamSync( frame_vcd=int(frame_num)), properties=property_dict) else: raise RuntimeError('Invalid group type: ' + level_type_name) return vcd