def test_action_properties(self): # 1.- Create a VCD instance vcd = core.VCD() # 2.- Create the Object uid_action1 = vcd.add_action(name="", semantic_type="#Running", frame_value=(0, 10)) vcd.add_action_data(uid=uid_action1, action_data=types.num(name="confidence", val=0.98), frame_value=0) vcd.add_action_data(uid=uid_action1, action_data=types.vec(name="confidence_vec", val=[0.98, 0.97]), frame_value=0) vcd.add_action_data(uid=uid_action1, action_data=types.text(name="annotation", val="Manual"), frame_value=0) vcd.add_action_data(uid=uid_action1, action_data=types.boolean(name="validated", val=True), frame_value=1) # Same can be done with events and event_data, and contexts and context_data # And can be done as dynamic or static info uid_object1 = vcd.add_object(name="Marcos", semantic_type="#Person") vcd.add_object_data(uid=uid_object1, object_data=types.text(name="Position", val="#Researcher")) uid_context1 = vcd.add_context(name="", semantic_type="#Sunny") vcd.add_context_data(uid=uid_context1, context_data=types.text(name="category", val="#Weather")) vcd.add_context_data(uid=uid_context1, context_data=types.text(name="annotation", val="Manual")) uid_context2 = vcd.add_context(name="", semantic_type="#Highway", frame_value=(0, 5)) vcd.add_context_data(uid=uid_context2, context_data=types.num(name="risk", val=0.7), frame_value=4) vcd.add_context_data(uid=uid_context2, context_data=types.num(name="weight", val=0.5), frame_value=4) if not os.path.isfile('./etc/vcd430_test_actions_with_action_data.json'): vcd.save('./etc/vcd430_test_actions_with_action_data.json', True) vcd_read = core.VCD('./etc/vcd430_test_actions_with_action_data.json', validation=True) vcd_read_stringified = vcd_read.stringify() vcd_stringified = vcd.stringify() # print(vcd_stringified) self.assertEqual(vcd_read_stringified, vcd_stringified)
def test_create_search_simple(self): # 1.- Create a VCD instance vcd = core.VCD() # 2.- Create the Object uid_marcos = vcd.add_object(name='marcos', semantic_type="person") self.assertEqual(uid_marcos, "0", "Should be 0") # 3.- Add some data to the object vcd.add_object_data(uid=uid_marcos, object_data=types.bbox(name='head', val=(10, 10, 30, 30))) vcd.add_object_data(uid=uid_marcos, object_data=types.bbox(name='body', val=(0, 0, 60, 120))) vcd.add_object_data(uid=uid_marcos, object_data=types.vec(name='speed', val=(0.0, 0.2))) vcd.add_object_data(uid=uid_marcos, object_data=types.num(name='accel', val=0.1)) uid_peter = vcd.add_object(name='peter', semantic_type="person") vcd.add_object_data(uid=uid_peter, object_data=types.num(name='age', val=38.0)) vcd.add_object_data(uid=uid_peter, object_data=types.vec(name='eyeL', val=(0, 0, 10, 10))) vcd.add_object_data(uid=uid_peter, object_data=types.vec(name='eyeR', val=(0, 0, 10, 10))) # 4.- Write into string vcd_string_pretty = vcd.stringify() vcd_string_nopretty = vcd.stringify(False) # 5.- We can ask VCD marcos_ref = vcd.get_element(element_type=core.ElementType.object, uid=uid_marcos) # print('Found Object: uid = ', uid_marcos, ', name = ', marcosRef['name']) self.assertEqual(uid_marcos, "0", "Should be 0") self.assertEqual(marcos_ref['name'], 'marcos', "Should be marcos") peter_ref = vcd.get_element(element_type=core.ElementType.object, uid=uid_peter) # print('Found Object: uid = ', uid_peter, ', name = ', peterRef['name']) self.assertEqual(uid_peter, "1", "Should be 1") self.assertEqual(peter_ref['name'], 'peter', "Should be peter") # print('VCD string no pretty:\n', vcd_string_nopretty) # print('VCD string pretty:\n', vcd_string_pretty) if not os.path.isfile('./etc/' + vcd_version_name + '_test_create_search_simple_nopretty.json'): vcd.save('./etc/' + vcd_version_name + '_test_create_search_simple_nopretty.json') vcd_file_nopretty = open('./etc/' + vcd_version_name + '_test_create_search_simple_nopretty.json', "r") vcd_string_nopretty_read = vcd_file_nopretty.read() self.assertEqual(vcd_string_nopretty_read, vcd_string_nopretty, "VCD no-pretty not equal to read file") vcd_file_nopretty.close() if not os.path.isfile('./etc/' + vcd_version_name + '_test_create_search_simple_pretty.json'): vcd.save('./etc/' + vcd_version_name + '_test_create_search_simple_pretty.json', True) vcd_file_pretty = open('./etc/' + vcd_version_name + '_test_create_search_simple_pretty.json', "r") vcd_string_pretty_read = vcd_file_pretty.read() self.assertEqual(vcd_string_pretty, vcd_string_pretty_read, "VCD pretty not equal to read file") vcd_file_pretty.close()
def test_remove_simple(self): # 1.- Create VCD vcd = core.VCD() # 2.- Create some objects car1_uid = vcd.add_object(name='BMW', semantic_type='#Car') car2_uid = vcd.add_object(name='Seat', semantic_type='#Car') person1_uid = vcd.add_object(name='John', semantic_type='#Pedestrian') trafficSign1UID = vcd.add_object(name='', semantic_type='#StopSign') # 3.- Add some content # Same FrameInterval (0, 5) vcd.add_object_data(uid=person1_uid, object_data=types.bbox('face', (0, 0, 100, 100)), frame_value=(0, 5)) vcd.add_object_data(uid=person1_uid, object_data=types.bbox('mouth', (0, 0, 10, 10)), frame_value=(0, 5)) vcd.add_object_data(uid=person1_uid, object_data=types.bbox('hand', (0, 0, 30, 30)), frame_value=(0, 5)) vcd.add_object_data(uid=person1_uid, object_data=types.bbox('eyeL', (0, 0, 10, 10)), frame_value=(0, 5)) vcd.add_object_data(uid=person1_uid, object_data=types.bbox('eyeR', (0, 0, 10, 10)), frame_value=(0, 5)) # A different FrameInterval (0, 10) vcd.add_object_data(uid=person1_uid, object_data=types.num('age', 35.0), frame_value=(0, 10)) # Data for the other objects vcd.add_object_data(uid=car1_uid, object_data=types.bbox('position', (100, 100, 200, 400)), frame_value=(0, 10)) vcd.add_object_data(uid=car1_uid, object_data=types.text('color', 'red'), frame_value=(6, 10)) vcd.add_object_data(uid=car2_uid, object_data=types.bbox('position', (300, 1000, 200, 400)), frame_value=(0, 10)) vcd.add_object_data(uid=trafficSign1UID, object_data=types.boolean('visible', True), frame_value=(0, 4)) # print("Frame 5, dynamic only message: ", vcd.stringify_frame(5, dynamic_only=True)) # print("Frame 5, full message: ", vcd.stringify_frame(5, dynamic_only=False)) if not os.path.isfile('./etc/' + vcd_version_name + '_test_remove_simple.json'): vcd.save('./etc/' + vcd_version_name + '_test_remove_simple.json') self.assertEqual(vcd.get_num_objects(), 4, "Should be 4") # 4.- Delete some content vcd.rm_object(uid=car2_uid) self.assertEqual(vcd.get_num_objects(), 3, "Should be 3") vcd.rm_object_by_type(semantic_type='#StopSign') self.assertEqual(vcd.get_num_objects(), 2, "Should be 2") # 5.- Remove all content sequentially vcd.rm_object(uid=person1_uid) self.assertEqual(vcd.get_num_objects(), 1, "Should be 1") vcd.rm_object(uid=car1_uid) self.assertEqual(vcd.get_num_objects(), 0, "Should be 0") self.assertEqual(vcd.get_frame_intervals().empty(), True)
def __add_attributes(self, src330, object_data430): # Add any attributes if 'attributes' in src330: attributes = src330['attributes'] for k, v in attributes.items(): if k == "bool": for od in v: object_data430.add_attribute(types.boolean(od['name'], od['val'])) elif k == "num": for od in v: if len(od['val']) == 1: object_data430.add_attribute(types.num(od['name'], od['val'][0])) else: object_data430.add_attribute(types.vec(od['name'], od['val'])) elif k == "text": for od in v: object_data430.add_attribute(types.text(od['name'], od['val']))
vcd.add_object_data( objects_uids[instance['token']], types.point3d('translation', tuple(annotation['translation'])), (frame_num, frame_num)) vcd.add_object_data(objects_uids[instance['token']], types.vec('size', tuple(annotation['size'])), (frame_num, frame_num)) vcd.add_object_data( objects_uids[instance['token']], types.vec('rotation', tuple(annotation['rotation'])), (frame_num, frame_num)) vcd.add_object_data( objects_uids[instance['token']], types.num('num_lidar_pts', annotation['num_lidar_pts']), (frame_num, frame_num)) vcd.add_object_data( objects_uids[instance['token']], types.num('num_radar_pts', annotation['num_radar_pts']), (frame_num, frame_num)) vcd.add_object_data(objects_uids[instance['token']], types.text('visibility', visibility['level']), (frame_num, frame_num)) vcd.add_object_data( objects_uids[instance['token']], types.text('sample_annotation_token', annotation['token']), (frame_num, frame_num)) for sensor in current_sample['data']: data = nusc.get('sample_data', current_sample['data'][sensor])
def __copy_elements(self, vcd_430, root, frame_num=None): if 'objects' in root: for object in root['objects']: uid = str(object['uid']) # Let's convert to string here name = object['name'] ontologyUID = None if 'ontologyUID' in object: ontologyUID = str(object['ontologyUID']) # Let's convert to string here typeSemantic = object.get('type', '') # In VCD 4.3.0 type is required, but it VCD 3.3.0 seems to be not if not vcd_430.has(core.ElementType.object, uid): vcd_430.add_object(name, typeSemantic, frame_num, uid, ontologyUID) if 'objectDataContainer' in object: objectDataContainer = object['objectDataContainer'] for key, value in objectDataContainer.items(): for object_data in value: inStream = None if 'inStream' in object_data: inStream = object_data['inStream'] if 'val' in object_data: val = object_data['val'] currentObjectData = None # Create main object_data body # NOTE: in the following calls, I am using direct access to dictionary for required fields, e.g. # object_data['name'], etc. # For optional fields, I am using get() function, e.g. object_data.get('mode') which defaults to # None if key == 'num': if len(val) == 1: # Single value, this is a num currentObjectData = types.num(object_data['name'], val[0], inStream) else: # Multiple values, this is a vec currentObjectData = types.vec(object_data['name'], val, inStream) elif key == 'bool': currentObjectData = types.boolean(object_data['name'], val, inStream) elif key == 'text': currentObjectData = types.text(object_data['name'], val, inStream) elif key == 'image': currentObjectData = types.image( object_data['name'], val, object_data['mimeType'], object_data['encoding'], inStream ) elif key == 'binary': currentObjectData = types.binary( object_data['name'], val, object_data['dataType'], object_data['encoding'], inStream ) elif key == 'vec': currentObjectData = types.vec(object_data['name'], val, inStream) elif key == 'bbox': currentObjectData = types.bbox(object_data['name'], val, inStream) elif key == 'cuboid': currentObjectData = types.cuboid(object_data['name'], val, inStream) elif key == 'mat': currentObjectData = types.mat( object_data['name'], val, object_data['channels'], object_data['width'], object_data['height'], object_data['dataType'], inStream ) elif key == 'point2D': currentObjectData = types.point2d(object_data['name'], val, object_data.get('id'), inStream) elif key == 'point3D': currentObjectData = types.point3d(object_data['name'], val, object_data.get('id'), inStream) elif key == "poly2D": mode_int = object_data['mode'] currentObjectData = types.poly2d( object_data['name'], val, types.Poly2DType(mode_int), object_data['closed'], inStream ) elif key == "poly3D": currentObjectData = types.poly3d(object_data['name'], val, object_data['closed'], inStream) elif key == "mesh": currentObjectData = types.mesh(object_data['name']) if 'point3D' in object_data: for p3d_330 in object_data['point3D']: # Create a types.point3d object and add it to the mesh id = p3d_330['id'] name = p3d_330['name'] val = p3d_330['val'] p3d_430 = types.point3d(name, val) self.__add_attributes(p3d_330, p3d_430) currentObjectData.add_vertex(p3d_430, id) if 'lineReference' in object_data: for lref_330 in object_data['lineReference']: # Create a types.line_reference object and add it to the mesh id = lref_330['id'] name = lref_330['name'] referenceType = lref_330['referenceType'] assert(referenceType == "point3D") val = lref_330.get('val') # defaults to None, needed for the constructor lref_430 = types.lineReference(name, val, types.ObjectDataType.point3d) self.__add_attributes(lref_330, lref_430) currentObjectData.add_edge(lref_430, id) if 'areaReference' in object_data: for aref_330 in object_data['areaReference']: # Create a types.area_reference object and add it to the mesh id = aref_330['id'] name = aref_330['name'] referenceType = aref_330['referenceType'] assert (referenceType == "point3D" or referenceType == "lineReference") val = aref_330.get('val') # defaults to None, needed for the constructor if referenceType == "point3D": aref_430 = types.areaReference(name, val, types.ObjectDataType.point3d) else: aref_430 = types.areaReference(name, val, types.ObjectDataType.line_reference) self.__add_attributes(aref_330, aref_430) currentObjectData.add_area(aref_430, id) # Add any attributes self.__add_attributes(object_data, currentObjectData) # Add the object_data to the object vcd_430.add_object_data(uid, currentObjectData, frame_num) if 'actions' in root: for action in root['actions']: uid = str(action['uid']) name = action['name'] ontologyUID = None if 'ontologyUID' in action: ontologyUID = str(action['ontologyUID']) typeSemantic = action.get('type', '') # required in VCD 4.0, not in VCD 3.3.0 vcd_430.add_action(name, typeSemantic, frame_num, uid, ontologyUID) if 'events' in root: for event in root['events']: uid = str(event['uid']) name = event['name'] ontologyUID = None if 'ontologyUID' in event: ontologyUID = str(event['ontologyUID']) typeSemantic = event.get('type', '') vcd_430.add_event(name, typeSemantic, frame_num, uid, ontologyUID) if 'contexts' in root: for context in root['contexts']: uid = str(context['uid']) name = context['name'] ontologyUID = None if 'ontologyUID' in context: ontologyUID = str(context['ontologyUID']) typeSemantic = context.get('type', '') vcd_430.add_context(name, typeSemantic, frame_num, uid, ontologyUID) if 'relations' in root: for relation in root['relations']: uid = str(relation['uid']) name = relation['name'] ontologyUID = None if 'ontologyUID' in relation: ontologyUID = str(relation['ontologyUID']) predicate = relation.get('predicate', '') rdf_objects = relation.get('rdf_objects', None) rdf_subjects = relation.get('rdf_subjects', None) vcd_430.add_relation(name, predicate, frame_value=frame_num, uid=uid, ont_uid=ontologyUID) relation = vcd_430.get_element(core.ElementType.relation, uid) if not 'rdf_objects' in relation or len(relation['rdf_objects']) == 0: # just add once for rdf_object in rdf_objects: element_type = None rdf_object_type_str = rdf_object['type'] if rdf_object_type_str == "Object": element_type = core.ElementType.object elif rdf_object_type_str == "Action": element_type = core.ElementType.action elif rdf_object_type_str == "Event": element_type = core.ElementType.event elif rdf_object_type_str == "Context": element_type = core.ElementType.context else: warnings.warn("ERROR: Unrecognized Element type. Must be Object, Action, Event or Context.") vcd_430.add_rdf(uid, core.RDF.object, str(rdf_object['uid']), element_type) if not 'rdf_subjects' in relation or len(relation['rdf_subjects']) == 0: # just add once for rdf_subject in rdf_subjects: element_type = None rdf_object_type_str = rdf_subject['type'] if rdf_object_type_str == "Object": element_type = core.ElementType.object elif rdf_object_type_str == "Action": element_type = core.ElementType.action elif rdf_object_type_str == "Event": element_type = core.ElementType.event elif rdf_object_type_str == "Context": element_type = core.ElementType.context else: warnings.warn("ERROR: Unrecognized Element type. Must be Object, Action, Event or Context.") vcd_430.add_rdf(uid, core.RDF.subject, str(rdf_subject['uid']), element_type)
def test_create_search_mid(self): # 1.- Create VCD vcd = core.VCD() # 2.- Create some content uid_marcos = vcd.add_object(name='marcos', semantic_type='#Adult') uid_peter = vcd.add_object(name='peter', semantic_type='#Adult') uid_katixa = vcd.add_object(name='katixa', semantic_type='#Child') list_uids = vcd.get_elements_uids(core.ElementType.object) self.assertEqual(len(list_uids), 3) self.assertEqual(list_uids[0], uid_marcos) self.assertEqual(list_uids[1], uid_peter) self.assertEqual(list_uids[2], uid_katixa) vcd.add_object_data(uid=uid_marcos, object_data=types.num('age', 37.0), frame_value=(0, 10)) vcd.add_object_data(uid=uid_marcos, object_data=types.num('height', 1.75), frame_value=(0, 10)) vcd.add_object_data(uid=uid_marcos, object_data=types.vec('marks', (5.0, 5.0, 5.0)), frame_value=(0, 10)) vcd.add_object_data(uid=uid_peter, object_data=types.num('age', 40.0), frame_value=(0, 11)) vcd.add_object_data(uid=uid_peter, object_data=types.vec('marks', (10.0, 10.0, 10.0)), frame_value=(0, 11)) vcd.add_object_data(uid=uid_katixa, object_data=types.num('age', 9), frame_value=(5, 10)) vcd.add_object_data(uid=uid_katixa, object_data=types.num('age', 9.01), frame_value=11) # by default union vcd.add_object_data(uid=uid_katixa, object_data=types.num('age', 9.02), frame_value=12) # by default union # 3.- Search Objects according to some search criteria # 3.1.- According to "Object::type" (also for other Elements such as Action, Event, Context) uids_child = vcd.get_elements_of_type( element_type=core.ElementType.object, semantic_type="#Child") for uid in uids_child: # print("Hi there! I'm ", vcd.getObject(uid)['name'], " and I am a child") self.assertEqual( vcd.get_object(uid)['name'], 'katixa', "Should be katixa") # 3.2.- According to ObjectData uids_age = vcd.get_objects_with_object_data_name(data_name='age') for uid in uids_age: object_ = vcd.get_object(uid=uid) # print("Hi there! I'm ", object['name'], " and I have ObjectData with name age") if uid == "0": self.assertEqual(object_['name'], 'marcos', "Should be marcos") elif uid == "1": self.assertEqual(object_['name'], 'peter', "Should be peter") elif uid == "2": self.assertEqual(object_['name'], 'katixa', "Should be katixa") frames_with_age = vcd.get_frames_with_object_data_name( uid=uid, data_name='age') for frame_interval in frames_with_age.get(): for frame_num in range(frame_interval[0], frame_interval[1] + 1): my_age = vcd.get_object_data(uid=uid, data_name='age', frame_num=frame_num) # print("I am ", myAge['val'], " years old at frame ", frameNum) if uid == "0": self.assertEqual(my_age['val'], 37.0, "Should be 37 for marcos") elif uid == "1": self.assertEqual(my_age['val'], 40.0, "Should be 40 for peter") elif uid == "2" and frame_num < 11: self.assertEqual( my_age['val'], 9, "Should be 9 for katixa while frameNum < 11") elif uid == "2": self.assertEqual( my_age['val'], 9 + 0.01 * (frame_num - 10), "Should increase 0.01 per frame for katixa for frameNum >= 11" ) if not os.path.isfile('./etc/vcd430_test_create_search_mid.json'): vcd.save('./etc/vcd430_test_create_search_mid.json') test_create_search_mid_read = open( './etc/vcd430_test_create_search_mid.json', 'r') stringified_vcd = vcd.stringify(False) read_vcd = test_create_search_mid_read.read() self.assertEqual(read_vcd, stringified_vcd, "Should be equal") test_create_search_mid_read.close()
def test_actions(self): # 1.- Create a VCD instance vcd_a = core.VCD() vcd_b = core.VCD() vcd_c = core.VCD() # 2.- Add ontology vcd_a.add_ontology(ontology_name="http://vcd.vicomtech.org/ontology/automotive") vcd_b.add_ontology(ontology_name="http://vcd.vicomtech.org/ontology/automotive") vcd_c.add_ontology(ontology_name="http://vcd.vicomtech.org/ontology/automotive") # 3.- Add some objects uid_pedestrian1 = vcd_a.add_object(name="", semantic_type="Pedestrian", frame_value=None, ont_uid=0) # therefore its uri is "http://vcd.vicomtech.org/ontology/automotive/#Pedestrian" uid_car1 = vcd_a.add_object(name="", semantic_type="Car", frame_value=None, ont_uid=0) uid_pedestrian1 = vcd_b.add_object(name="", semantic_type="Pedestrian", frame_value=None,ont_uid=0) uid_car1 = vcd_b.add_object(name="", semantic_type="Car", frame_value=None, ont_uid=0) uid_pedestrian1 = vcd_c.add_object(name="", semantic_type="Pedestrian", frame_value=None,ont_uid=0) uid_car1 = vcd_c.add_object(name="", semantic_type="Car", frame_value=None, ont_uid=0) # 4.- Add (intransitive) Actions # Option a) Add (intransitive) Actions as Object attributes # Pro: simple, quick code, less bytes in JSON # Con: No explicit Relation, lack of extensibility, only valid for simple subject-predicates vcd_a.add_object_data(uid=uid_pedestrian1, object_data=types.text(name="action", val="Walking")) vcd_a.add_object_data(uid=uid_car1, object_data=types.text(name="action", val="Parked")) # Option b) Add (intransitive) Actions as Actions and use Relations to link to Objects # Pro: Action as element with entity, can add action_data, link to other Objects or complex Relations # Con: long to write, occupy more bytes in JSON, more difficult to parse uid_action1 = vcd_b.add_action(name="", semantic_type="Walking", frame_value=None, ont_uid=0) uid_rel1 = vcd_b.add_relation(name="", semantic_type="performsAction", ont_uid=0) vcd_b.add_rdf(relation_uid=uid_rel1, rdf_type=core.RDF.subject, element_uid=uid_pedestrian1, element_type=core.ElementType.object) vcd_b.add_rdf(relation_uid=uid_rel1, rdf_type=core.RDF.object, element_uid=uid_action1, element_type=core.ElementType.action) uid_action2 = vcd_b.add_action(name="", semantic_type="Parked", frame_value=None, ont_uid=0) uid_rel2 = vcd_b.add_relation(name="", semantic_type="performsAction", ont_uid=0) vcd_b.add_rdf(relation_uid=uid_rel2, rdf_type=core.RDF.subject, element_uid=uid_car1, element_type=core.ElementType.object) vcd_b.add_rdf(relation_uid=uid_rel2, rdf_type=core.RDF.object, element_uid=uid_action2, element_type=core.ElementType.action) # Option c) Add Actions as Actions, and use action_Data to point to subject Object # Pro: simple as option a # Con: sames as a uid_action1 = vcd_c.add_action(name="", semantic_type="Walking", frame_value=None, ont_uid=0) uid_action2 = vcd_c.add_action(name="", semantic_type="Parked", frame_value=None, ont_uid=0) vcd_c.add_action_data(uid=uid_action1, action_data=types.num(name="subject", val=uid_pedestrian1)) vcd_c.add_action_data(uid=uid_action2, action_data=types.num(name="subject", val=uid_car1)) if not os.path.isfile('./etc/test_actions_a.json'): vcd_a.save('./etc/test_actions_a.json') if not os.path.isfile('./etc/test_actions_b.json'): vcd_b.save('./etc/test_actions_b.json') if not os.path.isfile('./etc/test_actions_c.json'): vcd_c.save('./etc/test_actions_c.json')
def test_scene_KITTI_Tracking_3(self): sequence_number = 3 vcd_file_name = './etc/' + vcd_version_name + '_kitti_tracking_' + str( sequence_number).zfill(4) + ".json" vcd = core.VCD(vcd_file_name) frame_num_last = vcd.get_frame_intervals().get_outer()['frame_end'] ''' "In a city, being sunny, the ego-vehicle drives in the left lane of a single-way two-lanes road, Two other cars drive in the right lane. When the cars pass the ego-vehicle, then the ego-vehicle changes to the right lane, and then the ego-vehicle drives in the right lane." ''' vcd.add_metadata_properties({ "cnl_text": "In a city, being sunny, the ego-vehicle drives in the left lane of a single-way two-lanes road, Two other cars drive in the right lane. When the cars pass the ego-vehicle, then the ego-vehicle changes to the right lane, and then the ego-vehicle drives in the right lane." }) # Let's add VCD entries following the order # Contexts (1-2) vcd.add_context(name="City1", semantic_type="City") vcd.add_context(name="Sunny1", semantic_type="Sunny") # Add non-labeled actors (Ego-vehicle and lanes) uid_ego = vcd.get_object_uid_by_name(name="Egocar") uid_lane_left = vcd.add_object(name="Lane1", semantic_type="Lane") uid_lane_right = vcd.add_object(name="Lane2", semantic_type="Lane") uid_road = vcd.add_object(name="Road1", semantic_type="Road") vcd.add_element_data(element_type=core.ElementType.object, uid=uid_lane_left, element_data=types.text(name="Position", val="Left")) vcd.add_element_data(element_type=core.ElementType.object, uid=uid_lane_right, element_data=types.text(name="Position", val="Right")) vcd.add_element_data(element_type=core.ElementType.object, uid=uid_road, element_data=types.text(name="Direction", val="Single-way")) vcd.add_element_data(element_type=core.ElementType.object, uid=uid_road, element_data=types.num(name="NumberOfLanes", val=2)) vcd.add_relation_object_object(name="", semantic_type="isPartOf", object_uid_1=uid_lane_left, object_uid_2=uid_road) vcd.add_relation_object_object(name="", semantic_type="isPartOf", object_uid_1=uid_lane_right, object_uid_2=uid_road) # Actors uid_car_a = "0" # (0, 75) uid_car_b = "1" # (22, 143) uid_car_other_a = "3" uid_car_other_b = "4" uid_van = "5" uid_car_other_c = "6" uid_car_other_d = "7" uid_car_other_e = "8" # Actions # Driving straight before lane change uid_action_drive_straight_1 = vcd.add_action( name="DriveStraight1", semantic_type="DriveStraight", frame_value=[ (0, 31) ]) # Approx. at frame 31, the ego vehicle starts changing lane vcd.add_relation_object_action(name="", semantic_type="isSubjectOfAction", object_uid=uid_ego, action_uid=uid_action_drive_straight_1) vcd.add_relation_object_action(name="", semantic_type="isObjectOfAction", object_uid=uid_lane_left, action_uid=uid_action_drive_straight_1) uid_action_drive_straight_2 = vcd.add_action( name="DriveStraight2", semantic_type="DriveStraight", frame_value=vcd.get_element_frame_intervals( element_type=core.ElementType.object, uid=uid_car_a).get()) vcd.add_relation_object_action(name="", semantic_type="isSubjectOfAction", object_uid=uid_car_a, action_uid=uid_action_drive_straight_2) vcd.add_relation_object_action(name="", semantic_type="isObjectOfAction", object_uid=uid_lane_right, action_uid=uid_action_drive_straight_2) uid_action_drive_straight_3 = vcd.add_action( name="DriveStraight3", semantic_type="DriveStraight", frame_value=vcd.get_element_frame_intervals( element_type=core.ElementType.object, uid=uid_car_b).get()) vcd.add_relation_object_action(name="", semantic_type="isSubjectOfAction", object_uid=uid_car_b, action_uid=uid_action_drive_straight_3) vcd.add_relation_object_action(name="", semantic_type="isObjectOfAction", object_uid=uid_lane_right, action_uid=uid_action_drive_straight_3) # Lane changing (event and action) uid_action_lane_change = vcd.add_action(name="LaneChange1", semantic_type="LaneChange", frame_value=[(33, 75)]) vcd.add_relation_object_action(name="", semantic_type="isSubjectOfAction", object_uid=uid_ego, action_uid=uid_action_lane_change) #uid_event_pass = vcd.add_event(name="CarB_passes_EgoCar", semantic_type="Pass", frame_value=32) #vcd.add_relation_subject_object(name="", semantic_type="Causes", subject_type=core.ElementType.event, subject_uid=uid_event_pass, # object_type=core.ElementType.action, object_uid=uid_action_lane_change) uid_event_pass = vcd.add_event(name="Pass1", semantic_type="Pass", frame_value=32) vcd.add_relation_subject_object(name="", semantic_type="isSubjectOfEvent", subject_type=core.ElementType.object, subject_uid=uid_car_b, object_type=core.ElementType.event, object_uid=uid_event_pass) vcd.add_relation_subject_object(name="", semantic_type="isObjectOfEvent", subject_type=core.ElementType.object, subject_uid=uid_ego, object_type=core.ElementType.event, object_uid=uid_event_pass) vcd.add_relation_subject_object(name="", semantic_type="causes", subject_type=core.ElementType.event, subject_uid=uid_event_pass, object_type=core.ElementType.action, object_uid=uid_action_lane_change) # Driving straight after lane change uid_action_drive_straight_4 = vcd.add_action( name="DriveStraight1", semantic_type="DriveStraight", frame_value=[ (76, frame_num_last) ]) # Approx. at frame 31, the ego vehicle starts changing lane vcd.add_relation_object_action(name="", semantic_type="isSubjectOfAction", object_uid=uid_ego, action_uid=uid_action_drive_straight_4) vcd.add_relation_object_action(name="", semantic_type="isObjectOfAction", object_uid=uid_lane_right, action_uid=uid_action_drive_straight_4) vcd.add_relation_action_action( name="", semantic_type="meets", action_uid_1=uid_action_lane_change, action_uid_2=uid_action_drive_straight_4, frame_value=75) # Store if not os.path.isfile('./etc/' + vcd_version_name + '_kitti_tracking_0003_actions.json'): vcd.save('./etc/' + vcd_version_name + '_kitti_tracking_0003_actions.json', validate=True)
def update_vcd(self, annotations, validations, statics=None, metadata=None): """ Convert annotations into VCD4 format """ # But, if there are already static annotations in vcd, take and keep # them for the next vcd areStatics = bool(statics) isMetadata = bool(metadata) if isMetadata: # @metadata: [face_meta, body_meta,hands_meta] # @face_meta (5): [rgb_video_frames,mat] # @body_meta (6): [date_time,rgb_video_frames,mat] # @hands_meta (7): [rgb_video_frames,mat] self._f_frames = int(metadata[0][0]) self._f_intrinsics = metadata[0][1] self.timeStamp = str(metadata[1][0]) # Change ":" symbol to ";" for windows correct visualization self.timeStamp.replace(":", ";") self._b_frames = int(metadata[1][1]) self._b_intrinsics = metadata[1][2] self._h_frames = int(metadata[2][0]) self._h_intrinsics = metadata[2][1] if areStatics: # Driver Data age = int(statics[0]["val"]) gender = statics[1]["val"] glasses = bool(statics[2]["val"]) drive_freq = statics[3]["val"] experience = statics[4]["val"] # Context Data weather = statics[5]["val"] setup = statics[6]["val"] # Annotator annotatorID = str(statics[7]["val"]) if self._bf_shift is None or self._hb_shift is None or \ self._hf_shift is None: raise RuntimeError( "Shift values have not been set. Run set_shifts() function " "before") body_face_shift = self._bf_shift # hands_body_shift = self.__hb_shift hands_face_shift = self._hf_shift # Get total number of lines which is equivalent to total number of # frames of mosaic assert (len(annotations) == len(validations)) total_frames = len(annotations) # 1.- Create a VCD instance vcd = core.VCD() # 2.- Add Object for Subject self.uid_driver = vcd.add_object(self.subject, "driver", ont_uid=0, frame_value=(0, total_frames - 1)) # 3.- VCD Name vcd.add_name(self.group + '_' + self.subject + '_' + self.session + '_' + self.date + '_' + self._annotation_mode) # 4.- Annotator if areStatics: vcd.add_annotator(annotatorID) # 5- Ontology vcd.add_ontology('http://dmd.vicomtech.org/ontology') # 6.- Cameras # Build Uri to video files if self._setUpManager._external_struct: video_root_path = Path() / self.group / self.subject / self.session face_uri = video_root_path / (self.group + '_' + self.subject + '_' + self.session + '_' + self.date + '_rgb_face.mp4') body_uri = video_root_path / (self.group + '_' + self.subject + '_' + self.session + '_' + self.date + '_rgb_body.mp4') hands_uri = video_root_path / (self.group + '_' + self.subject + '_' + self.session + '_' + self.date + '_rgb_hands.mp4') else: video_root_path = Path() / self.group / self.date / self.subject face_uri = video_root_path / (self.subject + '_' + self.session + '_' + 'face' + '_' + self.date + '.mp4') body_uri = video_root_path / (self.subject + '_' + self.session + '_' + 'body' + '_' + self.date + '.mp4') hands_uri = video_root_path / (self.subject + '_' + self.session + '_' + 'hands' + '_' + self.date + '.mp4') face_video_descr = 'Frontal face looking camera' body_video_descr = 'Side body looking camera' hands_video_descr = 'Hands and wheel looking camera' vcd.add_stream('face_camera', str(face_uri), face_video_descr, core.StreamType.camera) vcd.add_stream('body_camera', str(body_uri), body_video_descr, core.StreamType.camera) vcd.add_stream('hands_camera', str(hands_uri), hands_video_descr, core.StreamType.camera) # 7.- Stream Properties # Real Intrinsics of cameras vcd.add_stream_properties(stream_name='face_camera', properties={ 'cam_module': 'Intel RealSense D415', 'total_frames': self._f_frames, }, stream_sync=types.StreamSync(frame_shift=0), intrinsics=types.IntrinsicsPinhole( width_px=1280, height_px=720, camera_matrix_3x4=self._f_intrinsics)) vcd.add_stream_properties( stream_name='body_camera', properties={ 'camera_module': 'Intel RealSense D435', 'total_frames': self._b_frames, }, stream_sync=types.StreamSync(frame_shift=body_face_shift), intrinsics=types.IntrinsicsPinhole( width_px=1280, height_px=720, camera_matrix_3x4=self._b_intrinsics)) vcd.add_stream_properties( stream_name='hands_camera', properties={ 'camera_module': 'Intel RealSense D415', 'total_frames': self._h_frames, }, stream_sync=types.StreamSync(frame_shift=hands_face_shift), intrinsics=types.IntrinsicsPinhole( width_px=1280, height_px=720, camera_matrix_3x4=self._h_intrinsics)) if areStatics or isMetadata: # 8.- Add Context of Recording session last_frame = total_frames - 1 ctx_txt = 'recording_context' rec_context_uid = vcd.add_context(name='', semantic_type=ctx_txt, frame_value=(0, last_frame)) if areStatics: vcd.add_context_data(rec_context_uid, types.text(name='weather', val=weather)) vcd.add_context_data(rec_context_uid, types.text(name='setup', val=setup)) # 9.- Add Driver static properties vcd.add_object_data(self.uid_driver, types.num(name='age', val=age)) vcd.add_object_data(self.uid_driver, types.text(name='gender', val=gender)) vcd.add_object_data(self.uid_driver, types.boolean(name='glasses', val=glasses)) vcd.add_object_data( self.uid_driver, types.text(name='experience', val=experience)) vcd.add_object_data( self.uid_driver, types.text(name='drive_freq', val=drive_freq)) if isMetadata: vcd.add_context_data( rec_context_uid, types.text(name='recordTime', val=self.timeStamp)) # 10.- Save annotation and validation vectors in VCD format # Perform general update new_vcd = self.add_annotationsx(vcd, annotations, validations, self.ont_uid) # Update class variable __vcd with newly created object self._vcd = new_vcd return True
def parse_sequence(self, seq_number): vcd = core.VCD() ######################################### # OPEN files ######################################### calib_file_name = os.path.join(self.kitti_tracking_calib_path, str(seq_number).zfill(4) + ".txt") oxts_file_name = os.path.join(self.kitti_tracking_oxts_path, str(seq_number).zfill(4) + ".txt") object_file_name = os.path.join(self.kitti_tracking_objects_path, str(seq_number).zfill(4) + '.txt') calib_file = open(calib_file_name, newline='') oxts_file = open(oxts_file_name, newline='') object_file = open(object_file_name, newline='') calib_reader = csv.reader(calib_file, delimiter=' ') oxts_reader = csv.reader(oxts_file, delimiter=' ') object_reader = csv.reader(object_file, delimiter=' ') ######################################### # READ calibration matrices ######################################### img_width_px = 1236 img_height_px = 366 # these are rectified dimensions calib_matrices = {} for row in calib_reader: calib_matrices[row[0]] = [ float(x) for x in row[1:] if len(x) > 0 ] # filter out some spaces at the end of the row left_camera_K3x4 = np.reshape(calib_matrices["P2:"], (3, 4)) right_camera_K3x4 = np.reshape(calib_matrices["P3:"], (3, 4)) camera_rectification_3x3 = np.reshape(calib_matrices["R_rect"], (3, 3)) transform_velo_to_camleft_3x4 = np.reshape( calib_matrices["Tr_velo_cam"], (3, 4)) # WRT to LEFT CAMERA ONLY ######################################### # LIDAR info ######################################### # http://www.cvlibs.net/datasets/kitti/setup.php location_velo_wrt_lcs_3x1 = np.array( [[0.76], [0.0], [1.73]]) # according to the documentation # Create pose (p=[[R|C],[0001]]) pose_velo_wrt_lcs_4x4 = utils.create_pose( np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]), location_velo_wrt_lcs_3x1) transform_lcs_to_velo_4x4 = utils.inv(pose_velo_wrt_lcs_4x4) vcd.add_stream(stream_name="VELO_TOP", uri="", description="Velodyne roof", stream_type=core.StreamType.lidar) vcd.add_stream_properties( stream_name="VELO_TOP", extrinsics=types.Extrinsics( pose_scs_wrt_lcs_4x4=list(pose_velo_wrt_lcs_4x4.flatten()))) ######################################### # GPS/IMU info ######################################### # Let's build also the pose of the imu location_imu_wrt_lcs_4x4 = np.array([[-0.05], [0.32], [0.93] ]) # according to documentation pose_imu_wrt_lcs_4x4 = utils.create_pose( np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]), location_imu_wrt_lcs_4x4) vcd.add_stream(stream_name="IMU", uri="", description="GPS/IMU", stream_type=core.StreamType.other) vcd.add_stream_properties( stream_name="IMU", extrinsics=types.Extrinsics( pose_scs_wrt_lcs_4x4=list(pose_imu_wrt_lcs_4x4.flatten()))) ######################################### # CAMERAS ######################################### # From KITTI readme.txt: # To project a point from Velodyne coordinates into the left color image, # you can use this formula: x = P2 * R0_rect * Tr_velo_to_cam * y # For the right color image: x = P3 * R0_rect * Tr_velo_to_cam * y # Note: All matrices are stored row-major, i.e., the first values correspond # to the first row. R0_rect contains a 3x3 matrix which you need to extend to # a 4x4 matrix by adding a 1 as the bottom-right element and 0's elsewhere. # Tr_xxx is a 3x4 matrix (R|t), which you need to extend to a 4x4 matrix # in the same way! # Virtually, cam_left and cam_right are defined as the same coordinate systems, so their scs are the same # But their projection matrices (3x4) include a right-most non-zero column which shifts 3d points when projected # into the images, that is why projecting from velodyne to left and right use the same "extrinsics", and just differ # in the usage of the "intrinsic" matrices P2 and P3 # P2 and P3 might be decomposed so P2 = K2*T2 and P3=K3*T3, so T2 and T3 could host extrinsic information # while K2 and K3 could host the intrinsic information. This way, the pose of cam_left would be T2*R_rect*Tr_velo # However, such decomposition seems to be non-trivial. # x = P2 * R0_rect * Tr_velo_to_cam * y # x = P3 * R0_rect * Tr_velo_to_cam * y camera_rectification_4x4 = np.vstack((np.hstack( (camera_rectification_3x3, [[0], [0], [0]])), [0, 0, 0, 1])) transform_velo_to_camleft_4x4 = np.vstack( (transform_velo_to_camleft_3x4, [0, 0, 0, 1])) transform_velo_to_camleft_4x4 = np.dot( camera_rectification_4x4, transform_velo_to_camleft_4x4 ) # such that X_cam = transform_velo_to_cam_4x4 * X_velo # The pose of cameras can't be read from documentation, as these are virtual cameras created via a rectification # process, therefore, we need to build them using the velo_to_cam calibration # Pose_camLeft_wrt_ccs = RT_camLeft_to_ccs transform_lcs_to_camleft_4x4 = np.dot(transform_velo_to_camleft_4x4, transform_lcs_to_velo_4x4) pose_camleft_wrt_lcs_4x4 = utils.inv(transform_lcs_to_camleft_4x4) pose_camright_wrt_lcs_4x4 = pose_camleft_wrt_lcs_4x4 # Create cams and fill scene vcd.add_stream(stream_name="CAM_LEFT", uri="", description="Virtual Left color camera", stream_type=core.StreamType.camera) vcd.add_stream_properties( stream_name="CAM_LEFT", intrinsics=types.IntrinsicsPinhole(width_px=img_width_px, height_px=img_height_px, camera_matrix_3x4=list( left_camera_K3x4.flatten()), distortion_coeffs_1xN=None), extrinsics=types.Extrinsics( pose_scs_wrt_lcs_4x4=list(pose_camleft_wrt_lcs_4x4.flatten()))) vcd.add_stream(stream_name="CAM_RIGHT", uri="", description="Virtual Right color camera", stream_type=core.StreamType.camera) vcd.add_stream_properties( stream_name="CAM_RIGHT", intrinsics=types.IntrinsicsPinhole( width_px=img_width_px, height_px=img_height_px, camera_matrix_3x4=list(right_camera_K3x4.flatten()), distortion_coeffs_1xN=None), extrinsics=types.Extrinsics(pose_scs_wrt_lcs_4x4=list( pose_camright_wrt_lcs_4x4.flatten()))) ######################################### # ODOMETRY ######################################### oxts = [] for row in oxts_reader: row = row[0:len(row) - 1] floats = [float(i) for i in row] oxts.append(floats) '''lat_deg = row[0] # deg lon_deg = row[1] alt_deg = row[2] roll_rad = row[3] # 0 = level, positive = left side up (-pi..pi) pitch_rad = row[4] # 0 = level, positive = front down (-pi/2..pi/2) yaw_rad = row[5] # 0 = east, positive = counter clockwise (-pi..pi) vn = row[6] # velocity towards north(m / s) ve = row[7] # velocity towards east(m / s) vf = row[8] # forward velocity, i.e.parallel to earth - surface(m / s) vl = row[9] # leftward velocity, i.e.parallel to earth - surface(m / s) vu = row[10] # upward velocity, i.e.perpendicular to earth - surface(m / s) ax = row[11] # acceleration in x, i.e. in direction of vehicle front(m / s ^ 2) ay = row[12] # acceleration in y, i.e. in direction of vehicle left(m / s ^ 2) az = row[13] # acceleration in z, i.e. in direction of vehicle top(m / s ^ 2) af = row[14] # forward acceleration(m / s ^ 2) al = row[15] # leftward acceleration(m / s ^ 2) au = row[16] # upward acceleration(m / s ^ 2) wx = row[17] # angular rate around x(rad / s) wy = row[18] # angular rate around y(rad / s) wz = row[19] # angular rate around z(rad / s) wf = row[20] # angular rate around forward axis(rad / s) wl = row[21] # angular rate around leftward axis(rad / s) wu = row[22] # angular rate around upward axis(rad / s) posacc = row[23] # velocity accuracy(north / east in m) velacc = row[24] # velocity accuracy(north / east in m / s) navstat = row[25] # navigation status numsats = row[26] # number of satellites tracked by primary GPS receiver posmode = row[27] # position mode of primary GPS receiver velmode = row[28] # velocity mode of primary GPS receiver orimode = row[29] # orientation mode of primary GPS receiver ''' # Convert odometry (GPS) to poses odometry_4x4xN = utils.convert_oxts_to_pose(oxts) # An odometry entry is a 4x4 pose matrix of the lcs wrt wcs # poses_4x4xN_lcs_wrt_wcs = odometry_4x4xN frames_1xN = np.arange(0, odometry_4x4xN.shape[2], 1).reshape( (1, odometry_4x4xN.shape[2])) r, c = frames_1xN.shape for i in range(0, c): vcd.add_odometry( int(frames_1xN[0, i]), types.Odometry( pose_lcs_wrt_wcs_4x4=list(odometry_4x4xN[:, :, i].flatten()))) ######################################### # LABELS ######################################### for row in object_reader: frameNum = int(row[0]) trackID = int(row[1]) + 1 # VCD can't handle negative ids semantic_class = row[2] truncated = utils.float_2dec(float(row[3])) occluded = int(row[4]) alpha = utils.float_2dec(float(row[5])) left = utils.float_2dec(float(row[6])) top = utils.float_2dec(float(row[7])) width = utils.float_2dec(float(row[8]) - left) height = utils.float_2dec(float(row[9]) - top) bounding_box = types.bbox(name="", val=(left, top, width, height), stream='CAM_LEFT') dimHeight = utils.float_2dec(float(row[10])) dimWidth = utils.float_2dec(float(row[11])) dimLength = utils.float_2dec(float(row[12])) locX = utils.float_2dec(float(row[13])) locY = utils.float_2dec(float(row[14])) locZ = utils.float_2dec(float(row[15])) rotY = utils.float_2dec(float(row[16])) # Note KITTI uses (h, w, l, x, y, z, ry) for cuboids, in camera coordinates (X-to-right, Y-to-bottom, Z-to-front) # while in VCD (x,y,z, rx, ry, rz, sx, sy, sz) is defined as a dextrogire system # To express the cuboid in LCS (Local-Coordinate-System), we can add the pose of the camera # Cameras are 1.65 m height wrt ground # Cameras are 1.03 meters wrt to rear axle cam_wrt_rear_axle_z = 1.03 cam_height = 1.65 cuboid = types.cuboid( name="", val=(utils.float_2dec(locZ + cam_wrt_rear_axle_z), utils.float_2dec(-locX), utils.float_2dec(-locY + cam_height), 0, 0, utils.float_2dec(rotY), utils.float_2dec(dimWidth), utils.float_2dec(dimLength), utils.float_2dec(dimHeight))) # Note that if no "stream" parameter is given to this cuboid, LCS is assumed if not vcd.has(core.ElementType.object, trackID): vcd.add_object(name="", semantic_type=semantic_class, uid=trackID) vcd.add_object_data(trackID, bounding_box, frameNum) if semantic_class != "DontCare": vcd.add_object_data(trackID, cuboid, frameNum) vcd.add_object_data(trackID, types.num(name="truncated", val=truncated), frameNum) vcd.add_object_data(trackID, types.num(name="occluded", val=occluded), frameNum) vcd.add_object_data(trackID, types.num(name="alpha", val=alpha), frameNum) # Return return vcd
def parse_sequence_direct(self, seq_number): # This is a variant approach for creating a VCD 4.3.0 file reading the KITTI calibration files, # trying to avoid additional computation at this level, and exploiting the ability of VCD 4.3.0 to # express arbitrary transforms across coordinate systems vcd = core.VCD() ######################################### # OPEN files ######################################### calib_file_name = os.path.join(self.kitti_tracking_calib_path, str(seq_number).zfill(4) + ".txt") oxts_file_name = os.path.join(self.kitti_tracking_oxts_path, str(seq_number).zfill(4) + ".txt") object_file_name = os.path.join(self.kitti_tracking_objects_path, str(seq_number).zfill(4) + '.txt') calib_file = open(calib_file_name, newline='') oxts_file = open(oxts_file_name, newline='') object_file = open(object_file_name, newline='') calib_reader = csv.reader(calib_file, delimiter=' ') oxts_reader = csv.reader(oxts_file, delimiter=' ') object_reader = csv.reader(object_file, delimiter=' ') ######################################### # CREATE base coordinate system ######################################### # The main coordinate system for the scene "odom" represents a static cs (which coincides with first local cs). vcd.add_coordinate_system("odom", cs_type=types.CoordinateSystemType.scene_cs) ######################################### # CREATE vehicle coordinate system ######################################### # Local coordinate system, moving with the vehicle. Following iso8855 (x-front, y-left, z-up) vcd.add_coordinate_system("vehicle-iso8855", cs_type=types.CoordinateSystemType.local_cs, parent_name="odom") # Sensor coordinate systems are added # Add transforms for each time instant odometry_4x4xN = self.read_odometry_from_oxts(oxts_reader) # An odometry entry is a 4x4 pose matrix of the lcs wrt wcs (ergo a transform lcs_to_wcs) # poses_4x4xN_lcs_wrt_wcs = odometry_4x4xN frames_1xN = np.arange(0, odometry_4x4xN.shape[2], 1).reshape( (1, odometry_4x4xN.shape[2])) r, c = frames_1xN.shape for i in range(0, c): vcd.add_transform(int(frames_1xN[0, i]), transform=types.Transform( src_name="vehicle-iso8855", dst_name="odom", transform_src_to_dst_4x4=list( odometry_4x4xN[:, :, i].flatten()))) ######################################### # CREATE SENSORS coordinate system: LASER ######################################### # http://www.cvlibs.net/datasets/kitti/setup.php location_velo_wrt_vehicle_3x1 = np.array( [[0.76], [0.0], [1.73]]) # according to the documentation pose_velo_wrt_vehicle_4x4 = utils.create_pose( utils.identity(3), location_velo_wrt_vehicle_3x1) vcd.add_stream(stream_name="VELO_TOP", uri="", description="Velodyne roof", stream_type=core.StreamType.lidar) vcd.add_coordinate_system("VELO_TOP", cs_type=types.CoordinateSystemType.sensor_cs, parent_name="vehicle-iso8855", pose_wrt_parent=list( pose_velo_wrt_vehicle_4x4.flatten())) ######################################### # CREATE SENSORS coordinate system: GPS/IMU ######################################### # Let's build also the pose of the imu location_imu_wrt_vehicle_4x4 = np.array( [[-0.05], [0.32], [0.93]]) # according to documentation pose_imu_wrt_vehicle_4x4 = utils.create_pose( utils.identity(3), location_imu_wrt_vehicle_4x4) vcd.add_stream(stream_name="IMU", uri="", description="GPS/IMU", stream_type=core.StreamType.other) vcd.add_coordinate_system("IMU", cs_type=types.CoordinateSystemType.sensor_cs, parent_name="vehicle-iso8855", pose_wrt_parent=list( pose_imu_wrt_vehicle_4x4.flatten())) ######################################### # CREATE SENSORS coordinate system: CAM ######################################### img_width_px = 1242 img_height_px = 375 # these are rectified dimensions calib_matrices = {} for row in calib_reader: calib_matrices[row[0]] = [ float(x) for x in row[1:] if len(x) > 0 ] # filter out some spaces at the end of the row # From KITTI readme.txt: # To project a point from Velodyne coordinates into the left color image, # you can use this formula: x = P2 * R0_rect * Tr_velo_to_cam * y # For the right color image: x = P3 * R0_rect * Tr_velo_to_cam * y left_camera_K3x4 = np.reshape(calib_matrices["P2:"], (3, 4)) right_camera_K3x4 = np.reshape(calib_matrices["P3:"], (3, 4)) camera_rectification_3x3 = np.reshape(calib_matrices["R_rect"], (3, 3)) transform_velo_to_camleft_3x4 = np.reshape( calib_matrices["Tr_velo_cam"], (3, 4)) # WRT to LEFT CAMERA ONLY camera_rectification_4x4 = np.vstack((np.hstack( (camera_rectification_3x3, [[0], [0], [0]])), [0, 0, 0, 1])) transform_velo_to_camleft_4x4 = np.vstack( (transform_velo_to_camleft_3x4, [0, 0, 0, 1])) transform_velo_to_camleft_4x4 = np.dot( camera_rectification_4x4, transform_velo_to_camleft_4x4 ) # such that X_cam = transform_velo_to_cam_4x4 * X_velo pose_camleft_wrt_velo_4x4 = utils.inv(transform_velo_to_camleft_4x4) vcd.add_stream(stream_name="CAM_LEFT", uri="", description="Virtual Left color camera", stream_type=core.StreamType.camera) vcd.add_stream_properties(stream_name="CAM_LEFT", intrinsics=types.IntrinsicsPinhole( width_px=img_width_px, height_px=img_height_px, camera_matrix_3x4=list( left_camera_K3x4.flatten()), distortion_coeffs_1xN=None)) vcd.add_coordinate_system("CAM_LEFT", cs_type=types.CoordinateSystemType.sensor_cs, parent_name="VELO_TOP", pose_wrt_parent=list( pose_camleft_wrt_velo_4x4.flatten())) # Virtually, cam_left and cam_right are defined as the same coordinate systems, so their scs are the same # But their projection matrices (3x4) include a right-most non-zero column which shifts 3d points when projected # into the images, that is why projecting from velodyne to left and right use the same "extrinsics", and just differ # in the usage of the "intrinsic" matrices P2 and P3 # P2 and P3 might be decomposed so P2 = K2*T2 and P3=K3*T3, so T2 and T3 could host extrinsic information # while K2 and K3 could host the intrinsic information. This way, the pose of cam_left would be T2*R_rect*Tr_velo # However, such decomposition seems to be non-trivial. # x = P2 * R0_rect * Tr_velo_to_cam * y # x = P3 * R0_rect * Tr_velo_to_cam * y vcd.add_stream(stream_name="CAM_RIGHT", uri="", description="Virtual Right color camera", stream_type=core.StreamType.camera) vcd.add_stream_properties(stream_name="CAM_RIGHT", intrinsics=types.IntrinsicsPinhole( width_px=img_width_px, height_px=img_height_px, camera_matrix_3x4=list( right_camera_K3x4.flatten()), distortion_coeffs_1xN=None)) vcd.add_coordinate_system("CAM_RIGHT", cs_type=types.CoordinateSystemType.sensor_cs, parent_name="VELO_TOP", pose_wrt_parent=list( pose_camleft_wrt_velo_4x4.flatten())) ######################################### # LABELS ######################################### for row in object_reader: frameNum = int(row[0]) #trackID = int(row[1]) + 1 # VCD can't handle negative ids trackID = int(row[1]) #if trackID == 0: # continue # Let's ignore DontCare labels semantic_class = row[2] truncated = utils.float_2dec(float(row[3])) occluded = int(row[4]) alpha = utils.float_2dec(float( row[5])) # this is the observation angle (see cs_overview.pdf) left = utils.float_2dec(float(row[6])) top = utils.float_2dec(float(row[7])) width = utils.float_2dec(float(row[8]) - left) height = utils.float_2dec(float(row[9]) - top) if trackID == -1: # This is DontCare, there are multiple boxes count = vcd.get_element_data_count_per_type( core.ElementType.object, trackID, types.ObjectDataType.bbox, frameNum) name_box = "box2D" + str(count) else: name_box = "box2D" bounding_box = types.bbox(name=name_box, val=(left + width / 2, top + height / 2, width, height), coordinate_system='CAM_LEFT') # see cs_overview.pdf dimH = utils.float_2dec(float(row[10])) dimW = utils.float_2dec(float(row[11])) dimL = utils.float_2dec(float(row[12])) locX = utils.float_2dec(float(row[13])) locY = utils.float_2dec(float(row[14])) locZ = utils.float_2dec(float(row[15])) rotY = utils.float_2dec(float(row[16])) # Note KITTI uses (h, w, l, x, y, z, ry) for cuboids, in camera coordinates (X-to-right, Y-to-bottom, Z-to-front) # while in VCD (x, y, z, rx, ry, rz, sx, sy, sz) is defined as a dextrogire system, centroid-based # NOTE: changing locY by locY + dimH/2 as VCD uses centroid and KITTI uses bottom face # NOTE: All in Camera coordinate system # NOTE: x = length, y = height, z = width because of convention in readme.txt # The reference point for the 3D bounding box for each object is centered on the # bottom face of the box. The corners of bounding box are computed as follows with # respect to the reference point and in the object coordinate system: # x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2]^T # y_corners = [0, 0, 0, 0, -h, -h, -h, -h ]^T # z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2 ]^T # with l=length, h=height, and w=width. cuboid = types.cuboid(name="box3D", val=(utils.float_2dec(locX), utils.float_2dec(locY - dimH / 2), utils.float_2dec(locZ), 0, utils.float_2dec(rotY), 0, utils.float_2dec(dimL), utils.float_2dec(dimH), utils.float_2dec(dimW)), coordinate_system="CAM_LEFT") if not vcd.has(core.ElementType.object, str(trackID)): # First time if trackID >= 0: vcd.add_object(name=semantic_class + str(trackID), semantic_type=semantic_class, uid=str(trackID), frame_value=frameNum) else: # so this is for DontCare object vcd.add_object(name=semantic_class, semantic_type=semantic_class, uid=str(trackID), frame_value=frameNum) vcd.add_object_data(str(trackID), bounding_box, frameNum) vcd.add_object_data(str(trackID), cuboid, frameNum) vcd.add_object_data(trackID, types.num(name="truncated", val=truncated), frameNum) vcd.add_object_data(trackID, types.num(name="occluded", val=occluded), frameNum) vcd.add_object_data(trackID, types.num(name="alpha", val=alpha), frameNum) ######################################### # Ego-vehicle ######################################### vcd.add_object(name="Egocar", semantic_type="Egocar", uid=str(-2)) cuboid_ego = types.cuboid(name="box3D", val=(1.35, 0.0, 0.736, 0.0, 0.0, 0.0, 4.765, 1.82, 1.47), coordinate_system="vehicle-iso8855") vcd.add_object_data(str(-2), cuboid_ego) # Return return vcd