def __init__(self): self.output_video_file_name = '/home/algernon/samba/video_queue/SalienceRecognizer/videos/processed/output2.mkv' self.emotion_recognizer = EmotionRecognizer(self.EMOTION_PROB_THRESH) self.segmentator = None # self.clothes_detector = ClothesDetector("yolo/df2cfg/yolov3-df2.cfg", "yolo/weights/yolov3-df2_15000.weights", "yolo/df2cfg/df2.names") self.video_file_name = '/home/algernon/Videos/source_videos/interview_anna.webm' self.captioner = Captioner() self.video_reader = VideoReader(self.video_file_name) self.joke_picker = JokePicker('joke_picker/shortjokes.csv', 'joke_picker/joke_picker.fse') self.video_writer = cv2.VideoWriter( self.output_video_file_name, cv2.VideoWriter_fourcc(*"XVID"), self.video_reader.fps, (self.video_reader.width, self.video_reader.height)) self.face_recognizer = FaceRecognizer() self.segmentator = SceneSegmentator(self.video_reader.fps * self.DELAY_TO_DETECT_IN_SECS) self.object_detection_reader = DetectionReader('detections.json') self.object_detector = ObjectDetector( './configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml', 'https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl' ) self.age_gender_predictor = AgeGenderPredictor() self.beauty_estimator = BeautyEstimator( '/home/algernon/DNNS/isBeauty/weights/epoch_50.pkl') self.main_person_definer = MainPersonDefiner() self.context_generator = ContextGenerator(self.object_detector.classes) self.speech_recognizer = SpeechRecognizer(self.video_file_name)
def __init__(self, args): self.args = args self.params = args['params'] self.reader_left = VideoReader(args['video']) self.reader_right = VideoReader(args['opposite_video']) self.ldr_map = loadLDRCamMap(args['map']) self.queue = Queue.Queue() self.finished = False
class FrameCloudManager: def __init__(self, args): self.args = args self.params = args['params'] self.reader_left = VideoReader(args['video']) self.reader_right = VideoReader(args['opposite_video']) self.ldr_map = loadLDRCamMap(args['map']) self.queue = Queue.Queue() self.finished = False def loadNext(self): while self.finished == False: for t in range(5): (success, imgL) = self.reader_left.getNextFrame() (success, imgR) = self.reader_right.getNextFrame() if success == False: self.finished = True return #(disp, Q, R1, R2) = doStereo(imgL, imgR, self.params) #cv2.imshow('disp', disp) #print Q #stereo_points = get3dPoints(disp,Q) #stereo_points = stereo_points[disp > 5, :] (R1, R2, P1, P2, Q, size1, size2, map1x, map1y, map2x, map2y) = computeStereoRectify(self.params) stereo_points = np.load(sys.argv[3] + '/3d_' + str(self.reader_left.framenum) + '.npz')['data'] print stereo_points stereo_points = stereo_points.transpose() stereo_points = np.dot(R1.transpose(), stereo_points) print np.amax(stereo_points, axis=1) print np.amin(stereo_points, axis=1) stereo_points = np.vstack((stereo_points, np.ones((1,stereo_points.shape[1])))) print stereo_points.shape stereo_points = dot(np.linalg.inv(self.params['cam'][0]['E']), stereo_points) stereo_wrt_lidar = np.dot(R_to_c_from_l(self.params['cam'][0]).transpose(), stereo_points[0:3,:]) stereo_wrt_lidar = stereo_wrt_lidar.transpose() stereo_wrt_lidar = stereo_wrt_lidar[:,0:3] - self.params['cam'][0]['displacement_from_l_to_c_in_lidar_frame'] #img = cv2.resize(img, (640, 480)) imgL = cv2.pyrDown(imgL) #cv2.imshow('disparity', cv2.pyrDown(disp)/64.0) framenum = self.reader_left.framenum if framenum >= len(self.ldr_map): self.finished = True return ldr_file = self.ldr_map[framenum] lidar_pts = loadLDR(ldr_file) self.queue.put({'img': imgL, 'lidar_pts': lidar_pts, 'stereo_pts': stereo_wrt_lidar}) """
def open_project(self): with open(self.project_file_name, 'r') as project_file: self.video_file_name = project_file.readline().strip() self.db_name = project_file.readline().strip() self.data_base = DB(self.db_host, self.db_user_name, self.db_user_pass, self.db_name) self.video_reader = VideoReader(self.video_file_name) self.video_writer = cv2.VideoWriter(self.output_video_file_name, cv2.VideoWriter_fourcc(*"XVID"), self.video_reader.fps, (self.video_reader.width, self.video_reader.height)) self.segmentator = SceneSegmentator(self.video_reader.fps * 5) self.load_commands_from_db()
def video_processing(videos_dir): depth, height, width = 16, 120, 160 color = True factor = 4 vreader = VideoReader(depth, height, width, color, factor) X_train, Y_train, X_test, Y_test = vreader.loaddata(videos_dir) X_train = X_train.reshape( (X_train.shape[0], depth, height, width, 3)).astype('uint8') X_test = X_test.reshape( (X_test.shape[0], depth, height, width, 3)).astype('uint8') print('X_train shape:{}\nY_train shape:{}'.format(X_train.shape, Y_train.shape)) print('X_test shape:{}\nY_test shape:{}'.format(X_test.shape, Y_test.shape)) return X_train, Y_train.astype('uint32'), X_test, Y_test.astype('uint32')
def new_project(self, video_file, eye_movement_file, trialid_target, categorise_frames=False, categorising_eye_is_left=None): """Creates a new project. Categorisation of frames or fixations is indicated by the 'categorise_frames' flag. """ self.categorise_frames = categorise_frames self.categorising_eye_is_left = categorising_eye_is_left self.video_reader = VideoReader(video_file) self.eye_movement = EyeMovement(eye_movement_file, trialid_target) self.clock = Clock(self.video_reader.frame_count, self.video_reader.fps) self.clock.register(self._clock_tick) if self.categorise_frames: objects = {} for frame in xrange(int(self.video_reader.frame_count)): objects[(frame, frame)] = str(frame) else: objects = self.eye_movement.fixations(self.categorising_eye_is_left) self.category_container = CategoryContainer(objects) if categorising_eye_is_left == True: self.show_eyes = [True, False, False] elif categorising_eye_is_left == False: self.show_eyes = [False, True, False] else: self.show_eyes = [False, False, True] # seek to zero so we'll have a picture after loading the videeo file self.produceCurrentImage()
class FrameCloudManager: def __init__(self, video_file, map_file): self.reader = VideoReader(video_file) self.ldr_map = loadLDRCamMap(map_file) self.queue = Queue.Queue() self.finished = False def loadNext(self): while self.finished == False: for t in range(5): (success, img) = self.reader.getNextFrame() if success == False: self.finished = True return #img = cv2.resize(img, (640, 480)) img = cv2.pyrDown(img) framenum = self.reader.framenum if framenum >= len(self.ldr_map): self.finished = True return ldr_file = self.ldr_map[framenum] lidar_pts = loadLDR(ldr_file) self.queue.put({'img': img, 'lidar_pts': lidar_pts}) while self.queue.qsize() > 5: time.sleep(0.1)
def ReadVideos(self): ''' 主要功能是从视频中读取图像并重命名后保存在指定的文件夹中 函数输入:无 隐藏输入: self.opts:'video_image_dir','video_vr_opts','video_group_num', 'video_wait' 函数输出:无 隐藏输出: self.video_list , self.video_num 自定义函数调用: self.GetFiles self.SaveVideoImages VideoReader ''' # 获得文件夹中的所有视频文件 file_list = self.GetFiles() print(f'INFO:Find {len(file_list)} Files with ', end='') # 判断是不是视频格式 self.video_list = FilesFilter(file_list, _VIDTYPE_LIST) self.video_num = len(self.video_list) print(f'{len(self.video_list)} Videos') # 在源文件目录下创建存放源图像文件的目录,此目录及其中的文件会被保留 video_image_dir = self.opts['video_image_dir'] if not os.path.exists(video_image_dir): os.makedirs(video_image_dir) # 对每个视频进行读取、保存 for video_name in self.video_list: vr_opts = copy.deepcopy(self.opts['video_vr_opts']) vr = VideoReader(video_name, opts=vr_opts) if self.opts['video_group_num']: vr.AutoSet(self.opts['video_group_num']) vr.PrintVideoInfo() image_groups_arrays = vr.GetAllGroups() self.SaveVideoImages(image_groups_arrays, video_name) # 询问是否暂停来检查输出是否符合预期 if (self.opts['video_wait']): print('\nINFO:Video read complete ', f', Please check the pictures in folder {video_image_dir} ', ',input Y or y to continue') while (True): cin = input() if (cin is 'Y' or cin is 'y'): break else: print('input Y or y to continue')
def load_project(self, saved_filepath, overwrite_video_filepath=None): sc = SaveController() sc.loadFromFile(saved_filepath) controller_state = sc.getSavedState('controller') self.show_eyes = controller_state['show_eyes'] self.categorise_frames = controller_state['categorise_frames'] self.eye_movement = EyeMovement(saved_state=sc.getSavedState('eye_movement')) if overwrite_video_filepath is None: self.video_reader = VideoReader(saved_state=sc.getSavedState('video_reader')) else: self.video_reader = VideoReader(overwrite_video_filepath) self.clock = Clock(saved_state=sc.getSavedState('clock')) self.clock.register(self._clock_tick) self.category_container = CategoryContainer(saved_state=sc.getSavedState('category_container')) self.produceCurrentImage()
def processFile(self, file_path): file_path = file_path.replace('\\', '/') videoReader = VideoReader(file_path) movementTracker = MovementTracker() captured = re.search('(?<=/)([A-Za-z0-9\-_]+)(?=.avi)', file_path, re.IGNORECASE) if captured is None: return file_name = captured.group(0) while True: if self.cancel == True: break frame = videoReader.nextFrame() if frame is None: np.savetxt(self.target + "/" + file_name + ".csv", movementTracker.getEvents(), delimiter=",", fmt='%.2f') break foreground_mask, movement = self.videoAnalyzer.detectMovement( frame) movementTracker.update(movement, videoReader.currentPositionInSeconds()) videoReader.close()
def __init__(self): args = parse_args(sys.argv[1], sys.argv[2]) planar = glob.glob(sys.argv[1] + '/*_planar.npz')[0] npz = np.load(planar) self.planes = npz['planes'] (self.imu_transforms_mk1, self.gps_data_mk1, self.gps_times_mk1) = get_transforms(args, 'mark1') (self.imu_transforms_mk2, self.gps_data_mk2, self.gps_times_mk2) = get_transforms(args, 'mark2') self.back_projector = BackProjector(args) self.vr = VideoReader(args['video']) self.t = 1 cv2.namedWindow('image', cv2.WINDOW_AUTOSIZE) cv2.setMouseCallback('image', self.mouseHandler) self.lanes = None self.markings = None
def displayVideo(self, file_path): self.toggleVideoControls(True) videoReader = VideoReader(file_path) self.totalTimeLabel.setText( time.strftime('%H:%M:%S', time.gmtime(videoReader.lengthInSeconds()))) self.timeSlider.setTickInterval(1) self.timeSlider.setRange(0, videoReader.duration) self.videoThread.changeOriginalPixmap.connect( lambda p: self.original.setPixmap(p)) self.videoThread.changeProcessedPixmap.connect( lambda p: self.processed.setPixmap(p)) self.videoThread.changecurrentTime.connect( lambda t: self.__updateSlider(t)) self.videoThread.videoReader = videoReader self.videoThread.videoAnalyzer = self.videoAnalyzer self.videoThread.videoAnalyzer.updateParameters() if self.videoThread.isRunning() == False: self.videoThread.start() self.playbackStarted.emit()
def video_processing(videos_dir): depth, height, width = 16, 120, 160 color = True factor = 4 vreader = VideoReader(depth, height, width, color, factor) vreader.read_ucf_labels('../PA_HMDB51/privacy_split/classInd.txt') vreader.read_train_test_split('../PA_HMDB51/privacy_split/trainlist01.txt', '../PA_HMDB51/privacy_split/testlist01.txt') X_train, Y_train, X_test, Y_test = vreader.loaddata(videos_dir) X_train = X_train.reshape((X_train.shape[0], depth, height, width, 3)).astype('uint8') X_test = X_test.reshape((X_test.shape[0], depth, height, width, 3)).astype('uint8') print('X_train shape:{}\nY_train shape:{}'. format(X_train.shape, Y_train.shape)) print('X_test shape:{}\nY_test shape:{}'. format(X_test.shape, Y_test.shape)) return X_train, Y_train.astype('uint32'), X_test, Y_test.astype('uint32')
class InteractionMaker: EMOTION_PROB_THRESH = 0 def __init__(self): self.detection_reader = DetectionReader('detections.json') self.project_file_name = '/home/algernon/andro2' self.video_file_name = '' self.db_name = '' self.data_base = None self.video_maker = None self.db_user_name = 'root' self.db_user_pass = '******' self.db_host = 'localhost' self.commands = [] self.output_video_file_name = 'output.mkv' self.video_reader = None self.video_writer = None self.emotion_detection_reader = DetectionReader('emotion_results/er.json') self.emotion_recognizer = EmotionRecognizer(self.EMOTION_PROB_THRESH) self.captioner = Captioner('/home/algernon/a-PyTorch-Tutorial-to-Image-Captioning/weights/BEST_checkpoint_coco_5_cap_per_img_5_min_word_freq.pth.tar', '/home/algernon/a-PyTorch-Tutorial-to-Image-Captioning/weights/WORDMAP_coco_5_cap_per_img_5_min_word_freq.json') self.segmentator = None self.clothes_detector = ClothesDetector("yolo/df2cfg/yolov3-df2.cfg", "yolo/weights/yolov3-df2_15000.weights", "yolo/df2cfg/df2.names") self.face_recognizer = FaceRecognizer() self.open_project() self.recognizer = Recognizer( '/home/algernon/PycharmProjects/AIVlog/mmdetection/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py', '/home/algernon/PycharmProjects/AIVlog/mmdetection/work_dirs/faster_rcnn_r50_fpn_1x_voc0712/epoch_10.pth') def open_project(self): with open(self.project_file_name, 'r') as project_file: self.video_file_name = project_file.readline().strip() self.db_name = project_file.readline().strip() self.data_base = DB(self.db_host, self.db_user_name, self.db_user_pass, self.db_name) self.video_reader = VideoReader(self.video_file_name) self.video_writer = cv2.VideoWriter(self.output_video_file_name, cv2.VideoWriter_fourcc(*"XVID"), self.video_reader.fps, (self.video_reader.width, self.video_reader.height)) self.segmentator = SceneSegmentator(self.video_reader.fps * 5) self.load_commands_from_db() def load_commands_from_db(self): # upload commands cursor = self.data_base.exec_query("SELECT * FROM Command") while cursor.rownumber < cursor.rowcount: command_response = cursor.fetchone() query = "SELECT name FROM Labels WHERE label_id=%s" attached_character_class = \ self.data_base.exec_template_query(query, [command_response['attached_character_class']]).fetchone()[ 'name'] relation_class = '' if command_response['relation_class'] is not None: relation_class = \ self.data_base.exec_template_query(query, [command_response['relation_class']]).fetchone()[ 'name'] media_response = self.data_base.exec_query( f"SELECT * FROM Media WHERE media_id={command_response['media_id']}").fetchone() media = Media(media_response['file_name'], media_response['type'], media_response['duration']) trigger_cmd_name = '' trigger_cmd_id = command_response['trigger_event_id'] if trigger_cmd_id is not None: trigger_cmd_name = \ self.data_base.exec_query(f"SELECT name FROM Command WHERE command_id={trigger_cmd_id}").fetchone()[ 'name'] delay = command_response['delay'] emotion = '' emotion_id = command_response['expected_emotion_id'] if emotion_id is not None: emotion = \ self.data_base.exec_query(f"SELECT name FROM Emotion WHERE emotion_id={emotion_id}").fetchone()['name'] command = Command(command_response['name'], command_response['centered'], command_response['trigger_event_id'], attached_character_class, relation_class, CommandType(command_response['command_type_id']), trigger_cmd_name, media, command_response['duration'], delay, emotion) self.commands.append(command) def process_commands(self): for _ in trange(self.video_reader.frame_count): frame = self.video_reader.get_next_frame() cur_frame_num = self.video_reader.cur_frame_num #emotion_detections = self.detect_emotions_on_frame(frame) emotion_detections = [] #self.segmentator.push_frame(frame) clothes_detections = self.clothes_detector.detect_clothes(frame) self.draw_clothes(frame, clothes_detections) emotions_per_frame = [] for emotion_pos, emotion in emotion_detections: emotions_per_frame.append((emotion_pos, emotion)) self.draw_emotion_box(frame, emotion_pos, emotion) #_, object_detections_per_frame = self.recognizer.inference(frame) object_detections_per_frame = [] draw_det_boxes(frame, object_detections_per_frame) labels_per_frame = [detection[0] for detection in object_detections_per_frame] states_needed_to_be_checked_on_event = [Command.State.WAITING, Command.State.EXECUTING, Command.State.AFTER_DELAYING] commands_needed_to_be_checked_on_event = [cmd for cmd in self.commands if cmd.cur_state in states_needed_to_be_checked_on_event] for command in commands_needed_to_be_checked_on_event: self.update_commands(command, object_detections_per_frame, emotions_per_frame, labels_per_frame) executing_commands = [cmd for cmd in self.commands if cmd.cur_state == cmd.State.EXECUTING] for active_cmd in executing_commands: active_cmd.exec(frame) delaying_commands = [cmd for cmd in self.commands if cmd.cur_state == cmd.State.DELAYING] for delaying_command in delaying_commands: if delaying_command.wait_out_delay(): delaying_command.set_as_after_delay() #self.show_caption(frame) cv2.imshow('frame', frame) self.video_writer.write(frame) cv2.waitKey(1) def show_caption(self, frame): most_clear_img = self.segmentator.get_most_clear_frame() caption = self.captioner.caption_img(most_clear_img) cv2.putText(frame, caption, (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, Color.GOLD, 2) def draw_clothes(self, frame, clothes_detections): # clothes_detections: [[label: str, ((x1, y1), (x2, y2)), prob], ..] font = cv2.FONT_HERSHEY_SIMPLEX color = Color.BLACK for label, ((x1, y1), (x2, y2)), prob in clothes_detections: text = f'{label} ({prob}%)' cv2.rectangle(frame, (x1, y1), (x2, y2), color, 3) cv2.rectangle(frame, (x1 - 2, y1 - 25), (x1 + 8.5 * len(text), y1), color, -1) cv2.putText(frame, text, (x1, y1 - 5), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA) def detect_emotions_on_frame(self, frame): #return list of items of the following format: ((lt_point: tuple, rb_point: tuple), (emotion: str, prob: int)) detected_faces = self.face_recognizer.recognize_faces_on_image(frame) emotions = [] for face_pos in detected_faces: (l, t), (r, b) = face_pos face_img = frame[t:b, l:r] emotion = self.emotion_recognizer.recognize_emotion_by_face(face_img) if emotion: emotions.append((face_pos, emotion)) return emotions def draw_emotion_box(self, frame, emotion_pos, emotion: list): cv2.rectangle(frame, *emotion_pos, Color.GOLD, 2) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(frame, f'{emotion[0]} - {emotion[1]}', (emotion_pos[0][0], emotion_pos[0][1] - 5), font, 1, Color.YELLOW, 3) def update_commands(self, command, detections_per_frame, emotions_per_frame, labels_per_frame): if command.command_type == CommandType.OBJECTS_TRIGGER: self.check_object_on_the_screen_event(command, detections_per_frame, labels_per_frame) elif command.command_type == CommandType.REACTION_CHAIN_TRIGGER: self.check_reactions_chain_event(command, detections_per_frame, labels_per_frame) elif command.command_type == CommandType.EMOTION_TRIGGER: self.check_emotion_event(command, detections_per_frame, emotions_per_frame, labels_per_frame) def check_emotion_event(self, command: Command, objects_detections, emotion_detections, labels_per_frame): # emotions_per_frame format - [((start_point, end_point), (emotion, prob)), ...] # check whether there's main object if command.attached_character_class in labels_per_frame: # check whether there's expected emotion expected_emotions = [emotion for emotion in emotion_detections if emotion[1][0] == command.emotion] # check whether an emotion box is inside main object main_object_box = self.get_coords(command, objects_detections, labels_per_frame) main_object_box = (main_object_box[:2]), (main_object_box[2:]) emotion = [emotion for emotion in expected_emotions if self.is_rect_inside_rect((emotion[0][0], emotion[0][1]), main_object_box)] assert len(emotion) <= 1 if emotion: print(emotion) coords = *emotion[0][0][0], *emotion[0][0][1] self.update_state(True, command, objects_detections, labels_per_frame, coords=coords) def is_rect_inside_rect(self, in_rect: tuple, out_rect: tuple): lt_in_box_point_inside_out_box = all([out_rect[0][i] <= in_rect[0][i] <= out_rect[1][i] for i in range(2)]) rb_in_box_point_inside_out_box = all([out_rect[0][i] <= in_rect[0][i] <= out_rect[1][i] for i in range(2)]) return lt_in_box_point_inside_out_box and rb_in_box_point_inside_out_box def check_reactions_chain_event(self, command: Command, detections_per_frame, labels_per_frame): # there's main object if command.attached_character_class in labels_per_frame: # check whether triggered command is active active_command_names = [command.name for command in self.commands if command.cur_state == command.State.EXECUTING] event_happened = command.trigger_cmd_name in active_command_names self.update_state(event_happened, command, detections_per_frame, labels_per_frame) def check_object_on_the_screen_event(self, command: Command, detections_per_frame, labels_per_frame): desired_classes = {command.attached_character_class, command.relation_class} # we found desired labels event_happened = desired_classes.issubset(labels_per_frame) self.update_state(event_happened, command, detections_per_frame, labels_per_frame) def update_state(self, event_happened, command, detections_per_frame, labels_per_frame, coords=None): if event_happened: if command.cur_state == command.State.WAITING: command.set_as_delaying(self.video_reader.one_frame_duration) return coords = self.get_coords(command, detections_per_frame, labels_per_frame) if not coords else coords if command.cur_state == command.State.EXECUTING: command.overlay.set_coords(coords) # extract later this part from update_commands method if command.cur_state == command.State.AFTER_DELAYING: if command.media.type == MediaType.VIDEO: command.overlay = self.generate_video_overlay(command, coords) elif command.media.type == MediaType.IMAGE: command.overlay = self.generate_image_overlay_object(command, coords) elif command.media.type == MediaType.TEXT: command.overlay = self.generate_text_overlay_object(command, coords) command.set_as_executing() elif command.cur_state == command.cur_state.AFTER_DELAYING: command.set_as_waiting() @staticmethod def get_coords(command: Command, detections_per_frame, labels_per_frame): main_box = detections_per_frame[labels_per_frame.index(command.attached_character_class)][1] coords = main_box if command.centered: secondary_box = detections_per_frame[labels_per_frame.index(command.relation_class)][1] main_box_center = [(main_box[i + 2] + main_box[i]) // 2 for i in range(2)] secondary_box_center = [(secondary_box[i + 2] + secondary_box[i]) // 2 for i in range(2)] boxes_center = [(main_box_center[i] + secondary_box_center[i]) // 2 for i in range(2)] coords = boxes_center return coords def generate_video_overlay(self, command: Command, coords: tuple): video_cap = cv2.VideoCapture(command.media.file_name) duration = command.media.duration if command.duration == 0 else command.duration return VideoOverlay(video_cap, duration, coords, self.video_reader.one_frame_duration) def generate_image_overlay_object(self, command: Command, coords: tuple): image = cv2.imread(command.media.file_name) return ImageOverlay(image, command.duration, coords, self.video_reader.one_frame_duration) def generate_text_overlay_object(self, command: Command, coords: tuple): texts = self.read_text_from_file(command.media.file_name) ellipse, text_rect = generate_thought_balloon_by_text(texts) return TextOverlay((ellipse, text_rect), command.duration, coords, self.video_reader.one_frame_duration) def read_text_from_file(self, txt_file): with open(txt_file) as txt: texts = txt.readlines() return texts def close(self): if self.video_reader: self.video_reader.close() if self.video_writer: self.video_writer.release()
class ImageClicker(object): def __init__(self): args = parse_args(sys.argv[1], sys.argv[2]) planar = glob.glob(sys.argv[1] + '/*_planar.npz')[0] npz = np.load(planar) self.planes = npz['planes'] (self.imu_transforms_mk1, self.gps_data_mk1, self.gps_times_mk1) = get_transforms(args, 'mark1') (self.imu_transforms_mk2, self.gps_data_mk2, self.gps_times_mk2) = get_transforms(args, 'mark2') self.back_projector = BackProjector(args) self.vr = VideoReader(args['video']) self.t = 1 cv2.namedWindow('image', cv2.WINDOW_AUTOSIZE) cv2.setMouseCallback('image', self.mouseHandler) self.lanes = None self.markings = None def mouseHandler(self, event, x, y, flags, param): if event == cv2.EVENT_LBUTTONUP: if self.markings == None: self.markings = np.array([x, y])[np.newaxis] else: self.markings = np.vstack([self.markings, [x, y]]) pix = np.array([x, y, 1]) mk1_t = mk2_to_mk1(self.t, self.gps_times_mk1, self.gps_times_mk2) xform = self.imu_transforms_mk1[mk1_t] v = self.back_projector.calculateBackProjection(xform, pix) l0 = self.back_projector.calculateBackProjection(xform) l = l0 - v best_model = (np.inf, 0, 0) for i, plane in enumerate(self.planes): pt = self.back_projector.calculateIntersection(l0, l, plane) d = np.linalg.norm(pt - plane[3:]) if d < best_model[0]: best_model = (d, i, pt) print best_model[-1] if self.lanes == None: self.lanes = best_model[-1] else: self.lanes = np.vstack((self.lanes, best_model[-1])) def exportData(self, file_name): lanes = {} lanes['lane0'] = self.lanes print 'saved:' print self.lanes np.savez(file_name, **lanes) def nextFrame(self): while True: while self.vr.framenum < self.t: success, I = self.vr.getNextFrame() if self.markings != None: x = self.markings[:, 0] y = self.markings[:, 1] for i in xrange(4): I[y+i, x, :] = np.array([255, 255, 0]) I[y-i, x, :] = np.array([255, 255, 0]) I[y, x+i, :] = np.array([255, 255, 0]) I[y, x-i, :] = np.array([255, 255, 0]) cv2.imshow('image', I) key = cv2.waitKey(1) & 0xFF if key != 255: print key if key == 27: # esc return if key == 115: # s self.exportData(sys.argv[1] + '/multilane_points_image.npz') elif key == 32: # space self.t += 20 self.markings = None
from GPSReader import GPSReader from VideoReader import VideoReader from GPSTransforms import IMUTransforms from LidarTransforms import LDRLoader, utc_from_gps_log_all from ArgParser import parse_args from pipeline_config import LDR_DIR from pipeline_config import EXPORT_START, EXPORT_NUM, EXPORT_STEP ''' Create new lidar files synced to video ''' if __name__ == '__main__': args = parse_args(sys.argv[1], sys.argv[2]) cam_num = int(sys.argv[2][-5]) video_file = args['video'] video_reader = VideoReader(video_file) params = args['params'] cam = params['cam'][cam_num - 1] gps_reader_mark2 = GPSReader(args['gps_mark2']) gps_data_mark2 = gps_reader_mark2.getNumericData() lidar_loader = LDRLoader(args['frames']) imu_transforms_mark2 = IMUTransforms(gps_data_mark2) gps_times_mark2 = utc_from_gps_log_all(gps_data_mark2) frame_ldr_map = '' # TODO Check this if EXPORT_START != 0: video_reader.setFrame(EXPORT_START)
class SalienceRecognizer: EMOTION_PROB_THRESH = 0 DELAY_TO_DETECT_IN_SECS = 5 FONT = cv2.FONT_HERSHEY_SIMPLEX def __init__(self): self.output_video_file_name = '/home/algernon/samba/video_queue/SalienceRecognizer/videos/processed/output2.mkv' self.emotion_recognizer = EmotionRecognizer(self.EMOTION_PROB_THRESH) self.segmentator = None # self.clothes_detector = ClothesDetector("yolo/df2cfg/yolov3-df2.cfg", "yolo/weights/yolov3-df2_15000.weights", "yolo/df2cfg/df2.names") self.video_file_name = '/home/algernon/Videos/source_videos/interview_anna.webm' self.captioner = Captioner() self.video_reader = VideoReader(self.video_file_name) self.joke_picker = JokePicker('joke_picker/shortjokes.csv', 'joke_picker/joke_picker.fse') self.video_writer = cv2.VideoWriter( self.output_video_file_name, cv2.VideoWriter_fourcc(*"XVID"), self.video_reader.fps, (self.video_reader.width, self.video_reader.height)) self.face_recognizer = FaceRecognizer() self.segmentator = SceneSegmentator(self.video_reader.fps * self.DELAY_TO_DETECT_IN_SECS) self.object_detection_reader = DetectionReader('detections.json') self.object_detector = ObjectDetector( './configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml', 'https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl' ) self.age_gender_predictor = AgeGenderPredictor() self.beauty_estimator = BeautyEstimator( '/home/algernon/DNNS/isBeauty/weights/epoch_50.pkl') self.main_person_definer = MainPersonDefiner() self.context_generator = ContextGenerator(self.object_detector.classes) self.speech_recognizer = SpeechRecognizer(self.video_file_name) def launch(self): for _ in trange(self.video_reader.frame_count): frame = self.video_reader.get_next_frame() frame_for_detection = self.get_clearest_frame(frame) use_prev_detects = True if frame_for_detection is None else False speech = self.speech_recognizer.rewind_audio( self.video_reader.get_cur_timestamp()) if not use_prev_detects: # faces detected_faces = self.face_recognizer.recognize_faces_on_image( frame_for_detection, get_prev=use_prev_detects) # main person main_person_index = self.main_person_definer.get_main_person_by_face_size( detected_faces, get_prev=use_prev_detects) # emotions emotion_detections = self.emotion_recognizer.detect_emotions_on_frame( frame_for_detection, detected_faces, get_prev=use_prev_detects) emotion = [emotion_detections[main_person_index] ] if main_person_index is not None else [] # age gender age_gender_predictions = self.age_gender_predictor.detect_age_dender_by_faces( frame_for_detection, detected_faces, get_prev=use_prev_detects) age_gender_prediction = [ age_gender_predictions[main_person_index] ] if main_person_index is not None else [] # beauty beauty_scores = self.beauty_estimator.estimate_beauty_by_face( frame_for_detection, detected_faces, get_prev=use_prev_detects) beauty_score = [beauty_scores[main_person_index] ] if main_person_index is not None else [] # clothes # clothes_detections = self.clothes_detector.detect_clothes(frame) # clothes_detections = [] # self.draw_clothes(frame, clothes_detections) # caption # caption = self.captioner.caption_img(frame_for_detection, get_prev=use_prev_detects) # objects object_detections = self.object_detector.forward( frame_for_detection, get_prev=use_prev_detects) # object_detections = self.object_detection_reader.get_detections_per_specified_frame(cur_frame_num) # context = self.context_generator.generate_context(object_detections, caption, emotion, age_gender_prediction, # beauty_score, get_prev=use_prev_detects) # cv2.putText(frame, 'Context:', (0, 360), cv2.FONT_HERSHEY_SIMPLEX, 0.8, Color.GOLD, 2) # cv2.putText(frame, context, (0, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.8, Color.GOLD, 2) # jokes = self.joke_picker.pick_jokes_by_context(context, get_prev=use_prev_detects) # self.apply_jokes_on_frame(jokes, frame) self.apply_emotions_on_frame(frame, emotion_detections) self.apply_beauty_scores_on_frame(frame, detected_faces, beauty_scores) self.apply_age_gender_on_frame(frame, detected_faces, age_gender_predictions) # self.apply_caption_on_frame(frame, caption) frame = self.object_detector.draw_boxes(frame, object_detections) cv2.imshow('frame', frame) self.video_writer.write(frame) cv2.waitKey(1) def apply_age_gender_on_frame(self, frame, faces, age_gender_predictions): for i, face in enumerate(faces): tl_x, tl_y = face[i] cv2.putText(frame, f'{age_gender_predictions[i]}', (tl_x - 5, tl_y + 60), self.FONT, 1, Color.BLACK, 2) def apply_beauty_scores_on_frame(self, frame, faces, beauty_scores): for i, face in enumerate(faces): tl_x, tl_y = face[i] cv2.putText( frame, f'{ContextGenerator.beauty_score2desc(beauty_scores[i])} ({beauty_scores[i]})', (tl_x - 5, tl_y + 30), self.FONT, 1, Color.BLACK, 2) def apply_jokes_on_frame(self, jokes, frame): height = frame.shape[0] joke_height = 40 joke_y = height - joke_height * len(jokes) for joke in jokes: cv2.putText(frame, joke, (0, joke_y), cv2.FONT_HERSHEY_SIMPLEX, 0.8, Color.GOLD, 2) joke_y += joke_height def apply_emotions_on_frame(self, frame, emotion_detections): for emotion_pos, emotion in emotion_detections: self.draw_emotion_box(frame, emotion_pos, emotion) def get_clearest_frame(self, frame): self.segmentator.push_frame(frame) most_clear_img = self.segmentator.get_most_clear_frame() return most_clear_img def apply_caption_on_frame(self, frame, caption): cv2.putText(frame, caption, (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, Color.BLUE, 2) def draw_clothes(self, frame, clothes_detections): # clothes_detections: [[label: str, ((x1, y1), (x2, y2)), prob], ..] color = Color.BLACK for label, ((x1, y1), (x2, y2)), prob in clothes_detections: text = f'{label} ({prob}%)' cv2.rectangle(frame, (x1, y1), (x2, y2), color, 3) cv2.rectangle(frame, (x1 - 2, y1 - 25), (x1 + 8.5 * len(text), y1), color, -1) cv2.putText(frame, text, (x1, y1 - 5), self.FONT, 0.5, color.WHITE, 1, cv2.LINE_AA) def draw_emotion_box(self, frame, emotion_pos, emotion: list): cv2.rectangle(frame, *emotion_pos, Color.GOLD, 2) cv2.putText(frame, f'{emotion[0]} - {emotion[1]}', (emotion_pos[0][0], emotion_pos[0][1] - 5), self.FONT, 1, Color.BLACK, 3) def is_rect_inside_rect(self, in_rect: tuple, out_rect: tuple): lt_in_box_point_inside_out_box = all([ out_rect[0][i] <= in_rect[0][i] <= out_rect[1][i] for i in range(2) ]) rb_in_box_point_inside_out_box = all([ out_rect[0][i] <= in_rect[0][i] <= out_rect[1][i] for i in range(2) ]) return lt_in_box_point_inside_out_box and rb_in_box_point_inside_out_box # def generate_video_overlay(self, command: Command, coords: tuple): # video_cap = cv2.VideoCapture(command.media.file_name) # duration = command.media.duration if command.duration == 0 else command.duration # return VideoOverlay(video_cap, duration, coords, self.video_reader.one_frame_duration) # # def generate_image_overlay_object(self, command: Command, coords: tuple): # image = cv2.imread(command.media.file_name) # return ImageOverlay(image, command.duration, coords, self.video_reader.one_frame_duration) # # def generate_text_overlay_object(self, command: Command, coords: tuple): # texts = self.read_text_from_file(command.media.file_name) # ellipse, text_rect = generate_thought_balloon_by_text(texts) # return TextOverlay((ellipse, text_rect), command.duration, coords, self.video_reader.one_frame_duration) # def read_text_from_file(self, txt_file): # with open(txt_file) as txt: # texts = txt.readlines() # return texts def close(self): if self.video_reader: self.video_reader.close() if self.video_writer: self.video_writer.release()
class Controller(Saveable): """This class connects all the in- and output classes together and provides a clean interface for the connection to the gui. This means that a lot of the methods are like proxies, eg. `isClockRunning()` uses the instance of the Clock class to run the corresponding method in Clock. For information about those methods, please see the documentation of the class they are defined in. The images for the cursors which indicate the state of each eye are retreived from the config file. """ def __init__(self, video_str, current_frame): self.video_reader = None self.eye_movement = None self.clock = None self.cursors = [] self.category_container = None self.categorise_frames = False self.video_image = None self.show_eyes = [False, False, False] # [left_eye, right_eye, mean_eye] self.categorising_eye_is_left = None # True -> left, False -> right, None -> mean self.video_str = video_str self.current_frame = current_frame """contains the current image of the overlayed video. shared memory, so no latent ipc is needed""" self.config = Config() self.cursors = {None:None, 'fixated_left':self.config.get('cursors','fixated_left'), 'saccade_left':self.config.get('cursors','saccade_left'), 'blink_left':self.config.get('cursors','blink_left'), 'fixated_right':self.config.get('cursors','fixated_right'), 'saccade_right':self.config.get('cursors','saccade_right'), 'blink_right':self.config.get('cursors','blink_right'), 'fixated_mean':self.config.get('cursors','fixated_mean'), 'saccade_mean':self.config.get('cursors','saccade_mean'), 'blink_mean':self.config.get('cursors','blink_mean'), } # ----- CLOCK STUFF ---- def _clock_tick(self, frame): self.produceCurrentImage() # ------ STATUS STUFF --- def isClockRunning(self): return self.clock.running def categorisationEye(self): if self.categorising_eye_is_left == True: return 'left' elif self.categorising_eye_is_left == False: return 'right' else: return 'mean' def categorisationObjects(self): if self.categorise_frames == False: return 'fixations' else: return 'frames' def leftEyeStatus(self, show): self.show_eyes[0] = bool(show) # reproduce the current image to show or exclude this eye self.produceCurrentImage() def rightEyeStatus(self, show): self.show_eyes[1] = bool(show) # reproduce the current image to show or exclude this eye self.produceCurrentImage() def meanEyeStatus(self, show): self.show_eyes[2] = bool(show) # reproduce the current image to show or exclude this eye self.produceCurrentImage() def getEyeStatus(self): return self.show_eyes def ready(self): """You should always make sure this class is ready before using it's functions. If `ready()` returns true, this means every data needed is available""" return bool(self.video_reader) and \ bool(self.eye_movement) and \ bool(self.clock) and \ bool(self.cursors) and \ bool(self.category_container) def getMaxFramesOfEyeMovement(self): return self.eye_movement.maxFrames() def plausibleCheck(self): percent = float(self.getMaxFramesOfEyeMovement()) / float(self.getVideoFrameCount()) if abs(percent - 1) * 100 > self.config.get('general', 'plausible_edf_video_frame_diff_percent'): return False else: return True # ----------- LOAD/SAVE/NEW PROJECT ---- def new_project(self, video_file, eye_movement_file, trialid_target, categorise_frames=False, categorising_eye_is_left=None): """Creates a new project. Categorisation of frames or fixations is indicated by the 'categorise_frames' flag. """ self.categorise_frames = categorise_frames self.categorising_eye_is_left = categorising_eye_is_left self.video_reader = VideoReader(video_file) self.eye_movement = EyeMovement(eye_movement_file, trialid_target) self.clock = Clock(self.video_reader.frame_count, self.video_reader.fps) self.clock.register(self._clock_tick) if self.categorise_frames: objects = {} for frame in xrange(int(self.video_reader.frame_count)): objects[(frame, frame)] = str(frame) else: objects = self.eye_movement.fixations(self.categorising_eye_is_left) self.category_container = CategoryContainer(objects) if categorising_eye_is_left == True: self.show_eyes = [True, False, False] elif categorising_eye_is_left == False: self.show_eyes = [False, True, False] else: self.show_eyes = [False, False, True] # seek to zero so we'll have a picture after loading the videeo file self.produceCurrentImage() def save_project(self, saved_filepath): sc = SaveController() sc.addSaveable('eye_movement', self.eye_movement) sc.addSaveable('category_container', self.category_container) sc.addSaveable('video_reader', self.video_reader) sc.addSaveable('clock', self.clock) sc.addSaveable('controller', self) sc.saveToFile(saved_filepath) def load_project(self, saved_filepath, overwrite_video_filepath=None): sc = SaveController() sc.loadFromFile(saved_filepath) controller_state = sc.getSavedState('controller') self.show_eyes = controller_state['show_eyes'] self.categorise_frames = controller_state['categorise_frames'] self.eye_movement = EyeMovement(saved_state=sc.getSavedState('eye_movement')) if overwrite_video_filepath is None: self.video_reader = VideoReader(saved_state=sc.getSavedState('video_reader')) else: self.video_reader = VideoReader(overwrite_video_filepath) self.clock = Clock(saved_state=sc.getSavedState('clock')) self.clock.register(self._clock_tick) self.category_container = CategoryContainer(saved_state=sc.getSavedState('category_container')) self.produceCurrentImage() def getState(self): """Returns the state of the selected eye(s) and if frames or fixations are being categorised.""" return {'show_eyes':self.show_eyes, 'categorise_frames':self.categorise_frames} # ----------- CATEGORISATION STUFF ---- def categorise(self, shortcut): try: return self.category_container.categorise(self.clock.frame, shortcut) except CategoryContainerError: raise return False def deleteCategorisation(self, frame): self.category_container.deleteCategorisation(frame) def getCategorisations(self): return self.category_container.dictOfCategorisations() def getCategorisationsOrder(self): return self.category_container.start_end_frames def exportCategorisations(self, filepath): self.category_container.export(filepath) def getCategories(self): return self.category_container.categories def editCategory(self, old_shortcut, new_shortcut, category_name): self.category_container.editCategory(old_shortcut, new_shortcut, category_name) def getCategoryContainer(self): return self.category_container def importCategories(self, filepath): self.category_container.importCategories(filepath) def getCategoryOfFrame(self, frame): return self.category_container.getCategoryOfFrame(frame) # ----------- IMAGE PROCESSING ---- def overlayedFrame(self, frame, left, right, mean): """This method produces the overlay of eyemovement data on the current frame. It uses the video_reader to grab the frame and then uses `_addCursorToImage()` to draw the overlay.""" # retrieve original image from video file image = self.video_reader.frame(frame) # add cursors as neede if left and not self.eye_movement.statusLeftEyeAt(frame) is None: self._addCursorToImage(image, self.cursors[self.eye_movement.statusLeftEyeAt(frame)+'_left'], self.eye_movement.leftLookAt(frame)) if right and not self.eye_movement.statusRightEyeAt(frame) is None: self._addCursorToImage(image, self.cursors[self.eye_movement.statusRightEyeAt(frame)+'_right'], self.eye_movement.rightLookAt(frame)) if mean and not self.eye_movement.meanStatusAt(frame) is None: self._addCursorToImage(image, self.cursors[self.eye_movement.meanStatusAt(frame)+'_mean'], self.eye_movement.meanLookAt(frame)) return image def _addCursorToImage(self, image, cursor, position): """This helper method draws the overlay of the cursor image onto the video image, by using functions of opencv. In order for the overlay to be drawn correctly, it has to be put in a mask (cursorMask).""" # in case that we don't have information about the position or cursor end now if position is None or cursor is None: return cursor_left_upper_corner = (int(position[0]-cursor.width/2), int(position[1]-cursor.height/2)) cursor_right_lower_corner = (cursor_left_upper_corner[0]+cursor.width, cursor_left_upper_corner[1]+cursor.height) cursorROI = [0, 0, cursor.width, cursor.height] imageROI = [cursor_left_upper_corner[0], cursor_left_upper_corner[1], cursor.width, cursor.height] if cursor_right_lower_corner[0] <= 0 or cursor_right_lower_corner[1] < 0 or \ cursor_left_upper_corner[0] > image.width or cursor_left_upper_corner[1] > image.height: #print "cursor is out of image" # cursor out of image return if cursor_left_upper_corner[0] < 0: #print "left upper edge of cursor is left of image border" cursorROI[0] = - cursor_left_upper_corner[0] cursorROI[2] -= cursorROI[0] imageROI[0] = 0 if cursor_left_upper_corner[1] < 0: #print "left upper edge of cursor is above image border" cursorROI[1] = - cursor_left_upper_corner[1] cursorROI[3] -= cursorROI[1] imageROI[1] = 0 if cursor_right_lower_corner[0] > image.width: #print "right lower edge of cursor is right of image" cursorROI[2] = cursor.width - (cursor_right_lower_corner[0] - image.width) if cursorROI[2] == 0: return # width of cursor would be zero if cursor_right_lower_corner[1] > image.height: #print "right lower edge of cursor is below image" cursorROI[3] = cursor.height - (cursor_right_lower_corner[1] - image.height) if cursorROI[3] == 0: return # height of cursor would be zero imageROI[2] = cursorROI[2] imageROI[3] = cursorROI[3] cv.SetImageROI(cursor, tuple(cursorROI)) cursorMask = cv.CreateImage((cursorROI[2], cursorROI[3]), cv.IPL_DEPTH_8U, 1) for row in xrange(cursorROI[3]): for col in xrange(cursorROI[2]): if cursor[row, col] != (0,0,0): cursorMask[row,col] = 1 else: cursorMask[row,col] = 0 cv.SetImageROI(image, tuple(imageROI)) cv.SubS(image, cv.Scalar(101, 101, 101), image, cursorMask) cv.Add(image, cursor, image) cv.ResetImageROI(image) def produceCurrentImage(self): """This method populates the video widget of the gui with the current video image. For more information, please look at the documentation of the Clock class.""" frame = self.clock.frame fr = self.overlayedFrame(frame, self.show_eyes[0], self.show_eyes[1], self.show_eyes[2]) return_frame = cv.CreateImage((self.video_reader.width, self.video_reader.height), cv.IPL_DEPTH_8U, 3) cv.Copy(fr, return_frame) cv.CvtColor(return_frame, return_frame, cv.CV_BGR2RGB) self.video_image = return_frame self.current_frame.value = frame self.video_str.value = return_frame.tostring() def exportVideo(self, output_file): """ Exports the overlayed video to a new video file by using the VideoWriter. """ frame_size = (self.getVideoWidth(), self.getVideoHeight()) vidfps = self.video_reader.fps codec = self.config.get('general', 'video_export_codec') video_writer = VideoWriter(output_file, frame_size, vidfps, codec) # you have to be sure _clock_tick is called before this function # otherwise video offset of one frame self.pause() self.seek(0) for frame in xrange(self.video_reader.frame_count): self.seek(frame) video_writer.addFrame(self.video_image) video_writer.releaseWriter() # ----------- PLAYBACK CONTROLL ---- def play(self): if not self.clock.running: self.clock.run() def pause(self): if self.clock.running: self.clock.stop() def seek(self, frame): try: self.clock.seek(frame) except ClockError: # seeked to a frame out of video pass def nextFrame(self): """Jump one frame ahead.""" self.seek(self.clock.frame + 1) def prevFrame(self): """Jump back one frame.""" self.seek(self.clock.frame - 1) def jumpToNextUncategorisedObject(self): """Jump to the next frame or fixation (depending on what the user entered in the project wizard) that is not categorised yet.""" frame = self.category_container.nextNotCategorisedIndex(self.clock.frame) self.seek(frame) def nextFixation(self): '''Jump to the next fixation.''' frame = self.eye_movement.nextFixationFrame(self.clock.frame, self.categorising_eye_is_left) self.seek(frame) def prevFixation(self): '''Jump to the previous fixation.''' frame = self.eye_movement.prevFixationFrame(self.clock.frame, self.categorising_eye_is_left) self.seek(frame) def slowerPlayback(self): self.clock.setMultiplicator(self.clock.multiplicator * 0.9) def fasterPlayback(self): self.clock.setMultiplicator(self.clock.multiplicator * 1.1) def setPlaybackSpeed(self, speed): self.clock.setMultiplicator(speed) # --------------- VIDEO INFORMATION ----- def getVideoStrLength(self): return len(self.video_reader.frame(0).tostring()) def getVideoHeight(self): return self.video_reader.height def getVideoWidth(self): return self.video_reader.width def getVideoFrameCount(self): return self.video_reader.frame_count def getVideoFrameRate(self): return self.video_reader.fps
def __init__(self): if len(sys.argv) <= 2 or '--help' in sys.argv: print """Usage: {name} folder/ video.avi """.format(name=sys.argv[0]) sys.exit(-1) args = parse_args(sys.argv[1], sys.argv[2]) self.args = args bg_file = glob.glob(args['fullname'] + '*bg.npz')[0] print sys.argv self.small_step = 5 self.large_step = 10 self.startup_complete = False ##### Grab all the transforms ###### self.absolute = False (self.imu_transforms_mk1, self.gps_data_mk1, self.gps_times_mk1) = get_transforms(args, 'mark1', self.absolute) (self.imu_transforms_mk2, self.gps_data_mk2, self.gps_times_mk2) = get_transforms(args, 'mark2', self.absolute) self.mk2_t = 0 self.t = self.mk2_to_mk1() self.cur_imu_transform = self.imu_transforms_mk1[self.t, :, :] self.imu_kdtree = cKDTree(self.imu_transforms_mk1[:, :3, 3]) self.params = args['params'] self.lidar_params = self.params['lidar'] self.T_from_i_to_l = np.linalg.inv(self.lidar_params['T_from_l_to_i']) cam_num = args['cam_num'] self.cam_params = self.params['cam'][cam_num] # Load the MobilEye file self.mbly_loader = MblyLoader(args) self.mbly_rot = [0.0, -0.005, -0.006] self.mbly_T = [5.4, 0.0, -1.9] # Is the flyover running self.running = False # Has the user changed the time self.manual_change = 0 ###### Set up the renderers ###### self.cloud_ren = vtk.vtkRenderer() self.cloud_ren.SetViewport(0, 0, 1.0, 1.0) self.cloud_ren.SetBackground(0, 0, 0) self.img_ren = vtk.vtkRenderer() # self.img_ren.SetViewport(0.7, 0.0, 1.0, 0.37) self.img_ren.SetViewport(0.6, 0.6, 1.0, 1.0) self.img_ren.SetInteractive(False) self.img_ren.SetBackground(0.1, 0.1, 0.1) self.win = vtk.vtkRenderWindow() self.win.StereoCapableWindowOff() self.win.AddRenderer(self.cloud_ren) self.win.AddRenderer(self.img_ren) self.win.SetSize(800, 400) self.iren = vtk.vtkRenderWindowInteractor() self.iren.SetRenderWindow(self.win) ###### Cloud Actors ###### print 'Adding raw points' raw_npz = np.load(bg_file) pts = raw_npz['data'] raw_cloud = VtkPointCloud(pts[:, :3], np.ones(pts[:, :3].shape) * 255) raw_actor = raw_cloud.get_vtk_color_cloud() self.raw_lidar = VTKCloudTree(raw_cloud, raw_actor, build_tree=False) self.raw_lidar_2d = DataTree(self.raw_lidar.pts[:, :-1], build_tree=False) self.raw_lidar.actor.GetProperty().SetPointSize(2) self.raw_lidar.actor.GetProperty().SetOpacity(0.1) self.raw_lidar.actor.SetPickable(0) self.cloud_ren.AddActor(self.raw_lidar.actor) print 'Adding car' self.car = load_ply('../mapping/viz/gtr.ply') self.car.SetPickable(0) self.car.GetProperty().LightingOff() self.cloud_ren.AddActor(self.car) self.mbly_vtk_boxes = [] # Car: 0, Truck: 1, Bike: 2, Other: 3-7 red = np.array((1, 0, 0)) green = np.array((0, 1, 0)) blue = np.array((0, 0, 1)) white = red + green + blue self.mbly_obj_colors = [red, green, blue, white] self.mbly_vtk_lanes = [] # Dashed: 0, Solid: 1, undecided: 2, Edge: 3, Double: 4, Botts_Dots: 5 self.mbly_lane_color = [green, green, red, blue, white, green] self.mbly_lane_size = [2, 3, 1, 1, 2, 1] self.mbly_lane_subsamp = [20, 1, 1, 1, 1, 40] # Use our custom mouse interactor self.interactor = LaneInteractorStyle(self.iren, self.cloud_ren, self) self.iren.SetInteractorStyle(self.interactor) ###### 2D Projection Actors ###### self.video_reader = VideoReader(args['video']) self.img_actor = None self.I = None ###### Add Callbacks ###### print 'Rendering' self.iren.Initialize() # Set up time self.iren.AddObserver('TimerEvent', self.update) self.timer = self.iren.CreateRepeatingTimer(10) self.iren.Start()
class ImageClicker(object): def __init__(self): args = parse_args(sys.argv[1], sys.argv[2]) planar = glob.glob(sys.argv[1] + '/*_planar.npz')[0] npz = np.load(planar) self.planes = npz['planes'] (self.imu_transforms_mk1, self.gps_data_mk1, self.gps_times_mk1) = get_transforms(args, 'mark1') (self.imu_transforms_mk2, self.gps_data_mk2, self.gps_times_mk2) = get_transforms(args, 'mark2') self.back_projector = BackProjector(args) self.vr = VideoReader(args['video']) self.t = 1 cv2.namedWindow('image', cv2.WINDOW_AUTOSIZE) cv2.setMouseCallback('image', self.mouseHandler) self.lanes = None self.markings = None def mouseHandler(self, event, x, y, flags, param): if event == cv2.EVENT_LBUTTONUP: if self.markings == None: self.markings = np.array([x, y])[np.newaxis] else: self.markings = np.vstack([self.markings, [x, y]]) pix = np.array([x, y, 1]) mk1_t = mk2_to_mk1(self.t, self.gps_times_mk1, self.gps_times_mk2) xform = self.imu_transforms_mk1[mk1_t] v = self.back_projector.calculateBackProjection(xform, pix) l0 = self.back_projector.calculateBackProjection(xform) l = l0 - v best_model = (np.inf, 0, 0) for i, plane in enumerate(self.planes): pt = self.back_projector.calculateIntersection(l0, l, plane) d = np.linalg.norm(pt - plane[3:]) if d < best_model[0]: best_model = (d, i, pt) print best_model[-1] if self.lanes == None: self.lanes = best_model[-1] else: self.lanes = np.vstack((self.lanes, best_model[-1])) def exportData(self, file_name): lanes = {} lanes['lane0'] = self.lanes print 'saved:' print self.lanes np.savez(file_name, **lanes) def nextFrame(self): while True: while self.vr.framenum < self.t: success, I = self.vr.getNextFrame() if self.markings != None: x = self.markings[:, 0] y = self.markings[:, 1] for i in xrange(4): I[y + i, x, :] = np.array([255, 255, 0]) I[y - i, x, :] = np.array([255, 255, 0]) I[y, x + i, :] = np.array([255, 255, 0]) I[y, x - i, :] = np.array([255, 255, 0]) cv2.imshow('image', I) key = cv2.waitKey(1) & 0xFF if key != 255: print key if key == 27: # esc return if key == 115: # s self.exportData(sys.argv[1] + '/multilane_points_image.npz') elif key == 32: # space self.t += 20 self.markings = None
def main(): filename = '../UntrackedFiles/stereoClip11_Kyle.mov' vr = VideoReader(filename) # find court corners: cf = CourtFinder() numFrames = int(vr.getNumFrames()) vr.setNextFrame(int(numFrames/2)) ret, frame = vr.readFrame() cf.FindCourtCorners(frame, 0) # reset frame index to beginning vr.setNextFrame(0) bf = BallFinder() # second video reader for faster processing vr2 = VideoReader(filename) numFrameForward = 5 vr2.setNextFrame(numFrameForward) # # calculate average frame for background # numFrameForAve = 10; # ret, sumFrame = vr.readFrame() # sumFrame = sumFrame.astype(np.int16) # for i in range(1,numFrameForAve): # ret, frame = vr.readFrame() # sumFrame += frame.astype(np.int16) # avgFrame = sumFrame/numFrameForAve # avgFrame = avgFrame.astype(np.uint8) # # # reset video reader index to beginning again # vr.setNextFrame(0) done = False while(not(done)): ret, frame = vr.readFrame() ret2, frame2, = vr2.readFrame() if not(ret) or not(ret2): done = True else: frameDiff1 = bf.hsvDiff(frame, frame2) frameDiff2 = bf.rgbDiff(frame, frame2) frameCornerMask = bf.GetCornernessMask(frame, frame2) mask = bf.aveMask(frameDiff1, frameDiff2, frameCornerMask) cv2.imshow('frame',cv2.resize(frameDiff2, (960, 540))) cv2.waitKey(1) # cv2.imwrite('../UntrackedFiles/frame_diff.jpg', frame_diff) # # quit sequence: # print "press q enter to quit " # done = False # while(not(done)): # c = sys.stdin.read(1) # if c == 'q': # done = True vr.close()
class Blockworld: def __init__(self): if len(sys.argv) <= 2 or '--help' in sys.argv: print """Usage: {name} folder/ video.avi """.format(name = sys.argv[0]) sys.exit(-1) args = parse_args(sys.argv[1], sys.argv[2]) self.args = args bg_file = glob.glob(args['fullname'] + '*bg.npz')[0] print sys.argv self.small_step = 5 self.large_step = 10 self.startup_complete = False ##### Grab all the transforms ###### self.absolute = False (self.imu_transforms_mk1, self.gps_data_mk1, self.gps_times_mk1) = get_transforms(args, 'mark1', self.absolute) (self.imu_transforms_mk2, self.gps_data_mk2, self.gps_times_mk2) = get_transforms(args, 'mark2', self.absolute) self.mk2_t = 0 self.t = self.mk2_to_mk1() self.cur_imu_transform = self.imu_transforms_mk1[self.t, :,:] self.imu_kdtree = cKDTree(self.imu_transforms_mk1[:, :3, 3]) self.params = args['params'] self.lidar_params = self.params['lidar'] self.T_from_i_to_l = np.linalg.inv(self.lidar_params['T_from_l_to_i']) cam_num = args['cam_num'] self.cam_params = self.params['cam'][cam_num] # Load the MobilEye file self.mbly_loader = MblyLoader(args) self.mbly_rot = [0.0, -0.005, -0.006] self.mbly_T = [5.4, 0.0, -1.9] # Is the flyover running self.running = False # Has the user changed the time self.manual_change = 0 ###### Set up the renderers ###### self.cloud_ren = vtk.vtkRenderer() self.cloud_ren.SetViewport(0, 0, 1.0, 1.0) self.cloud_ren.SetBackground(0, 0, 0) self.img_ren = vtk.vtkRenderer() # self.img_ren.SetViewport(0.7, 0.0, 1.0, 0.37) self.img_ren.SetViewport(0.6, 0.6, 1.0, 1.0) self.img_ren.SetInteractive(False) self.img_ren.SetBackground(0.1, 0.1, 0.1) self.win = vtk.vtkRenderWindow() self.win.StereoCapableWindowOff() self.win.AddRenderer(self.cloud_ren) self.win.AddRenderer(self.img_ren) self.win.SetSize(800, 400) self.iren = vtk.vtkRenderWindowInteractor() self.iren.SetRenderWindow(self.win) ###### Cloud Actors ###### print 'Adding raw points' raw_npz = np.load(bg_file) pts = raw_npz['data'] raw_cloud = VtkPointCloud(pts[:, :3], np.ones(pts[:, :3].shape) * 255) raw_actor = raw_cloud.get_vtk_color_cloud() self.raw_lidar = VTKCloudTree(raw_cloud, raw_actor, build_tree=False) self.raw_lidar_2d = DataTree(self.raw_lidar.pts[:, :-1], build_tree=False) self.raw_lidar.actor.GetProperty().SetPointSize(2) self.raw_lidar.actor.GetProperty().SetOpacity(0.1) self.raw_lidar.actor.SetPickable(0) self.cloud_ren.AddActor(self.raw_lidar.actor) print 'Adding car' self.car = load_ply('../mapping/viz/gtr.ply') self.car.SetPickable(0) self.car.GetProperty().LightingOff() self.cloud_ren.AddActor(self.car) self.mbly_vtk_boxes = [] # Car: 0, Truck: 1, Bike: 2, Other: 3-7 red = np.array((1, 0, 0)) green = np.array((0, 1, 0)) blue = np.array((0, 0, 1)) white = red+green+blue self.mbly_obj_colors = [red, green, blue, white] self.mbly_vtk_lanes = [] # Dashed: 0, Solid: 1, undecided: 2, Edge: 3, Double: 4, Botts_Dots: 5 self.mbly_lane_color = [green, green, red, blue, white, green] self.mbly_lane_size = [2, 3, 1, 1, 2, 1] self.mbly_lane_subsamp = [20, 1, 1, 1, 1, 40] # Use our custom mouse interactor self.interactor = LaneInteractorStyle(self.iren, self.cloud_ren, self) self.iren.SetInteractorStyle(self.interactor) ###### 2D Projection Actors ###### self.video_reader = VideoReader(args['video']) self.img_actor = None self.I = None ###### Add Callbacks ###### print 'Rendering' self.iren.Initialize() # Set up time self.iren.AddObserver('TimerEvent', self.update) self.timer = self.iren.CreateRepeatingTimer(10) self.iren.Start() def mk2_to_mk1(self, mk2_idx=-1): if mk2_idx == -1: mk2_idx = self.mk2_t return mk2_to_mk1(mk2_idx, self.gps_times_mk1, self.gps_times_mk2) def getCameraPosition(self, t, focus=100): offset = np.array([-75.0, 0, 25.0]) / 4 # Rotate the camera so it is behind the car position = np.dot(self.imu_transforms_mk1[t, 0:3, 0:3], offset) position += self.imu_transforms_mk1[t, 0:3, 3] + position # Focus 10 frames in front of the car focal_point = self.imu_transforms_mk1[t + focus, 0:3, 3] return position, focal_point def finished(self, focus=100): return self.mk2_t + 2 * focus > self.video_reader.total_frame_count def update(self, iren, event): # Get the cameras cloud_cam = self.cloud_ren.GetActiveCamera() img_cam = self.img_ren.GetActiveCamera() # Initialization if not self.startup_complete: cloud_cam.SetViewUp(0, 0, 1) self.mk2_t = 1 self.t = self.mk2_to_mk1() self.startup_complete = True self.manual_change = -1 # Force an update for the camera # Update the time (arrow keys also update time) if self.running: self.mk2_t += self.large_step if self.finished(): self.mk2_t -= self.large_step if self.running == True: self.interactor.KeyHandler(key='space') # Get the correct gps time (mk2 is camera time) self.t = self.mk2_to_mk1() self.cur_imu_transform = self.imu_transforms_mk1[self.t, :, :] # Get the correct frame to show (success, self.I) = self.video_reader.getFrame(self.mk2_t) # Update the gps time self.cur_gps_time = self.gps_times_mk2[self.mk2_t] # Make sure the calibration has been updated self.mbly_R = euler_matrix(*self.mbly_rot)[:3, :3] if self.running or self.manual_change: # Set camera position to in front of the car position, focal_point = self.getCameraPosition(self.t) cloud_cam.SetPosition(position) cloud_cam.SetFocalPoint(focal_point) # Update the car position transform = vtk_transform_from_np(self.cur_imu_transform) transform.RotateZ(90) transform.Translate(-2, -3, -2) self.car.SetUserTransform(transform) # If the user caused a manual change, reset it self.manual_change = 0 # Copy the image so we can project points onto it I = self.I.copy() # Add the lanes to the cloud mbly_lanes = self.mbly_loader.loadLane(self.cur_gps_time) lanes_wrt_mbly = self.mblyLaneAsNp(mbly_lanes) self.addLaneToCloud(lanes_wrt_mbly) # Add the lanes to the image copy I = self.addLaneToImg(I, lanes_wrt_mbly) # Add the objects (cars) to the cloud mbly_objs = self.mbly_loader.loadObj(self.cur_gps_time) objs_wrt_mbly = self.mblyObjAsNp(mbly_objs) self.addObjToCloud(objs_wrt_mbly) # Add the lanes to the image copy I = self.addObjToImg(I, objs_wrt_mbly) vtkimg = VtkImage(I) self.img_ren.RemoveActor(self.img_actor) self.img_actor = vtkimg.get_vtk_image() self.img_ren.AddActor(self.img_actor) # We need to draw the image before we run ResetCamera or else # the image is too small self.img_ren.ResetCamera() img_cam.SetClippingRange(100, 100000) # These units are pixels img_cam.Dolly(2.00) self.iren.GetRenderWindow().Render() def xformMblyToGlobal(self, pts_wrt_mbly): # TODO: Need to tranform from imu to gps frame of reference T_l_to_i = self.params['lidar']['T_from_l_to_i'] T_i_to_world = self.imu_transforms_mk1[self.t, 0:3, 0:4] pts = pts_wrt_mbly # Puts points in lidar FOR pts_wrt_lidar = T_from_mbly_to_lidar(pts_wrt_mbly, self.mbly_T, \ self.mbly_R[:3,:3]) # Make points homogoneous hom_pts = np.hstack((pts_wrt_lidar[:, :3], np.ones((pts.shape[0], 1)))) # Put in imu FOR pts_wrt_imu = np.dot(T_l_to_i, hom_pts.T).T # Put in global FOR pts_wrt_world = np.dot(T_i_to_world, pts_wrt_imu.T).T # Add metadata back to output pts_wrt_world = np.hstack((pts_wrt_world, pts[:, 3:])) return pts_wrt_world def mblyObjAsNp(self, mbly_objs): """Turns a mobileye object pb message into a numpy array with format: [x, y, .7, length, width, type] """ pts_wrt_mbly = [] for obj in mbly_objs: pt_wrt_mbly = [obj.pos_x, obj.pos_y, .7, obj.length, \ obj.width, obj.obj_type] pts_wrt_mbly.append(pt_wrt_mbly) return np.array(pts_wrt_mbly) def mblyLaneAsNp(self, mbly_lane): """Turns a mobileye lane into a numpy array with format: [C0, C1, C2, C3, lane_id, lane_type, view_range] Y = C3*X^3 + C2*X^2 + C1*X + C0. X is longitudinal distance from camera (positive right!) Y is lateral distance from camera lane_id is between -2 and 2, with -2 being the farthest left, and 2 being the farthest right lane. There is no 0 id. """ lanes_wrt_mbly = [] for l in mbly_lane: lane_wrt_mbly = [l.C0, l.C1, l.C2, l.C3, l.lane_id, l.lane_type, \ l.view_range] lanes_wrt_mbly.append(lane_wrt_mbly) return np.array(lanes_wrt_mbly) def addObjToCloud(self, objs_wrt_mbly): """ Add the mobileye returns to the 3d scene """ mbly_vtk_boxes = [] car_rot = self.imu_transforms_mk1[self.t, :, :3] objs_wrt_world = self.xformMblyToGlobal(objs_wrt_mbly) # Draw each box car_rot = euler_from_matrix(car_rot)[2] * 180 / math.pi for o in objs_wrt_world: # Create the vtk object box = VtkBoundingBox(o) actor = box.get_vtk_box(car_rot) color = self.mbly_obj_colors[int(o[5])] actor.GetProperty().SetColor(*color) mbly_vtk_boxes.append(box) # Remove old boxes for vtk_box in self.mbly_vtk_boxes: self.cloud_ren.RemoveActor(vtk_box.actor) # Update to new actors self.mbly_vtk_boxes = mbly_vtk_boxes # Draw new boxes for vtk_box in self.mbly_vtk_boxes: self.cloud_ren.AddActor(vtk_box.actor) def getLanePointsFromModel(self, lane_wrt_mbly): view_range = lane_wrt_mbly[6] if view_range == 0: return None num_pts = view_range * 3 X = np.linspace(0, view_range, num=num_pts) # from model: Y = C3*X^3 + C2*X^2 + C1*X + C0. X = np.vstack((np.ones(X.shape), X, np.power(X, 2), np.power(X, 3))) # Mbly uses Y-right as positive, we use Y-left as positive Y = -1 * np.dot(lane_wrt_mbly[:4], X) lane_pts_wrt_mbly = np.vstack((X[1, :], Y, np.zeros((1, num_pts)))).T return lane_pts_wrt_mbly def addLaneToCloud(self, lane_wrt_mbly): for lane in self.mbly_vtk_lanes: self.cloud_ren.RemoveActor(lane.actor) self.mbly_vtk_lanes = [] for i in xrange(lane_wrt_mbly.shape[0]): type = int(lane_wrt_mbly[i, 5]) color = self.mbly_lane_color[type] * 255 size = self.mbly_lane_size[type] subsamp = self.mbly_lane_subsamp[type] lane_pts_wrt_mbly = self.getLanePointsFromModel(lane_wrt_mbly[i, :]) if lane_pts_wrt_mbly is None: continue pts_wrt_global = self.xformMblyToGlobal(lane_pts_wrt_mbly) pts_wrt_global = pts_wrt_global[::subsamp] num_pts = pts_wrt_global.shape[0] vtk_lane = VtkPointCloud(pts_wrt_global, np.tile(color, (num_pts, 1))) actor = vtk_lane.get_vtk_color_cloud() actor.GetProperty().SetPointSize(size) self.mbly_vtk_lanes.append(vtk_lane) self.cloud_ren.AddActor(actor) def addObjToImg(self, I, objs_wrt_mbly): """Takes an image and the mbly objects. Converts the objects into corners of a bounding box and draws them on the image """ if objs_wrt_mbly.shape[0] == 0: return None pix = [] width = objs_wrt_mbly[:, 4] # Assuming the point in obs_wrt_mbly are the center of the object, draw # a box .5 m below, .5 m above, -.5*width left, .5*width right. Keep the # same z position for z in [-.5, .5]: for y in [-.5, .5]: offset = np.zeros((width.shape[0], 3)) offset[:, 1] = width*y offset[:, 2] = z pt = objs_wrt_mbly[:, :3] + offset proj_pt = projectPoints(pt, self.args, self.mbly_T, self.mbly_R) pix.append(proj_pt[:, 3:]) pix = np.array(pix, dtype=np.int32) pix = np.swapaxes(pix, 0, 1) # Draw a line between projected points for i, corner in enumerate(pix): # Get the color of the box and convert RGB -> BGR color = self.mbly_obj_colors[int(objs_wrt_mbly[i, 5])][::-1] * 255 corner = tuple(map(tuple, corner)) cv2.rectangle(I, corner[0], corner[3], color, 2) return I def addLaneToImg(self, I, lanes_wrt_mbly): """Takes an image and the 3d lane points. Projects these points onto the image """ if lanes_wrt_mbly.shape[0] == 0: return None pix = [] for i in xrange(len(self.mbly_vtk_lanes)): type = int(lanes_wrt_mbly[i, 5]) color = self.mbly_lane_color[type] * 255 size = self.mbly_lane_size[type] subsamp = self.mbly_lane_subsamp[type] pts = self.getLanePointsFromModel(lanes_wrt_mbly[i])[::subsamp] if pts is None: continue proj_pts = projectPoints(pts, self.args, self.mbly_T, self.mbly_R) proj_pts = proj_pts[:, 3:].astype(np.int32, copy = False) img_mask = (proj_pts[:, 0] < 1280) & (proj_pts[:, 0] >= 0) &\ (proj_pts[:, 1] < 800) & (proj_pts[:, 1] >= 0) proj_pts = proj_pts[img_mask] for pt_i in xrange(proj_pts.shape[0]): # cv2 only takes tuples at points pt = tuple(proj_pts[pt_i, :]) # Make sure to convert to bgr cv2.circle(I, pt, size, color[::-1], thickness=-size) return I
# usage: # python test_video_reader.py <path to video splits> import sys from VideoReader import VideoReader if __name__ == '__main__': reader = VideoReader(sys.argv[1]) reader.playVideo()
cluster_labels1 = h5f['cluster_labels'][...] cluster_centers1 = h5f['cluster_centers'][...] h5f.close() h5f = h5py.File(args.bounds2, 'r') cluster_labels2 = h5f['cluster_labels'][...] cluster_centers2 = h5f['cluster_centers'][...] h5f.close() # Set up video and shared parameters cam_num = args.cam video_file = args.video # PARAM FIXME params = LoadParameters('q50_4_3_14_params') cam = params['cam'][cam_num-1] video_reader = VideoReader(video_file) T_from_i_to_l = np.linalg.inv(params['lidar']['T_from_l_to_i']) # BGR red = [0, 0, 255] blue = [255, 0, 0] green = [0, 255, 0] # Set up video writer if not args.debug: fps = 30 # PARAM fourcc = cv2.cv.CV_FOURCC(*'MJPG') video_writer = cv2.VideoWriter() video_writer.open(args.outvideo, fourcc, fps, (1280, 960)) # PARAM
def main(): # Pretty videos: fourcc = cv2.VideoWriter_fourcc(*'mp4v') kyleOutputVideo = cv2.VideoWriter('../UntrackedFiles/BallOutputKyle.mp4', fourcc, 60.0, (1920, 1080)) meganOutputVideo = cv2.VideoWriter('../UntrackedFiles/BallOutputMegan.mp4', fourcc, 60.0, (1920, 1080)) meganFilename = '../UntrackedFiles/stereoClip5_Megan.mov' kyleFilename = '../UntrackedFiles/stereoClip5_Kyle.mov' vrMegan1 = VideoReader(meganFilename) vrMegan2 = VideoReader(meganFilename) vrKyle1 = VideoReader(kyleFilename) vrKyle2 = VideoReader(kyleFilename) numFrameForward = 5 vrMegan2.setNextFrame(numFrameForward) vrKyle2.setNextFrame(numFrameForward) # find court corners: cfKyle = CourtFinder() cfMegan = CourtFinder() numFrames = int(vrMegan1.getNumFrames()) vrMegan1.setNextFrame(int(numFrames / 2)) ret, frame = vrMegan1.readFrame() cfMegan.FindCourtCorners(frame, 0) vrKyle1.setNextFrame(int(numFrames / 2)) ret, frame = vrKyle1.readFrame() cfKyle.FindCourtCorners(frame, 0) if (not cfMegan.found_corners) or not (cfKyle.found_corners): print "Couldn't find the court. Exiting." return # reset frame index to beginning vrMegan1.setNextFrame(0) vrKyle1.setNextFrame(0) frameNum = 1 meganCam = Camera("megan", cfMegan.corners_sort) kyleCam = Camera("kyle", cfKyle.corners_sort) # make a ball finder bf = BallFinder() kf = KalmanFilter(vrMegan1.framerate) csvData = [] while (True): ret1, kyleFrame1 = vrKyle1.readFrame() ret2, kyleFrame2, = vrKyle2.readFrame() ret3, meganFrame1 = vrMegan1.readFrame() ret4, meganFrame2, = vrMegan2.readFrame() if not (ret1) or not (ret2) or not (ret3) or not (ret4): print 'Ending after', frameNum - 1, 'frames.' break kyleMask, kyleRays = getBallCandidateRays(bf, kyleCam, kyleFrame1, kyleFrame2) meganMask, meganRays = getBallCandidateRays(bf, meganCam, meganFrame1, meganFrame2) # check all candidate rays for candidate balls minDist = 1000000 # TODO set inf ballPt = [] # all ball points and distances threshDist = 0.2 # rays must be within .1 m of each other for intersect ballCandidates = [] candidateCertainty = [] for kyleRay in kyleRays: for meganRay in meganRays: pt, dist, _D, _E = IntersectRays(kyleRay, meganRay) if dist < threshDist and pt[1] < 3.5: # don't include candidates clearly not valid intersect points # also don't include candidates that are clearly too high to be the ball courtBuffer = 2 if pt[0] < Camera.HALF_COURT_X + courtBuffer and pt[ 0] > -Camera.HALF_COURT_X - courtBuffer: if pt[2] < Camera.HALF_COURT_Z + 0.6: # and pt[2] > -Camera.HALF_COURT_Z - 0: ballCandidates.append(pt) candidateCertainty.append(dist) if dist < minDist: minDist = dist ballPt = pt kf.processMeas(ballCandidates, candidateCertainty) # ========== CSV and VIDEO output =========== csvTuple = list() csvTuple.append(frameNum) if np.linalg.norm(kf.sigma_k, 'fro') < 100: # valid result # Format the tuple for successful reading csvTuple.append(1) posVel = np.reshape(kf.mu_k, (1, 6))[0] posVel = np.round(posVel, 3) for val in posVel: csvTuple.append(val) for val in kyleCam.ConvertWorldToImagePosition(posVel[0:3]): csvTuple.append(val) for val in meganCam.ConvertWorldToImagePosition(posVel[0:3]): csvTuple.append(val) # Videos kyleOutFrame = kyleFrame1.copy() bf.ballPixelLoc = kyleCam.ConvertWorldToImagePosition(posVel[0:3]) kyleOutFrame = cfKyle.drawCornersOnFrame(kyleOutFrame) kyleOutFrame = bf.drawBallOnFrame(kyleOutFrame) kyleOutFrame |= kyleMask kyleOutputVideo.write(kyleOutFrame) meganOutFrame = meganFrame1.copy() meganOutFrame |= meganMask bf.ballPixelLoc = meganCam.ConvertWorldToImagePosition(posVel[0:3]) meganOutFrame = cfMegan.drawCornersOnFrame(meganOutFrame) meganOutFrame = bf.drawBallOnFrame(meganOutFrame) meganOutputVideo.write(meganOutFrame) else: # Format the tuple for unsuccessful reading csvTuple.append(0) csvData.append(list(csvTuple)) print csvTuple frameNum += 1 # <END WHILE LOOP> with open('../UntrackedFiles/ballEst.csv', 'wb') as csvfile: filewriter = csv.writer(csvfile, delimiter=',') filewriter.writerows(csvData) vrKyle1.close() vrKyle2.close() vrMegan1.close() vrMegan2.close() kyleOutputVideo.release() meganOutputVideo.release()
def __init__(self, video_file, map_file): self.reader = VideoReader(video_file) self.ldr_map = loadLDRCamMap(map_file) self.queue = Queue.Queue() self.finished = False
def main(): # generate poster/report photos? posterPhotos = True # get file names videoFile1 = sys.argv[1] videoFile2 = sys.argv[2] dataFile = sys.argv[3] # read in video files vr1 = VideoReader(videoFile1) vr2 = VideoReader(videoFile2) # read in tennis court image courtTopView = cv.imread('../SharedData/courtTop.png') courtTopView = cv.cvtColor(courtTopView, cv.COLOR_BGR2RGB) courtHeight, courtWidth, _ = courtTopView.shape netSideView = cv.imread('../SharedData/netSide.png') netSideView = cv.cvtColor(netSideView, cv.COLOR_BGR2RGB) netHeight, netWidth, _ = netSideView.shape # read in results data with open(dataFile, 'rb') as csvfile: dataReader = csv.reader(csvfile, delimiter=',') numFramesData = sum(1 for _ in dataReader) csvfile.close() ballFound = np.zeros((numFramesData, 1)) positionData = np.zeros((numFramesData, 3)) velocityData = np.zeros((numFramesData, 3)) pixelsKyle = np.zeros((numFramesData, 2)) pixelsMegan = np.zeros((numFramesData, 2)) with open(dataFile, 'rb') as csvfile: dataReader = csv.reader(csvfile, delimiter=',') for row in dataReader: frameNumber = int(row[0]) - 1 found = int(row[1]) if found: xPos = float(row[2]) yPos = float(row[3]) zPos = float(row[4]) xVel = float(row[5]) yVel = float(row[6]) zVel = float(row[7]) xPixelKyle = int(row[8]) yPixelKyle = int(row[9]) xPixelMegan = int(row[10]) yPixelMegan = int(row[11]) ballFound[frameNumber] = found positionData[frameNumber, :] = [xPos, yPos, zPos] velocityData[frameNumber, :] = [xVel, yVel, zVel] pixelsKyle[frameNumber, :] = [xPixelKyle, yPixelKyle] pixelsMegan[frameNumber, :] = [xPixelMegan, yPixelMegan] csvfile.close() # parameters for results file msToMPH = 2.23694 xPosMin = -7 xPosMax = 7 yPosMin = 0 yPosMax = 3 zPosMin = -21 zPosMax = 21 # parameters for court graphic xCourtMin = -8.25 xCourtMax = 8.25 xCourtWidth = abs(xCourtMin) + abs(xCourtMax) yCourtMin = -14.75 yCourtMax = 14.75 yCourtLength = abs(yCourtMin) + abs(yCourtMax) # get first frame currentFrame = 0 vr1.setNextFrame(currentFrame) ret1, frame1, = vr1.readFrame() frame1 = cv.cvtColor(frame1, cv.COLOR_BGR2RGB) frame1Height = vr1.height frame1Width = vr1.width numFrames1 = int(vr1.numFrames) print "Frames in video 1: " + str(numFrames1) vr2.setNextFrame(currentFrame) ret2, frame2, = vr2.readFrame() frame2 = cv.cvtColor(frame2, cv.COLOR_BGR2RGB) frame2Height = vr2.height frame2Width = vr2.width numFrames2 = int(vr2.numFrames) print "Frames in video 2: " + str(numFrames2) print "Frames in data: " + str(numFramesData) frameOffset = int(numFrames1 - numFramesData - 5) numFrames = int(min(min(numFrames1, numFrames2), numFramesData)) # compute velocity in MPH if ballFound[currentFrame]: [xv, yv, zv] = velocityData[currentFrame, :] velMPH = np.sqrt((xv * msToMPH)**2 + (yv * msToMPH)**2 + (zv * msToMPH)**2) print 'Velocity: ' + str(velMPH) + ' mph' # corners of court corners1 = [(1161, 431), (1758, 479), (1368, 978), (76, 716)] corners2 = [(122, 456), (752, 446), (1754, 817), (319, 1030)] # create figure and show first frame fig = plt.figure(figsize=(12, 8)) plt.ion() ax1 = fig.add_subplot(2, 2, 1) ax1.xaxis.set_visible(False) ax1.yaxis.set_visible(False) ax1.set_title("iPhone Camera #1") im1 = ax1.imshow(frame1) for x, y in corners1: ax1.scatter(x, y, s=16, c='red', marker='o') ax2 = fig.add_subplot(2, 2, 2) ax2.xaxis.set_visible(False) ax2.yaxis.set_visible(False) ax2.set_title("iPhone Camera #2") im2 = ax2.imshow(frame2) for x, y in corners2: ax2.scatter(x, y, s=16, c='red', marker='o') # top view of court ax3 = fig.add_subplot(2, 2, 3) ax3.xaxis.set_visible(False) ax3.yaxis.set_visible(False) im3 = ax3.imshow(courtTopView) # 3D scatterplot ax4 = fig.add_subplot(2, 2, 4, projection='3d') ax4.set_xlim([zPosMin, zPosMax]) ax4.set_xticks(np.arange(-20, 21, 10)) ax4.set_xlabel("Court Length (m)") ax4.set_ylim([xPosMin, xPosMax]) ax4.set_yticks(np.arange(-6, 7, 3)) ax4.set_ylabel("Court Width (m)") ax4.set_zlim([yPosMin, yPosMax]) ax4.set_zticks(np.arange(0, 4, 1)) ax4.set_zlabel("Ball Height (m)") ax4.set_aspect('equal') fig.show() # output image files imFilename = '../UntrackedFiles/imageOutput/image' + str( currentFrame) + '.png' fig.set_size_inches(12, 8) plt.savefig(imFilename, dpi=200) # vidFrame = cv.imread(imFilename) # vidFrameHeight, vidFrameWidth, _ = vidFrame.shape # outFilename = '../UntrackedFiles/testVideo.mp4' # fourcc = cv.VideoWriter_fourcc(*'mp4v'); # outputVideo = cv.VideoWriter(outFilename,fourcc, 60, (vidFrameWidth,vidFrameHeight)); # outputVideo.write(vidFrame) # for poster/report if posterPhotos: # Megan's camera fig1 = plt.figure(figsize=(12, 8)) fig1ax1 = fig1.add_subplot(1, 1, 1) fig1ax1.xaxis.set_visible(False) fig1ax1.yaxis.set_visible(False) fig1ax1.set_title("iPhone Camera #1") fim1 = fig1ax1.imshow(frame1) for x, y in corners1: fig1ax1.scatter(x, y, s=16, c='red', marker='o') fig1.show() # Kyle's Camera fig2 = plt.figure(figsize=(12, 8)) fig2ax1 = fig2.add_subplot(1, 1, 1) fig2ax1.xaxis.set_visible(False) fig2ax1.yaxis.set_visible(False) fig2ax1.set_title("iPhone Camera #2") fim2 = fig2ax1.imshow(frame2) for x, y in corners2: fig2ax1.scatter(x, y, s=16, c='red', marker='o') fig2.show() # court graphic fig3 = plt.figure(figsize=(6, 8)) fig3ax1 = fig3.add_subplot(1, 1, 1) fig3ax1.xaxis.set_visible(False) fig3ax1.yaxis.set_visible(False) fig3ax1.imshow(courtTopView) fig3.show() # 3D scatterplot fig4 = plt.figure(figsize=(12, 8)) fig4ax1 = fig4.add_subplot(1, 1, 1, projection='3d') fig4ax1.set_xlim([zPosMin, zPosMax]) fig4ax1.set_xticks(np.arange(-20, 21, 10)) fig4ax1.set_xlabel("Court Length (m)") fig4ax1.set_ylim([xPosMin, xPosMax]) fig4ax1.set_yticks(np.arange(-6, 7, 3)) fig4ax1.set_ylabel("Court Width (m)") fig4ax1.set_zlim([yPosMin, yPosMax]) fig4ax1.set_zticks(np.arange(0, 4, 1)) fig4ax1.set_zlabel("Ball Height (m)") fig4ax1.set_aspect('equal') fig4.show() # update plots in real-time for f in range(1, numFrames): # get next frame currentFrame = f vr1.setNextFrame(currentFrame) ret1, frame1, = vr1.readFrame() frame1 = cv.cvtColor(frame1, cv.COLOR_BGR2RGB) vr2.setNextFrame(currentFrame) ret2, frame2, = vr2.readFrame() frame2 = cv.cvtColor(frame2, cv.COLOR_BGR2RGB) # compute velocity in MPH if ballFound[f]: [xv, yv, zv] = velocityData[f, :] velMPH = np.sqrt((xv * msToMPH)**2 + (yv * msToMPH)**2 + (zv * msToMPH)**2) speedText = 'Speed: ' + str(int(round(velMPH))) + ' mph' ax3.set_title(speedText) # Megan's camera im1.set_data(frame1) if posterPhotos: fim1.set_data(frame1) if ballFound[f]: [x1, y1] = pixelsMegan[f, :] ax1.scatter(x1, y1, s=1, c='pink', marker='x') if posterPhotos: fig1ax1.scatter(x1, y1, s=1, c='pink', marker='x') # Kyle's camera im2.set_data(frame2) if posterPhotos: fim2.set_data(frame2) if ballFound[f]: [x2, y2] = pixelsKyle[f, :] ax2.scatter(x2, y2, s=1, c='pink', marker='x') if posterPhotos: fig2ax1.scatter(x2, y2, s=1, c='pink', marker='x') # court graphic if ballFound[f]: [x, y, z] = positionData[f, :] if x > xCourtMin and x < xCourtMax and z > yCourtMin and z < yCourtMax: xc = int( round(((x - xCourtMin) / (2 * xCourtMax)) * (courtWidth - 6))) yc = int( round(((z - yCourtMin) / (2 * yCourtMax)) * (courtHeight - 6))) ax3.scatter(xc, yc, s=2, c='pink', marker='o') if posterPhotos: fig3ax1.scatter(xc, yc, s=3, c='pink', marker='o') # 3D scatterplot if ballFound[f]: [x, y, z] = positionData[f, :] # change colors based on speed if velMPH >= 30: ax4.scatter(z, x, y, s=2, c='pink', marker='o') elif velMPH < 30 and velMPH >= 25: ax4.scatter(z, x, y, s=2, c='red', marker='o') elif velMPH < 25 and velMPH >= 20: ax4.scatter(z, x, y, s=2, c='orange', marker='o') elif velMPH < 20 and velMPH >= 15: ax4.scatter(z, x, y, s=2, c='yellow', marker='o') elif velMPH < 15 and velMPH >= 10: ax4.scatter(z, x, y, s=2, c='green', marker='o') elif velMPH < 10 and velMPH >= 5: ax4.scatter(z, x, y, s=2, c='blue', marker='o') else: ax4.scatter(z, x, y, s=2, c='purple', marker='o') if posterPhotos: if velMPH >= 30: fig4ax1.scatter(z, x, y, s=2, c='pink', marker='o') elif velMPH < 30 and velMPH >= 25: fig4ax1.scatter(z, x, y, s=2, c='red', marker='o') elif velMPH < 25 and velMPH >= 20: fig4ax1.scatter(z, x, y, s=2, c='orange', marker='o') elif velMPH < 20 and velMPH >= 15: fig4ax1.scatter(z, x, y, s=2, c='yellow', marker='o') elif velMPH < 15 and velMPH >= 10: fig4ax1.scatter(z, x, y, s=2, c='green', marker='o') elif velMPH < 10 and velMPH >= 5: fig4ax1.scatter(z, x, y, s=2, c='blue', marker='o') else: fig4ax1.scatter(z, x, y, s=2, c='purple', marker='o') # graphics for poster/report if posterPhotos and f == numFrames - 1: # Megan's camera fig1.show() imFilename = '../UntrackedFiles/imageOutput/plot1.png' fig1.savefig(imFilename, dpi=200) # Kyle's camera fig2.show() imFilename = '../UntrackedFiles/imageOutput/plot2.png' fig2.savefig(imFilename, dpi=200) # court graphic fig3.show() imFilename = '../UntrackedFiles/imageOutput/plot3.png' fig3.savefig(imFilename, dpi=200) # 3D scatterplot fig4.show() imFilename = '../UntrackedFiles/imageOutput/plot4.png' fig4.savefig(imFilename, dpi=200) # update plots fig.show() imFilename = '../UntrackedFiles/imageOutput/image' + str(f) + '.png' fig.set_size_inches(12, 8) fig.savefig(imFilename, dpi=200) #vidFrame = cv.imread(imFilename) #outputVideo.write(vidFrame) plt.pause(0.00001) # close video readers vr1.close() vr2.close()
# usage: # python test_video_reader.py <path to video splits> <output filename> import sys from VideoReader import VideoReader import cv2, cv import time if __name__ == '__main__': reader = VideoReader(sys.argv[1]) writer = cv2.VideoWriter(sys.argv[2], cv.CV_FOURCC('F','M','P','4'), 50.0, (1280,960)) frames = 0 lastTime = time.time() while True: (success, img) = reader.getNextFrame() if success == False: break writer.write(img) frames += 1 currentTime = time.time() if (currentTime - lastTime) > 1: print 'Encoding at', frames, 'Hz.' lastTime = currentTime frames = 0
def __init__(self): if len(sys.argv) <= 2 or '--help' in sys.argv: print """Usage: {name} folder/ video.avi """.format(name = sys.argv[0]) sys.exit(-1) args = parse_args(sys.argv[1], sys.argv[2]) self.args = args bg_file = glob.glob(args['fullname'] + '*bg.npz')[0] print sys.argv self.small_step = 5 self.large_step = 10 self.startup_complete = False ##### Grab all the transforms ###### self.absolute = False (self.imu_transforms_mk1, self.gps_data_mk1, self.gps_times_mk1) = get_transforms(args, 'mark1', self.absolute) (self.imu_transforms_mk2, self.gps_data_mk2, self.gps_times_mk2) = get_transforms(args, 'mark2', self.absolute) self.mk2_t = 0 self.t = self.mk2_to_mk1() self.cur_imu_transform = self.imu_transforms_mk1[self.t, :,:] self.imu_kdtree = cKDTree(self.imu_transforms_mk1[:, :3, 3]) self.params = args['params'] self.lidar_params = self.params['lidar'] self.T_from_i_to_l = np.linalg.inv(self.lidar_params['T_from_l_to_i']) cam_num = args['cam_num'] self.cam_params = self.params['cam'][cam_num] # Load the MobilEye file self.mbly_loader = MblyLoader(args) self.mbly_rot = [0.0, -0.005, -0.006] self.mbly_T = [5.4, 0.0, -1.9] # Is the flyover running self.running = False # Has the user changed the time self.manual_change = 0 ###### Set up the renderers ###### self.cloud_ren = vtk.vtkRenderer() self.cloud_ren.SetViewport(0, 0, 1.0, 1.0) self.cloud_ren.SetBackground(0, 0, 0) self.img_ren = vtk.vtkRenderer() # self.img_ren.SetViewport(0.7, 0.0, 1.0, 0.37) self.img_ren.SetViewport(0.6, 0.6, 1.0, 1.0) self.img_ren.SetInteractive(False) self.img_ren.SetBackground(0.1, 0.1, 0.1) self.win = vtk.vtkRenderWindow() self.win.StereoCapableWindowOff() self.win.AddRenderer(self.cloud_ren) self.win.AddRenderer(self.img_ren) self.win.SetSize(800, 400) self.iren = vtk.vtkRenderWindowInteractor() self.iren.SetRenderWindow(self.win) ###### Cloud Actors ###### print 'Adding raw points' raw_npz = np.load(bg_file) pts = raw_npz['data'] raw_cloud = VtkPointCloud(pts[:, :3], np.ones(pts[:, :3].shape) * 255) raw_actor = raw_cloud.get_vtk_color_cloud() self.raw_lidar = VTKCloudTree(raw_cloud, raw_actor, build_tree=False) self.raw_lidar_2d = DataTree(self.raw_lidar.pts[:, :-1], build_tree=False) self.raw_lidar.actor.GetProperty().SetPointSize(2) self.raw_lidar.actor.GetProperty().SetOpacity(0.1) self.raw_lidar.actor.SetPickable(0) self.cloud_ren.AddActor(self.raw_lidar.actor) print 'Adding car' self.car = load_ply('../mapping/viz/gtr.ply') self.car.SetPickable(0) self.car.GetProperty().LightingOff() self.cloud_ren.AddActor(self.car) self.mbly_vtk_boxes = [] # Car: 0, Truck: 1, Bike: 2, Other: 3-7 red = np.array((1, 0, 0)) green = np.array((0, 1, 0)) blue = np.array((0, 0, 1)) white = red+green+blue self.mbly_obj_colors = [red, green, blue, white] self.mbly_vtk_lanes = [] # Dashed: 0, Solid: 1, undecided: 2, Edge: 3, Double: 4, Botts_Dots: 5 self.mbly_lane_color = [green, green, red, blue, white, green] self.mbly_lane_size = [2, 3, 1, 1, 2, 1] self.mbly_lane_subsamp = [20, 1, 1, 1, 1, 40] # Use our custom mouse interactor self.interactor = LaneInteractorStyle(self.iren, self.cloud_ren, self) self.iren.SetInteractorStyle(self.interactor) ###### 2D Projection Actors ###### self.video_reader = VideoReader(args['video']) self.img_actor = None self.I = None ###### Add Callbacks ###### print 'Rendering' self.iren.Initialize() # Set up time self.iren.AddObserver('TimerEvent', self.update) self.timer = self.iren.CreateRepeatingTimer(10) self.iren.Start()
#cv2.waitKey(0) #print edges.shape #import matplotlib.pyplot as plt #plt.imshow(edges[0]) #plt.show() #return edges[0] return edges if __name__ == '__main__': args = parse_args(sys.argv[1], sys.argv[2]) cam_num = int(sys.argv[2][-5]) video_file = args['video'] params = args['params'] cam = params['cam'][cam_num - 1] video_reader = VideoReader(video_file) ldr_map = loadLDRCamMap(args['map']) #(tx,ty,tz) = (-0.50000000000000004, -0.2875, 0.34) C_current = np.array([tx, ty, tz, rx, ry, rz]) BATCH_SIZE = 1 from multiprocessing import Pool pool = Pool(4) while True: batch_data = [] while len(batch_data) < BATCH_SIZE: (success, I, raw_pts) = getNextData(video_reader, ldr_map) if not success: break
for video in sorted( glob.glob(remote_run + '/' + run + '60*.zip')): print '\tReading', video driverseat_run = driverseat_folder + run + '/' # If it is a zipped file, remove the .zip extension vid_num = video.replace('zip', '')[-4:-1] # First create an uncompressed mpg from the images. Give # this a file extension mp so we know the process is # not finished mp_vid_name = driverseat_run + 'cam_' + vid_num + \ '_uncompressed.mpg' # The finished file should have the extension mpg mpg_vid_name = mp_vid_name.replace('_uncompressed', '') if not os.path.isfile(mp_vid_name) and \ not os.path.isfile(mpg_vid_name): with VideoReader(video) as reader: print '\tWriting ' + mp_vid_name writer = None while True: (succ, I) = reader.getNextFrame() if not succ: break if reader.framenum % sub_samp == 0: if I == None: # This is a corrupted jpeg prev_fnum = reader.framenum # Read the next image in the file (succ, I) = reader.getNextFrame() if not succ: break
class Blockworld: def __init__(self): if len(sys.argv) <= 2 or '--help' in sys.argv: print """Usage: {name} folder/ video.avi """.format(name=sys.argv[0]) sys.exit(-1) args = parse_args(sys.argv[1], sys.argv[2]) self.args = args bg_file = glob.glob(args['fullname'] + '*bg.npz')[0] print sys.argv self.small_step = 5 self.large_step = 10 self.startup_complete = False ##### Grab all the transforms ###### self.absolute = False (self.imu_transforms_mk1, self.gps_data_mk1, self.gps_times_mk1) = get_transforms(args, 'mark1', self.absolute) (self.imu_transforms_mk2, self.gps_data_mk2, self.gps_times_mk2) = get_transforms(args, 'mark2', self.absolute) self.mk2_t = 0 self.t = self.mk2_to_mk1() self.cur_imu_transform = self.imu_transforms_mk1[self.t, :, :] self.imu_kdtree = cKDTree(self.imu_transforms_mk1[:, :3, 3]) self.params = args['params'] self.lidar_params = self.params['lidar'] self.T_from_i_to_l = np.linalg.inv(self.lidar_params['T_from_l_to_i']) cam_num = args['cam_num'] self.cam_params = self.params['cam'][cam_num] # Load the MobilEye file self.mbly_loader = MblyLoader(args) self.mbly_rot = [0.0, -0.005, -0.006] self.mbly_T = [5.4, 0.0, -1.9] # Is the flyover running self.running = False # Has the user changed the time self.manual_change = 0 ###### Set up the renderers ###### self.cloud_ren = vtk.vtkRenderer() self.cloud_ren.SetViewport(0, 0, 1.0, 1.0) self.cloud_ren.SetBackground(0, 0, 0) self.img_ren = vtk.vtkRenderer() # self.img_ren.SetViewport(0.7, 0.0, 1.0, 0.37) self.img_ren.SetViewport(0.6, 0.6, 1.0, 1.0) self.img_ren.SetInteractive(False) self.img_ren.SetBackground(0.1, 0.1, 0.1) self.win = vtk.vtkRenderWindow() self.win.StereoCapableWindowOff() self.win.AddRenderer(self.cloud_ren) self.win.AddRenderer(self.img_ren) self.win.SetSize(800, 400) self.iren = vtk.vtkRenderWindowInteractor() self.iren.SetRenderWindow(self.win) ###### Cloud Actors ###### print 'Adding raw points' raw_npz = np.load(bg_file) pts = raw_npz['data'] raw_cloud = VtkPointCloud(pts[:, :3], np.ones(pts[:, :3].shape) * 255) raw_actor = raw_cloud.get_vtk_color_cloud() self.raw_lidar = VTKCloudTree(raw_cloud, raw_actor, build_tree=False) self.raw_lidar_2d = DataTree(self.raw_lidar.pts[:, :-1], build_tree=False) self.raw_lidar.actor.GetProperty().SetPointSize(2) self.raw_lidar.actor.GetProperty().SetOpacity(0.1) self.raw_lidar.actor.SetPickable(0) self.cloud_ren.AddActor(self.raw_lidar.actor) print 'Adding car' self.car = load_ply('../mapping/viz/gtr.ply') self.car.SetPickable(0) self.car.GetProperty().LightingOff() self.cloud_ren.AddActor(self.car) self.mbly_vtk_boxes = [] # Car: 0, Truck: 1, Bike: 2, Other: 3-7 red = np.array((1, 0, 0)) green = np.array((0, 1, 0)) blue = np.array((0, 0, 1)) white = red + green + blue self.mbly_obj_colors = [red, green, blue, white] self.mbly_vtk_lanes = [] # Dashed: 0, Solid: 1, undecided: 2, Edge: 3, Double: 4, Botts_Dots: 5 self.mbly_lane_color = [green, green, red, blue, white, green] self.mbly_lane_size = [2, 3, 1, 1, 2, 1] self.mbly_lane_subsamp = [20, 1, 1, 1, 1, 40] # Use our custom mouse interactor self.interactor = LaneInteractorStyle(self.iren, self.cloud_ren, self) self.iren.SetInteractorStyle(self.interactor) ###### 2D Projection Actors ###### self.video_reader = VideoReader(args['video']) self.img_actor = None self.I = None ###### Add Callbacks ###### print 'Rendering' self.iren.Initialize() # Set up time self.iren.AddObserver('TimerEvent', self.update) self.timer = self.iren.CreateRepeatingTimer(10) self.iren.Start() def mk2_to_mk1(self, mk2_idx=-1): if mk2_idx == -1: mk2_idx = self.mk2_t return mk2_to_mk1(mk2_idx, self.gps_times_mk1, self.gps_times_mk2) def getCameraPosition(self, t, focus=100): offset = np.array([-75.0, 0, 25.0]) / 4 # Rotate the camera so it is behind the car position = np.dot(self.imu_transforms_mk1[t, 0:3, 0:3], offset) position += self.imu_transforms_mk1[t, 0:3, 3] + position # Focus 10 frames in front of the car focal_point = self.imu_transforms_mk1[t + focus, 0:3, 3] return position, focal_point def finished(self, focus=100): return self.mk2_t + 2 * focus > self.video_reader.total_frame_count def update(self, iren, event): # Get the cameras cloud_cam = self.cloud_ren.GetActiveCamera() img_cam = self.img_ren.GetActiveCamera() # Initialization if not self.startup_complete: cloud_cam.SetViewUp(0, 0, 1) self.mk2_t = 1 self.t = self.mk2_to_mk1() self.startup_complete = True self.manual_change = -1 # Force an update for the camera # Update the time (arrow keys also update time) if self.running: self.mk2_t += self.large_step if self.finished(): self.mk2_t -= self.large_step if self.running == True: self.interactor.KeyHandler(key='space') # Get the correct gps time (mk2 is camera time) self.t = self.mk2_to_mk1() self.cur_imu_transform = self.imu_transforms_mk1[self.t, :, :] # Get the correct frame to show (success, self.I) = self.video_reader.getFrame(self.mk2_t) # Update the gps time self.cur_gps_time = self.gps_times_mk2[self.mk2_t] # Make sure the calibration has been updated self.mbly_R = euler_matrix(*self.mbly_rot)[:3, :3] if self.running or self.manual_change: # Set camera position to in front of the car position, focal_point = self.getCameraPosition(self.t) cloud_cam.SetPosition(position) cloud_cam.SetFocalPoint(focal_point) # Update the car position transform = vtk_transform_from_np(self.cur_imu_transform) transform.RotateZ(90) transform.Translate(-2, -3, -2) self.car.SetUserTransform(transform) # If the user caused a manual change, reset it self.manual_change = 0 # Copy the image so we can project points onto it I = self.I.copy() # Add the lanes to the cloud mbly_lanes = self.mbly_loader.loadLane(self.cur_gps_time) lanes_wrt_mbly = self.mblyLaneAsNp(mbly_lanes) self.addLaneToCloud(lanes_wrt_mbly) # Add the lanes to the image copy I = self.addLaneToImg(I, lanes_wrt_mbly) # Add the objects (cars) to the cloud mbly_objs = self.mbly_loader.loadObj(self.cur_gps_time) objs_wrt_mbly = self.mblyObjAsNp(mbly_objs) self.addObjToCloud(objs_wrt_mbly) # Add the lanes to the image copy I = self.addObjToImg(I, objs_wrt_mbly) vtkimg = VtkImage(I) self.img_ren.RemoveActor(self.img_actor) self.img_actor = vtkimg.get_vtk_image() self.img_ren.AddActor(self.img_actor) # We need to draw the image before we run ResetCamera or else # the image is too small self.img_ren.ResetCamera() img_cam.SetClippingRange(100, 100000) # These units are pixels img_cam.Dolly(2.00) self.iren.GetRenderWindow().Render() def xformMblyToGlobal(self, pts_wrt_mbly): # TODO: Need to tranform from imu to gps frame of reference T_l_to_i = self.params['lidar']['T_from_l_to_i'] T_i_to_world = self.imu_transforms_mk1[self.t, 0:3, 0:4] pts = pts_wrt_mbly # Puts points in lidar FOR pts_wrt_lidar = T_from_mbly_to_lidar(pts_wrt_mbly, self.mbly_T, \ self.mbly_R[:3,:3]) # Make points homogoneous hom_pts = np.hstack((pts_wrt_lidar[:, :3], np.ones((pts.shape[0], 1)))) # Put in imu FOR pts_wrt_imu = np.dot(T_l_to_i, hom_pts.T).T # Put in global FOR pts_wrt_world = np.dot(T_i_to_world, pts_wrt_imu.T).T # Add metadata back to output pts_wrt_world = np.hstack((pts_wrt_world, pts[:, 3:])) return pts_wrt_world def mblyObjAsNp(self, mbly_objs): """Turns a mobileye object pb message into a numpy array with format: [x, y, .7, length, width, type] """ pts_wrt_mbly = [] for obj in mbly_objs: pt_wrt_mbly = [obj.pos_x, obj.pos_y, .7, obj.length, \ obj.width, obj.obj_type] pts_wrt_mbly.append(pt_wrt_mbly) return np.array(pts_wrt_mbly) def mblyLaneAsNp(self, mbly_lane): """Turns a mobileye lane into a numpy array with format: [C0, C1, C2, C3, lane_id, lane_type, view_range] Y = C3*X^3 + C2*X^2 + C1*X + C0. X is longitudinal distance from camera (positive right!) Y is lateral distance from camera lane_id is between -2 and 2, with -2 being the farthest left, and 2 being the farthest right lane. There is no 0 id. """ lanes_wrt_mbly = [] for l in mbly_lane: lane_wrt_mbly = [l.C0, l.C1, l.C2, l.C3, l.lane_id, l.lane_type, \ l.view_range] lanes_wrt_mbly.append(lane_wrt_mbly) return np.array(lanes_wrt_mbly) def addObjToCloud(self, objs_wrt_mbly): """ Add the mobileye returns to the 3d scene """ mbly_vtk_boxes = [] car_rot = self.imu_transforms_mk1[self.t, :, :3] objs_wrt_world = self.xformMblyToGlobal(objs_wrt_mbly) # Draw each box car_rot = euler_from_matrix(car_rot)[2] * 180 / math.pi for o in objs_wrt_world: # Create the vtk object box = VtkBoundingBox(o) actor = box.get_vtk_box(car_rot) color = self.mbly_obj_colors[int(o[5])] actor.GetProperty().SetColor(*color) mbly_vtk_boxes.append(box) # Remove old boxes for vtk_box in self.mbly_vtk_boxes: self.cloud_ren.RemoveActor(vtk_box.actor) # Update to new actors self.mbly_vtk_boxes = mbly_vtk_boxes # Draw new boxes for vtk_box in self.mbly_vtk_boxes: self.cloud_ren.AddActor(vtk_box.actor) def getLanePointsFromModel(self, lane_wrt_mbly): view_range = lane_wrt_mbly[6] if view_range == 0: return None num_pts = view_range * 3 X = np.linspace(0, view_range, num=num_pts) # from model: Y = C3*X^3 + C2*X^2 + C1*X + C0. X = np.vstack((np.ones(X.shape), X, np.power(X, 2), np.power(X, 3))) # Mbly uses Y-right as positive, we use Y-left as positive Y = -1 * np.dot(lane_wrt_mbly[:4], X) lane_pts_wrt_mbly = np.vstack((X[1, :], Y, np.zeros((1, num_pts)))).T return lane_pts_wrt_mbly def addLaneToCloud(self, lane_wrt_mbly): for lane in self.mbly_vtk_lanes: self.cloud_ren.RemoveActor(lane.actor) self.mbly_vtk_lanes = [] for i in xrange(lane_wrt_mbly.shape[0]): type = int(lane_wrt_mbly[i, 5]) color = self.mbly_lane_color[type] * 255 size = self.mbly_lane_size[type] subsamp = self.mbly_lane_subsamp[type] lane_pts_wrt_mbly = self.getLanePointsFromModel( lane_wrt_mbly[i, :]) if lane_pts_wrt_mbly is None: continue pts_wrt_global = self.xformMblyToGlobal(lane_pts_wrt_mbly) pts_wrt_global = pts_wrt_global[::subsamp] num_pts = pts_wrt_global.shape[0] vtk_lane = VtkPointCloud(pts_wrt_global, np.tile(color, (num_pts, 1))) actor = vtk_lane.get_vtk_color_cloud() actor.GetProperty().SetPointSize(size) self.mbly_vtk_lanes.append(vtk_lane) self.cloud_ren.AddActor(actor) def addObjToImg(self, I, objs_wrt_mbly): """Takes an image and the mbly objects. Converts the objects into corners of a bounding box and draws them on the image """ if objs_wrt_mbly.shape[0] == 0: return None pix = [] width = objs_wrt_mbly[:, 4] # Assuming the point in obs_wrt_mbly are the center of the object, draw # a box .5 m below, .5 m above, -.5*width left, .5*width right. Keep the # same z position for z in [-.5, .5]: for y in [-.5, .5]: offset = np.zeros((width.shape[0], 3)) offset[:, 1] = width * y offset[:, 2] = z pt = objs_wrt_mbly[:, :3] + offset proj_pt = projectPoints(pt, self.args, self.mbly_T, self.mbly_R) pix.append(proj_pt[:, 3:]) pix = np.array(pix, dtype=np.int32) pix = np.swapaxes(pix, 0, 1) # Draw a line between projected points for i, corner in enumerate(pix): # Get the color of the box and convert RGB -> BGR color = self.mbly_obj_colors[int(objs_wrt_mbly[i, 5])][::-1] * 255 corner = tuple(map(tuple, corner)) cv2.rectangle(I, corner[0], corner[3], color, 2) return I def addLaneToImg(self, I, lanes_wrt_mbly): """Takes an image and the 3d lane points. Projects these points onto the image """ if lanes_wrt_mbly.shape[0] == 0: return None pix = [] for i in xrange(len(self.mbly_vtk_lanes)): type = int(lanes_wrt_mbly[i, 5]) color = self.mbly_lane_color[type] * 255 size = self.mbly_lane_size[type] subsamp = self.mbly_lane_subsamp[type] pts = self.getLanePointsFromModel(lanes_wrt_mbly[i])[::subsamp] if pts is None: continue proj_pts = projectPoints(pts, self.args, self.mbly_T, self.mbly_R) proj_pts = proj_pts[:, 3:].astype(np.int32, copy=False) img_mask = (proj_pts[:, 0] < 1280) & (proj_pts[:, 0] >= 0) &\ (proj_pts[:, 1] < 800) & (proj_pts[:, 1] >= 0) proj_pts = proj_pts[img_mask] for pt_i in xrange(proj_pts.shape[0]): # cv2 only takes tuples at points pt = tuple(proj_pts[pt_i, :]) # Make sure to convert to bgr cv2.circle(I, pt, size, color[::-1], thickness=-size) return I
from GPSTransforms import IMUTransforms from LidarTransforms import LDRLoader, utc_from_gps_log_all from ArgParser import parse_args from pipeline_config import LDR_DIR from pipeline_config import EXPORT_START, EXPORT_NUM, EXPORT_STEP ''' Create new lidar files synced to video ''' if __name__ == '__main__': args = parse_args(sys.argv[1], sys.argv[2]) cam_num = int(sys.argv[2][-5]) video_file = args['video'] video_reader = VideoReader(video_file) params = args['params'] cam = params['cam'][cam_num-1] gps_reader_mark2 = GPSReader(args['gps_mark2']) gps_data_mark2 = gps_reader_mark2.getNumericData() lidar_loader = LDRLoader(args['frames']) imu_transforms_mark2 = IMUTransforms(gps_data_mark2) gps_times_mark2 = utc_from_gps_log_all(gps_data_mark2) frame_ldr_map = '' # TODO Check this if EXPORT_START != 0: video_reader.setFrame(EXPORT_START)
class FrameCloudManager: def __init__(self, args): self.args = args self.params = args['params'] self.reader_left = VideoReader(args['video']) self.reader_right = VideoReader(args['opposite_video']) self.ldr_map = loadLDRCamMap(args['map']) self.queue = Queue.Queue() self.finished = False def loadNext(self): while self.finished == False: for t in range(5): (success, imgL) = self.reader_left.getNextFrame() (success, imgR) = self.reader_right.getNextFrame() if success == False: self.finished = True return #(disp, Q, R1, R2) = doStereo(imgL, imgR, self.params) #cv2.imshow('disp', disp) #print Q #stereo_points = get3dPoints(disp,Q) #stereo_points = stereo_points[disp > 5, :] (R1, R2, P1, P2, Q, size1, size2, map1x, map1y, map2x, map2y) = computeStereoRectify(self.params) stereo_points = np.load(sys.argv[3] + '/3d_' + str(self.reader_left.framenum) + '.npz')['data'] print stereo_points stereo_points = stereo_points.transpose() stereo_points = np.dot(R1.transpose(), stereo_points) print np.amax(stereo_points, axis=1) print np.amin(stereo_points, axis=1) stereo_points = np.vstack( (stereo_points, np.ones((1, stereo_points.shape[1])))) print stereo_points.shape stereo_points = dot(np.linalg.inv(self.params['cam'][0]['E']), stereo_points) stereo_wrt_lidar = np.dot( R_to_c_from_l(self.params['cam'][0]).transpose(), stereo_points[0:3, :]) stereo_wrt_lidar = stereo_wrt_lidar.transpose() stereo_wrt_lidar = stereo_wrt_lidar[:, 0:3] - self.params['cam'][ 0]['displacement_from_l_to_c_in_lidar_frame'] #img = cv2.resize(img, (640, 480)) imgL = cv2.pyrDown(imgL) #cv2.imshow('disparity', cv2.pyrDown(disp)/64.0) framenum = self.reader_left.framenum if framenum >= len(self.ldr_map): self.finished = True return ldr_file = self.ldr_map[framenum] lidar_pts = loadLDR(ldr_file) self.queue.put({ 'img': imgL, 'lidar_pts': lidar_pts, 'stereo_pts': stereo_wrt_lidar }) """