def main(): player = AudioPlayer() player.play() tetris_environment = TetrisEnvironment() timer = 0 reward = 0 with keyboard.Listener(on_press=on_press, on_release=on_release) as listener: while not tetris_environment.gameover: if input_queue.qsize() > 0: key = input_queue.get() if key == 'q': print('Exit the GAME') break elif key == 'a' or key == '\x1b[D' or key == keyboard.Key.left: reward = tetris_environment.move_left() elif key == 'd' or key == '\x1b[C' or key == keyboard.Key.right: reward = tetris_environment.move_right() elif key == 's' or key == '\x1b[B' or key == keyboard.Key.down: reward = tetris_environment.drop() elif key == 'w' or key == '\x1b[A' or key == '.' or key == keyboard.Key.up: reward = tetris_environment.rotate_right() elif key == ',': reward = tetris_environment.rotate_left() draw_board(tetris_environment) print('Reward: {}'.format(reward)) if timer % FPS == 0: reward = tetris_environment.wait() draw_board(tetris_environment) print('Reward: {}'.format(reward)) timer += 1 time.sleep(1 / FPS) player.stop() print('GAME OVER') print('YOUR SCORE: {0}'.format(tetris_environment.score)) print('') highscore = Highscore() highscore.write(tetris_environment.score) print(highscore) exit(0)
def main(): """audio_server.py: main function""" player = AudioPlayer(constants.CHUNK_SIZE, constants.WIDTH, constants.RATE, constants.CHANNELS) socket_client = socket.socket() socket_client.bind(("", constants.PORT)) socket_client.listen(1) client, address = socket_client.accept() print("Audio Connection from:", str(address)) while True: data = client.recv(8192) if not data: break player.audio_data = data player.play() client.close() player.close()
class NightstandApp(App): def __init__(self): super(NightstandApp, self).__init__() with open('configuration.yaml', 'r') as config: data = config.read() self.configuration = yaml.load(data) self.player = AudioPlayer() self.current_uid = None self.reader = RfidReader() def _keyboard_closed(self): self._keyboard.unbind(on_key_down=self._on_keyboard_down) self._keyboard = None def _on_keyboard_down(self, keyboard, keycode, text, modifiers): if keycode[1] == 'a': self.message_received('1234', 'figurine_added') elif keycode[1] == 'r': self.message_received('1234', 'figurine_removed') return True def build(self): self.main = Main() self.main.manager.state = 'main' self.main.manager.transition = NoTransition() self.main.ids.volume_slider.bind(value=self.on_volume_slider_change) self.main.ids.audio_list.data_directory = self.configuration[ 'data_directory'] self.main.ids.audio_list.audio_directory = os.path.join( self.configuration['data_directory'], 'audio') self.main.ids.audio_list.show_all() start_new_thread(self.update_seek_slider, ()) self.initialize_debug_keyboard() return self.main def initialize_debug_keyboard(self): if not sys.platform.startswith('linux'): self._keyboard = Window.request_keyboard(self._keyboard_closed, self) self._keyboard.bind(on_key_down=self._on_keyboard_down) def on_volume_slider_change(self, instance, value): self.player.set_volume(value) def seek_to_user(self): new_position = self.main.ids.seek_slider.value self.player.seek(new_position) def show_playing_screen(self, restart_playback=True): self.root.manager.current = 'playing' self.root.manager.state = 'playing' if restart_playback: self.player.play(self.figurine.get_audio_path()) def show_create_figurine_screen(self): self.root.manager.current = 'create_figurine' def show_main_screen(self): self.root.manager.current = 'main' self.root.manager.state = 'main' self.current_uid = None def update_seek_slider(self): while True: (position, length) = self.player.seek_information() self.main.ids.seek_slider.range = (0, length) self.main.ids.seek_slider.value = position self.main.ids.seek_time.text = str(self.player.remaining()) if self.player.is_playing(): self.main.ids.play_pause_button.text = 'Pause' self.main.ids.playing_label.text = 'Playing..' self.main.ids.audio_name.text = os.path.basename( self.figurine.get_audio_path()) self.main.ids.seek_slider.opacity = 100 self.main.ids.seek_time.opacity = 100 else: if position >= 0.99 * length: self.main.ids.play_pause_button.text = 'Replay' self.main.ids.playing_label.text = '' self.main.ids.seek_slider.opacity = 0 self.main.ids.seek_time.opacity = 0 else: self.main.ids.play_pause_button.text = 'Play' self.main.ids.playing_label.text = '' self.main.ids.seek_slider.opacity = 100 self.main.ids.seek_time.opacity = 100 time.sleep(1) def toggle_pause(self): if self.player is not None: if self.player.is_playing(): self.player.pause() else: (position, length) = self.player.seek_information() if position >= 0.99 * length: self.player.replay() else: self.player.resume() def save_figurine(self): data = self.main.ids.audio_list.selection selected_audio_path = os.path.join(data['directory'], data['name']) self.figurine = Figurine(self.current_uid or self.requested_uid, self.configuration['data_directory']) self.figurine.save(selected_audio_path) self.show_playing_screen() self.initialize_debug_keyboard() def delete_figurine(self): if self.player is not None and self.player.is_playing(): self.player.pause() self.figurine.delete() self.show_main_screen() def message_received(self, uid, action): print uid + ' / ' + action if action == 'figurine_added': if self.current_uid == uid: self.show_playing_screen(False) self.player.resume() else: self.figurine = Figurine(uid, self.configuration['data_directory']) if self.figurine.exists(): self.show_playing_screen() else: self.show_create_figurine_screen() self.requested_uid = uid # We must not set the current_uid here, as otherwise the cancel button on the 'Add figurine' screen does not work return self.current_uid = uid elif action == 'figurine_removed': self.player.pause() self.show_main_screen() def check_rfid_reader(self, delta_time): self.reader.read_rfid(self.message_received) def shutdown(self): os.system('sudo poweroff') App.get_running_app().stop()
eyesController.holdEyes() print "Initial facial states set" # Analyze file result = audioPlayer.analyze(wavFile) mouthController.setChunkAnalysisData(result) print "Wav file analysis complete" # Start the controller threads mouthController.startControlThread() print "Mouth controller started" eyesController.startControlThread() print "Eye controller started" # Play the audio audioPlayer.play(wavFile) # Terminate everything and wait for completion mouthController.terminateControlThread() eyesController.terminateControlThread() mouthController.join() eyesController.join() # Set final states mouthController.closeMouth() eyesController.openEyes() sleep(0.5) mouthController.holdMouth() eyesController.holdEyes() print "Done"
def process_single_video(video_dir, output_dir, face_detection_model, face_validation_model, speaker_validation, output_video_dir=None): # 将视频音轨导出到临时文件夹中,采样率为16000 audio_tmp = os.path.join(config.temp_dir, 'audio.wav') command = ( "ffmpeg -y -i %s -async 1 -ac 1 -vn -acodec pcm_s16le -ar 16000 %s > %s 2>&1" % (video_dir, audio_tmp, os.path.join(config.log_dir, "ffmpeg.log"))) output = subprocess.call(command, shell=True, stdout=None) sample_rate, audio = wavfile.read(audio_tmp) # print(audio.shape) # 打开标签输出文件 predict_results = open(output_dir, "w") # predict_results = open(os.path.join(os.getcwd(), 'result', POI, POI + '-' + str(config.video_num) + '.txt'), "w") # 初始化临时变量 tracker_list = [] candidates = [] series_id = 0 # 验证视频帧数 cap = cv2.VideoCapture(video_dir) video_fps = cap.get(cv2.CAP_PROP_FPS) video_total_frame = cap.get(7) if config.enable_syncnet: assert video_fps == 25 print("Video FPS:", video_fps) # 是否输出视频,若需要,则需要传入额外参数output_video_dir if config.write_video: videoWriter = cv2.VideoWriter( os.path.join(output_video_dir, 'song.avi'), cv2.VideoWriter_fourcc(*'XVID'), video_fps, (1280, 720)) # 视频宽度大于1280时,缩放至 1280 * 720 if cap.get(cv2.CAP_PROP_FRAME_WIDTH) > 1280: need_to_resize = True else: need_to_resize = False # 跳读n帧 debug 过程中使用,实际运行中不可跳读 # start_frame = 0 # cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame) # shot_count = start_frame - 1 shot_count = 0 print("\033[94mstart process\033[0m") print("total frames: %s.." % (video_total_frame)) video_type = video_dir.split("/")[-2] if config.enable_multi_conf: if video_type == "interview" or video_type == "speech": config.starting_confidence = config.easy_starting_confidence config.patient_confidence = config.easy_patient_confidence elif "entertain" in video_type or "tv" in video_type: config.starting_confidence = config.hard_starting_confidence config.patient_confidence = config.hard_patient_confidence else: config.starting_confidence = config.normal_starting_confidence config.patient_confidence = config.normal_patient_confidence # print("\033[94mthreshold: %s & %s\033[0m" % (str(config.starting_confidence), str(config.patient_confidence))) start_time = time.time() while True: # resize if need_to_resize: success, raw_image = cap.read() if not success: break raw_image = cv2.resize(raw_image, (1280, 720)) else: success, raw_image = cap.read() if not success: break image = raw_image.copy() bboxes, landmarks = face_detection_model.update(raw_image) # track new_tracker_list = [] for tracker in tracker_list: tracked, bbox = tracker.update(raw_image, shot_count) # if target lost, start SyncNet process if tracked is False: if config.debug: print("tracking failed") if config.enable_syncnet: if config.debug: print(16000 * tracker.start_shot // video_fps, 16000 * (tracker.end_shot) // video_fps) # 默认视频帧速率为25时,截取相应音频并验证长度是否合规 part_audio = audio[int(16000 * tracker.start_shot // video_fps):int(16000 * (tracker.end_shot) // video_fps)] if len(part_audio) != len( tracker.sync_seq) * 16000 // video_fps: print("fatal: video and audio does not match") print("startshot", tracker.start_shot) print("endshot", tracker.end_shot) print(tracker.series_name) print(len(tracker.sync_seq)) print(len(part_audio)) # exit(-1)] if config.debug: wavfile.write('temp/segment.wav', 16000, part_audio) player = AudioPlayer('temp/segment.wav') # 分别使用原音轨和空音轨调用 SyncNet,对于空音轨中置信度高于 的部分,将原音轨中计算出的相应置信度置零 offset, confidence, dists_npy = speaker_validation.evaluate( video_fps, tracker.sync_seq, part_audio) silent_audio = np.zeros(part_audio.shape, dtype=audio.dtype) __, conf_silent, __ = speaker_validation.evaluate( video_fps, tracker.sync_seq, silent_audio) # print(conf_silent) confidence[conf_silent > config.conf_silent_threshold] = 0 # confidence = conf_silent # debug 模式下输出额外信息 if config.debug: print("Sequence length:", len(tracker.sync_seq)) debug_cap = cv2.VideoCapture(video_dir) debug_cap.set(1, tracker.start_shot) player.play() for i in range(len(tracker.sync_seq)): if i < 6: if need_to_resize: __, img = debug_cap.read() img = cv2.resize(img, (1280, 720)) else: __, img = debug_cap.read() cv2.imshow('Speaking', img) cv2.waitKey(40) else: if need_to_resize: __, img = debug_cap.read() img = cv2.resize(img, (1280, 720)) else: __, img = debug_cap.read() box = tracker.bbox_seq[i] lip_box = tracker.lip_box_seq[i] try: confidence_caption = 'Conf: %.3f' % ( confidence[i - 6]) clr = int( max(min(confidence[i - 6] * 30, 255), 0)) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, clr, 255 - clr), 2, cv2.LINE_AA) cv2.rectangle(img, (lip_box[2], lip_box[0]), (lip_box[3], lip_box[1]), (255, 0, 0), 2, cv2.LINE_AA) except: confidence_caption = 'Conf: exceeded' cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2, cv2.LINE_AA) cv2.putText(img, confidence_caption, (int(box[0]), int(box[1]) + 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(img, confidence_caption, (int(box[0]), int(box[1]) + 20), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) cv2.imshow('Speaking', img) cv2.waitKey(40) cv2.waitKey(0) prelabels = speaker_validation.verification( confidence, tracker.start_shot, predict_results) candidates = candidates + prelabels else: new_tracker_list.append(tracker) tracker_list = new_tracker_list # for each face detected for boundary, landmark in zip(bboxes, landmarks): boundary = boundary.astype(np.int) center = [ int((boundary[1] + boundary[3]) / 2), int((boundary[0] + boundary[2]) / 2) ] validation = face_validation_model.confirm_validity( raw_image, boundary=boundary, landmark=landmark) if validation: caption = "Yes" # 验证该人脸是否已经被某个追踪器追踪 tracking = isTracking((center[1], center[0]), tracker_list) lip_center = np.mean(landmark[3:], axis=0) # new target if not tracking: series_id += 1 new_tracker = CV_Tracker(raw_image, boundary, series_id, lip_center, shot_count) tracker_list.append(new_tracker) else: # 验证追踪器是否追踪到该人脸 for tracker in tracker_list: if tracker.valid is True: continue if tracker.is_valid(center): # build lip picture sequence tracker.update_lip_seq(raw_image, boundary, lip_center) else: caption = "No" # showimg 模式下输出人脸检测,识别,追踪信息 if config.showimg: cv2.rectangle(image, (boundary[0], boundary[1]), (boundary[2], boundary[3]), (0, 255, 0), 2, cv2.LINE_AA) index_color = 0 for point in landmark: pos = (point[0], point[1]) cv2.circle(image, pos, 1, (255, 255, 255 / 68 * index_color), -1) index_color = index_color + 1 # lip center lip_center = np.mean(landmark[3:], axis=0) cv2.circle(image, (lip_center[0], lip_center[1]), 1, (0, 0, 0), -1) for tracker in tracker_list: if tracker.tracked is True: bbox = tracker.bbox cv2.rectangle( image, (int(bbox[0]), int(bbox[1])), (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), (255, 0, 0), 2, cv2.LINE_AA) cv2.putText(image, str(tracker.series_name), (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, str(tracker.series_name), (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) else: print("Warning a invalid tracker was not removed") cv2.putText(image, str(caption), (boundary[0], boundary[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, str(caption), (boundary[0], boundary[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) # 对于追踪区域中没有人脸的tracker,判断是否需要关闭tracker new_tracker_list = [] for tracker in tracker_list: if tracker.valid is False: tracker.drop_count += 1 tracker.update_lip_seq(raw_image, None, None) if tracker.drop(): tracker.set_end_shot(shot_count) if config.debug: print("tracker missed the target") # 关闭 tracker 前,处理tracker保存的视频序列 if config.enable_syncnet: part_audio = audio[int(16000 // video_fps * tracker.start_shot ):int(16000 // video_fps * (tracker.end_shot - config.patience + 1))] if len(part_audio) != len( tracker.sync_seq[:-config.patience] ) * 16000 // video_fps: print("fatal: video and audio does not match") print("startshot", tracker.start_shot) print("endshot", tracker.end_shot) print(len(tracker.sync_seq)) print(len(part_audio)) # exit(-2) if config.debug: wavfile.write('temp/segment.wav', 16000, part_audio) player = AudioPlayer('temp/segment.wav') offset, confidence, dists_npy = speaker_validation.evaluate( video_fps, tracker.sync_seq[:-config.patience], part_audio) silent_audio = np.zeros(part_audio.shape, dtype=audio.dtype) __, conf_silent, __ = speaker_validation.evaluate( video_fps, tracker.sync_seq[:-config.patience], silent_audio) # print(conf_silent) confidence[conf_silent > config.conf_silent_threshold] = 0 # confidence = conf_silent if config.debug: print("Sequence length:", len(tracker.sync_seq[:-config.patience])) debug_cap = cv2.VideoCapture(video_dir) debug_cap.set(1, tracker.start_shot) player.play() for i in range( len(tracker.sync_seq) - config.patience): if i < 6: if need_to_resize: __, img = debug_cap.read() img = cv2.resize(img, (1280, 720)) else: __, img = debug_cap.read() cv2.imshow('Speaking', img) cv2.waitKey(40) else: if need_to_resize: __, img = debug_cap.read() img = cv2.resize(img, (1280, 720)) else: __, img = debug_cap.read() box = tracker.bbox_seq[i] lip_box = tracker.lip_box_seq[i] try: confidence_caption = 'Conf: %.3f' % ( confidence[i - 6]) clr = int( max(min(confidence[i - 6] * 30, 255), 0)) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, clr, 255 - clr), 2, cv2.LINE_AA) cv2.rectangle(img, (lip_box[2], lip_box[0]), (lip_box[3], lip_box[1]), (255, 0, 0), 2, cv2.LINE_AA) except: confidence_caption = 'Conf: exceeded' cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2, cv2.LINE_AA) cv2.putText(img, confidence_caption, (int(box[0]), int(box[1]) + 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(img, confidence_caption, (int(box[0]), int(box[1]) + 20), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) cv2.imshow('Speaking', img) cv2.waitKey(40) cv2.waitKey(0) prelabels = speaker_validation.verification( confidence, tracker.start_shot, predict_results) candidates = candidates + prelabels else: new_tracker_list.append(tracker) tracker_list = new_tracker_list # 进度展示 if shot_count % 1000 == 0 and shot_count != 0: print('Shot {:d}, FPS {:.2f} '.format( shot_count, 1000 / (time.time() - start_time)), end='\n') start_time = time.time() if config.showimg: cv2.imshow('Video', image) if config.write_video: videoWriter.write(image) shot_count += 1 # 跳出循环 if cv2.waitKey(10) == 27: break predict_results.close() if config.enable_dataclean: dataclean(output_dir, video_total_frame) # evaluate if config.enable_evaluation: index = video_dir.rfind('.') FPR, Recall = evaluate_result(video_dir[:index] + ".csv", output_dir, video_total_frame)
from audio_player import AudioPlayer from audio_recorder import AudioRecorder import time import os if __name__ == "__main__": # Usage example for pyaudio while True: # play the audio a = AudioPlayer("sample_1202.wav") print("Start Playing: {}".format(time.time())) os.system("ping -c 1 1.1.1.1") # punch play start a.play(start=0, end=11.5) print("End Playing: {}".format(time.time())) os.system("ping -c 1 8.8.4.4") # punch play end a.close() # Start recording b = AudioRecorder() print("Waiting") print() time.sleep(180)