def transcribe_proc(): """Transcribe live audio stream indefinitely.""" while True: # Get result of transcription transcribe_result = transcriber.transcribe_stream( audio_stream(), sample_rate, sample_width, channels) _LOGGER.debug("Transcription result: %s", transcribe_result) transcribe_result = transcribe_result or Transcription.empty() transcribe_dict = dataclasses.asdict(transcribe_result) transcribe_dict["timeout"] = is_timeout print_json(transcribe_dict)
def transcribe(args: argparse.Namespace): """Do speech to text on one more WAV files.""" # Load transcriber args.model_dir = Path(args.model_dir) if args.graph_dir: args.graph_dir = Path(args.graph_dir) else: args.graph_dir = args.model_dir / "graph" transcriber = KaldiCommandLineTranscriber( args.model_type, args.model_dir, args.graph_dir ) # Do transcription try: if args.wav_file: # Transcribe WAV files for wav_path in args.wav_file: _LOGGER.debug("Processing %s", wav_path) wav_bytes = open(wav_path, "rb").read() result = transcriber.transcribe_wav(wav_bytes) if not result: result = Transcription.empty() print_json(result) else: # Read WAV data from stdin if os.isatty(sys.stdin.fileno()): print("Reading WAV data from stdin...", file=sys.stderr) # Stream in chunks with wave.open(sys.stdin.buffer, "rb") as wav_file: def audio_stream(wav_file, frames_in_chunk): num_frames = wav_file.getnframes() try: while num_frames > frames_in_chunk: yield wav_file.readframes(frames_in_chunk) num_frames -= frames_in_chunk if num_frames > 0: # Last chunk yield wav_file.readframes(num_frames) except KeyboardInterrupt: pass result = transcriber.transcribe_stream( audio_stream(wav_file, args.frames_in_chunk), wav_file.getframerate(), wav_file.getsampwidth(), wav_file.getnchannels(), ) assert result print_json(result) except KeyboardInterrupt: pass finally: transcriber.stop()
async def transcribe_wav(args: argparse.Namespace, core: Voice2JsonCore) -> None: """Speech to text from WAV file(s).""" from rhasspyasr import Transcription # Make sure profile has been trained assert core.check_trained(), "Not trained" # Get speech to text transcriber for profile transcriber = core.get_transcriber(open_transcription=args.open, debug=args.debug) # Directory to report WAV file names relative to relative_dir = (None if args.relative_directory is None else Path( args.relative_directory)) try: if args.wav_file or args.stdin_files: # Read WAV file paths wav_files = args.wav_file if args.stdin_files: _LOGGER.debug("Reading file paths from stdin") wav_files = itertools.chain(wav_files, sys.stdin) for wav_path_str in wav_files: wav_path_str = wav_path_str.strip() # Load and convert wav_path = Path(wav_path_str) _LOGGER.debug("Transcribing %s", wav_path) wav_data = await core.maybe_convert_wav(wav_path.read_bytes()) # Transcribe transcription = (transcriber.transcribe_wav(wav_data) or Transcription.empty()) result = dataclasses.asdict(transcription) if relative_dir is None: # Add name of WAV file to result result["wav_name"] = wav_path.name else: # Make relative to some directory result["wav_name"] = str(wav_path.absolute().relative_to( relative_dir.absolute())) print_json(result) else: # Read WAV data from stdin _LOGGER.debug("Reading WAV data from stdin") if args.input_size: # Number of bytes is on separate line line = sys.stdin.buffer.readline().strip() if not line: return num_bytes = int(line) while num_bytes > 0: # Read in WAV wav_data = sys.stdin.buffer.read(num_bytes) while len(wav_data) < num_bytes: wav_data = sys.stdin.buffer.read(num_bytes - len(wav_data)) # Transcribe wav_data = await core.maybe_convert_wav(wav_data) transcription = (transcriber.transcribe_wav(wav_data) or Transcription.empty()) result = dataclasses.asdict(transcription) print_json(result) # Next WAV line = sys.stdin.buffer.readline().strip() if not line: break num_bytes = int(line) else: # Load and convert entire input wav_data = await core.maybe_convert_wav( sys.stdin.buffer.read()) # Transcribe transcription = (transcriber.transcribe_wav(wav_data) or Transcription.empty()) result = dataclasses.asdict(transcription) print_json(result) finally: transcriber.stop()