def client_reset(self): # Jack setup self.client = jack.Client('JackAudioSink') self.client.blocksize = self.block_size self.tmp_buf = np.array([0.0] * self.block_size) self.tmp_buf_pos = 0 self.is_active = False # Debug self.start_time = 0 self.sample_count = 0 self.xrun_count = 0 self.port = self.client.outports.register('audio_out') # Make sure sample rate works and set multiplier if not self.allow_fractional_resample: if self.client.samplerate % self.orig_sample_rate != 0: raise (ValueError( "OS sample rate " + str(self.client.samplerate) + " must be evenly divisible by given sample rate " + str(self.orig_sample_rate))) self.sample_multiplier = int(self.client.samplerate / self.orig_sample_rate) else: self.sample_multiplier = float(self.client.samplerate / self.orig_sample_rate) self.resampler = samplerate.Resampler('sinc_best', channels=1) # Callback setup self.client.set_process_callback(self.process) self.client.set_xrun_callback(self.xrun)
def resample(data, source_to_target_ratio, ZSCORE, resample_method='sinc_best', N_channels_max=128): ###################### # If downsampling by an integer, just anti-alias and subsample?? ###################### # 128 is the max for the underlying library N_channels_max = min(N_channels_max, 128) N_channels = data.shape[1] data_mat = None for i0 in np.arange(0, N_channels, N_channels_max): iF = np.min((i0 + N_channels_max, N_channels)) resampler = samplerate.Resampler(resample_method, channels=iF - i0) data_chunk = resampler.process(data[:, i0:iF], 1 / source_to_target_ratio, end_of_input=True) data_mat = (data_chunk if data_mat is None else np.concatenate( (data_mat, data_chunk), axis=1)) if ZSCORE: data_mat = zscore(data_mat) return data_mat
def fn(config, inq, outq): import samplerate resampler = samplerate.Resampler('sinc_best', channels=1) while 1: x = inq.get() y = resampler.process(x, i) z = numpy.array(y, dtype="<h") outq.put(z)
def process_samples(self): resampler = samplerate.Resampler('sinc_fastest', channels=2) resampler.process(np.zeros((2048, 2)), self.sample_rate / self.client.samplerate) while True: self.output_data( resampler.process(self.sample_pipe_out.recv().T, self.sample_rate / self.client.samplerate) * self.gain)
def audio_runner(self): """Thread for getting data from the microphone""" # Find matching audio device p = pyaudio.PyAudio() self.api_id = None self.device_id = None num_apis = p.get_host_api_count() for j in range(0, num_apis): info = p.get_host_api_info_by_index(j) numdevices = info.get('deviceCount') for i in range(0, numdevices): if (p.get_device_info_by_host_api_device_index(j, i).get('maxInputChannels')) > 1: device_name = p.get_device_info_by_host_api_device_index(j, i).get('name') if self.device_name_filter in device_name: self.api_id = j self.device_id = i print("Found device with id " + str(self.api_id) + "." + str(self.device_id) + ": " + device_name) if self.device_id is None: print("No devices found that match filter. Device list:") for j in range(0, num_apis): info = p.get_host_api_info_by_index(j) numdevices = info.get('deviceCount') for i in range(0, numdevices): dev_info = p.get_device_info_by_host_api_device_index(j, i) #if (dev_info.get('maxInputChannels')) > 1: device_name = dev_info.get('name') supported_rates = self.test_sample_rates(p, i) print("*", device_name, "channels:", dev_info.get('maxInputChannels'), "rates:", supported_rates) raise AssertionError("No device.") # Find best samplerate self.sample_rate = min(self.test_sample_rates(p, self.device_id)) sample_multiplier = int(self.sample_rate / 16000) resampler = samplerate.Resampler('sinc_fastest', channels = 2) # Open stream stream = p.open( format = pyaudio.paInt16, channels = 2, rate = self.sample_rate, input = True, frames_per_buffer = 256, input_device_index = self.device_id, ) # Record while self.stop_process.value == False: data = stream.read(256, exception_on_overflow = False) samples = np.frombuffer(data, dtype = 'int16').astype('float').reshape(256, 2) samples_res = resampler.process(samples, sample_multiplier) self.frame_pipe_in.send(samples_res) p.terminate()
def __init__(self, samplerate, channels, master_settings: AudioOutputSettings): self.stream = sd.OutputStream(samplerate=samplerate, channels=channels, blocksize=0, dtype='float32') self.signals = AudioOutputSignals() self.settings = AudioOutputSettings() self.master_settings = master_settings self.resampler = sr.Resampler(channels=channels)
def recognize_wav(model_name, wav_filename): """Генератор запросов на распознавание""" # Конфигурируем распознавание на модель model_name yield AsrService_pb2.RecognizeRequest( config=AsrService_pb2.RecognitionConfig( auth=AsrService_pb2.Auth(client_id=args.client_id, domain_id=args.domain_id, api_key=args.api_key), model=base_pb2.Model(id=model_name))) model_samplerate = 8000 if 'phone_call' in model_name.lower( ) or 'ivr' in model_name.lower() else 16000 # Cчитываем wav-файл кусочками по 1 секунде with sf.SoundFile(wav_filename, mode='r') as wav: wav_samplerate = wav.samplerate samples_to_read = int(wav_samplerate / 5) # Настройки для передискретизации звука # Converter type: # sinc_best = 0, sinc_medium = 1, sinc_fastest = 2, zero_order_hold = 3, linear = 4 # Описание типов: http://www.mega-nerd.com/libsamplerate/api_misc.html resampler = sr.Resampler(converter_type="sinc_best") resampling_ratio = float(model_samplerate) / wav_samplerate print("resampling_ratio: {}\n".format(resampling_ratio)) while wav.tell() < wav.frames: wave_data = wav.read(samples_to_read, dtype="int16") sound_for_recognition = wave_data # Запускаем передискрет только, если частота модели и файла не совпадают if resampling_ratio != 1.0: resampled_data = resampler.process( wave_data, resampling_ratio, end_of_input=(wav.tell() >= wav.frames)).astype(np.int16) sound_for_recognition = resampled_data # Отправляем звук на распознавание yield AsrService_pb2.RecognizeRequest(sound=AsrService_pb2.Sound( samples=sound_for_recognition.tobytes())) time.sleep(0.2) # Завершаем распознавание yield AsrService_pb2.RecognizeRequest(finish=base_pb2.Finish())
def audio_player(speed_factor=1.0): # Perform late imports # Those can fail if a linux machine doesn't have portaudio or libsamplerate installed import samplerate import sounddevice resampler = samplerate.Resampler("linear", channels=2) audio_out = AudioOut(resampler, speed_factor) with sounddevice.OutputStream( samplerate=audio_out.output_rate, dtype="int16", channels=2, latency="low", blocksize=audio_out.buffer_size, callback=audio_out.stream_callback, ): yield audio_out
def _make_resampler(self, actual, preferred): try: import numpy import samplerate except ImportError as e: self._logger.warning( "samplerate not installed; expect glitches during playback") async def resample(input_queue, output_queue): while True: data = await input_queue.get() await output_queue.put(data) if not data: break return resample, actual resampler = samplerate.Resampler() def resample_worker(input_data, end): input_array = numpy.frombuffer(input_data, dtype="<u2") input_array = (input_array.astype(numpy.float32) - 32768) / 32768 output_array = resampler.process(input_array, ratio=preferred / actual, end_of_input=end) output_array = (output_array * 32768 + 32768).astype(numpy.uint16) return output_array.tobytes() async def resample(input_queue, output_queue): while True: input_data = await input_queue.get() output_data = await asyncio.get_running_loop().run_in_executor( None, resample_worker, input_data, not input_data) if output_data: await output_queue.put(output_data) if not input_data: await output_queue.put(b"") break return resample, preferred
def recognize_from_micro(model_name, block_size_ms): # Конфигурируем распознавание на модель model_name и частоту model_samplerate yield AsrService_pb2.RecognizeRequest(config=AsrService_pb2.RecognitionConfig( auth=AsrService_pb2.Auth( client_id=args.client_id, domain_id=args.domain_id, api_key=args.api_key ), model=base_pb2.Model(id=model_name))) model_samplerate = 8000 if 'phone_call' in model_name.lower() or 'ivr' in model_name.lower() else 16000 # Получаем частоту дискретизации дефолтного микрофона microphone_samplerate = 0 with create_default_input_stream(user_callback=test_callback) as input_device: microphone_samplerate = input_device.samplerate # Очередь звуковых данных queue = Queue() # Callback для обработки очередной порции звука, пришедшей с микрофона def capture_sound_callback(indata, frames, time, status): if status: print(status, flush=True) queue.put(indata.copy()) # Сколько сэмлов будем брать с микрофона bsize_micro = int(microphone_samplerate * block_size_ms / 1000.0) # Для передискретизации звука screen.addstr("microphone samplerate: {} \n".format(microphone_samplerate)) screen.refresh() resampling_ratio = float(model_samplerate) / microphone_samplerate resampler = sr.Resampler(converter_type="sinc_best") screen.addstr("Press Q for quit\n\n") screen.refresh() # Параллельно будем писать в файл, чтобы проверить захват звука with sf.SoundFile("recorded.wav", mode="w", samplerate=int(model_samplerate), channels=1, subtype="PCM_16") as file: with create_default_input_stream(user_blocksize=bsize_micro, user_callback=capture_sound_callback): while True: raw_sound_from_microphone = queue.get() # Выбираем только один канал и делаем передискретизацию на целевую частоту модели распознавания речи _, cols = raw_sound_from_microphone.shape raw_sound = raw_sound_from_microphone[:, :-1] if cols > 1 else raw_sound_from_microphone sound_for_recognition = \ resampler.process(raw_sound, resampling_ratio).astype(np.int16) # Отправляем звук на распознавание yield AsrService_pb2.RecognizeRequest( sound=AsrService_pb2.Sound(samples=sound_for_recognition.tobytes())) file.write(sound_for_recognition) # Завершаем запись при нажатии клавиши if keyboard.is_pressed("q"): break # Завершаем распознавание yield AsrService_pb2.RecognizeRequest(finish=base_pb2.Finish())
def main(): parser = argparse.ArgumentParser('Rearrange a sound file to match another') parser.add_argument('target', type=str, help='the sound file to recreate') parser.add_argument('palette', type=str, help='the sound file to recreate it from') parser.add_argument('out', type=str, help='the sound file to output') parser.add_argument( '--chunk-length', type=int, help='the length of each chunk the sound files are divided in', default=5) parser.add_argument('--seed', type=int, help='the seed of the random number generator') parser.add_argument( '--max-swap-fails', type=int, default=250, help='the maximum number of failed attempts to improve the quality') args = parser.parse_args(sys.argv[1:]) random.seed(args.seed) target_data, target_samplerate = soundfile.read(args.target, always_2d=True, dtype='float32') palette_data, palette_samplerate = soundfile.read(args.palette, always_2d=True, dtype='float32') if palette_samplerate != target_samplerate: print('Resampling palette...') progress_bar = progressbar.ProgressBarThread() progress_bar.start() resampler = libsamplerate.Resampler( libsamplerate.converters.ConverterType.sinc_best, channels=2) palette_data = resampler.process( palette_data, target_samplerate / palette_samplerate) progress_bar.stop() sample_rate = target_samplerate del target_samplerate del palette_samplerate print('Chopping up sound files...') progress_bar = progressbar.ProgressBarThread() progress_bar.start() samples_per_chunk = sample_rate * args.chunk_length // 1000 target_chunks = divide_sound_file(target_data, samples_per_chunk) palette_chunks = divide_sound_file(palette_data, samples_per_chunk) progress_bar.stop() print('Moulding palette...') progress_bar = progressbar.ProgressBarThread() progress_bar.start() # occ = {} result_chunks = stretch_palette(palette_chunks, len(target_chunks)) assert len(result_chunks) == len(target_chunks) progress_bar.stop() print('Normalizing chunks...') progress_bar = progressbar.ProgressBarThread(2 * len(target_chunks)) for i, chunk in enumerate(target_chunks): length, num_channels = chunk.shape if length < samples_per_chunk: target_chunks[i] = numpy.append(chunk, [[0, 0]] * (samples_per_chunk - length), axis=0) assert target_chunks[i].shape == (samples_per_chunk, 2) progress_bar.progress(i) for i, chunk in enumerate(result_chunks): length, num_channels = chunk.shape if length < samples_per_chunk: result_chunks[i] = numpy.append(chunk, [[0, 0]] * (samples_per_chunk - length), axis=0) assert result_chunks[i].shape == (samples_per_chunk, 2) progress_bar.progress(i) progress_bar.stop() print('Maximizing sound quality...') progress_bar = progressbar.ProgressBarThread(args.max_swap_fails) progress_bar.start() num_failed_swaps = 0 num_failed_swaps_hist = [num_failed_swaps] total_swaps = 0 total_iters = 0 while num_failed_swaps < args.max_swap_fails: i, k = random.sample(range(0, len(target_chunks)), k=2) cdiff_i = ((result_chunks[i] - target_chunks[i])**2).mean() cdiff_k = ((result_chunks[k] - target_chunks[k])**2).mean() cdiff = cdiff_i + cdiff_k ndiff_i = ((result_chunks[k] - target_chunks[i])**2).mean() ndiff_k = ((result_chunks[i] - target_chunks[k])**2).mean() ndiff = ndiff_i + ndiff_k if ndiff < cdiff: num_failed_swaps_hist = num_failed_swaps_hist[-args. max_swap_fails:] num_failed_swaps_hist.append(num_failed_swaps) num_failed_swaps = 0 total_swaps += 1 result_chunks[i], result_chunks[k] = result_chunks[ k], result_chunks[i] else: num_failed_swaps += 1 progress_bar.progress(numpy.average(num_failed_swaps_hist), 's: {}, t: {}'.format(total_swaps, total_iters)) total_iters += 1 progress_bar.stop() soundfile.write(args.out, numpy.concatenate(result_chunks), sample_rate)
def __init__(self, input_device): logger.debug('MicrophoneStream INIT') self._audio_interface = pyaudio.PyAudio() self._inputDeviceName = input_device self._inputDeviceIndex = None self._num_channels = 1 self._format = pyaudio.paInt16 self._outputSampleRate = SAMPLERATE self.meterQueue = None self.meter_peak_np = np.empty(0, dtype=np.int16) self.meter_time = float(0) self.recordingFilename = None numdevices = self._audio_interface.get_default_host_api_info().get( 'deviceCount') defaultHostAPIindex = self._audio_interface.get_default_host_api_info( ).get('index') defaultInputDeviceIndex = self._audio_interface.get_default_input_device_info( ).get('index') defaultInputDeviceName = self._audio_interface.get_default_input_device_info( ).get('name') for i in range(0, numdevices): inputDevice = self._audio_interface.get_device_info_by_host_api_device_index( defaultHostAPIindex, i) if self._inputDeviceName == inputDevice.get('name'): self._inputDeviceIndex = inputDevice.get('index') break if self._inputDeviceIndex is None: self._inputDeviceName = defaultInputDeviceName self._inputDeviceIndex = defaultInputDeviceIndex deviceInfo = self._audio_interface.get_device_info_by_index( self._inputDeviceIndex) self._rate = int(deviceInfo.get('defaultSampleRate')) try: if self._audio_interface.is_format_supported( self._outputSampleRate, input_device=self._inputDeviceIndex, input_channels=self._num_channels, input_format=self._format): self._rate = self._outputSampleRate except ValueError: pass self.resampler = sr.Resampler() self.resampler_ratio = self._outputSampleRate / self._rate self._chunk_size = int(self._rate / 10) self._wavfile = None # Create a thread-safe buffer of audio data self._streamBuff = queue.Queue() self._recordingBuff = queue.Queue() self.closed = True # 2 bytes in 16 bit samples self._bytes_per_sample = 2 * self._num_channels self._bytes_per_second = self._rate * self._bytes_per_sample self._bytes_per_chunk = (self._chunk_size * self._bytes_per_sample) self._chunks_per_second = (self._bytes_per_second // self._bytes_per_chunk)
import noisereduce as nr import numpy as np import samplerate as sr from pyaudio import PyAudio, Stream, paInt16 from contextlib import asynccontextmanager, contextmanager, AsyncExitStack from typing import AsyncGenerator, Generator from threading import Thread import engines.blockout server = 'localhost:2700' loop = asyncio.get_event_loop() listening = True recording = True denoiser = engines.blockout.RNNoise() resampler = sr.Resampler() def listen1(): global recording while True: try: user_input = input("Type something to begin...\n") if (user_input == 'start'): recording = True start() if (user_input == 'stop'): print('stopping') recording = False
def test_match(data, converter_type, ratio=2.0): num_channels, input_data = data output_simple = samplerate.resample(input_data, ratio, converter_type) resampler = samplerate.Resampler(converter_type, channels=num_channels) output_full = resampler.process(input_data, ratio, end_of_input=True) assert np.allclose(output_simple, output_full)
def test_process(data, converter_type, ratio=2.0): num_channels, input_data = data src = samplerate.Resampler(converter_type, num_channels) src.process(input_data, ratio)
def decode_chunked_partial_endpointing_mic( asr, feat_info, decodable_opts, paudio, input_microphone_id, channels=1, samp_freq=16000, record_samplerate=16000, chunk_size=1024, wait_for_start_command=False, record_message_history=False, compute_confidences=True, asr_client=None, speaker_str="Speaker", resample_algorithm="sinc_best", save_debug_wav=False, use_threads=False, minimum_num_frames_decoded_per_speaker=5, mic_vol_cutoff=0.5, use_local_mic=True, decode_control_channel='asr_control', audio_data_channel='asr_audio'): # Subscribe to command and control redis channel p = red.pubsub() p.subscribe(decode_control_channel) if not use_local_mic: pa = red.pubsub() pa.subscribe(audio_data_channel) # Figure out if we need to resample (Todo: channles does not seem to work) need_resample = False if record_samplerate != samp_freq: print( "Activating resampler since record and decode samplerate are different:", record_samplerate, "->", samp_freq) resampler = samplerate.Resampler(resample_algorithm, channels=channels) need_resample = True ratio = samp_freq / record_samplerate print("Resample ratio:", ratio) # Initialize Python/Kaldi bridge print("Constructing decoding pipeline") adaptation_state = OnlineIvectorExtractorAdaptationState.from_info( feat_info.ivector_extractor_info) key = 'mic' + str(input_microphone_id) feat_pipeline, sil_weighting = initNnetFeatPipeline( adaptation_state, asr, decodable_opts, feat_info) print("Done") speaker = speaker_str.replace("#c#", "0") last_chunk = False utt, part = 1, 1 prev_num_frames_decoded, offset_complete = 0, 0 chunks_decoded = 0 num_chunks = 0 blocks = [] rawblocks = [] if use_local_mic: # Open microphone channel print("Open microphone stream with id" + str(input_microphone_id) + "...") stream = paudio.open(format=pyaudio.paInt16, channels=channels, rate=record_samplerate, input=True, frames_per_buffer=chunk_size, input_device_index=input_microphone_id) print("Done!") do_decode = not wait_for_start_command need_finalize = False block, previous_block = None, None decode_future = None # Send event (with redis) to the front that ASR session is ready asr_client.asr_ready(speaker=speaker) # Initialize a ThreadPoolExecutor. # Note that we initialize the thread executer independently of whether we actually use it later (the -t option). # At the end of this loop we have two code paths, one that uses a computation future (with -t) and one without it. with ThreadPoolExecutor(max_workers=1) as executor: while not last_chunk: # Check if there is a message from the redis server first (non-blocking!), if there is no new message msh is simply None. msg = p.get_message() # We check if there are externally send control commands if msg is not None: print('msg:', msg) if msg['data'] == b"start": print('Start command received!') do_decode = True asr_client.sendstatus(isDecoding=do_decode) elif msg['data'] == b"stop": print('Stop command received!') if do_decode and prev_num_frames_decoded > 0: need_finalize = True do_decode = False asr_client.sendstatus(isDecoding=do_decode) elif msg['data'] == b"shutdown": print('Shutdown command received!') last_chunk = True elif msg['data'] == b"status": print('Status command received!') asr_client.sendstatus(isDecoding=do_decode) elif msg['data'] == b"reset_timer": print('Reset time command received!') asr_client.resetTimer() if use_local_mic: # We always consume from the microphone stream, even if we do not decode block_raw = stream.read(chunk_size, exception_on_overflow=False) npblock = np.frombuffer(block_raw, dtype=np.int16) else: block_audio_redis_msg = next(pa.listen()) if block_audio_redis_msg[ 'type'] == "subscribe" and block_audio_redis_msg[ "data"] == 1: print('audio msg:', block_audio_redis_msg) print("Successfully connected to redis audio stream!") continue else: npblock = np.frombuffer(block_audio_redis_msg['data'], dtype=np.int16) # print("audio data: ", npblock) # Resample the block if necessary, e.g. 48kHz -> 16kHz if need_resample: block = resampler.process(np.array(npblock, copy=True), ratio) block = np.array(block, dtype=np.int16) else: block = npblock # Only save the wav, if the save_debug flag is enabled (TODO: investigate: does not seem to work with multiple channels) if save_debug_wav: blocks.append(block) rawblocks.append(npblock) # Block on the result of the decode if one is pending if use_threads and do_decode and block is not None and decode_future is not None: # This call blocks until the result is ready need_endpoint_finalize, prev_num_frames_decoded, part, utt = decode_future.result( ) # Check if we need to finalize, disallow endpoint without a single decoded frame if need_endpoint_finalize and prev_num_frames_decoded > 0: need_finalize = True resend_previous_waveform = True print("prev_num_frames_decoded:", prev_num_frames_decoded) if need_endpoint_finalize and prev_num_frames_decoded == 0: print( "WARN need_endpoint_finalize and prev_num_frames_decoded == 0" ) # Finalize the decoding here, if endpointing signalized that we should start a new utterance. # We might also need to finalize if we switch from do_decode=True to do_decode=False (user starts/stops decoding from frontend). if need_finalize and block is not None and prev_num_frames_decoded > 0: print("prev_num_frames_decoded:", prev_num_frames_decoded) out, confd = finalize_decode(asr, asr_client, key, part, speaker, utt) feat_pipeline, sil_weighting = reinitialize_asr( adaptation_state, asr, feat_info, feat_pipeline) utt += 1 part = 1 if resend_previous_waveform and previous_block is not None: # We always resend the last block for the new utterance (we only know that the endpoint is inside of a chunk, but not where exactly) feat_pipeline.accept_waveform(samp_freq, Vector(previous_block)) resend_previous_waveform = False need_finalize = False prev_num_frames_decoded = 0 # If we operate on multichannel data, select the channel here that has the highest volume # (with some added heuristic, only change the speaker if the previous speaker was active for minimum_num_frames_decoded_per_speaker many frames) if channels > 1: block = np.reshape(block, (-1, channels)) # Select loudest channel volume_norms = [] for i in range(channels): # We have a simplyfied concept of loudness, it is simply the L2 of the chunk interpreted as a vector (sqrt of the sum of squares): # This has nothing to do with the physical loudness. volume_norms.append( np.linalg.norm(block[:, i] / 65536.0) * 10.0) #print("|" * int(volume_norm)) #print('vols:', volume_norms) volume_norms = [ 0.0 if elem < mic_vol_cutoff else elem for elem in volume_norms ] volume_norm = max(volume_norms) max_channel = volume_norms.index(volume_norm) block = block[:, max_channel] new_speaker = speaker_str.replace("#c#", str(max_channel)) #print('vols:',volume_norms, 'max:',max_channel, 'value:',volume_norm) if sum(volume_norms) > 1e-10 and new_speaker != speaker \ and prev_num_frames_decoded >= minimum_num_frames_decoded_per_speaker: print( "Speaker change! Number of frames decoded for previous speaker:", str(prev_num_frames_decoded)) speaker = new_speaker need_finalize = True resend_previous_waveform = True #prev_num_frames_decoded = 0 else: volume_norm = np.linalg.norm(block / 65536.0) * 10.0 num_chunks += 1 # Send status beacon periodically (to frontend, so its knows we are alive) if num_chunks % 50 == 0: asr_client.sendstatus(isDecoding=do_decode) if do_decode: # If we use the unthreaded mode, we block until the computation here in this loop if not use_threads: need_endpoint_finalize, prev_num_frames_decoded, part, utt = advance_mic_decoding( adaptation_state, asr, asr_client, block, chunks_decoded, feat_info, feat_pipeline, key, last_chunk, part, prev_num_frames_decoded, samp_freq, sil_weighting, speaker, utt) # Check if we need to finalize, disallow endpoint without a single decoded frame if need_endpoint_finalize and prev_num_frames_decoded > 0: need_finalize = True resend_previous_waveform = True print("prev_num_frames_decoded:", prev_num_frames_decoded) else: # In threaded mode, we submit a non blocking computation request to the thread executor decode_future = executor.submit( advance_mic_decoding, adaptation_state, asr, asr_client, block, chunks_decoded, feat_info, feat_pipeline, key, last_chunk, part, prev_num_frames_decoded, samp_freq, sil_weighting, speaker, utt) else: time.sleep(0.001) previous_block = block # Record message history as an integrated Python file, that can be used as a standalone replay if record_message_history: with open('message_history_replay.py', 'w') as message_history_out: message_history_out.write(asr_client.message_trace) else: print( "Not writing record message history since --record_message_history is not set." ) # Write debug wav as output file (will only be executed after shutdown) if save_debug_wav: print("Saving debug output...") wavefile.write("debug.wav", samp_freq, np.concatenate(blocks, axis=None)) wavefile.write("debugraw.wav", record_samplerate, np.concatenate(rawblocks, axis=None)) else: print( "Not writing debug wav output since --save_debug_wav is not set.") # Now shuting down pipeline, compute MBR for the final utterance and complete it. print("Shutdown: finalizing ASR output...") asr.finalize_decoding() out = asr.get_output() mbr = MinimumBayesRisk(out["lattice"]) confd = mbr.get_one_best_confidences() print(out) # print(key + "-utt%d-final" % utt, out["text"], flush=True) if asr_client is not None: asr_client.completeUtterance(utterance=out["text"], key=key + "-utt%d-part%d" % (utt, part), confidences=confd, speaker=speaker) asr_client.sendstatus(isDecoding=False, shutdown=True) print("Done, will exit now.")
dev_index = 2 # device index found by p.get_device_info_by_index(ii) wav_output = 'test1.wav' # name of .wav file converter = 'sinc_best' # or 'sinc_fastest', ... audio = pyaudio.PyAudio() # create pyaudio instantiation # create pyaudio stream stream = audio.open(format = sample_format, channels = channels, rate = samp_rate, frames_per_buffer=chunk, input_device_index = dev_index, input = True ) print("recording") resampler = sr.Resampler(converter,channels) ratio = target_rate / samp_rate raw_frames = [] frames = [] # loop through stream and append audio chunks to frame array for i in range(0,int(samp_rate/chunk*record_secs)): raw_data = stream.read(chunk,exception_on_overflow = False) #raw_frames.append(raw_data) frames.append(raw_data) print("finished recording") # stop the stream, close it, and terminate the pyaudio instantiation stream.stop_stream() stream.close() audio.terminate() #print("start encoding")