def synthesize(self, user_id, text, log="true"): if text == "_silence_" or text == "silence()": # just let the TTS generate an empty wav text == "" wav = [] timestamp = datetime.now().strftime('%Y-%m-%d--%H-%M-%S.%f') fname = 'tts-{stamp}.wav'.format(stamp=timestamp) self.commands.send(Command('tts_start(user_id="%s",text="%s",fname="%s")' % (user_id,text,fname), 'TTS', 'HUB')) self.audio_out.send(Command('utterance_start(user_id="%s",text="%s",fname="%s",log="%s")' % (user_id, text, fname, log), 'TTS', 'AudioOut')) segments = self.parse_into_segments(text) for i, segment_text in enumerate(segments): segment_wav = self.tts.synthesize(segment_text) segment_wav = self.remove_start_and_final_silence(segment_wav) if i < len(segments) - 1: # add silence only for non-final segments segment_wav += self.gen_silence() wav.append(segment_wav) segment_wav = various.split_to_bins(segment_wav, 2 * self.cfg['Audio']['samples_per_frame']) for frame in segment_wav: self.audio_out.send(Frame(frame)) self.commands.send(Command('tts_end(user_id="%s",text="%s",fname="%s")' % (user_id,text,fname), 'TTS', 'HUB')) self.audio_out.send(Command('utterance_end(user_id="%s",text="%s",fname="%s",log="%s")' % (user_id, text, fname, log), 'TTS', 'AudioOut'))
def send_wav(self, filename, stream=None): """Send given wavfile to the dialogue system as if it was said throught microphone.""" # load wav wav = load_wav(self.cfg, filename) wav = various.split_to_bins( wav, 2 * self.cfg['Audio']['samples_per_frame']) # frame by frame send it for frame in wav: if stream is not None: stream.write(frame) self.audio_record.send(Frame(frame)) # send some silence so that VAD recognizes end of recording for _ in range(10): self.audio_record.send(Frame(b"\x00\x00" * self.cfg['Audio']['samples_per_frame']))
def play(cfg, wav): # open the audio device p = pyaudio.PyAudio() chunk = 160 # open stream stream = p.open(format=p.get_format_from_width(pyaudio.paInt32), channels=1, rate=cfg['Audio']['sample_rate'], output=True, frames_per_buffer=chunk) wav = various.split_to_bins(wav, chunk) for w in wav: stream.write(w) stream.stop_stream() stream.close() p.terminate()
def synthesize(self, user_id, text, log="true"): if text == "_silence_" or text == "silence()": # just let the TTS generate an empty wav text == "" wav = [] timestamp = datetime.now().strftime('%Y-%m-%d--%H-%M-%S.%f') fname = 'tts-{stamp}.wav'.format(stamp=timestamp) self.commands.send( Command( 'tts_start(user_id="%s",text="%s",fname="%s")' % (user_id, text, fname), 'TTS', 'HUB')) self.audio_out.send( Command( 'utterance_start(user_id="%s",text="%s",fname="%s",log="%s")' % (user_id, text, fname, log), 'TTS', 'AudioOut')) segments = self.parse_into_segments(text) for i, segment_text in enumerate(segments): segment_wav = self.tts.synthesize(segment_text) segment_wav = self.remove_start_and_final_silence(segment_wav) if i < len(segments) - 1: # add silence only for non-final segments segment_wav += self.gen_silence() wav.append(segment_wav) segment_wav = various.split_to_bins( segment_wav, 2 * self.cfg['Audio']['samples_per_frame']) for frame in segment_wav: self.audio_out.send(Frame(frame)) self.commands.send( Command( 'tts_end(user_id="%s",text="%s",fname="%s")' % (user_id, text, fname), 'TTS', 'HUB')) self.audio_out.send( Command( 'utterance_end(user_id="%s",text="%s",fname="%s",log="%s")' % (user_id, text, fname, log), 'TTS', 'AudioOut'))
parser.add_argument('-c', "--configs", nargs='+', help='additional configuration files') args = parser.parse_args() cfg = Config.load_configs(args.configs) session_logger = cfg['Logging']['session_logger'] system_logger = cfg['Logging']['system_logger'] ######################################################################### ######################################################################### system_logger.info("Test of the AudioIO component\n" + "=" * 120) wav = audio.load_wav(cfg, './resources/test16k-mono.wav') # split audio into frames wav = various.split_to_bins(wav, 2 * cfg['Audio']['samples_per_frame']) # remove the last frame aio_commands, aio_child_commands = multiprocessing.Pipe() # used to send aio_commands audio_record, child_audio_record = multiprocessing.Pipe() # I read from this connection recorded audio audio_play, child_audio_play = multiprocessing.Pipe() # I write in audio to be played close_event = multiprocessing.Event() aio = AudioIO(cfg, aio_child_commands, child_audio_record, child_audio_play, close_event) aio.start() count = 0 max_count = 2500 while count < max_count: time.sleep(cfg['Hub']['main_loop_sleep_time'])
'power_decision_non_speech_threshold': 0.2, }, 'Hub': { 'main_loop_sleep_time': 0.005, }, 'Logging': { 'output_dir': './tmp' } } print "Test of the AudioIO and VAD components:" print "=" * 120 wav = audio.load_wav(cfg, './resources/test16k-mono.wav') # split audio into frames wav = various.split_to_bins(wav, 2 * cfg['Audio']['samples_per_frame']) # remove the last frame aio_commands, aio_child_commands = multiprocessing.Pipe() # used to send commands to AudioIO audio_record, child_audio_record = multiprocessing.Pipe() # I read from this connection recorded audio audio_play, child_audio_play = multiprocessing.Pipe( ) # I write in audio to be played vad_commands, vad_child_commands = multiprocessing.Pipe() # used to send commands to VAD vad_audio_out, vad_child_audio_out = multiprocessing.Pipe()# used to read output audio from VAD close_event = multiprocessing.Event() aio = AudioIO(cfg, aio_child_commands, child_audio_record, child_audio_play, close_event) vad = VAD(cfg, vad_child_commands, audio_record, vad_child_audio_out, close_event) command_connections = [aio_commands, vad_commands]
def scores_equal_size_bins(wp_2_match): max_n = 100 print "Split into equal size bins" wp_2_match_binned = split_to_bins(wp_2_match, len(wp_2_match) / max_n) # wp_2_match_binned[0][0][0] = 0.0 # merge the same bins wp_2_match_binned_new = [] for b in wp_2_match_binned: min = b[0][0] max = b[-1][0] if wp_2_match_binned[-1][0][0] == min and wp_2_match_binned[-1][-1][ 0] == max: wp_2_match_binned_new[-1].extend(b) else: wp_2_match_binned_new.append(b) wp_2_match_binned = wp_2_match_binned_new x = [] s = [] i = -1 for b in wp_2_match_binned: min = b[0][0] max = b[-1][0] match = [wpm[1] for wpm in b] succ = sum(match) / len(match) # print "{min:.6e} -- {max:.6e} | {size} / {succ:.3f}".format(min=min, max=max, size=len(b), succ=succ) i += 1 x.append(float(i)) s.append(succ) xdata = [f for f in x] ydata = [f for f in s] sigma = [1.0 for f in x] sigma[-2] = 0.99 sigma[-1] = 0.1 popt, pcov = curve_fit(sig1, xdata, ydata, sigma=sigma, p0=[0.0, 1.0, 0.0, 0.0]) print popt fitx = np.linspace(0, len(x), 50) fity = sig1(fitx, *popt) for xx, ss, f in zip(x, s, sig1(x, *popt)): print xx, ss, f f = P.figure() p = f.add_subplot(2, 1, 1) p.bar(x, s) # p = f.add_subplot(2,1,2) p.plot(fitx, fity) p.grid(True) P.savefig('kaldi_calibration_scores_equal_size_bins.pdf') print "Calibration table" cal_list = [] last_f = 2.0 last_min = 2.0 for b, f in reversed(zip(wp_2_match_binned, sig1(x, *popt))): min = b[0][0] max = b[-1][0] if last_f - f > 0.02: cal_list.append((min, last_min, f)) print min, f last_f = f last_min = min else: print 0.0, f cal_list.append((0.0, last_min, f)) def find_approx(x): for i, (min, max, f) in enumerate(cal_list): if min <= x < max: return i, f print "ASR calibration warning: cannot map score." return x count = defaultdict(int) s = time.time() for wpm in wp_2_match: i, f = find_approx(wpm[0]) count[i] += 1 e = time.time() print "size {size} elapsed {time}".format(size=len(wp_2_match), time=e - s) pri_cal_list = [] for i, x in enumerate(cal_list): pri_cal_list.append((count[i], x)) pri_cal_list.sort() pri_cal_list.reverse() cal_list = [x[1] for x in pri_cal_list] s = time.time() for wpm in wp_2_match: i, f = find_approx(wpm[0]) e = time.time() print "size {size} elapsed {time}".format(size=len(wp_2_match), time=e - s) print "=" * 120 print "The calibration table: insert it in the config" print "-" * 120 print repr(cal_list)
def scores_equal_size_bins(wp_2_match): max_n = 100 print "Split into equal size bins" wp_2_match_binned = split_to_bins(wp_2_match, len(wp_2_match)/max_n) # wp_2_match_binned[0][0][0] = 0.0 # merge the same bins wp_2_match_binned_new = [] for b in wp_2_match_binned: min = b[0][0] max = b[-1][0] if wp_2_match_binned[-1][0][0] == min and wp_2_match_binned[-1][-1][0] == max: wp_2_match_binned_new[-1].extend(b) else: wp_2_match_binned_new.append(b) wp_2_match_binned = wp_2_match_binned_new x = [] s = [] i = -1 for b in wp_2_match_binned: min = b[0][0] max = b[-1][0] match = [wpm[1] for wpm in b] succ = sum(match) / len(match) # print "{min:.6e} -- {max:.6e} | {size} / {succ:.3f}".format(min=min, max=max, size=len(b), succ=succ) i += 1 x.append(float(i)) s.append(succ) xdata = [f for f in x] ydata = [f for f in s] sigma = [1.0 for f in x] sigma[-2] = 0.99 sigma[-1] = 0.1 popt, pcov = curve_fit(sig1, xdata, ydata, sigma = sigma, p0 = [0.0, 1.0, 0.0, 0.0] ) print popt fitx = np.linspace(0, len(x), 50) fity = sig1(fitx, *popt) for xx, ss, f in zip(x, s, sig1(x, *popt)): print xx, ss, f f = P.figure() p = f.add_subplot(2,1,1) p.bar(x, s) # p = f.add_subplot(2,1,2) p.plot(fitx,fity) p.grid(True) P.savefig('kaldi_calibration_scores_equal_size_bins.pdf') print "Calibration table" cal_list = [] last_f = 2.0 last_min = 2.0 for b, f in reversed(zip(wp_2_match_binned, sig1(x, *popt))): min = b[0][0] max = b[-1][0] if last_f - f > 0.02: cal_list.append((min, last_min, f)) print min, f last_f = f last_min = min else: print 0.0, f cal_list.append((0.0, last_min, f)) def find_approx(x): for i, (min, max, f) in enumerate(cal_list): if min <= x < max: return i, f print "ASR calibration warning: cannot map score." return x count = defaultdict(int) s = time.time() for wpm in wp_2_match: i, f = find_approx(wpm[0]) count[i] += 1 e = time.time() print "size {size} elapsed {time}".format(size=len(wp_2_match), time = e - s) pri_cal_list = [] for i, x in enumerate(cal_list): pri_cal_list.append((count[i], x)) pri_cal_list.sort() pri_cal_list.reverse() cal_list = [ x[1] for x in pri_cal_list] s = time.time() for wpm in wp_2_match: i, f = find_approx(wpm[0]) e = time.time() print "size {size} elapsed {time}".format(size=len(wp_2_match), time = e - s) print "="*120 print "The calibration table: insert it in the config" print "-"*120 print repr(cal_list)