def test_lm(self): ps = Pocketsphinx( dic='deps/pocketsphinx/test/data/defective.dic', mmap=False ) # Decoding with 'defective' dictionary ps.decode() self.assertEqual(ps.hypothesis(), '') # Switch to 'turtle' language model turtle_lm = 'deps/pocketsphinx/test/data/turtle.lm.bin' lm = NGramModel(ps.get_config(), ps.get_logmath(), turtle_lm) ps.set_lm('turtle', lm) ps.set_search('turtle') # Decoding with 'turtle' language model ps.decode() self.assertEqual(ps.hypothesis(), '') # The word 'meters' isn't in the loaded dictionary # Let's add it manually ps.add_word('foobie', 'F UW B IY', False) ps.add_word('meters', 'M IY T ER Z', True) # Decoding with 'turtle' language model ps.decode() self.assertEqual(ps.hypothesis(), 'foobie meters meters')
def test_lm(self): ps = Pocketsphinx(dic='deps/pocketsphinx/test/data/defective.dic', mmap=False) # Decoding with 'defective' dictionary ps.decode() self.assertEqual(ps.hypothesis(), '') # Switch to 'turtle' language model turtle_lm = 'deps/pocketsphinx/test/data/turtle.lm.bin' lm = NGramModel(ps.get_config(), ps.get_logmath(), turtle_lm) ps.set_lm('turtle', lm) ps.set_search('turtle') # Decoding with 'turtle' language model ps.decode() self.assertEqual(ps.hypothesis(), '') # The word 'meters' isn't in the loaded dictionary # Let's add it manually ps.add_word('foobie', 'F UW B IY', False) ps.add_word('meters', 'M IY T ER Z', True) # Decoding with 'turtle' language model ps.decode() self.assertEqual(ps.hypothesis(), 'foobie meters meters')
def transform_audio_to_text(filename): user = expanduser("~") path = user + "/DTAI_Internship/src/speech_recognizer_node/data/" lm_file = path + "generated_language_model.lm" dict_file = path + "generated_dictionary.dic" hmm_file = user + "/.local/lib/python2.7/site-packages/pocketsphinx/model/en-us" model_path = get_model_path() data_path = get_data_path() config = { 'hmm': os.path.join(model_path, 'en-us'), 'lm': os.path.join(model_path, lm_file), 'dict': os.path.join(model_path, dict_file) } ps = Pocketsphinx(**config) ps.decode(audio_file=os.path.join(data_path, filename), buffer_size=2048, no_search=False, full_utt=False) text = ps.hypothesis() print(text) return text
class TestRawDecoder(TestCase): def __init__(self, *args, **kwargs): self.ps = Pocketsphinx() self.ps.decode() super(TestRawDecoder, self).__init__(*args, **kwargs) def test_raw_decoder_lookup_word(self): self.assertEqual(self.ps.lookup_word('hello'), 'HH AH L OW') self.assertEqual(self.ps.lookup_word('abcdf'), None) def test_raw_decoder_hypothesis(self): self.assertEqual(self.ps.hypothesis(), 'go forward ten meters') self.assertEqual(self.ps.score(), -7066) self.assertEqual(self.ps.confidence(), 0.04042641466841839) def test_raw_decoder_segments(self): self.assertEqual(self.ps.segments(), [ '<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>' ]) def test_raw_decoder_best_hypothesis(self): self.assertEqual(self.ps.best(), [ ('go forward ten meters', -28034), ('go for word ten meters', -28570), ('go forward and majors', -28670), ('go forward and meters', -28681), ('go forward and readers', -28685), ('go forward ten readers', -28688), ('go forward ten leaders', -28695), ('go forward can meters', -28695), ('go forward and leaders', -28706), ('go for work ten meters', -28722) ])
def pocket(): ps = Pocketsphinx() language_directory = os.path.dirname(os.path.realpath(__file__)) print language_directory acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model") language_model_file = os.path.join(language_directory, "language-model.lm.bin") phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict") config = Decoder.default_config() config.set_string("-hmm", acoustic_parameters_directory) # set the path of the hidden Markov model (HMM) parameter files config.set_string("-lm", language_model_file) config.set_string("-dict", phoneme_dictionary_file) decoder = Decoder(config) with sr.AudioFile(s_dir + "/a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav") as source: audio_data = r.record(source) decoder.start_utt() decoder.process_raw(audio_data, False, True) decoder.end_utt() print decoder.hyp() ps.decode( audio_file=os.path.join(s_dir, 'a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav'), buffer_size=2048, no_search=False, full_utt=False) print(ps.hypothesis()) # => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>'] #pocket()
class TestRawDecoder(TestCase): def __init__(self, *args, **kwargs): self.ps = Pocketsphinx() self.ps.decode() super(TestRawDecoder, self).__init__(*args, **kwargs) def test_raw_decoder_lookup_word(self): self.assertEqual(self.ps.lookup_word('hello'), 'HH AH L OW') self.assertEqual(self.ps.lookup_word('abcdf'), None) def test_raw_decoder_hypothesis(self): self.assertEqual(self.ps.hypothesis(), 'go forward ten meters') self.assertEqual(self.ps.score(), -7066) self.assertEqual(self.ps.confidence(), 0.04042641466841839) def test_raw_decoder_segments(self): self.assertEqual( self.ps.segments(), ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>']) def test_raw_decoder_best_hypothesis(self): self.assertEqual(self.ps.best(), [('go forward ten meters', -28034), ('go for word ten meters', -28570), ('go forward and majors', -28670), ('go forward and meters', -28681), ('go forward and readers', -28685), ('go forward ten readers', -28688), ('go forward ten leaders', -28695), ('go forward can meters', -28695), ('go forward and leaders', -28706), ('go for work ten meters', -28722)])
def __init__(self, mode): # state self.micbuf = np.zeros((0, 4), 'uint16') self.outbuf = None self.buffer_stuff = 0 self.mode = mode self.playchan = 0 self.playsamp = 0 # check mode if not (mode == "echo" or mode == "record" or mode == "record4"): error("argument not recognised") # robot name topic_base_name = "/" + os.getenv("MIRO_ROBOT_NAME") # publish topic = topic_base_name + "/control/stream" print ("publish", topic) self.pub_stream = rospy.Publisher(topic, Int16MultiArray, queue_size=0) # subscribe topic = topic_base_name + "/sensors/stream" print ("subscribe", topic) self.sub_stream = rospy.Subscriber(topic, UInt16MultiArray, self.callback_stream, queue_size=1, tcp_nodelay=True) # subscribe topic = topic_base_name + "/sensors/mics" print ("subscribe", topic) self.sub_mics = rospy.Subscriber(topic, Int16MultiArray, self.callback_mics, queue_size=5, tcp_nodelay=True) # report print "recording from 4 microphones for", RECORD_TIME, "seconds..." ####### Speech Recongnition using Pocket-Sphinx ######### model_path = get_model_path() data_path = get_data_path() config = { 'hmm' : os.path.join(model_path, 'en-us'), # Hidden Markov Model, Speech Recongnition model - trained probability scoring system 'lm': os.path.join(model_path, 'en-us.lm.bin'), #language model 'dict' : os.path.join(model_path, 'cmudict-en-us.dict') # language dictionary } ps = Pocketsphinx(**config) ps.decode( audio_file=("/tmp/input.wav"), #add temp input.wav file buffer_size=2048, no_search= False, full_utt=False) print("Recognized: ") print((ps.hypothesis())) ## output print("END")
def test_jsgf(self): ps = Pocketsphinx(lm='deps/pocketsphinx/test/data/turtle.lm.bin', dic='deps/pocketsphinx/test/data/turtle.dic') # Decoding with 'turtle' language model ps.decode() self.assertEqual(ps.hypothesis(), 'go forward ten meters') # Switch to JSGF grammar jsgf = Jsgf('deps/pocketsphinx/test/data/goforward.gram') rule = jsgf.get_rule('goforward.move2') fsg = jsgf.build_fsg(rule, ps.get_logmath(), 7.5) ps.set_fsg('goforward', fsg) ps.set_search('goforward') # Decoding with 'goforward' grammar ps.decode() self.assertEqual(ps.hypothesis(), 'go forward ten meters')
def test_cep_decoder_hypothesis(self): ps = Pocketsphinx() with open('deps/pocketsphinx/test/data/goforward.mfc', 'rb') as f: with ps.start_utterance(): f.read(4) buf = f.read(13780) ps.process_cep(buf, False, True) self.assertEqual(ps.hypothesis(), 'go forward ten meters') self.assertEqual(ps.score(), -7095) self.assertEqual(ps.probability(), -32715)
def test_jsgf(self): ps = Pocketsphinx( lm='deps/pocketsphinx/test/data/turtle.lm.bin', dic='deps/pocketsphinx/test/data/turtle.dic' ) # Decoding with 'turtle' language model ps.decode() self.assertEqual(ps.hypothesis(), 'go forward ten meters') # Switch to JSGF grammar jsgf = Jsgf('deps/pocketsphinx/test/data/goforward.gram') rule = jsgf.get_rule('goforward.move2') fsg = jsgf.build_fsg(rule, ps.get_logmath(), 7.5) ps.set_fsg('goforward', fsg) ps.set_search('goforward') # Decoding with 'goforward' grammar ps.decode() self.assertEqual(ps.hypothesis(), 'go forward ten meters')
class TestPhoneme(TestCase): def __init__(self, *args, **kwargs): self.ps = Pocketsphinx( lm=False, dic=False, allphone='deps/pocketsphinx/model/en-us/en-us-phone.lm.bin', lw=2.0, pip=0.3, beam=1e-200, pbeam=1e-20, mmap=False ) self.ps.decode() super(TestPhoneme, self).__init__(*args, **kwargs) def test_phoneme_hypothesis(self): self.assertEqual( self.ps.hypothesis(), 'SIL G OW F AO R W ER D T AE N M IY IH ZH ER Z S V SIL' ) def test_phoneme_best_phonemes(self): self.assertEqual(self.ps.segments(), [ 'SIL', 'G', 'OW', 'F', 'AO', 'R', 'W', 'ER', 'D', 'T', 'AE', 'N', 'M', 'IY', 'IH', 'ZH', 'ER', 'Z', 'S', 'V', 'SIL' ])
class TestPhoneme(TestCase): def __init__(self, *args, **kwargs): self.ps = Pocketsphinx( lm=False, dic=False, allphone='deps/pocketsphinx/model/en-us/en-us-phone.lm.bin', lw=2.0, pip=0.3, beam=1e-200, pbeam=1e-20, mmap=False ) self.ps.decode() super(TestPhoneme, self).__init__(*args, **kwargs) def test_phoneme_hypothesis(self): self.assertEqual( self.ps.hypothesis(), 'SIL G OW F AO R D T AE N NG IY ZH ER S SIL' ) def test_phoneme_best_phonemes(self): self.assertEqual(self.ps.segments(), [ 'SIL', 'G', 'OW', 'F', 'AO', 'R', 'D', 'T', 'AE', 'N', 'NG', 'IY', 'ZH', 'ER', 'S', 'SIL' ])
full_utt=False ) #print(ps.segments()) # => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>'] #print('Detailed segments:', *ps.segments(detailed=True), sep='\n') # => [ # word, prob, start_frame, end_frame # ('<s>', 0, 0, 24) # ('<sil>', -3778, 25, 45) # ('go', -27, 46, 63) # ('forward', -38, 64, 116) # ('ten', -14105, 117, 152) # ('meters', -2152, 153, 211) # ('</s>', 0, 212, 260) # ] print("hypothesis:\n" + ps.hypothesis()) # => go forward ten meters print("probablity of correct:\n"+ str(ps.probability())) # => -32079 print("score:\n" + str(ps.score())) # => -7066 print("confidence:\n" + str(ps.confidence())) # => 0.04042641466841839 #print(*ps.best(count=10), sep='\n') # => [ # ('go forward ten meters', -28034) # ('go for word ten meters', -28570) # ('go forward and majors', -28670) # ('go forward and meters', -28681) # ('go forward and readers', -28685) # ('go forward ten readers', -28688) # ('go forward ten leaders', -28695) # ('go forward can meters', -28695) # ('go forward and leaders', -28706) # ('go for work ten meters', -28722)
class HotwordRecognizer: """热词(唤醒词)识别器,对 |pocketsphinx| 的简单封装,默认的热词是 `'阿Q'` 和 `'R-cute`。 如果要自定义热词,请参考 https://blog.51cto.com/feature09/2300352 .. |pocketsphinx| raw:: html <a href='https://github.com/bambocher/pocketsphinx-python' target='blank'>pocketsphinx</a> .. |config| raw:: html <a href='https://github.com/bambocher/pocketsphinx-python#default-config' target='blank'>pocketsphinx Default config</a> :param hotword: 热词或热词列表,默认为 `['阿Q', 'R-cute']` :type hotword: str / list, optional :param hmm: 参考 |config| :type hmm: str, optional :param lm: 参考 |config| :type lm: str, optional :param dic: 参考 |config| :type dic: str, optional """ def __init__(self, **kwargs): # signal.signal(signal.SIGINT, self.stop) self._no_search = False self._full_utt = False hotword = kwargs.pop('hotword', ['阿Q', 'R-cute']) self._hotwords = hotword if isinstance(hotword, list) else [hotword] model_path = get_model_path() opt = { 'verbose': False, 'hmm': os.path.join(model_path, 'en-us'), 'lm': util.resource('sphinx/rcute.lm'), 'dic': util.resource('sphinx/rcute.dic'), } opt.update(kwargs) self._rec = Pocketsphinx(**opt) def recognize(self, stream, timeout=None): """开始识别 :param source: 声音来源 :param timeout: 超时,即识别的最长时间(秒),默认为 `None` ,表示不设置超时,知道识别到热词才返回 :type timeout: float, optional :return: 识别到的热词模型对应的热词,若超时没识别到热词则返回 `None` :rtype: str """ self._cancel = False if timeout: count = 0.0 in_speech = False with self._rec.start_utterance(): while True: data = stream.raw_read() self._rec.process_raw(data, self._no_search, self._full_utt) if in_speech != self._rec.get_in_speech(): in_speech = not in_speech if not in_speech and self._rec.hyp(): with self._rec.end_utterance(): hyp = self._rec.hypothesis() if hyp in self._hotwords: return hyp if self._cancel: raise RuntimeError( 'Hotword detection cancelled by another thread') elif timeout: count += source.frame_duration #len(data) / 32000 if count > timeout: return def cancel(self): """停止识别""" self._cancel = True
) #print(ps.segments()) #save the detailed segments of the words, #which will contain details word, probablity, start_time and end_time #print('Detailed segments:', *ps.segments(detailed=True), sep='\n') # with open('output_segments_obama_farewell_speech.txt', 'a') as f: # print(*ps.segments(detailed=True), sep='\n', file=f) with open(filename_output_segments, 'a') as f: print(*ps.segments(detailed=True), sep='\n', file=f) #convert from audio to text and save text = ps.hypothesis() file1 = open(filename_sphinx,"w")#write mode file1.write(text) file1.close() #load into dataframe # For the above saved file, modify manually by removing '(',')',' and then save as modified fie #df = pd.read_csv('output_segments_donaldTrump_modified.txt', sep=",", header=None) df = pd.read_csv(filename_output_segments_mod, sep=",", header=None) df.columns = ["word", "prob","startTime", "endTime"] df.head() #calculate time taken for each word df['time_taken']=df['endTime'] - df['startTime'] df.head(20)
class SpeechToText: ''' Предназначен для распознавания речи с помощью PocketSphinx. 1. mode - может иметь два значения: from_file и from_microphone 1.1. from_file - распознавание речи из .wav файла (частота дискретизации >=16кГц, 16bit, моно) 1.2. from_microphone - распознавание речи с микрофона 2. name_dataset - имя набора данных, на основе которого построена языковая модель: plays_ru, subtitles_ru или conversations_ru ''' def __init__(self, mode='from_microphone', name_dataset='plays_ru'): self.current_dirname = os.path.dirname(os.path.realpath(__file__)) self.work_mode = mode model_path = get_model_path() if not (name_dataset == 'plays_ru' or name_dataset == 'subtitles_ru' or name_dataset == 'conversations_ru'): print( '\n[E] Неверное значение name_dataset. Возможные варианты: plays_ru, subtitles_ru или conversations_ru\n' ) return if self.work_mode == 'from_file': config = { 'hmm': os.path.join(model_path, 'zero_ru.cd_cont_4000'), 'lm': os.path.join(model_path, 'ru_bot_' + name_dataset + '.lm'), 'dict': os.path.join(model_path, 'ru_bot_' + name_dataset + '.dic') } self.speech_from_file = Pocketsphinx(**config) elif self.work_mode == 'from_microphone': self.speech_from_microphone = LiveSpeech( verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'zero_ru.cd_cont_4000'), lm=os.path.join(model_path, 'ru_bot_' + name_dataset + '.lm'), dic=os.path.join(model_path, 'ru_bot_' + name_dataset + '.dic')) else: print( '[E] Неподдерживаемый режим работы, проверьте значение аргумента mode.' ) # Добавить фильтры шума, например с помощью sox def get(self, f_name_audio=None): ''' Распознавание речи с помощью PocketSphinx. Режим задаётся при создании объекта класса (из файла или с микрофона). 1. f_name_audio - имя .wav или .opus файла с речью (для распознавания из файла, частота дискретизации >=16кГц, 16bit, моно) 2. возвращает строку с распознанной речью ''' if self.work_mode == 'from_file': if f_name_audio is None: print( '[E] В режиме from_file необходимо указывать имя .wav или .opus файла.' ) return filename_audio_raw = f_name_audio[:f_name_audio.find('.')] + '.raw' filename_audio_wav = f_name_audio[:f_name_audio.find('.')] + '.wav' audio_format = f_name_audio[f_name_audio.find('.') + 1:] # Конвертирование .opus файла в .wav if audio_format == 'opus': command_line = "yes | ffmpeg -i '" + f_name_audio + "' '" + filename_audio_wav + "'" proc = subprocess.Popen(command_line, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() if err.decode().find(f_name_audio + ':') != -1: return 'error' # Конвертирование .wav файла в .raw audio_file = AudioSegment.from_wav(self.current_dirname + '/' + filename_audio_wav) audio_file = audio_file.set_frame_rate(16000) audio_file.export(self.current_dirname + '/' + filename_audio_raw, format='raw') # Создание декодера и распознавание self.speech_from_file.decode(audio_file=self.current_dirname + '/' + filename_audio_raw, buffer_size=2048, no_search=False, full_utt=False) return self.speech_from_file.hypothesis() elif self.work_mode == 'from_microphone': for phrase in self.speech_from_microphone: return str(phrase)
p = pyaudio.PyAudio() # Create an interface to PortAudio # See PyAudio Documentation stream = p.open(format=sample_format, channels=channels, rate=fs, frames_per_buffer=chunk, input=True) print("Running as a daemon") print("Recording") #this is the only one that needs PyAudio it seems while True: ps.start_utt() # When there is silence, assume they stopped speaking while stream.get_read_available() > 0: data = stream.read(chunk) ps.process_raw(data, False, False) frames.append(data) ps.end_utt() # This is not part of the While loop # This prevents it from printing silence if (ps.hypothesis() != ''): print(ps.hypothesis()) frames = [] stream.stop_stream() stream.close() p.terminate()
full_utt=False) # => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>'] print(ps.segments()) print('Detailed segments:', *ps.segments(detailed=True), sep='\n') # => [ # word, prob, start_frame, end_frame # ('<s>', 0, 0, 24) # ('<sil>', -3778, 25, 45) # ('go', -27, 46, 63) # ('forward', -38, 64, 116) # ('ten', -14105, 117, 152) # ('meters', -2152, 153, 211) # ('</s>', 0, 212, 260) # ] print(ps.hypothesis()) # => go forward ten meters print(ps.probability()) # => -32079 print(ps.score()) # => -7066 print(ps.confidence()) # => 0.04042641466841839 print(*ps.best(count=10), sep='\n') # => [ # ('go forward ten meters', -28034) # ('go for word ten meters', -28570) # ('go forward and majors', -28670) # ('go forward and meters', -28681) # ('go forward and readers', -28685) # ('go forward ten readers', -28688) # ('go forward ten leaders', -28695) # ('go forward can meters', -28695) # ('go forward and leaders', -28706) # ('go for work ten meters', -28722)
def __init__(self): # state self.micbuf = np.zeros((0, 4), 'uint16') self.spkrbuf = None self.buffer_stuff = 0 # robot name topic_base = "/" + os.getenv("MIRO_ROBOT_NAME") + "/" # publish topic = topic_base + "control/stream" print ("publish", topic) self.pub_stream = rospy.Publisher(topic, Int16MultiArray, queue_size=0) # subscribe topic = topic_base + "sensors/stream" print ("subscribe", topic) self.sub_stream = rospy.Subscriber(topic, UInt16MultiArray, self.callback_stream) # subscribe topic = topic_base + "sensors/mics" print ("subscribe", topic) self.sub_mics = rospy.Subscriber(topic, Int16MultiArray, self.callback_mics) # report print "recording on 4 microphone channels..." ####### Speech Recongnition using Pocket-Sphinx ######### #obtain audio from microphone r = sr.Recognizer() with sr.callback_mics() as source: print("Say Hello") audio = r.listen(source) #write audio as a wav file with open("./tmp/input.wav", "wb") as f: f.write(audio.get_wav_data()) model_path = get_model_path() data_path = get_data_path() config = { 'hmm' : os.path.join(model_path, 'en-us'), # Hidden Markov Model, Speech Recongnition model - trained probability scoring system 'lm': os.path.join(model_path, 'en-us.lm.bin'), #language model 'dict' : os.path.join(model_path, 'cmudict-en-us.dict') # language dictionary } ps = Pocketsphinx(**config) ps.decode( audio_file=os.path.join(data_path, "./tmp/input.wav"),#add temp input.wav file buffer_size=2048 no_search= False, full_utt=False ) print(ps.hypothesis()) ## output
# Code retested by KhalsaLabs # You can use your own audio file in code # Raw or wav files would work perfectly # For mp3 files, you need to modify code (add codex) from __future__ import print_function import os from pocketsphinx import Pocketsphinx, get_model_path, get_data_path model_path = get_model_path() data_path = get_data_path() config = { 'hmm': os.path.join(model_path, 'en-us'), 'lm': os.path.join(model_path, 'en-us.lm.bin'), 'dict': os.path.join(model_path, 'cmudict-en-us.dict') } ps = Pocketsphinx(**config) ps.decode( audio_file=os.path.join(data_path, 'test1.wav'), # add your audio file here buffer_size=2048, no_search=False, full_utt=False) print(ps.hypothesis())
def loop(self): # loop while not rospy.core.is_shutdown(): # if recording finished if not self.outbuf is None: # write output file print("writing output file") outfilename = '/tmp/input.wav' file = wave.open(outfilename, 'wb') file.setparams((1, 4, 20000, 0, 'NONE', 'not compressed')) print("Starting Reshape") x = np.reshape(self.outbuf[:, [0, 0]], (-1)) print("writing frames") print(len(x)) values = [] for s in x: packed_value = struct.pack('<h', s) values.append(packed_value) #file.writeframes(struct.pack('<h', s)) #close file value_str = b''.join(values) file.writeframes(value_str) print("Closing file") file.close() model_path = get_model_path() data_path = get_data_path() config = { 'hmm': os.path.join( model_path, 'en-us' ), # Hidden Markov Model, Speech Recongnition model - trained probability scoring system 'lm': os.path.join(model_path, 'en-us.lm.bin'), #language model 'dict': os.path.join( model_path, 'cmudict-en-us.dict') #, # language dictionary #'samprate' : 16000 } #cmd= "ffmpeg -y -i /tmp/output.wav -ar 8000 -af asetrate=16000*" + pitch + ",aresample=16000,atempo=" + tempo + " -ac 1 /tmp/outputConv.wav" #cmd = "ffmpeg -y -i /tmp/input.wav -f s32le -acodec pcm_s32le -ar 16000 -ac 1 /tmp/inputConv.wav" #cmd = "sox /tmp/input.wav -r 16000 inputConv.wav" #cmd = "ffmpeg -i /tmp/input.wav -ar 16000 /tmp/inputConv.wav" print("Converting via FFMPEG") cmd = "ffmpeg -y -i /tmp/input.wav -f s16le -acodec pcm_s16le -ar 16000 -af 'aresample=20000' -ac 1 /tmp/inputConv.wav -loglevel quiet" os.system(cmd) print("Decoding Via Pocketsphinx") ps = Pocketsphinx(**config) ps.decode( audio_file=( "/tmp/inputConv.wav"), #add temp input.wav file buffer_size=8192, no_search=False, full_utt=False) print("Recognized: ") print(ps.hypothesis()) ## output ## Speech Analysis, (what to start?) if ps.hypothesis() == "hello": mml.say("Hello there human") # Change this to whatever elif ps.hypothesis().find("how are you") >= 0: mml.say("I'm always good") print("END") self.micbuf = np.zeros((0, 4), 'uint16') self.outbuf = None self.buffer_stuff = 0 self.playchan = 0 self.playsamp = 0 # state time.sleep(0.02)