def recognize(wav_file): #BASE_PATH = os.path.dirname(os.path.realpath(__file__)) #HMDIR = os.path.join(BASE_PATH, "hmm") #LMDIR = os.path.join(BASE_PATH, "lm/en-us.lm.bin") #DICTD = os.path.join(BASE_PATH, "dict/en_in.dic") sound = "try1.wav" model_path = get_model_path() data_path = get_data_path() config = DefaultConfig() config.set_string('-hmm', "hmm/") config.set_string('-lm', 'lm\en-us.lm.bin') config.set_string('-dict', 'dict\en_in.dic') #decoder = Decoder(config) """ Run speech recognition on a given file. """ speech_rec = Decoder(config) print("Decoder Initialized") wav_file = wave.open(wav_file, 'rb') print("AudioFile Loaded") speech_rec.decode_raw(wav_file) print("Audio file decoded") result = speech_rec.get_hyp() print("Result Ready\n") return result
def record(listen_time): THRESHOLD = None WAVE_OUTPUT_FILENAME = "livewav.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* recording" frames = [] detected = False for i in range(0, RATE / CHUNK * listen_time): data = stream.read(CHUNK) frames.append(data) score = getScore(data) if score < THRESHOLD: continue else: detected = True if not detected: print "nothing detected" return("") print "* done recording" # stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir + "/livewav.wav" config = Decoder.default_config() config.set_string('-hmm', hmdir) config.set_string('-lm', lmdir) config.set_string('-dict', dictd) config.set_string('-logfn', '/dev/null') speechRec = Decoder(config) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) #result = speechRec.get_hyp() return(speechRec.hyp().hypstr)
def passiverecord(THRESHOLD=None): FORMAT = pyaudio.paInt16 CHANNELS = 1 LISTEN_TIME = 2 WAVE_OUTPUT_FILENAME = "passive.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) # print "* recording" frames = [] lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # print average,THRESHOLD * 0.8 if average < THRESHOLD * 0.8: break # print "* done recording" # stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir + "/passive.wav" # decoded=decodepassive() speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) result = speechRec.get_hyp() return (result[0])
def record(listen_time): THRESHOLD=None WAVE_OUTPUT_FILENAME = "livewav.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* recording" frames = [] detected=False for i in range(0, RATE / CHUNK * listen_time): data = stream.read(CHUNK) frames.append(data) score = getScore(data) if score < THRESHOLD: continue else: detected=True if not detected: print "nothing detected" return("") print "* done recording" #stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir+"/livewav.wav" speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) result = speechRec.get_hyp() return(result[0])
def best_sphinx_speech_result(pyaudio, wav_name, profile): if not have_sphinx_dictionary: if not profile.has_key("words"): raise(KeyError("Pass the possible words in in profile")) compile("sentences.txt", "dictionary.dic", "language_model.lm", profile["words"]) global have_sphinx_dictionary have_sphinx_dictionary = True wav_file = file(wav_name, 'rb') speechRec = Decoder( hmm = "/usr/local/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k", lm = "language_model.lm", dict = "dictionary.dic" ) speechRec.decode_raw(wav_file) results = speechRec.get_hyp() return results[0]
class CMUSphinxRecognizer(BaseRecognizer): def __init__(self): config = Decoder.default_config() config.set_string('-hmm', SPHINX_HMM) config.set_string('-lm', SPHINX_LM) config.set_string('-dict', SPHINX_DICT) self.decoder = Decoder(config) def recognize(self, raw_audio): file_path = self.__save_file(raw_audio) with open(file_path, 'r') as wav_fp: self.decoder.decode_raw(wav_fp) hypothesis = self.decoder.hyp() return hypothesis.hypstr, hypothesis.best_score, hypothesis.prob @staticmethod def __save_file(data): tmp_fp = NamedTemporaryFile(delete=False) tmp_fp.write(data) tmp_fp.close() return tmp_fp.name
''' Created on Dec 29, 2013 @author: Mindaugas Greibus ''' import sys, os from pocketsphinx import Decoder MODELDIR = "../models" # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/')) config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram')) config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict')) decoder = Decoder(config) decoder.decode_raw( open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb')) # Retrieve hypothesis. hypothesis = decoder.hyp() print('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr) print('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])
''' Created on Dec 29, 2013 @author: Mindaugas Greibus ''' import sys, os from pocketsphinx import Decoder MODELDIR = "../models" # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/')) config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram')) config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict')) decoder = Decoder(config) decoder.decode_raw(open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb')) # Retrieve hypothesis. hypothesis = decoder.hyp() print ('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr) print ('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])
def record(THRESHOLD=None): FORMAT = pyaudio.paInt16 CHANNELS = 1 LISTEN_TIME = 4 WAVE_OUTPUT_FILENAME = "livewav.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* recording" frames = [] lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) #print average,THRESHOLD * 0.8 if average < THRESHOLD * 0.8: break print "* done recording" #stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir+"/livewav.wav" #decoded=decodepassive() speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) result = speechRec.get_hyp() return(result[0])
def decodepassive(): speechRec = Decoder(hmm = hmdir, lm = lmdir, dict = dictd) with open(passivewav, 'rb') as passivewav: speechRec.decode_raw(passivewav) result = speechRec.get_hyp() return(result[0])
p.terminate() # write data to WAVE file data = ''.join(all) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() if __name__ == "__main__": hmdir = "/usr/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k" lmdir = "/usr/share/pocketsphinx/model/lm/en_US/hub4.5000.DMP" dictd = "/usr/share/pocketsphinx/model/lm/en_US/cmu07a.dic" record() wavfile = "/home/shridhar/pocketsphinxtest/livewav.wav" speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) wavFile = file(wavfile, 'rb') speechRec.decode_raw(wavFile) result = speechRec.get_hyp() print "Recognised text from the converted video file" print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" print result[0] print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
from os import environ, path from pocketsphinx import Decoder from sphinxbase import * print('a') model_dir = '/home/wmlab/CMU_try/zh_broadcastnews_ptm256_8000/' # lm_dir = '/home/wmlab/CMU_try/CMUsphinx-Demo-master/0506.lm' # dict_dir = '/home/wmlab/CMU_try/CMUsphinx-Demo-master/0506.dic' lm_dir = '/home/wmlab/CMU_try/try_python/usable_version/0506.lm' dict_dir = '/home/wmlab/CMU_try/try_python/usable_version/0506.dic' wav_file = '/home/wmlab/CMU_try/testing_audio/testing003.wav' speech_rec = Decoder(hmm=model_dir, lm=lm_dir, dict=dict_dir) wavFile = file(wav_file, 'rb') wavFile.seek(44) speech_rec.decode_raw(wavFile) result = speech_rec.get_hyp() print(result[0])
def decodepassive(): speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) with open(passivewav, 'rb') as passivewav: speechRec.decode_raw(passivewav) result = speechRec.get_hyp() return(result[0])