def recognize(wav_file): #BASE_PATH = os.path.dirname(os.path.realpath(__file__)) #HMDIR = os.path.join(BASE_PATH, "hmm") #LMDIR = os.path.join(BASE_PATH, "lm/en-us.lm.bin") #DICTD = os.path.join(BASE_PATH, "dict/en_in.dic") sound = "try1.wav" model_path = get_model_path() data_path = get_data_path() config = DefaultConfig() config.set_string('-hmm', "hmm/") config.set_string('-lm', 'lm\en-us.lm.bin') config.set_string('-dict', 'dict\en_in.dic') #decoder = Decoder(config) """ Run speech recognition on a given file. """ speech_rec = Decoder(config) print("Decoder Initialized") wav_file = wave.open(wav_file, 'rb') print("AudioFile Loaded") speech_rec.decode_raw(wav_file) print("Audio file decoded") result = speech_rec.get_hyp() print("Result Ready\n") return result
def init(): # Create a decoder with certain model config = DefaultConfig() config.set_string('-logfn', settings.POCKETSPHINX_LOG) #config.set_string('-hmm', settings.ACOUSTIC_MODEL) config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) #config.set_string('-lm', settings.LANGUAGE_MODEL) config.set_string('-kws', settings.KEYPHRASES) #config.set_string('-dict', settings.POCKET_DICT) # Decode streaming data global decoder, p decoder = Decoder(config) p = pyaudio.PyAudio() global r r = speech_recognition.Recognizer()
def init(): # Create a decoder with certain model config = DefaultConfig() # config.set_string('-logfn', settings.POCKET_LOG) config.set_string('-hmm', settings.POCKET_HMM_ACOUSTIC_MODEL) config.set_string('-lm', settings.POCKET_LANGUAGE_MODEL) config.set_string('-dict', settings.POCKET_DICTIONARY) # config.set_string('-kws', settings.POCKET_KEYPHRASES) # Decode streaming data global decoder, p decoder = Decoder(config) p = pyaudio.PyAudio() # Set up speech recognition recogniser global r r = speech_recognition.Recognizer()
def load_decoder(myid, model_config, out): # Create a decoder with certain model pocketsphinx_config = DefaultConfig() model_name = model_config.sections()[0] hmm = model_config[model_name]['hmm'] dict = model_config[model_name]['dict'] lm = model_config[model_name]['lm'] # logfn = model_config[model_name]['log'] logfn = '{}_{}.log'.format(out, myid) if not os.path.exists(hmm): print('ERROR: {} does not exist'.format(hmm)) sys.exit(-2) if not os.path.exists(lm): print('ERROR: {} does not exist'.format(lm)) sys.exit(-4) if not os.path.exists(dict): print('ERROR: {} does not exist'.format(dict)) sys.exit(-5) pocketsphinx_config.set_string('-hmm', hmm) pocketsphinx_config.set_string('-lm', lm) pocketsphinx_config.set_string('-dict', dict) pocketsphinx_config.set_string('-logfn', logfn) decoder_engine = Decoder(pocketsphinx_config) return decoder_engine
def test_config_set_string(self): config = DefaultConfig() config.set_string('-rawlogdir', '~/pocketsphinx') self.assertEqual(config.get_string('-rawlogdir'), '~/pocketsphinx')
def test_config_get_string(self): config = DefaultConfig() self.assertEqual(config.get_string('-rawlogdir'), None)
def retrieve_scores(word): filename = word + '.wav' grammarname = word + '-align.jsgf' model_path = get_model_path() # Initialize the config values config = DefaultConfig() config.set_boolean('-verbose', False) config.set_string('-hmm', os.path.join(model_path, 'en-us')) config.set_boolean('-lm', False) config.set_string('-dict', 'phonemes.dict.txt') config.set_boolean('-backtrace', True) config.set_boolean('-bestpath', False) config.set_boolean('-fsgusefiller', False) decoder = Decoder(config) # Set the search to JSGF Grammar jsgf = Jsgf(grammarname) rule = jsgf.get_rule('forcing.' + word) decoder.set_jsgf_file('grammar', grammarname) decoder.set_search('grammar') stream = open(filename, 'rb') utt_started = False scores = [] decoder.start_utt() while True: buf = stream.read(1024) if buf: decoder.process_raw(buf, False, False) in_speech = decoder.get_in_speech() if (in_speech and not utt_started): utt_started = True if (not in_speech and utt_started): decoder.end_utt() hyp = decoder.hyp() if hyp is not None: print('hyp: %s' % (hyp.best_score)) print_segments(decoder) scores = retrieve_segments(decoder) decoder.start_utt() utt_started = False else: break decoder.end_utt() print('scores:', scores) return scores
def test_config_get_boolean(self): config = DefaultConfig() self.assertEqual(config.get_boolean('-backtrace'), False)
def test_config_get_int(self): config = DefaultConfig() self.assertEqual(config.get_int('-nfft'), 512)
def test_config_get_float(self): config = DefaultConfig() self.assertEqual(config.get_float('-samprate'), 16000.0)
import os from pocketsphinx import DefaultConfig, Decoder, get_model_path, get_data_path model_path = get_model_path() data_path = 'C:/project/accent/accent-poc/src/Audio/' # Create a decoder with a certain model config = DefaultConfig() config.set_string('-hmm', os.path.join(model_path, 'en-us')) config.set_string('-lm', os.path.join(model_path, 'en-us.lm.bin')) config.set_string('-dict', os.path.join(model_path, 'cmudict-en-us.dict')) decoder = Decoder(config) # Decode streaming data buf = bytearray(1024) with open(os.path.join(data_path, 'speaker2.wav'), 'rb') as f: decoder.start_utt() while f.readinto(buf): decoder.process_raw(buf, False, False) decoder.end_utt() print('Best hypothesis segments:', [seg.word for seg in decoder.seg()])
def get_config(self): # Create a decoder with a certain model config = DefaultConfig() #config.set_string('-hmm', os.path.join(self.model_path, 'en-us')) config.set_string('-hmm', os.path.join(Audio_Tuner.tuned_path, 'en-us-adapt')) config.set_string('-lm', os.path.join(Audio_Tuner.tuned_path, 'en-us.lm.bin')) #print("Using custom lm") #config.set_string('-lm', "/tmp/knowledge_base.lm") # To do this, just only copy the words you want over to another file config.set_string('-dict', self.dict_path) #print("using custom dict") #config.set_string('-dict', "/tmp/dict.dict") #config.set_string('-dict', os.path.join(self.model_path, # 'cmudict-en-us.dict')) config.set_string('-kws', self.keywords_path) config.set_string("-logfn", '/dev/null') config.set_boolean("-verbose", False) return config
vals = 2.0 / N * np.abs(yf[0:N // 2]) # FFT is symmetrical, so we take just the first half # FFT is also complex, to we take just the real part (abs) return xf, vals FilePath = 'C:/project/accent/accent-poc/src/Audio/' model_path = get_model_path() data_path = get_data_path() dirs = [f for f in os.listdir(FilePath)] recordings = [] for direct in dirs: if direct.endswith('.wav'): recordings.append(direct) # Create a decoder with a hmm model config = DefaultConfig() config.set_string('-hmm', os.path.join(model_path, 'en-us')) config.set_string('-allphone', os.path.join(model_path, 'en-us-phone.lm.bin')) config.set_string('-lm', os.path.join(model_path, 'en-us.lm.bin')) config.set_string('-dict', os.path.join(model_path, 'cmudict-en-us.dict')) config.set_float('-lw', 2.0) config.set_float('-beam', 1e-10) config.set_float('-pbeam', 1e-10) decoder = Decoder(config) # Decode streaming data buf = bytearray(1024) with open(path.join(FilePath, 'amol.wav'), 'rb') as f: decoder.start_utt() while f.readinto(buf): decoder.process_raw(buf, False, False)
def test_config_set_boolean(self): config = DefaultConfig() config.set_boolean('-backtrace', True) self.assertEqual(config.get_boolean('-backtrace'), True)
def test_config_set_float(self): config = DefaultConfig() config.set_float('-samprate', 8000.0) self.assertEqual(config.get_float('-samprate'), 8000.0)
def test_config_set_int(self): config = DefaultConfig() config.set_int('-nfft', 256) self.assertEqual(config.get_int('-nfft'), 256)
# of the phrase. HOTWORD_THRESHOLD = -5000 # Higher values indicate that pocketsphinx # is sure about the word. Adjust to your needs. # Pocketsphinx files MODELDIR = get_model_path() # Default model path, replace if # you use a custom model in a custom location. # WORDS HOTWORD = "Anastasia" SPEAK_UNDERSTOOD = "Understood." SPEAK_FAILURE = "Sorry?" SPEAK_SUCCESS = "Okay." SPEAK_READY = HOTWORD + ". At your service." # Decoder setup config = DefaultConfig() config.set_string('-hmm', path.join(MODELDIR, 'en-us')) # you can use a custom model config.set_string('-lm', path.join(MODELDIR, 'en-us.lm.bin')) # you can use a custom dictionary config.set_string('-dict', path.join(MODELDIR, 'cmudict-en-us.dict')) # config.set_string('-logfn', '/dev/null') decoder = Decoder(config) # TTS settings and setup engine = TtsEngine('en-US') engine.pitch = 80 # 50 - 200 engine.rate = 120 # speed of speech, 20 - 500 # engine.volume = xx # 0 - 500