예제 #1
0
def pocket():

	ps = Pocketsphinx()


	language_directory = os.path.dirname(os.path.realpath(__file__))
	
	print language_directory

	acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model")
	language_model_file = os.path.join(language_directory, "language-model.lm.bin")
	phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict")
    
	config = Decoder.default_config()
	config.set_string("-hmm", acoustic_parameters_directory)  # set the path of the hidden Markov model (HMM) parameter files
	config.set_string("-lm", language_model_file)
	config.set_string("-dict", phoneme_dictionary_file)

	decoder = Decoder(config)

	with sr.AudioFile(s_dir + "/a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav") as source:
		audio_data = r.record(source)
	decoder.start_utt()
	decoder.process_raw(audio_data, False, True)
	decoder.end_utt()

	print decoder.hyp()

	ps.decode(
	    audio_file=os.path.join(s_dir, 'a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav'),
	    buffer_size=2048,
	    no_search=False,
	    full_utt=False)
	print(ps.hypothesis()) # => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>']
#pocket()
예제 #2
0
	def __init__(self):
		JarvisIOHandler.__init__(self)
		hmm = '/usr/local/share/pocketsphinx/model/en-us/en-us'
		dic ='/usr/local/share/pocketsphinx/model/en-us/cmudict-en-us.dict'
		lm ='/usr/local/share/pocketsphinx/model/en-us/en-us.lm.bin'

		config = Decoder.default_config()
		config.set_string('-hmm',hmm)
		config.set_string('-lm',lm)
		config.set_string('-dict',dic)
		config.set_string('-logfn','/dev/null')

		self.decoder = Decoder(config)

		self.microphone = pyaudio.PyAudio()

		pyvona_config = open('configs/pyvona.txt')
		pvcfg = pyvona_config.readlines()
		pyvona_config.close()
		self.voice = pyvona.create_voice(pvcfg[0].strip(),pvcfg[1].strip())
		self.voice.region = 'us-west'
		self.voice.voice_name='Brian'
		self.voice.sentence_break = 200

		googleSTT_config = open('configs/GoogleSTT.txt')
		self.key = googleSTT_config.readlines()[0].strip()
		googleSTT_config.close()
		self.recognizer = sr.Recognizer()
		with sr.Microphone() as source:
			self.recognizer.adjust_for_ambient_noise(source)
예제 #3
0
def get_text_from_audio(audio_input_name: str,
                        working_directory: str = WORKING_DIRECTORY):
    """ Gets text from audio file (using pocketsphinx-python library)

    Args:

    Return:
        list: text from audio file

    """

    # Create a decoder with certain model
    config = Decoder.default_config()
    config.set_string('-hmm', os.path.join(SPEECH_MODEL_PATH, 'en-us'))
    config.set_string('-lm', os.path.join(SPEECH_MODEL_PATH, 'en-us.lm.bin'))
    config.set_string('-dict',
                      os.path.join(SPEECH_MODEL_PATH, 'cmudict-en-us.dict'))
    decoder = Decoder(config)

    # Decode streaming data.
    decoder.start_utt()
    with open(os.path.join(working_directory, audio_input_name),
              'rb') as stream:
        while True:
            buf = stream.read(1024)
            if buf:
                decoder.process_raw(buf, False, False)
            else:
                break

    decoder.end_utt()
    text_from_audio = [seg.word for seg in decoder.seg()]

    return text_from_audio if text_from_audio else 'Audio file doesn\'t contain words'
예제 #4
0
파일: local.py 프로젝트: yannickulrich/IRIS
    def __init__(self, profile, hmm=None, dict=None, lm=None,
                 kws_threshold=None, keyphrase=None):
        self.profile = profile
        if keyphrase:
            if not dict:
                dict = fullpath('config/keyphrase.dic')
            if not lm:
                lm = fullpath('config/keyphrase.lm')
        else:
            if not dict:
                dict = fullpath('config/corpus.dic')
            if not lm:
                lm = fullpath('config/corpus.lm')

        if not hmm:
            hmm = 'share/pocketsphinx/model/en-us/en-us'

        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(SPHINX_ROOT, hmm))
        config.set_string('-dict', dict)
        config.set_string('-lm', lm)
        config.set_string('-logfn', fullpath('config/sphinx.log'))

        if keyphrase:
            config.set_string('-keyphrase', keyphrase)
        if kws_threshold:
            config.set_float('-kws_threshold', kws_threshold)

        self.decoder = Decoder(config)

        self.transcribe = self.transcribe_darwin
        self.hyp = None
예제 #5
0
    def __init__(self,
                 key_phrase,
                 dict_file,
                 hmm_folder,
                 threshold=1e-90,
                 chunk_size=-1):
        from pocketsphinx import Decoder
        config = Decoder.default_config()
        config.set_string('-hmm', hmm_folder)
        config.set_string('-dict', dict_file)
        config.set_string('-keyphrase', key_phrase)
        config.set_float('-kws_threshold', float(threshold))
        config.set_float('-samprate', 16000)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        self.key_phrase = key_phrase
        self.buffer = b'\0' * pr.sample_depth * pr.buffer_samples
        self.pr = pr
        self.read_size = -1 if chunk_size == -1 else pr.sample_depth * chunk_size

        try:
            self.decoder = Decoder(config)
        except RuntimeError:
            options = dict(key_phrase=key_phrase,
                           dict_file=dict_file,
                           hmm_folder=hmm_folder,
                           threshold=threshold)
            raise RuntimeError('Invalid Pocketsphinx options: ' + str(options))
예제 #6
0
def recog_wav(MODELDIR, wavfile):

    #print(MODELDIR)

    config = Decoder.default_config()
    config.set_string('-hmm', os.path.join(MODELDIR, 'en-us'))
    config.set_string('-lm', os.path.join(MODELDIR, 'en-us.lm.bin'))
    config.set_string('-dict', os.path.join(MODELDIR, 'cmudict-en-us.dict'))

    # Decode streaming data.
    decoder = Decoder(config)
    start = time.time()
    decoder.start_utt()
    wav_stream = open(wavfile, "rb")
    while True:
        buffer = wav_stream.read(1024)
        if buffer:
            decoder.process_raw(buffer, False, False)
        else:
            break
    decoder.end_utt()
    duration = time.time() - start
    print("Duration: " + str(duration))  #Benchmarking
    for seg in decoder.seg():
        print(seg.word)
    def onStart(self):
        super().onStart()

        if not self.checkLanguage():
            self.downloadLanguage()

        try:
            pocketSphinxPath = self.getPocketSphinxPath()
        except:
            raise

        self._config = Decoder.default_config()
        self._config.set_string(
            '-hmm',
            f'{pocketSphinxPath}/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}'
        )
        self._config.set_string(
            '-lm',
            f'{pocketSphinxPath}/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin'
        )
        self._config.set_string(
            '-dict',
            f'{pocketSphinxPath}/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict'
        )
        self._decoder = Decoder(self._config)
예제 #8
0
def record(listen_time):

    THRESHOLD = None
    WAVE_OUTPUT_FILENAME = "livewav.wav"

    p = pyaudio.PyAudio()
    if THRESHOLD == None:
        THRESHOLD = fetchThreshold()
        print THRESHOLD

    stream = p.open(format=FORMAT,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print "* recording"
    frames = []
    detected = False
    for i in range(0, RATE / CHUNK * listen_time):
        data = stream.read(CHUNK)
        frames.append(data)
        score = getScore(data)
        if score < THRESHOLD:
            continue
        else:
            detected = True
    if not detected:
        print "nothing detected"
        return("")

    print "* done recording"
    # stream.stop_stream()
    stream.close()
    p.terminate()

    # write data to WAVE file
    data = ''.join(frames)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    sysdir = os.getcwd()
    wavfile = sysdir + "/livewav.wav"
    config = Decoder.default_config()
    config.set_string('-hmm', hmdir)
    config.set_string('-lm', lmdir)
    config.set_string('-dict', dictd)
    config.set_string('-logfn', '/dev/null')

    speechRec = Decoder(config)

    with open(wavfile, 'rb') as wavFile:
        speechRec.decode_raw(wavFile)
        #result = speechRec.get_hyp()

    return(speechRec.hyp().hypstr)
예제 #9
0
def create_decoder():
    model_path = get_model_path()
    config = Decoder.default_config()
    config.set_string("-hmm", os.path.join(model_path, "en-us"))
    config.set_string("-lm", os.path.join(model_path, "en-us.lm.bin"))
    config.set_string("-dict", os.path.join(model_path, "cmudict-en-us.dict"))
    config.set_string("-logfn", os.devnull)
    return Decoder(config)
예제 #10
0
 def createConfig(self,pGramma):
     print ("[createConfig]+++")
     config = Decoder.default_config()
     config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/liepa.cd_semi_200/'))
     config.set_string('-fsg', os.path.join("../resource/", pGramma+'.fsg'))
     #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram'))
     config.set_string('-dict', os.path.join("../resource/", 'service.dict'))
     print ("[createConfig]---")
     return config;
예제 #11
0
def listen(MODE):
    CORPUS = 6278
    model_path = get_model_path()
    home_path = "/home/the0s/Desktop/HCR_Python"
    print(model_path)
    print(home_path)
    DATADIR = "/usr/local/lib/python2.7/dist-packages/pocketsphinx/data"

    config = Decoder.default_config()
    config.set_string('-hmm', os.path.join(model_path, 'hub4wsj_sc_8k'))
    config.set_string('-lm', os.path.join(home_path, str(CORPUS) + '.lm.bin'))
    config.set_string('-dict', os.path.join(home_path, str(CORPUS) + '.dic'))
    config.set_string('-logfn', '/dev/null')
    decoder = Decoder(config)

    p = pyaudio.PyAudio()

    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=16000,
                    input=True,
                    frames_per_buffer=1024)
    stream.start_stream()
    in_speech_bf = False
    decoder.start_utt()
    while True:
        buf = stream.read(1024)
        if buf:
            decoder.process_raw(buf, False, False)
            if decoder.get_in_speech() != in_speech_bf:
                in_speech_bf = decoder.get_in_speech()
                if not in_speech_bf:
                    decoder.end_utt()
                    if decoder.hyp() is not None:
                        buf = [s for s in decoder.hyp().hypstr.split()]
                        print(buf)
                        if len(buf) > 0:
                            if MODE == 0:  #DrinkRequest
                                for item in buf:
                                    if checkRequest(item) != "NONE":
                                        output = checkRequest(item)
                                        stream.stop_stream()
                                        stream.close()
                                        return output
                            if MODE == 1:  #DrinkConfirm
                                for item in buf:
                                    if checkConfirm(item) != "NONE":
                                        output = checkConfirm(item)
                                        stream.stop_stream()
                                        stream.close()
                                        return output

                    decoder.start_utt()
        else:
            break
    decoder.end_utt()
예제 #12
0
    def __init__(self):
        
        MODELDIR = get_model_path()
        CURR_DIR = os.path.dirname(os.path.realpath(__file__))
        KEYPHRASE_THRESH_DIR = CURR_DIR + '/keyphrases.thresh'
        
        # Create a decoder with certain model
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(MODELDIR, 'en-us'))
        config.set_string('-dict', \
                          os.path.join(MODELDIR, 'cmudict-en-us.dict'))
        config.set_string('-kws', KEYPHRASE_THRESH_DIR)
        #config.set_string('-logfn', '/dev/null')
        decoder = Decoder(config)
        
        
        p = pyaudio.PyAudio()
        host_info = p.get_host_api_info_by_index(0)
        device_index = 3
        for i in range(host_info.get('deviceCount')):
            device_info = p.get_device_info_by_host_api_device_index(0, i)
            #print('\n\n\n\n'+str(i)+device_info.get('name') + " : " + str(device_info.get('maxInputChannels')))
            if 'USB' in device_info.get('name'):
                device_index = i
                break

        '''
        fire /1e18/
        '''
            
        stream = p.open(
            format=pyaudio.paInt16,
            channels=1,
            rate=44100,
            input=True,
            frames_per_buffer=1024,
            input_device_index=device_index)
        
        stream.start_stream()
        in_speech_bf = True
        
        decoder.start_utt()
        print("Starting to listen")
        
        while True:
            buf = stream.read(1024, exception_on_overflow = False)
            decoder.process_raw(buf, False, False)
            if decoder.hyp() != None:
                print("\nDetected: " + decoder.hyp().hypstr + "\n")
                decoder.end_utt()
                #print "Detected Move Forward, restarting search"
                decoder.start_utt()
        print("Am not listening any more")
        stream.stop_stream()
        stream.close()
        p.terminate()
예제 #13
0
 def create_config(self, dict_name):
     config = Decoder.default_config()
     config.set_string('-hmm', join(BASEDIR, 'model', self.lang, 'hmm'))
     config.set_string('-dict', dict_name)
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', self.threshold)
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn', '/dev/null')
     return config
 def create_config(self, dict_name):
     config = Decoder.default_config()
     config.set_string('-hmm', join(BASEDIR, 'model', self.lang, 'hmm'))
     config.set_string('-dict', dict_name)
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', float(self.threshold))
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn', '/dev/null')
     return config
예제 #15
0
 def create_config(self, dict_name):
     config = Decoder.default_config()
     config.set_string('-hmm', os.path.join(MODELDIR, 'en-us'))
     config.set_string('-dict', dict_name)
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', float(self.threshold))
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn', '/dev/null')
     return config
예제 #16
0
 def create_config(self, dict_name):
     config = Decoder.default_config()
     config.set_string('-hmm', join(BASEDIR, 'model', self.lang, 'hmm'))
     config.set_string('-dict', dict_name)
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', float(self.threshold))
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn',
                       '/home/sg/mycroft-core/scripts/logs/pocket.log')
     return config
예제 #17
0
	def onStart(self):
		super().onStart()

		if not self.checkLanguage():
			self.downloadLanguage()

		self._config = Decoder.default_config()
		self._config.set_string('-hmm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}')
		self._config.set_string('-lm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin')
		self._config.set_string('-dict', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict')
		self._decoder = Decoder(self._config)
예제 #18
0
 def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
     super().__init__(key_phrase, config, lang)
     # Hotword module imports
     from pocketsphinx import Decoder
     # Hotword module params
     self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T")
     self.num_phonemes = len(self.phonemes.split())
     self.threshold = self.config.get("threshold", 1e-90)
     self.sample_rate = self.listener_config.get("sample_rate", 1600)
     dict_name = self.create_dict(self.key_phrase, self.phonemes)
     config = self.create_config(dict_name, Decoder.default_config())
     self.decoder = Decoder(config)
예제 #19
0
 def createConfig(self,pGramma):
     '''
     Create configuration with acoustic model path, grammar and dictionary
     '''
     print ("[createConfig]+++")
     config = Decoder.default_config()
     config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/lt.cd_cont_200/'))
     config.set_string('-fsg', os.path.join("../resource/", pGramma+'.fsg'))
     #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram'))
     config.set_string('-dict', os.path.join("../resource/", 'service.dict'))
     print ("[createConfig]---")
     return config;
예제 #20
0
 def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
     super(PocketsphinxHotWord, self).__init__(key_phrase, config, lang)
     # Hotword module imports
     from pocketsphinx import Decoder
     # Hotword module params
     self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T")
     self.num_phonemes = len(self.phonemes.split())
     self.threshold = self.config.get("threshold", 1e-90)
     self.sample_rate = self.listener_config.get("sample_rate", 1600)
     dict_name = self.create_dict(self.key_phrase, self.phonemes)
     config = self.create_config(dict_name, Decoder.default_config())
     self.decoder = Decoder(config)
예제 #21
0
 def createConfig(self, pGramma):
     '''
     Create configuration with acoustic model path, grammar and dictionary
     '''
     print("[createConfig]+++")
     config = Decoder.default_config()
     config.set_string('-hmm',
                       os.path.join(self.MODELDIR, 'hmm/lt.cd_cont_200/'))
     config.set_string('-fsg', os.path.join("../resource/",
                                            pGramma + '.fsg'))
     #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram'))
     config.set_string('-dict', os.path.join("../resource/",
                                             'service.dict'))
     print("[createConfig]---")
     return config
예제 #22
0
 def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
     super(PocketsphinxHotWord, self).__init__(key_phrase, config, lang)
     # Hotword module imports
     from pocketsphinx import Decoder
     # Hotword module config
     module = self.config.get("module")
     if module != "pocketsphinx":
         LOG.warning(
             str(module) + " module does not match with "
             "Hotword class pocketsphinx")
     # Hotword module params
     self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T")
     self.num_phonemes = len(self.phonemes.split())
     self.threshold = self.config.get("threshold", 1e-90)
     self.sample_rate = self.listener_config.get("sample_rate", 1600)
     dict_name = self.create_dict(self.key_phrase, self.phonemes)
     config = self.create_config(dict_name, Decoder.default_config())
     self.decoder = Decoder(config)
예제 #23
0
def get_decoder_config():
    """
    Get a populated configuration object for the pocketsphinx Decoder.
    """
    model_dir = get_model_path()

    config = Decoder.default_config()
    config.set_string("-dict", os.path.join(model_dir, "cmudict-en-us.dict"))
    config.set_string("-fdict", os.path.join(model_dir, "en-us/noisedict"))
    config.set_string("-featparams", os.path.join(model_dir, "en-us/feat.params"))
    config.set_string("-hmm", os.path.join(model_dir, "en-us"))
    config.set_string("-lm", os.path.join(model_dir, "en-us.lm.bin"))
    config.set_string("-mdef", os.path.join(model_dir, "en-us/mdef"))
    config.set_string("-mean", os.path.join(model_dir, "en-us/means"))
    config.set_string("-sendump", os.path.join(model_dir, "en-us/sendump"))
    config.set_string("-tmat", os.path.join(model_dir, "en-us/transition_matrices"))
    config.set_string("-var", os.path.join(model_dir, "en-us/variances"))

    return config
    def __init__(self, rt, on_activation: Callable):
        super().__init__(rt, on_activation)
        lang = rt.config['lang']
        self.hmm_folder = join(rt.paths.user_config, 'models', lang)
        self.rate, self.width = self.rec_config['sample_rate'], self.rec_config['sample_width']
        self.padding = b'\0' * int(self.rate * self.width * self.SILENCE_SEC)
        self.buffer = b''

        download_extract_tar(self.url.format(lang=lang), self.hmm_folder)

        config = Decoder.default_config()
        config.set_string('-hmm', self.hmm_folder)
        config.set_string('-dict', self._create_dict(self.wake_word, self.config['phonemes']))
        config.set_string('-keyphrase', self.wake_word)
        config.set_float('-kws_threshold', float(self.config['threshold']))
        config.set_float('-samprate', self.rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        self.ps = Decoder(config)
예제 #25
0
    def run(self):
        conf = Decoder.default_config()
        conf.set_string('-hmm', self.config.hmmPS)
        conf.set_string('-lm', self.config.lmPS)
        conf.set_string('-dict', self.config.dictPS)
        if os.path.isfile(self.config.mllrPS):
            conf.set_string('-mllr', self.config.mllrPS)
        decoder = Decoder(conf)

        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=16000,
                        input=True,
                        frames_per_buffer=1024)
        stream.start_stream()
        self.samplewith = p.get_sample_size(pyaudio.paInt16)

        in_speech_bf = True
        decoder.start_utt('')
        while not self._terminate:
            buf = stream.read(1024)
            if buf:
                if self.save:
                    self.liSave.append(buf)
                    self.numSave += 1
                    if self.numSave > self.maxSave:  # nos protegemos de dejar el microfono encendido
                        self.activeSave(self.fichWAV)
                decoder.process_raw(buf, False, False)
                if decoder.get_in_speech() != in_speech_bf:
                    in_speech_bf = decoder.get_in_speech()
                    if not in_speech_bf:
                        decoder.end_utt()
                        try:
                            if decoder.hyp().hypstr != '':
                                self.decode(decoder.hyp().hypstr)
                        except AttributeError:
                            pass
                        decoder.start_utt('')
            else:
                break
        decoder.end_utt()
예제 #26
0
파일: Voice.py 프로젝트: JERUKA9/lucaschess
    def run( self ):
        conf = Decoder.default_config()
        conf.set_string('-hmm', self.config.hmmPS)
        conf.set_string('-lm', self.config.lmPS)
        conf.set_string('-dict', self.config.dictPS)
        if os.path.isfile(self.config.mllrPS):
            conf.set_string('-mllr', self.config.mllrPS)
        decoder = Decoder(conf)

        p = pyaudio.PyAudio()
        stream = p.open( format=pyaudio.paInt16,
                         channels=1,
                         rate=16000,
                         input=True,
                         frames_per_buffer=1024 )
        stream.start_stream()
        self.samplewith = p.get_sample_size(pyaudio.paInt16)

        in_speech_bf = True
        decoder.start_utt('')
        while not self._terminate:
            buf = stream.read(1024)
            if buf:
                if self.save:
                    self.liSave.append(buf)
                    self.numSave += 1
                    if self.numSave > self.maxSave: # nos protegemos de dejar el microfono encendido
                        self.activeSave(self.fichWAV)
                decoder.process_raw(buf, False, False)
                if decoder.get_in_speech() != in_speech_bf:
                    in_speech_bf = decoder.get_in_speech()
                    if not in_speech_bf:
                        decoder.end_utt()
                        try:
                            if decoder.hyp().hypstr != '':
                                self.decode(decoder.hyp().hypstr)
                        except AttributeError:
                            pass
                        decoder.start_utt('')
            else:
                break
        decoder.end_utt()
예제 #27
0
    def __init__(self, file_name='aux.wav', raspi=False, local=True):

        ## load environment

        self.FILE_NAME = file_name
        self.audio = pyaudio.PyAudio()
        self.raspi = raspi

        self.local = local

        self.config = Decoder.default_config()
        self.config.set_string('-hmm',
                               os.path.join(self.MODELDIR, 'acoustic-model'))
        self.config.set_string(
            '-dict',
            os.path.join(self.MODELDIR, 'pronounciation-dictionary.dict'))
        self.config.set_string('-logfn', os.devnull)
        self.decoder = Decoder(self.config)
        self.r = sr.Recognizer()
        print("adjunting...")
        with sr.Microphone() as source:
            self.r.adjust_for_ambient_noise(source)

        # tts
        if self.local:
            self.tts = pyttsx3.init()
            self.tts.setProperty('rate', self.RATE)
            self.tts.setProperty('volume', self.VOLUME)
            self.tts.setProperty('voice', 'spanish-latin-am')
        else:
            # Instantiates a client
            self.tts_client = texttospeech.TextToSpeechClient()
            # Build the voice request, select the language code ("en-US") and the ssml
            # voice gender ("neutral")
            self.tts_voice = texttospeech.types.VoiceSelectionParams(
                language_code='es-ES',
                ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

            # Select the type of audio file you want returned
            self.tts_audio_config = texttospeech.types.AudioConfig(
                audio_encoding=texttospeech.enums.AudioEncoding.MP3)
예제 #28
0
    def begin_passive_listening(self):
        """Uses PocketSphinx to listen for the wakeword and call the active
           listening function
        """
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-dict',
                          os.path.join(get_model_path(), 'cmudict-en-us.dict'))
        config.set_string('-keyphrase',
                          self.config.get("general", "wake_word"))
        config.set_string('-logfn', 'nul')
        config.set_float('-kws_threshold', 1e-10)

        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=16000,
                        input=True,
                        frames_per_buffer=1024)
        stream.start_stream()

        decoder = Decoder(config)
        decoder.start_utt()

        while True:
            buf = stream.read(1024)
            decoder.process_raw(buf, False, False)
            if decoder.hyp() is not None:
                logging.debug("Wake word recognized")
                speech_input = self.active_listen()
                if (speech_input != -1 and speech_input != -2
                        and speech_input != -3):
                    for name, command in self.commands.items():
                        if speech_input in name:
                            command()
                elif speech_input == -1:
                    self.speak("Sorry, I didn't catch that.")
                decoder.end_utt()
                decoder.start_utt()
                logging.debug("Listening for wakeword again")
예제 #29
0
    def __init__(self, config=Decoder.default_config()):
        assert isinstance(config, Config)

        search_args_set = search_arguments_set(config)

        if len(search_args_set) == 0:
            # Use the language model by default if nothing else is set
            set_lm_path(config)
        elif len(search_args_set) > 1:
            raise ConfigError(
                "more than one search argument was set in the Config "
                "object")

        # Set the required config paths if they aren't already set
        if not (config.get_string("-hmm") and config.get_string("-dict")):
            set_hmm_and_dict_paths(config)

        self._speech_start_callback = None
        self._hypothesis_callback = None
        self._utterance_state = self._UTT_ENDED

        super(PocketSphinx, self).__init__(config)
예제 #30
0
    def __init__(self, file_name='aux.wav', raspi=False):
        self.FILE_NAME = file_name
        self.audio = pyaudio.PyAudio()
        self.raspi = raspi

        self.config = Decoder.default_config()
        self.config.set_string('-hmm',
                               os.path.join(self.MODELDIR, 'acoustic-model'))
        self.config.set_string(
            '-dict',
            os.path.join(self.MODELDIR, 'pronounciation-dictionary.dict'))
        self.config.set_string('-logfn', os.devnull)
        self.decoder = Decoder(self.config)
        self.r = sr.Recognizer()
        print("adjunting...")
        with sr.Microphone() as source:
            self.r.adjust_for_ambient_noise(source)

        # tts
        self.tts = pyttsx3.init()
        self.tts.setProperty('rate', self.RATE)
        self.tts.setProperty('volume', self.VOLUME)
        self.tts.setProperty('voice', 'spanish-latin-am')
예제 #31
0
파일: kws.py 프로젝트: lpdink/Jamming
    def __init__(self, in_fs, out_fs, mute_period_length, kws_frame_length):
        threading.Thread.__init__(self)
        # 初始化配置
        self.daemon = True
        self.exit_flag = False
        self.in_fs = in_fs
        self.out_fs = out_fs
        self.mute_period_frames_count = int(in_fs * mute_period_length)
        self.kws_frames_count = int(in_fs * kws_frame_length)
        model_path = get_model_path()
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(model_path, 'en-us'))  # 声学模型路径
        # config.set_string('-lm',"./tests/7567.lm")
        config.set_string('-dict',
                          os.path.join(model_path,
                                       'cmudict-en-us.dict'))  # 字典路径
        config.set_string('-keyphrase', 'alexa')
        config.set_float('-kws_threshold', 1e-20)
        config.set_string('-logfn', './logs/tmp')  # INFO输出到其他位置
        self.decoder = Decoder(config)
        self.decoder.start_utt()

        self.start()
예제 #32
0
'''
Created on Dec 29, 2013


@author: Mindaugas Greibus
'''
import sys, os

from pocketsphinx import Decoder

MODELDIR = "../models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)

decoder.decode_raw(
    open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb'))

# Retrieve hypothesis.
hypothesis = decoder.hyp()
print('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
print('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])
예제 #33
0
 def __init__(self):
     config = Decoder.default_config()
     config.set_string('-hmm', SPHINX_HMM)
     config.set_string('-lm', SPHINX_LM)
     config.set_string('-dict', SPHINX_DICT)
     self.decoder = Decoder(config)
예제 #34
0
def record(listen_time):

    THRESHOLD=None
    WAVE_OUTPUT_FILENAME = "livewav.wav"

    p = pyaudio.PyAudio()
    if THRESHOLD == None:
		THRESHOLD = fetchThreshold()
		print THRESHOLD

    stream = p.open(format=FORMAT,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print "* recording"
    frames = []
    detected=False
    for i in range(0, RATE / CHUNK * listen_time):
		data = stream.read(CHUNK)
		frames.append(data)
		score = getScore(data)
		if score < THRESHOLD:
			continue
                else:
                        detected=True
    if not detected:
        print "nothing detected"
        return("")

    print "* done recording"
    #stream.stop_stream()
    stream.close()
    p.terminate()

    # write data to WAVE file
    data = ''.join(frames)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    sysdir = os.getcwd()
    wavfile = sysdir+"/livewav.wav"
    config = Decoder.default_config()
    config.set_string('-hmm', hmdir)
    config.set_string('-lm', lmdir)
    config.set_string('-dict', dictd)
    config.set_string('-logfn', '/dev/null')

    speechRec = Decoder(config)


    with open(wavfile, 'rb') as wavFile:
        speechRec.decode_raw(wavFile)
        #result = speechRec.get_hyp()


    return(speechRec.hyp().hypstr)
예제 #35
0
def recognition_worker(audio_file,
                       queue, event, max_no_speech=120, debug=False,
                       hmm='/usr/local/share/pocketsphinx/model/en-us/en-us',
                       lm='/usr/local/share/pocketsphinx/model/en-us/en-us.lm.bin',
                       cmudict='/usr/local/share/pocketsphinx/model/en-us/cmudict-en-us.dict'):
    '''
    Read audio from `audio_file and feed it to pocketsphinx.
    Put recognized text in `queue`. Shut down if `event` is set.
    If no speech is detected for `max_no_speech` seconds, set
    `event` and quit.
    '''
    from pocketsphinx import Decoder
    config = Decoder.default_config()
    config.set_string('-hmm', hmm)
    config.set_string('-lm', lm)
    config.set_string('-dict', cmudict)
    if not debug:
        config.set_string('-logfn', '/dev/null')
    decoder = Decoder(config)
    in_speech_bf = True
    no_speech_timer = None
    now_in_speech = False
    decoder.start_utt()
    try:
        with open(audio_file, 'rb') as f:
            f.read(40) # read RIFF header
            # TODO: Probably should sanity check the audio format...
            while not event.is_set():
                buf = f.read(1024)
                if buf:
                    decoder.process_raw(buf, False, False)
                    now_in_speech = decoder.get_in_speech()
                    if debug and now_in_speech:
                        print('Found speech', file=sys.stderr)
                    if now_in_speech != in_speech_bf:
                        in_speech_bf = now_in_speech
                        if not in_speech_bf:
                            if debug:
                                print('Processing speech', file=sys.stderr)
                            # No speech, but there was speech before, so, process.
                            decoder.end_utt()
                            try:
                                speech = decoder.hyp().hypstr
                                if speech != '':
                                    if debug:
                                        print('Speech: ' + speech, file=sys.stderr)
                                    queue.put_nowait(speech)
                            except AttributeError:
                                pass
                            decoder.start_utt()
                        else:
                            # Got some speech, reset timer.
                            no_speech_timer = None
                else:
                    if debug:
                        print('No audio', file=sys.stderr)
                    # Wait a bit...
                    event.wait(0.1)
                if not now_in_speech:
                    if no_speech_timer is None:
                        no_speech_timer = datetime.datetime.now()
                    elif (datetime.datetime.now() - no_speech_timer).total_seconds() > max_no_speech:
                        if debug:
                            print('No speech, timing out', file=sys.stderr)
                        event.set()
    except KeyboardInterrupt:
        pass
    def __init__(self, **kwargs):
        signal.signal(signal.SIGINT, self.stop)

        model_path = get_model_path()

        kwargs = {
            x: os.path.expandvars(kwargs[x])
            if type(kwargs[x]) is str else kwargs[x]
            for x in kwargs
        }

        nodename = kwargs.pop('nodename')
        grammar_file = kwargs.pop('grammar_file', None)
        grammar_rule = kwargs.pop('grammar_rule', None)
        grammar_name = kwargs.pop('grammar_name', None)

        kwargs.pop('esiaf_input_topic')

        if kwargs.get('dic') is not None and kwargs.get('dict') is None:
            kwargs['dict'] = kwargs.pop('dic')

        if kwargs.get('hmm') is None:
            kwargs['hmm'] = os.path.join(model_path, 'en-us')

        if kwargs.get('lm') is None:
            kwargs['lm'] = os.path.join(model_path, 'en-us.lm.bin')

        if kwargs.get('dict') is None and kwargs.get('dic') is None:
            kwargs['dict'] = os.path.join(model_path, 'cmudict-en-us.dict')

        if kwargs.pop('verbose', False) is False:
            if sys.platform.startswith('win'):
                kwargs['logfn'] = 'nul'
            else:
                kwargs['logfn'] = '/dev/null'

        config = Decoder.default_config()

        print(kwargs)

        for key, value in kwargs.items():
            if isinstance(value, bool):
                config.set_boolean('-{}'.format(key), value)
            elif isinstance(value, int):
                config.set_int('-{}'.format(key), value)
            elif isinstance(value, float):
                config.set_float('-{}'.format(key), value)
            elif isinstance(value, str):
                config.set_string('-{}'.format(key), value)

        self.decoder = Decoder(config)

        if grammar_file and grammar_rule and grammar_name:
            jsgf = Jsgf(grammar_file)
            rule = jsgf.get_rule(grammar_name + '.' + grammar_rule)
            fsg = jsgf.build_fsg(rule, self.decoder.get_logmath(), 7.5)
            self.decoder.set_fsg(grammar_name, fsg)
            self.decoder.set_search(grammar_name)

        self.start = None
        self.finish = None

        self.speech_publisher = rospy.Publisher(nodename + '/' + 'SpeechRec',
                                                SpeechInfo,
                                                queue_size=10)
'''
Created on Dec 29, 2013


@author: Mindaugas Greibus
'''
import sys, os



from pocketsphinx import Decoder

MODELDIR = "../models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)

decoder.decode_raw(open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb'))

# Retrieve hypothesis.
hypothesis = decoder.hyp()
print ('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
print ('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])