Ejemplo n.º 1
0
def recognize(wav_file):
    #BASE_PATH = os.path.dirname(os.path.realpath(__file__))
    #HMDIR = os.path.join(BASE_PATH, "hmm")
    #LMDIR = os.path.join(BASE_PATH, "lm/en-us.lm.bin")
    #DICTD = os.path.join(BASE_PATH, "dict/en_in.dic")
    sound = "try1.wav"

    model_path = get_model_path()
    data_path = get_data_path()
    config = DefaultConfig()
    config.set_string('-hmm', "hmm/")
    config.set_string('-lm', 'lm\en-us.lm.bin')
    config.set_string('-dict', 'dict\en_in.dic')
    #decoder = Decoder(config)
    """
    Run speech recognition on a given file.
    """
    speech_rec = Decoder(config)
    print("Decoder Initialized")
    wav_file = wave.open(wav_file, 'rb')
    print("AudioFile Loaded")
    speech_rec.decode_raw(wav_file)
    print("Audio file decoded")
    result = speech_rec.get_hyp()
    print("Result Ready\n")
    return result
Ejemplo n.º 2
0
def init():
    # Create a decoder with certain model
    config = DefaultConfig()
    config.set_string('-logfn', settings.POCKETSPHINX_LOG)
    #config.set_string('-hmm',   settings.ACOUSTIC_MODEL)
    config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
    config.set_string('-dict',
                      os.path.join(get_model_path(), 'cmudict-en-us.dict'))
    #config.set_string('-lm',    settings.LANGUAGE_MODEL)
    config.set_string('-kws', settings.KEYPHRASES)
    #config.set_string('-dict',  settings.POCKET_DICT)

    # Decode streaming data
    global decoder, p
    decoder = Decoder(config)
    p = pyaudio.PyAudio()

    global r
    r = speech_recognition.Recognizer()
Ejemplo n.º 3
0
def init():
	# Create a decoder with certain model
	config = DefaultConfig()
	# config.set_string('-logfn', settings.POCKET_LOG)
	config.set_string('-hmm', settings.POCKET_HMM_ACOUSTIC_MODEL)
	config.set_string('-lm', settings.POCKET_LANGUAGE_MODEL)
	config.set_string('-dict', settings.POCKET_DICTIONARY)
	# config.set_string('-kws',   settings.POCKET_KEYPHRASES)

	# Decode streaming data
	global decoder, p
	decoder = Decoder(config)
	p = pyaudio.PyAudio()

	# Set up speech recognition recogniser
	global r
	r = speech_recognition.Recognizer()
Ejemplo n.º 4
0
def load_decoder(myid, model_config, out):
    # Create a decoder with certain model
    pocketsphinx_config = DefaultConfig()
    model_name = model_config.sections()[0]
    hmm = model_config[model_name]['hmm']
    dict = model_config[model_name]['dict']
    lm = model_config[model_name]['lm']
    # logfn = model_config[model_name]['log']
    logfn = '{}_{}.log'.format(out, myid)
    if not os.path.exists(hmm):
        print('ERROR: {} does not exist'.format(hmm))
        sys.exit(-2)
    if not os.path.exists(lm):
        print('ERROR: {} does not exist'.format(lm))
        sys.exit(-4)
    if not os.path.exists(dict):
        print('ERROR: {} does not exist'.format(dict))
        sys.exit(-5)
    pocketsphinx_config.set_string('-hmm', hmm)
    pocketsphinx_config.set_string('-lm', lm)
    pocketsphinx_config.set_string('-dict', dict)
    pocketsphinx_config.set_string('-logfn', logfn)
    decoder_engine = Decoder(pocketsphinx_config)
    return decoder_engine
Ejemplo n.º 5
0
 def test_config_set_string(self):
     config = DefaultConfig()
     config.set_string('-rawlogdir', '~/pocketsphinx')
     self.assertEqual(config.get_string('-rawlogdir'), '~/pocketsphinx')
Ejemplo n.º 6
0
 def test_config_get_string(self):
     config = DefaultConfig()
     self.assertEqual(config.get_string('-rawlogdir'), None)
Ejemplo n.º 7
0
def retrieve_scores(word):
    filename = word + '.wav'
    grammarname = word + '-align.jsgf'
    model_path = get_model_path()

    # Initialize the config values
    config = DefaultConfig()
    config.set_boolean('-verbose', False)
    config.set_string('-hmm', os.path.join(model_path, 'en-us'))
    config.set_boolean('-lm', False)
    config.set_string('-dict', 'phonemes.dict.txt')
    config.set_boolean('-backtrace', True)
    config.set_boolean('-bestpath', False)
    config.set_boolean('-fsgusefiller', False)

    decoder = Decoder(config)

    # Set the search to JSGF Grammar
    jsgf = Jsgf(grammarname)
    rule = jsgf.get_rule('forcing.' + word)
    decoder.set_jsgf_file('grammar', grammarname)
    decoder.set_search('grammar')

    stream = open(filename, 'rb')
    utt_started = False
    scores = []
    decoder.start_utt()

    while True:
        buf = stream.read(1024)
        if buf:
            decoder.process_raw(buf, False, False)
            in_speech = decoder.get_in_speech()
            if (in_speech and not utt_started):
                utt_started = True
            if (not in_speech and utt_started):
                decoder.end_utt()
                hyp = decoder.hyp()
                if hyp is not None:
                    print('hyp: %s' % (hyp.best_score))
                print_segments(decoder)
                scores = retrieve_segments(decoder)
                decoder.start_utt()
                utt_started = False
        else:
            break

    decoder.end_utt()
    print('scores:', scores)

    return scores
Ejemplo n.º 8
0
 def test_config_get_boolean(self):
     config = DefaultConfig()
     self.assertEqual(config.get_boolean('-backtrace'), False)
Ejemplo n.º 9
0
 def test_config_get_string(self):
     config = DefaultConfig()
     self.assertEqual(config.get_string('-rawlogdir'), None)
Ejemplo n.º 10
0
 def test_config_get_int(self):
     config = DefaultConfig()
     self.assertEqual(config.get_int('-nfft'), 512)
Ejemplo n.º 11
0
 def test_config_get_float(self):
     config = DefaultConfig()
     self.assertEqual(config.get_float('-samprate'), 16000.0)
Ejemplo n.º 12
0
import os
from pocketsphinx import DefaultConfig, Decoder, get_model_path, get_data_path
model_path = get_model_path()
data_path = 'C:/project/accent/accent-poc/src/Audio/'

# Create a decoder with a certain model
config = DefaultConfig()
config.set_string('-hmm', os.path.join(model_path, 'en-us'))
config.set_string('-lm', os.path.join(model_path, 'en-us.lm.bin'))
config.set_string('-dict', os.path.join(model_path, 'cmudict-en-us.dict'))
decoder = Decoder(config)

# Decode streaming data
buf = bytearray(1024)
with open(os.path.join(data_path, 'speaker2.wav'), 'rb') as f:
    decoder.start_utt()
    while f.readinto(buf):
        decoder.process_raw(buf, False, False)
    decoder.end_utt()
print('Best hypothesis segments:', [seg.word for seg in decoder.seg()])
Ejemplo n.º 13
0
    def get_config(self):
        # Create a decoder with a certain model
        config = DefaultConfig()
        #config.set_string('-hmm', os.path.join(self.model_path, 'en-us'))
        config.set_string('-hmm',
                          os.path.join(Audio_Tuner.tuned_path, 'en-us-adapt'))
        config.set_string('-lm',
                          os.path.join(Audio_Tuner.tuned_path, 'en-us.lm.bin'))
        #print("Using custom lm")
        #config.set_string('-lm', "/tmp/knowledge_base.lm")

        # To do this, just only copy the words you want over to another file
        config.set_string('-dict', self.dict_path)
        #print("using custom dict")

        #config.set_string('-dict', "/tmp/dict.dict")
        #config.set_string('-dict', os.path.join(self.model_path,
        #                                        'cmudict-en-us.dict'))
        config.set_string('-kws', self.keywords_path)
        config.set_string("-logfn", '/dev/null')
        config.set_boolean("-verbose", False)

        return config
Ejemplo n.º 14
0
    vals = 2.0 / N * np.abs(yf[0:N // 2])  # FFT is symmetrical, so we take just the first half
    # FFT is also complex, to we take just the real part (abs)
    return xf, vals


FilePath = 'C:/project/accent/accent-poc/src/Audio/'
model_path = get_model_path()
data_path = get_data_path()
dirs = [f for f in os.listdir(FilePath)]
recordings = []
for direct in dirs:
    if direct.endswith('.wav'):
        recordings.append(direct)

# Create a decoder with a hmm model
config = DefaultConfig()
config.set_string('-hmm', os.path.join(model_path, 'en-us'))
config.set_string('-allphone', os.path.join(model_path, 'en-us-phone.lm.bin'))
config.set_string('-lm', os.path.join(model_path, 'en-us.lm.bin'))
config.set_string('-dict', os.path.join(model_path, 'cmudict-en-us.dict'))
config.set_float('-lw', 2.0)
config.set_float('-beam', 1e-10)
config.set_float('-pbeam', 1e-10)
decoder = Decoder(config)

# Decode streaming data
buf = bytearray(1024)
with open(path.join(FilePath, 'amol.wav'), 'rb') as f:
    decoder.start_utt()
    while f.readinto(buf):
        decoder.process_raw(buf, False, False)
Ejemplo n.º 15
0
 def test_config_get_boolean(self):
     config = DefaultConfig()
     self.assertEqual(config.get_boolean('-backtrace'), False)
Ejemplo n.º 16
0
 def test_config_set_boolean(self):
     config = DefaultConfig()
     config.set_boolean('-backtrace', True)
     self.assertEqual(config.get_boolean('-backtrace'), True)
Ejemplo n.º 17
0
 def test_config_get_float(self):
     config = DefaultConfig()
     self.assertEqual(config.get_float('-samprate'), 16000.0)
Ejemplo n.º 18
0
 def test_config_set_float(self):
     config = DefaultConfig()
     config.set_float('-samprate', 8000.0)
     self.assertEqual(config.get_float('-samprate'), 8000.0)
Ejemplo n.º 19
0
 def test_config_set_float(self):
     config = DefaultConfig()
     config.set_float('-samprate', 8000.0)
     self.assertEqual(config.get_float('-samprate'), 8000.0)
Ejemplo n.º 20
0
 def test_config_set_int(self):
     config = DefaultConfig()
     config.set_int('-nfft', 256)
     self.assertEqual(config.get_int('-nfft'), 256)
Ejemplo n.º 21
0
 def test_config_get_int(self):
     config = DefaultConfig()
     self.assertEqual(config.get_int('-nfft'), 512)
Ejemplo n.º 22
0
 def test_config_set_string(self):
     config = DefaultConfig()
     config.set_string('-rawlogdir', '~/pocketsphinx')
     self.assertEqual(config.get_string('-rawlogdir'), '~/pocketsphinx')
Ejemplo n.º 23
0
 def test_config_set_int(self):
     config = DefaultConfig()
     config.set_int('-nfft', 256)
     self.assertEqual(config.get_int('-nfft'), 256)
Ejemplo n.º 24
0
 def test_config_set_boolean(self):
     config = DefaultConfig()
     config.set_boolean('-backtrace', True)
     self.assertEqual(config.get_boolean('-backtrace'), True)
Ejemplo n.º 25
0
# of the phrase.
HOTWORD_THRESHOLD = -5000  # Higher values indicate that pocketsphinx
# is sure about the word. Adjust to your needs.
# Pocketsphinx files
MODELDIR = get_model_path()  # Default model path, replace if
# you use a custom model in a custom location.

# WORDS
HOTWORD = "Anastasia"
SPEAK_UNDERSTOOD = "Understood."
SPEAK_FAILURE = "Sorry?"
SPEAK_SUCCESS = "Okay."
SPEAK_READY = HOTWORD + ". At your service."

# Decoder setup
config = DefaultConfig()
config.set_string('-hmm', path.join(MODELDIR, 'en-us'))
# you can use a custom model
config.set_string('-lm', path.join(MODELDIR, 'en-us.lm.bin'))
# you can use a custom dictionary
config.set_string('-dict', path.join(MODELDIR, 'cmudict-en-us.dict'))
# config.set_string('-logfn', '/dev/null')
decoder = Decoder(config)

# TTS settings and setup
engine = TtsEngine('en-US')
engine.pitch = 80  # 50 - 200
engine.rate = 120  # speed of speech, 20 - 500
# engine.volume = xx # 0 - 500