Exemple #1
0
def recognize(wav_file):
    #BASE_PATH = os.path.dirname(os.path.realpath(__file__))
    #HMDIR = os.path.join(BASE_PATH, "hmm")
    #LMDIR = os.path.join(BASE_PATH, "lm/en-us.lm.bin")
    #DICTD = os.path.join(BASE_PATH, "dict/en_in.dic")
    sound = "try1.wav"

    model_path = get_model_path()
    data_path = get_data_path()
    config = DefaultConfig()
    config.set_string('-hmm', "hmm/")
    config.set_string('-lm', 'lm\en-us.lm.bin')
    config.set_string('-dict', 'dict\en_in.dic')
    #decoder = Decoder(config)
    """
    Run speech recognition on a given file.
    """
    speech_rec = Decoder(config)
    print("Decoder Initialized")
    wav_file = wave.open(wav_file, 'rb')
    print("AudioFile Loaded")
    speech_rec.decode_raw(wav_file)
    print("Audio file decoded")
    result = speech_rec.get_hyp()
    print("Result Ready\n")
    return result
Exemple #2
0
def record(listen_time):

    THRESHOLD = None
    WAVE_OUTPUT_FILENAME = "livewav.wav"

    p = pyaudio.PyAudio()
    if THRESHOLD == None:
        THRESHOLD = fetchThreshold()
        print THRESHOLD

    stream = p.open(format=FORMAT,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print "* recording"
    frames = []
    detected = False
    for i in range(0, RATE / CHUNK * listen_time):
        data = stream.read(CHUNK)
        frames.append(data)
        score = getScore(data)
        if score < THRESHOLD:
            continue
        else:
            detected = True
    if not detected:
        print "nothing detected"
        return("")

    print "* done recording"
    # stream.stop_stream()
    stream.close()
    p.terminate()

    # write data to WAVE file
    data = ''.join(frames)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    sysdir = os.getcwd()
    wavfile = sysdir + "/livewav.wav"
    config = Decoder.default_config()
    config.set_string('-hmm', hmdir)
    config.set_string('-lm', lmdir)
    config.set_string('-dict', dictd)
    config.set_string('-logfn', '/dev/null')

    speechRec = Decoder(config)

    with open(wavfile, 'rb') as wavFile:
        speechRec.decode_raw(wavFile)
        #result = speechRec.get_hyp()

    return(speechRec.hyp().hypstr)
Exemple #3
0
def passiverecord(THRESHOLD=None):

    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    LISTEN_TIME = 2
    WAVE_OUTPUT_FILENAME = "passive.wav"

    p = pyaudio.PyAudio()
    if THRESHOLD == None:
        THRESHOLD = fetchThreshold()
        print THRESHOLD

    stream = p.open(format=FORMAT,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    # print "* recording"
    frames = []
    lastN = [THRESHOLD * 1.2 for i in range(30)]
    for i in range(0, RATE / CHUNK * LISTEN_TIME):
        data = stream.read(CHUNK)
        frames.append(data)
        score = getScore(data)
        lastN.pop(0)
        lastN.append(score)
        average = sum(lastN) / float(len(lastN))
        # print average,THRESHOLD * 0.8
        if average < THRESHOLD * 0.8:
            break

    # print "* done recording"
    # stream.stop_stream()
    stream.close()
    p.terminate()

    # write data to WAVE file
    data = ''.join(frames)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    sysdir = os.getcwd()
    wavfile = sysdir + "/passive.wav"
    # decoded=decodepassive()

    speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd)
    with open(wavfile, 'rb') as wavFile:
        speechRec.decode_raw(wavFile)
        result = speechRec.get_hyp()

    return (result[0])
Exemple #4
0
def record(listen_time):

    THRESHOLD=None
    WAVE_OUTPUT_FILENAME = "livewav.wav"

    p = pyaudio.PyAudio()
    if THRESHOLD == None:
		THRESHOLD = fetchThreshold()
		print THRESHOLD

    stream = p.open(format=FORMAT,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print "* recording"
    frames = []
    detected=False
    for i in range(0, RATE / CHUNK * listen_time):
		data = stream.read(CHUNK)
		frames.append(data)
		score = getScore(data)
		if score < THRESHOLD:
			continue
                else:
                        detected=True
    if not detected:
        print "nothing detected"
        return("")

    print "* done recording"
    #stream.stop_stream()
    stream.close()
    p.terminate()

    # write data to WAVE file
    data = ''.join(frames)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    sysdir = os.getcwd()
    wavfile = sysdir+"/livewav.wav"


    speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd)
    with open(wavfile, 'rb') as wavFile:
        speechRec.decode_raw(wavFile)
        result = speechRec.get_hyp()


    return(result[0])
def best_sphinx_speech_result(pyaudio, wav_name, profile):
  if not have_sphinx_dictionary:
    if not profile.has_key("words"):
      raise(KeyError("Pass the possible words in in profile"))
    compile("sentences.txt", "dictionary.dic", "language_model.lm", profile["words"])
    global have_sphinx_dictionary
    have_sphinx_dictionary = True

  wav_file = file(wav_name, 'rb')
  speechRec = Decoder(
    hmm  = "/usr/local/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k",
    lm   = "language_model.lm",
    dict = "dictionary.dic"
  )

  speechRec.decode_raw(wav_file)
  results = speechRec.get_hyp()
  return results[0]
class CMUSphinxRecognizer(BaseRecognizer):
    def __init__(self):
        config = Decoder.default_config()
        config.set_string('-hmm', SPHINX_HMM)
        config.set_string('-lm', SPHINX_LM)
        config.set_string('-dict', SPHINX_DICT)
        self.decoder = Decoder(config)

    def recognize(self, raw_audio):
        file_path = self.__save_file(raw_audio)
        with open(file_path, 'r') as wav_fp:
            self.decoder.decode_raw(wav_fp)
            hypothesis = self.decoder.hyp()
            return hypothesis.hypstr, hypothesis.best_score, hypothesis.prob

    @staticmethod
    def __save_file(data):
        tmp_fp = NamedTemporaryFile(delete=False)
        tmp_fp.write(data)
        tmp_fp.close()
        return tmp_fp.name
'''
Created on Dec 29, 2013


@author: Mindaugas Greibus
'''
import sys, os

from pocketsphinx import Decoder

MODELDIR = "../models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)

decoder.decode_raw(
    open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb'))

# Retrieve hypothesis.
hypothesis = decoder.hyp()
print('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
print('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])
'''
Created on Dec 29, 2013


@author: Mindaugas Greibus
'''
import sys, os



from pocketsphinx import Decoder

MODELDIR = "../models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)

decoder.decode_raw(open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb'))

# Retrieve hypothesis.
hypothesis = decoder.hyp()
print ('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
print ('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])


Exemple #9
0
def record(THRESHOLD=None):


    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    LISTEN_TIME = 4
    WAVE_OUTPUT_FILENAME = "livewav.wav"

    p = pyaudio.PyAudio()
    if THRESHOLD == None:
		THRESHOLD = fetchThreshold()
		print THRESHOLD


    stream = p.open(format=FORMAT,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)



    print "* recording"
    frames = []
    lastN = [THRESHOLD * 1.2 for i in range(30)]
    for i in range(0, RATE / CHUNK * LISTEN_TIME):
		data = stream.read(CHUNK)
		frames.append(data)
		score = getScore(data)
		lastN.pop(0)
		lastN.append(score)
		average = sum(lastN) / float(len(lastN))
		#print average,THRESHOLD * 0.8
		if average < THRESHOLD * 0.8:
			break



    print "* done recording"
    #stream.stop_stream()
    stream.close()
    p.terminate()

    # write data to WAVE file
    data = ''.join(frames)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    sysdir = os.getcwd()
    wavfile = sysdir+"/livewav.wav"
    #decoded=decodepassive()


    speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd)
    with open(wavfile, 'rb') as wavFile:
        speechRec.decode_raw(wavFile)
        result = speechRec.get_hyp()


    return(result[0])
Exemple #10
0
def decodepassive():
	speechRec = Decoder(hmm = hmdir, lm = lmdir, dict = dictd)
	with open(passivewav, 'rb') as passivewav:
		speechRec.decode_raw(passivewav)
        result = speechRec.get_hyp()
        return(result[0])
    p.terminate()

    # write data to WAVE file
    data = ''.join(all)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()


if __name__ == "__main__":
    hmdir = "/usr/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k"
    lmdir = "/usr/share/pocketsphinx/model/lm/en_US/hub4.5000.DMP"
    dictd = "/usr/share/pocketsphinx/model/lm/en_US/cmu07a.dic"
    record()
    wavfile = "/home/shridhar/pocketsphinxtest/livewav.wav"

    speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd)
    wavFile = file(wavfile, 'rb')
    speechRec.decode_raw(wavFile)
    result = speechRec.get_hyp()

    print "Recognised text from the converted video file"

    print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"

    print result[0]
    print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
Exemple #12
0
from os import environ, path
from pocketsphinx import Decoder
from sphinxbase import *

print('a')

model_dir = '/home/wmlab/CMU_try/zh_broadcastnews_ptm256_8000/'
# lm_dir = '/home/wmlab/CMU_try/CMUsphinx-Demo-master/0506.lm'
# dict_dir = '/home/wmlab/CMU_try/CMUsphinx-Demo-master/0506.dic'
lm_dir = '/home/wmlab/CMU_try/try_python/usable_version/0506.lm'
dict_dir = '/home/wmlab/CMU_try/try_python/usable_version/0506.dic'
wav_file = '/home/wmlab/CMU_try/testing_audio/testing003.wav'

speech_rec = Decoder(hmm=model_dir, lm=lm_dir, dict=dict_dir)
wavFile = file(wav_file, 'rb')
wavFile.seek(44)
speech_rec.decode_raw(wavFile)
result = speech_rec.get_hyp()
print(result[0])
Exemple #13
0
def decodepassive():
    speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd)
    with open(passivewav, 'rb') as passivewav:
        speechRec.decode_raw(passivewav)
    result = speechRec.get_hyp()
    return(result[0])