Esempio n. 1
0
def get_text_from_audio(audio_input_name: str,
                        working_directory: str = WORKING_DIRECTORY):
    """ Gets text from audio file (using pocketsphinx-python library)

    Args:

    Return:
        list: text from audio file

    """

    # Create a decoder with certain model
    config = Decoder.default_config()
    config.set_string('-hmm', os.path.join(SPEECH_MODEL_PATH, 'en-us'))
    config.set_string('-lm', os.path.join(SPEECH_MODEL_PATH, 'en-us.lm.bin'))
    config.set_string('-dict',
                      os.path.join(SPEECH_MODEL_PATH, 'cmudict-en-us.dict'))
    decoder = Decoder(config)

    # Decode streaming data.
    decoder.start_utt()
    with open(os.path.join(working_directory, audio_input_name),
              'rb') as stream:
        while True:
            buf = stream.read(1024)
            if buf:
                decoder.process_raw(buf, False, False)
            else:
                break

    decoder.end_utt()
    text_from_audio = [seg.word for seg in decoder.seg()]

    return text_from_audio if text_from_audio else 'Audio file doesn\'t contain words'
Esempio n. 2
0
def recog_wav(MODELDIR, wavfile):

    #print(MODELDIR)

    config = Decoder.default_config()
    config.set_string('-hmm', os.path.join(MODELDIR, 'en-us'))
    config.set_string('-lm', os.path.join(MODELDIR, 'en-us.lm.bin'))
    config.set_string('-dict', os.path.join(MODELDIR, 'cmudict-en-us.dict'))

    # Decode streaming data.
    decoder = Decoder(config)
    start = time.time()
    decoder.start_utt()
    wav_stream = open(wavfile, "rb")
    while True:
        buffer = wav_stream.read(1024)
        if buffer:
            decoder.process_raw(buffer, False, False)
        else:
            break
    decoder.end_utt()
    duration = time.time() - start
    print("Duration: " + str(duration))  #Benchmarking
    for seg in decoder.seg():
        print(seg.word)
Esempio n. 3
0
def get_phonemes(file):
    # Decode streaming data
    decoder = Decoder(config)
    decoder.start_utt()
    stream = open(file, 'rb')
    i=0
    while True:
        buf = stream.read(1024)
        if buf:
            decoder.process_raw(buf, False, False)
        else:
            break
    decoder.end_utt()

    Hypothesis = decoder.hyp()
    return [seg.word for seg in decoder.seg()]
'''
Created on Dec 29, 2013


@author: Mindaugas Greibus
'''
import sys, os

from pocketsphinx import Decoder

MODELDIR = "../models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)

decoder.decode_raw(
    open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb'))

# Retrieve hypothesis.
hypothesis = decoder.hyp()
print('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
print('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])
'''
Created on Dec 29, 2013


@author: Mindaugas Greibus
'''
import sys, os



from pocketsphinx import Decoder

MODELDIR = "../models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)

decoder.decode_raw(open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb'))

# Retrieve hypothesis.
hypothesis = decoder.hyp()
print ('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
print ('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])


Esempio n. 6
0
import os
import sys
from pocketsphinx import DefaultConfig, Decoder, get_model_path, get_data_path

model_path = get_model_path()
data_path = get_data_path()

# Create a decoder with a certain model
config = DefaultConfig()
config.set_string('-hmm', os.path.join(model_path, 'en-us'))
config.set_string('-lm', os.path.join(model_path, 'en-us.lm.bin'))
config.set_string('-dict', 'aviation.dict')
# set log level
#config.set_string("-logfn", "null")
decoder = Decoder(config)

# Decode streaming data
buf = bytearray(1024)

with open('subject.wav', 'rb') as f:  #should be raw format with right timing
    decoder.start_utt()
    while f.readinto(buf):
        decoder.process_raw(buf, False, False)
    decoder.end_utt()
print('Best hypothesis segments:')

for seg in decoder.seg():
    if not seg.word == '<sil>':
        sys.stdout.write(seg.word)
        sys.stdout.write(' ')
Esempio n. 7
0
class KeywordSpotting(threading.Thread):
    def __init__(self, in_fs, out_fs, mute_period_length, kws_frame_length):
        threading.Thread.__init__(self)
        # 初始化配置
        self.daemon = True
        self.exit_flag = False
        self.in_fs = in_fs
        self.out_fs = out_fs
        self.mute_period_frames_count = int(in_fs * mute_period_length)
        self.kws_frames_count = int(in_fs * kws_frame_length)
        model_path = get_model_path()
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(model_path, 'en-us'))  # 声学模型路径
        # config.set_string('-lm',"./tests/7567.lm")
        config.set_string('-dict',
                          os.path.join(model_path,
                                       'cmudict-en-us.dict'))  # 字典路径
        config.set_string('-keyphrase', 'alexa')
        config.set_float('-kws_threshold', 1e-20)
        config.set_string('-logfn', './logs/tmp')  # INFO输出到其他位置
        self.decoder = Decoder(config)
        self.decoder.start_utt()

        self.start()

    def run(self):
        while not self.exit_flag:
            # 1.从input池中读取一定长度的数据。该过程可能被阻塞,直到池中存在足够多数据。
            processed_input_frames = global_var.processed_input_pool.get(
                self.kws_frames_count)

            # 2.如果keyword spotting检测出该数据段中存在关键字,则对该数据进行重采样,填充后,存入keyword池
            if self._kws(processed_input_frames):
                global_var.keyword_pool.put(
                    self._padding(
                        Resampler.resampling(processed_input_frames,
                                             self.in_fs, self.out_fs), 0,
                        self.mute_period_frames_count))

    def stop(self):
        self.exit_flag = True
        self.join()

    def _kws(self, frames):
        buf = frames.tobytes()
        if buf:
            self.decoder.process_raw(buf, False, False)
            if self.decoder.hyp() != None:
                print([(seg.word, seg.prob, seg.start_frame, seg.end_frame)
                       for seg in self.decoder.seg()])
                print("Detected keyphrase, restarting search")
                self.decoder.end_utt()
                self.decoder.start_utt()
                return True
        return False

    def _padding(self, frames, padding_value, padding_num):
        res = np.pad(frames, (0, padding_num),
                     'constant',
                     constant_values=(padding_value, padding_value))
        return res
Esempio n. 8
0
config.set_string('-allphone', os.path.join(model_path, 'en-us-phone.lm.bin'))
config.set_string('-lm', os.path.join(model_path, 'en-us.lm.bin'))
config.set_string('-dict', os.path.join(model_path, 'cmudict-en-us.dict'))
config.set_float('-lw', 2.0)
config.set_float('-beam', 1e-10)
config.set_float('-pbeam', 1e-10)
decoder = Decoder(config)

# Decode streaming data
buf = bytearray(1024)
with open(path.join(FilePath, 'amol.wav'), 'rb') as f:
    decoder.start_utt()
    while f.readinto(buf):
        decoder.process_raw(buf, False, False)
    decoder.end_utt()
    print('Phonemes: ', [seg.word for seg in decoder.seg()])
    print('-' * 28)
    print('| %5s |  %3s  |   %4s   |' % ('start', 'end', 'word'))
    print('-' * 28)
    for s in decoder.seg():
        s.start_frame
        print('| %4ss | %4ss | %8s |' % (s.start_frame / fps, s.end_frame / fps, s.word))
    print('-' * 28)
#hypothesis = decoder.hyp()
#print(hypothesis)


# plot the graph
'''fig = plt.figure(figsize=(12, 6))
plt.subplots_adjust(hspace=0.5)
for index, filename in enumerate(recordings, start=1):