Beispiel #1
0
if __name__ == "__main__":
    decoder = Decoder(MODEL_PATH)

    file_name = os.path.join(os.path.dirname(__file__), 'eleven.wav')

    data = wave.open(file_name)

    n_decoded = 0
    while True:
        frames = data.readframes(8000)
        if len(frames) == 0:
            break

        decoder.accept_audio(frames)
        n_decoded += decoder.decode(8000)

        if n_decoded > 0:
            prob, word_ids = decoder.get_best_path()
            # ivec = decoder.get_ivector()
            print('Hypothesis: "%s" (speaker finished speaking: %s)' % (
                word_ids_to_str_hyp(decoder, word_ids),
                decoder.endpoint_detected(),
            ))

    decoder.input_finished()
    print('Final hypothesis: "%s"' % word_ids_to_str_hyp(decoder, word_ids))

    decoder.finalize_decoding()

    p, lat = decoder.get_lattice()
Beispiel #2
0
if __name__ == "__main__":
    decoder = Decoder(MODEL_PATH)

    file_name = os.path.join(os.path.dirname(__file__), 'eleven.wav')

    data = wave.open(file_name)

    n_decoded = 0
    while True:
        frames = data.readframes(8000)
        if len(frames) == 0:
            break

        decoder.accept_audio(frames)
        n_decoded += decoder.decode(8000)

        if n_decoded > 0:
            prob, word_ids = decoder.get_best_path()
            # ivec = decoder.get_ivector()
            print('Hypothesis: "%s" (speaker finished speaking: %s)' % (word_ids_to_str_hyp(decoder, word_ids), decoder.endpoint_detected(), ))

    decoder.input_finished()
    print('Final hypothesis: "%s"' % word_ids_to_str_hyp(decoder, word_ids))

    decoder.finalize_decoding()

    p, lat = decoder.get_lattice()

    print ('Resulting lattice:')
    for state in lat.states:
Beispiel #3
0
import struct
from os.path import basename

inputdir = sys.argv[1]
scratchdir = sys.argv[2]
resourcedir = sys.argv[3]
outdir = sys.argv[4]

os.system('mkdir -p ' + scratchdir)
# Load speech recognition model from "asr_model_dir" directory.

for inputfile in glob.glob(inputdir + '/*.wav'):
    decoder = Decoder(resourcedir + "/asr_model_dir_nnet3")
    file_id = basename(inputfile)
    os.system('sox ' + inputfile + ' -e signed-integer -r 16000 -b 16 -c 1 ' +
              scratchdir + file_id + '.wav')
    # Load audio frames from input wav file.
    data = wave.open(scratchdir + file_id + '.wav')
    frames = data.readframes(data.getnframes())

    # Feed the audio data to the decoder.
    decoder.accept_audio(frames)
    decoder.decode(data.getnframes())
    decoder.input_finished()

    # Get and print the best hypothesis.
    prob, word_ids = decoder.get_best_path()
    output_string = " ".join(map(decoder.get_word, word_ids))
    os.system('echo ' + output_string + ' > ' + outdir + '/' + file_id +
              '.txt')