import os import subprocess import sys import tempfile from contextlib import contextmanager from util.paths import get_binary BINARY_PATH = os.path.join("ffmpeg", "ffmpeg.exe" if os.name == "nt" else "ffmpeg") FFMPEG = get_binary(BINARY_PATH) def resample(infile, outfile): if not os.path.isfile(infile): raise IOError("Not a file: %s" % infile) ''' Use FFMPEG to convert a media file to a wav file sampled at 8K ''' return subprocess.call([ FFMPEG, '-loglevel', 'panic', '-y', '-i', infile, '-ac', '1', '-ar', '8000', '-acodec', 'pcm_s16le', outfile ], cwd=os.path.dirname(sys.executable)) @contextmanager def resampled(infile): with tempfile.NamedTemporaryFile(suffix='.wav') as fp: if resample(infile, fp.name) != 0:
'''Glue code for communicating with standard_kaldi C++ process''' import json import logging import os import subprocess import tempfile import wave from util.paths import get_binary from gentle.rpc import RPCProtocol from gentle.resources import Resources EXECUTABLE_PATH = get_binary("ext/standard_kaldi") class Kaldi(object): '''Kaldi spawns a standard_kaldi subprocess and provides a Python wrapper for communicating with it.''' def __init__(self, nnet_dir, hclg_path, proto_langdir): self.proto_langdir = proto_langdir devnull = open(os.devnull, 'w') cmd = [EXECUTABLE_PATH, nnet_dir, hclg_path, proto_langdir] self._subprocess = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=devnull) self._transitions = None self._words = None self._stopped = False
import os import subprocess import tempfile import wave from shutil import copyfile from contextlib import contextmanager from util.paths import get_binary FFMPEG = get_binary("ffmpeg") def resample(infile, outfile): if not os.path.isfile(infile): raise IOError("Not a file: %s" % infile) ''' Use FFMPEG to convert a media file to a wav file sampled at 8K ''' # Check if the file already meets our specifications try: wave_file = wave.open(infile, 'rb') if wave_file.getframerate() == 8000: copyfile(infile, outfile) return 0 except wave.Error as err: pass return subprocess.call([ FFMPEG, '-loglevel', 'panic', '-y', '-i', infile, '-ac', '1', '-ar', '8000', '-acodec', 'pcm_s16le', outfile ])
import subprocess from util.paths import get_binary import os from resources import Config EXECUTABLE_PATH = get_binary("ext/k3") class Kaldi: def __init__(self, nnet_dir=None, hclg_path=None, proto_langdir=None): devnull = open(os.devnull, 'w') cmd = [EXECUTABLE_PATH] self.config = Config() if nnet_dir is not None: cmd.append(nnet_dir) cmd.append(hclg_path) cmd.append(self.config['silencephones']) self._p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=devnull) self.finished = False def _cmd(self, c): self._p.stdin.write("%s\n" % (c)) self._p.stdin.flush()
import subprocess from util.paths import get_binary import os BINARY_PATH = "ext\\gentleK3.exe" if os.name == "nt" else "ext/gentleK3" EXECUTABLE_PATH = get_binary(BINARY_PATH) class Kaldi: def __init__(self, nnet_dir=None, hclg_path=None, proto_langdir=None): devnull = open(os.devnull, 'wb') cmd = [EXECUTABLE_PATH] if nnet_dir is not None: cmd.append(nnet_dir) cmd.append(hclg_path) self._p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=devnull) self.finished = False def _cmd(self, c): self._p.stdin.write("%s\n" % (c)) self._p.stdin.flush() def push_chunk(self, buf): # Wait until we're ready self._cmd("push-chunk") cnt = len(buf)/2
import logging import math import os import shutil import subprocess import sys import tempfile from util.paths import get_binary from metasentence import MetaSentence from resources import Resources MKGRAPH_PATH = get_binary("ext/mkgraph") def make_bigram_lm_fst(word_sequences, **kwargs): ''' Use the given token sequence to make a bigram language model in OpenFST plain text format. When the "conservative" flag is set, an [oov] is interleaved between successive words. When the "disfluency" flag is set, a small set of disfluencies is interleaved between successive words `Word sequence` is a list of lists, each valid as a start ''' if len(word_sequences) == 0 or type(word_sequences[0]) != list: word_sequences = [word_sequences]
import logging import math import os import shutil import subprocess import sys import tempfile from util.paths import get_binary from metasentence import MetaSentence from resources import Resources BINARY_PATH = "ext\\gentleM3.exe" if os.name == "nt" else "ext/gentleM3" MKGRAPH_PATH = get_binary(BINARY_PATH) # [oov] no longer in words.txt OOV_TERM = '<unk>' def make_bigram_lm_fst(word_sequences, **kwargs): ''' Use the given token sequence to make a bigram language model in OpenFST plain text format. When the "conservative" flag is set, an [oov] is interleaved between successive words. When the "disfluency" flag is set, a small set of disfluencies is interleaved between successive words `Word sequence` is a list of lists, each valid as a start
import logging import math import os import shutil import subprocess import sys import tempfile from util.paths import get_binary from metasentence import MetaSentence from resources import Resources MKGRAPH_PATH = get_binary("ext/m3") # [oov] no longer in words.txt OOV_TERM = '<unk>' def make_bigram_lm_fst(word_sequences, **kwargs): ''' Use the given token sequence to make a bigram language model in OpenFST plain text format. When the "conservative" flag is set, an [oov] is interleaved between successive words. When the "disfluency" flag is set, a small set of disfluencies is interleaved between successive words `Word sequence` is a list of lists, each valid as a start '''