コード例 #1
0
 def __init__(self):
     self.proto_langdir = get_resource('PROTO_LANGDIR')
     self.nnet_gpu_path = get_resource('data/nnet_a_gpu_online')
     self.full_hclg_path = get_resource('data/graph/HCLG.fst')
     with open(os.path.join(self.proto_langdir,
                            "graphdir/words.txt")) as fh:
         self.vocab = metasentence.load_vocabulary(fh)
コード例 #2
0
    def __init__(self, data_dir, nthreads=4, ntranscriptionthreads=2):
        self.data_dir = data_dir
        self.nthreads = nthreads
        self.ntranscriptionthreads = ntranscriptionthreads

        proto_langdir = get_resource('PROTO_LANGDIR')
        vocab_path = os.path.join(proto_langdir, "graphdir/words.txt")
        with open(vocab_path) as f:
            self.vocab = metasentence.load_vocabulary(f)

        # load kaldi instances for full transcription
        gen_hclg_filename = get_resource('data/graph/HCLG.fst')
        
        if os.path.exists(gen_hclg_filename) and self.ntranscriptionthreads > 0:
            proto_langdir = get_resource('PROTO_LANGDIR')
            nnet_gpu_path = get_resource('data/nnet_a_gpu_online')
            
            kaldi_queue = Queue()
            for i in range(self.ntranscriptionthreads):
                kaldi_queue.put(standard_kaldi.Kaldi(
                    nnet_gpu_path,
                    gen_hclg_filename,
                    proto_langdir)
                )
            self.full_transcriber = MultiThreadedTranscriber(kaldi_queue, nthreads=self.ntranscriptionthreads)

        self._status_dicts = {}
コード例 #3
0
def test_load_vocabulary():
	tests = [
		[['<eps> 0'], set(['<eps>'])],
		[['<eps> 0', ''], set(['<eps>'])],
		[['a 66', 'zulu 124944'], set(['a', 'zulu'])],
	]
	for test in tests:
		input, want = test
		got = load_vocabulary(input)
		assert_equals(got, want)
コード例 #4
0
    def __init__(self):
        self.proto_langdir = get_resource('PROTO_LANGDIR')
        self.nnet_gpu_path = get_resource('data/nnet_a_gpu_online')
        self.full_hclg_path = get_resource('data/graph/HCLG.fst')

        def require_dir(path):
            if not os.path.isdir(path):
                raise RuntimeError("No resource directory %s.  Check %s environment variable?" % (path, ENV_VAR))


        require_dir(self.proto_langdir)
        require_dir(self.nnet_gpu_path)

        with open(os.path.join(self.proto_langdir, "graphdir/words.txt")) as fh:
            self.vocab = metasentence.load_vocabulary(fh)
コード例 #5
0
ファイル: resources.py プロジェクト: alongreyber/speakup
    def __init__(self):
        self.proto_langdir = get_resource('exp')
        self.nnet_gpu_path = get_resource('exp/tdnn_7b_chain_online/')
        self.full_hclg_path = get_resource(
            'exp/tdnn_7b_chain_online/graph_pp/HCLG.fst')

        def require_dir(path):
            if not os.path.isdir(path):
                raise RuntimeError(
                    "No resource directory %s.  Check %s environment variable?"
                    % (path, ENV_VAR))

        require_dir(self.proto_langdir)
        require_dir(self.nnet_gpu_path)

        with open(os.path.join(self.proto_langdir, "langdir",
                               "words.txt")) as fh:
            self.vocab = metasentence.load_vocabulary(fh)
コード例 #6
0
ファイル: resources.py プロジェクト: yifan/gentle
    def __init__(self, modelDir):
        self.proto_langdir = get_resource(modelDir)
        self.nnet_gpu_path = get_resource(os.path.join(modelDir, 'online'))
        self.full_hclg_path = get_resource(
            os.path.join(self.nnet_gpu_path, 'graph', 'HCLG.fst'))

        self.config = Config()
        confPath = os.path.join(self.proto_langdir, 'config.yaml')
        if os.path.exists(confPath):
            self.config.load(confPath)

        def require_dir(path):
            if not os.path.isdir(path):
                raise RuntimeError(
                    "No resource directory %s.  Check %s environment variable?"
                    % (path, ENV_VAR))

        require_dir(self.proto_langdir)
        require_dir(self.nnet_gpu_path)

        with open(os.path.join(self.proto_langdir, "langdir",
                               "words.txt")) as fh:
            self.vocab = metasentence.load_vocabulary(fh)
コード例 #7
0
ファイル: multipass.py プロジェクト: EnGassa/gentle
import logging
from multiprocessing.pool import ThreadPool as Pool
import os
import wave

from gentle import standard_kaldi
from gentle import metasentence
from gentle import language_model
from gentle.paths import get_resource
from gentle import diff_align

# XXX: refactor out somewhere
proto_langdir = get_resource('PROTO_LANGDIR')
vocab_path = os.path.join(proto_langdir, "graphdir/words.txt")
with open(vocab_path) as f:
    vocab = metasentence.load_vocabulary(f)

def prepare_multipass(alignment):
    to_realign = []
    last_aligned_word = None
    cur_unaligned_words = []

    for wd_idx,wd in enumerate(alignment):
        if wd['case'] == 'not-found-in-audio':
            cur_unaligned_words.append(wd)
        elif wd['case'] == 'success':
            if len(cur_unaligned_words) > 0:
                to_realign.append({
                    "start": last_aligned_word,
                    "end": wd,
                    "words": cur_unaligned_words})
コード例 #8
0
ファイル: generateLM.py プロジェクト: shreya2111/gentle-labs
    # # path to original/exhaustive lexicon path is sys.argv[3] ex: originalLex/lexicon.txt
    # lexicon = sys.argv[3]
    # sys.argv[4]: path to a pristine copy of kaldi_root (pre-compiled)
    kaldi_path = sys.argv[3]

    # Creating lexicon, phones, L.fst etc: inputs for generating HCLG.fst
    lx.generateLexicon(text, proto_dir)

    # call prepare_language.sh c functions here
    lang.create_fst(kaldi_path, proto_dir)

    # Generating HCLG.fst (using Gentle here)
    txt_in = open(text).read()

    vocab_in = ms.load_vocabulary(
        open(proto_dir + "/tdnn_7b_chain_online/graph_pp/words.txt")
    )

    print("My Vocab", vocab_in)

    source_words_list = txt_in.split(" ")[1:]

    # We must supply a version of `words_in` that only has words within our vocabulary (ie. proto_langdir/words.txt)
    new_wdlist = []
    for wd in source_words_list:
        if wd not in vocab_in:
            new_wdlist.append(lm.OOV_TERM)
        else:
            new_wdlist.append(wd)

    print("Supplying these words", new_wdlist)
コード例 #9
0
ファイル: serve.py プロジェクト: afcarl/earmark
import os
import shutil
import tempfile
import time
import zipfile

from gentle.paths import get_resource
from gentle.standard_kaldi import Kaldi
import gentle.metasentence as metasentence
import gentle.language_model as language_model

# kaldi quirk...
proto_langdir = get_resource('PROTO_LANGDIR')
vocab_path = os.path.join(proto_langdir, "graphdir/words.txt")
with open(vocab_path) as f:
    vocab = metasentence.load_vocabulary(f)


class AudioConferenceFactory(WebSocketServerFactory):
    def __init__(self, resources, dbdir="db", db=None):
        WebSocketServerFactory.__init__(self, None)
        self.clients = {}  # peerstr -> client

        self.resources = resources

        self.db = db
        self.gen_hclg_filename = db.gen_hclg_filename if db else None

        self.rerunning = False

        self.dbdir = dbdir