Exemple #1
0
    def __init__(self, resources, nthreads=2):
        self.available = False
        if nthreads <= 0: return
        if not os.path.exists(resources.full_hclg_path): return

        queue = kaldi_queue.build(resources, nthreads=nthreads)
        self.mtt = MultiThreadedTranscriber(queue, nthreads=nthreads)
        self.available = True
Exemple #2
0
    def __init__(self, resources, nthreads=2):
        self.available = False
        if nthreads <= 0: return
        if not os.path.exists(resources.full_hclg_path): return

        queue = kaldi_queue.build(resources, nthreads=nthreads)
        self.mtt = MultiThreadedTranscriber(queue, nthreads=nthreads)
        self.available = True
Exemple #3
0
 def __init__(self, resources, transcript, nthreads=4, **kwargs):
     self.kwargs = kwargs
     self.nthreads = nthreads
     self.transcript = transcript
     self.resources = resources
     self.ms = metasentence.MetaSentence(transcript, resources.vocab)
     ks = self.ms.get_kaldi_sequence()
     gen_hclg_filename = language_model.make_bigram_language_model(ks, resources.proto_langdir, **kwargs)
     self.queue = kaldi_queue.build(resources, hclg_path=gen_hclg_filename, nthreads=nthreads)
     self.mtt = MultiThreadedTranscriber(self.queue, nthreads=nthreads)
Exemple #4
0
 def __init__(self, resources, transcript, nthreads=4, **kwargs):
     self.kwargs = kwargs
     self.nthreads = nthreads
     self.transcript = transcript
     self.resources = resources
     self.ms = metasentence.MetaSentence(transcript, resources.vocab)
     ks = self.ms.get_kaldi_sequence()
     gen_hclg_filename = language_model.make_bigram_language_model(ks, resources.proto_langdir, **kwargs)
     self.queue = kaldi_queue.build(resources, hclg_path=gen_hclg_filename, nthreads=nthreads)
     self.mtt = MultiThreadedTranscriber(self.queue, nthreads=nthreads)
Exemple #5
0
    def test_transcriber(self):
        from gentle import resampled, kaldi_queue, standard_kaldi, Resources
        from gentle.transcriber import MultiThreadedTranscriber

        resources = Resources()
        k_queue = kaldi_queue.build(resources, 1)
        trans = MultiThreadedTranscriber(k_queue)

        with resampled('examples/data/lucier.mp3', 10.5, 2.5) as filename:
            words, duration = trans.transcribe(filename)
        self.assertEqual(words[0].word, "different")
Exemple #6
0
    def test_transcriber(self):
        import subprocess
        from gentle import resampled, kaldi_queue, standard_kaldi, Resources
        from gentle.transcriber import MultiThreadedTranscriber

        standard_kaldi.STDERR = subprocess.STDOUT

        resources = Resources()
        k_queue = kaldi_queue.build(resources, 1)
        trans = MultiThreadedTranscriber(k_queue)

        with resampled(self.audio, 10.5, 2.5) as filename:
            words, duration = trans.transcribe(filename)
        self.assertEqual(words[0].word, "different")
Exemple #7
0
    def test_transcriber(self):
        import subprocess
        from gentle import resampled, kaldi_queue, standard_kaldi, Resources
        from gentle.transcriber import MultiThreadedTranscriber

        standard_kaldi.STDERR = subprocess.STDOUT

        resources = Resources()
        k_queue = kaldi_queue.build(resources, 1)
        trans = MultiThreadedTranscriber(k_queue)

        with resampled(self.audio, 10.5, 2.5) as filename:
            words, duration = trans.transcribe(filename)
        self.assertEqual(words[0].word, "different")
Exemple #8
0
        words = [
            words[i] for i in range(len(words) - 1)
            if not words[i].corresponds(words[i + 1])
        ]

        return words, duration


if __name__ == '__main__':
    # full transcription
    import json
    import sys

    import logging
    logging.getLogger().setLevel('INFO')

    import gentle
    from gentle import standard_kaldi
    from gentle import kaldi_queue

    resources = gentle.Resources()

    k_queue = kaldi_queue.build(resources, 3)
    trans = MultiThreadedTranscriber(k_queue)

    with gentle.resampled(sys.argv[1]) as filename:
        words, duration = trans.transcribe(filename)

    open(sys.argv[2],
         'w').write(transcription.Transcription(words=words).to_json())
Exemple #9
0
        # word in the audio.
        words.sort(key=lambda word: word.start)
        words.append(transcription.Word(word="__dummy__"))
        words = [words[i] for i in range(len(words)-1) if not words[i].corresponds(words[i+1])]

        return words, duration


if __name__=='__main__':
    # full transcription
    import json
    import sys

    import logging
    logging.getLogger().setLevel('INFO')

    import gentle
    from gentle import standard_kaldi
    from gentle import kaldi_queue

    resources = gentle.Resources()

    k_queue = kaldi_queue.build(resources, 3)
    trans = MultiThreadedTranscriber(k_queue)

    with gentle.resampled(sys.argv[1]) as filename:
        words, duration = trans.transcribe(filename)

    open(sys.argv[2], 'w').write(transcription.Transcription(words=words).to_json())