コード例 #1
0
ファイル: full_transcriber.py プロジェクト: bit/gentle
class FullTranscriber():

    def __init__(self, resources, nthreads=2):
        self.available = False
        if nthreads <= 0: return
        if not os.path.exists(resources.full_hclg_path): return

        queue = kaldi_queue.build(resources, nthreads=nthreads)
        self.mtt = MultiThreadedTranscriber(queue, nthreads=nthreads)
        self.available = True

    def transcribe(self, wavfile, progress_cb=None, logging=None):
        words, duration = self.mtt.transcribe(wavfile, progress_cb=progress_cb)
        return self.make_transcription_alignment(words)

    @staticmethod
    def make_transcription_alignment(trans):
        # Spoof the `diff_align` output format
        transcript = ""
        words = []
        for t_wd in trans:
            word = transcription.Word(
                case=transcription.Word.SUCCESS,
                startOffset=len(transcript),
                endOffset=len(transcript) + len(t_wd.word),
                word=t_wd.word,
                alignedWord=t_wd.word,
                phones=t_wd.phones,
                start=t_wd.start,
                end=t_wd.end)
            words.append(word)

            transcript += word.word + " "

        return Transcription(words=words, transcript=transcript)
コード例 #2
0
ファイル: full_transcriber.py プロジェクト: wan/gentle
class FullTranscriber():
    def __init__(self, resources, nthreads=2):
        self.available = False
        if nthreads <= 0: return
        if not os.path.exists(resources.full_hclg_path): return

        queue = kaldi_queue.build(resources, nthreads=nthreads)
        self.mtt = MultiThreadedTranscriber(queue, nthreads=nthreads)
        self.available = True

    def transcribe(self, wavfile, progress_cb=None, logging=None):
        words = self.mtt.transcribe(wavfile, progress_cb=progress_cb)
        return self.make_transcription_alignment(words)

    @staticmethod
    def make_transcription_alignment(trans):
        # Spoof the `diff_align` output format
        transcript = ""
        words = []
        for t_wd in trans:
            word = transcription.Word(case="success",
                                      startOffset=len(transcript),
                                      endOffset=len(transcript) +
                                      len(t_wd.word),
                                      word=t_wd.word,
                                      alignedWord=t_wd.word,
                                      phones=t_wd.phones,
                                      start=t_wd.start,
                                      end=t_wd.end)
            words.append(word)

            transcript += word.word + " "

        return Transcription(words=words, transcript=transcript)
コード例 #3
0
ファイル: forced_aligner.py プロジェクト: yifan/gentle
class ForcedAligner():
    def __init__(self,
                 resources,
                 transcript,
                 nthreads=4,
                 context_width=3,
                 **kwargs):
        self.kwargs = kwargs
        self.nthreads = nthreads
        self.transcript = transcript
        self.resources = resources
        self.context_width = context_width
        self.ms = metasentence.MetaSentence(transcript, resources.vocab)
        ks = self.ms.get_kaldi_sequence()
        gen_hclg_filename = language_model.make_bigram_language_model(
            ks, resources.proto_langdir, context_width=context_width, **kwargs)
        self.queue = kaldi_queue.build(resources,
                                       hclg_path=gen_hclg_filename,
                                       nthreads=nthreads)
        self.mtt = MultiThreadedTranscriber(self.queue, nthreads=nthreads)

    def transcribe(self, wavfile, progress_cb=None, logging=None):
        words, duration = self.mtt.transcribe(wavfile, progress_cb=progress_cb)

        # Clear queue (would this be gc'ed?)
        for i in range(self.nthreads):
            k = self.queue.get()
            k.stop()

        # Align words
        words = diff_align.align(words, self.ms, **self.kwargs)

        # Perform a second-pass with unaligned words
        if logging is not None:
            logging.info(
                "%d unaligned words (of %d)" %
                (len([X
                      for X in words if X.not_found_in_audio()]), len(words)))

        if progress_cb is not None:
            progress_cb({'status': 'ALIGNING'})

        words = multipass.realign(wavfile,
                                  words,
                                  self.ms,
                                  context_width=self.context_width,
                                  resources=self.resources,
                                  nthreads=self.nthreads,
                                  progress_cb=progress_cb)

        if logging is not None:
            logging.info(
                "after 2nd pass: %d unaligned words (of %d)" %
                (len([X
                      for X in words if X.not_found_in_audio()]), len(words)))

        words = AdjacencyOptimizer(words, duration).optimize()

        return Transcription(words=words, transcript=self.transcript)
コード例 #4
0
    def test_transcriber(self):
        from gentle import resampled, kaldi_queue, standard_kaldi, Resources
        from gentle.transcriber import MultiThreadedTranscriber

        resources = Resources()
        k_queue = kaldi_queue.build(resources, 1)
        trans = MultiThreadedTranscriber(k_queue)

        with resampled('examples/data/lucier.mp3', 10.5, 2.5) as filename:
            words, duration = trans.transcribe(filename)
        self.assertEqual(words[0].word, "different")
コード例 #5
0
ファイル: transcriber.py プロジェクト: bit/gentle
    def test_transcriber(self):
        import subprocess
        from gentle import resampled, kaldi_queue, standard_kaldi, Resources
        from gentle.transcriber import MultiThreadedTranscriber

        standard_kaldi.STDERR = subprocess.STDOUT

        resources = Resources()
        k_queue = kaldi_queue.build(resources, 1)
        trans = MultiThreadedTranscriber(k_queue)

        with resampled(self.audio, 10.5, 2.5) as filename:
            words, duration = trans.transcribe(filename)
        self.assertEqual(words[0].word, "different")
コード例 #6
0
    def test_transcriber(self):
        import subprocess
        from gentle import resampled, kaldi_queue, standard_kaldi, Resources
        from gentle.transcriber import MultiThreadedTranscriber

        standard_kaldi.STDERR = subprocess.STDOUT

        resources = Resources()
        k_queue = kaldi_queue.build(resources, 1)
        trans = MultiThreadedTranscriber(k_queue)

        with resampled(self.audio, 10.5, 2.5) as filename:
            words, duration = trans.transcribe(filename)
        self.assertEqual(words[0].word, "different")
コード例 #7
0
ファイル: forced_aligner.py プロジェクト: bit/gentle
class ForcedAligner():

    def __init__(self, resources, transcript, nthreads=4, **kwargs):
        self.kwargs = kwargs
        self.nthreads = nthreads
        self.transcript = transcript
        self.resources = resources
        self.ms = metasentence.MetaSentence(transcript, resources.vocab)
        ks = self.ms.get_kaldi_sequence()
        gen_hclg_filename = language_model.make_bigram_language_model(ks, resources.proto_langdir, **kwargs)
        self.queue = kaldi_queue.build(resources, hclg_path=gen_hclg_filename, nthreads=nthreads)
        self.mtt = MultiThreadedTranscriber(self.queue, nthreads=nthreads)

    def transcribe(self, wavfile, progress_cb=None, logging=None):
        words, duration = self.mtt.transcribe(wavfile, progress_cb=progress_cb)

        # Clear queue (would this be gc'ed?)
        for i in range(self.nthreads):
            k = self.queue.get()
            k.stop()

        # Align words
        words = diff_align.align(words, self.ms, **self.kwargs)

        # Perform a second-pass with unaligned words
        if logging is not None:
            logging.info("%d unaligned words (of %d)" % (len([X for X in words if X.not_found_in_audio()]), len(words)))

        if progress_cb is not None:
            progress_cb({'status': 'ALIGNING'})

        words = multipass.realign(wavfile, words, self.ms, resources=self.resources, nthreads=self.nthreads, progress_cb=progress_cb)

        if logging is not None:
            logging.info("after 2nd pass: %d unaligned words (of %d)" % (len([X for X in words if X.not_found_in_audio()]), len(words)))

        words = AdjacencyOptimizer(words, duration).optimize()

        return Transcription(words=words, transcript=self.transcript)