Example #1
0
    def __init__(self, data_dir, nthreads=4, ntranscriptionthreads=2):
        self.data_dir = data_dir
        self.nthreads = nthreads
        self.ntranscriptionthreads = ntranscriptionthreads

        proto_langdir = get_resource('PROTO_LANGDIR')
        vocab_path = os.path.join(proto_langdir, "graphdir/words.txt")
        with open(vocab_path) as f:
            self.vocab = metasentence.load_vocabulary(f)

        # load kaldi instances for full transcription
        gen_hclg_filename = get_resource('data/graph/HCLG.fst')
        
        if os.path.exists(gen_hclg_filename) and self.ntranscriptionthreads > 0:
            proto_langdir = get_resource('PROTO_LANGDIR')
            nnet_gpu_path = get_resource('data/nnet_a_gpu_online')
            
            kaldi_queue = Queue()
            for i in range(self.ntranscriptionthreads):
                kaldi_queue.put(standard_kaldi.Kaldi(
                    nnet_gpu_path,
                    gen_hclg_filename,
                    proto_langdir)
                )
            self.full_transcriber = MultiThreadedTranscriber(kaldi_queue, nthreads=self.ntranscriptionthreads)

        self._status_dicts = {}
Example #2
0
def serve(port=8765, interface='0.0.0.0', installSignalHandlers=0, data_dir=get_datadir('webdata')):
    logging.info("SERVE %d, %s, %d", port, interface, installSignalHandlers)
    
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    zip_dir = os.path.join(data_dir, 'zip')
    if not os.path.exists(zip_dir):
        os.makedirs(zip_dir)
    
    f = File(data_dir)

    f.putChild('', File(get_resource('www/index.html')))
    f.putChild('status.html', File(get_resource('www/status.html')))
    f.putChild('preloader.gif', File(get_resource('www/preloader.gif')))

    trans = Transcriber(data_dir)
    trans_ctrl = TranscriptionsController(trans)
    f.putChild('transcriptions', trans_ctrl)

    trans_zippr = TranscriptionZipper(zip_dir, trans)
    f.putChild('zip', trans_zippr)
    
    s = Site(f)
    logging.info("about to listen")
    default_reactor.listenTCP(port, s, interface=interface)
    logging.info("listening")

    default_reactor.run(installSignalHandlers=installSignalHandlers)
Example #3
0
def serve(port=8765,
          interface='0.0.0.0',
          installSignalHandlers=0,
          data_dir=get_datadir('webdata')):
    logging.info("SERVE %d, %s, %d", port, interface, installSignalHandlers)

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    zip_dir = os.path.join(data_dir, 'zip')
    if not os.path.exists(zip_dir):
        os.makedirs(zip_dir)

    f = File(data_dir)

    f.putChild('', File(get_resource('www/index.html')))
    f.putChild('status.html', File(get_resource('www/status.html')))
    f.putChild('preloader.gif', File(get_resource('www/preloader.gif')))

    trans = Transcriber(data_dir)
    trans_ctrl = TranscriptionsController(trans)
    f.putChild('transcriptions', trans_ctrl)

    trans_zippr = TranscriptionZipper(zip_dir, trans)
    f.putChild('zip', trans_zippr)

    s = Site(f)
    logging.info("about to listen")
    default_reactor.listenTCP(port, s, interface=interface)
    logging.info("listening")

    default_reactor.run(installSignalHandlers=installSignalHandlers)
Example #4
0
 def get_kaldi(self):
     # In theory, we could preserve these instances through a
     # session.
     return standard_kaldi.Kaldi(
         get_resource('data/nnet_a_gpu_online'),
         self.gen_hclg_filename,
         get_resource('PROTO_LANGDIR'))
Example #5
0
    def realign(chunk):
        wav_obj = wave.open(wavfile, 'r')

        start_t = (chunk["start"] or {"end": 0})["end"]
        end_t = chunk["end"]
        if end_t is None:
            end_t = wav_obj.getnframes() / float(wav_obj.getframerate())
        else:
            end_t = end_t["start"]

        duration = end_t - start_t
        if duration < 0.01 or duration > 60:
            logging.debug("cannot realign %d words with duration %f" %
                          (len(chunk['words']), duration))
            return

        # Create a language model
        offset_offset = chunk['words'][0]['startOffset']
        chunk_len = chunk['words'][-1]['endOffset'] - offset_offset
        chunk_transcript = ms.raw_sentence[offset_offset:offset_offset +
                                           chunk_len].encode("utf-8")
        chunk_ms = metasentence.MetaSentence(chunk_transcript, vocab)
        chunk_ks = chunk_ms.get_kaldi_sequence()

        chunk_gen_hclg_filename = language_model.make_bigram_language_model(
            chunk_ks, proto_langdir)
        k = standard_kaldi.Kaldi(get_resource('data/nnet_a_gpu_online'),
                                 chunk_gen_hclg_filename, proto_langdir)

        wav_obj = wave.open(wavfile, 'r')
        wav_obj.setpos(int(start_t * wav_obj.getframerate()))
        buf = wav_obj.readframes(int(duration * wav_obj.getframerate()))

        k.push_chunk(buf)
        ret = k.get_final()
        k.stop()

        word_alignment = diff_align.align(ret, chunk_ms)

        # Adjust startOffset, endOffset, and timing to match originals
        for wd in word_alignment:
            if wd.get("end"):
                # Apply timing offset
                wd['start'] += start_t
                wd['end'] += start_t

            if wd.get("endOffset"):
                wd['startOffset'] += offset_offset
                wd['endOffset'] += offset_offset

        # "chunk" should be replaced by "words"
        realignments.append({"chunk": chunk, "words": word_alignment})

        if progress_cb is not None:
            progress_cb(
                {"percent": len(realignments) / float(len(to_realign))})
Example #6
0
    def realign(chunk):
        wav_obj = wave.open(wavfile, 'r')

        start_t = (chunk["start"] or {"end": 0})["end"]
        end_t = chunk["end"]
        if end_t is None:
            end_t = wav_obj.getnframes() / float(wav_obj.getframerate())
        else:
            end_t = end_t["start"]

        duration = end_t - start_t
        if duration < 0.01 or duration > 60:
            logging.debug("cannot realign %d words with duration %f" % (len(chunk['words']), duration))
            return

        # Create a language model
        offset_offset = chunk['words'][0]['startOffset']
        chunk_len = chunk['words'][-1]['endOffset'] - offset_offset
        chunk_transcript = ms.raw_sentence[offset_offset:offset_offset+chunk_len].encode("utf-8")
        chunk_ms = metasentence.MetaSentence(chunk_transcript, vocab)
        chunk_ks = chunk_ms.get_kaldi_sequence()

        chunk_gen_hclg_filename = language_model.make_bigram_language_model(chunk_ks, proto_langdir)
        k = standard_kaldi.Kaldi(
            get_resource('data/nnet_a_gpu_online'),
            chunk_gen_hclg_filename,
            proto_langdir)

        wav_obj = wave.open(wavfile, 'r')
        wav_obj.setpos(int(start_t * wav_obj.getframerate()))
        buf = wav_obj.readframes(int(duration * wav_obj.getframerate()))

        k.push_chunk(buf)
        ret = k.get_final()
        k.stop()

        word_alignment = diff_align.align(ret, chunk_ms)

        # Adjust startOffset, endOffset, and timing to match originals
        for wd in word_alignment:
            if wd.get("end"):
                # Apply timing offset
                wd['start'] += start_t
                wd['end'] += start_t

            if wd.get("endOffset"):
                wd['startOffset'] += offset_offset
                wd['endOffset'] += offset_offset

        # "chunk" should be replaced by "words"
        realignments.append({"chunk": chunk, "words": word_alignment})

        if progress_cb is not None:
            progress_cb({"percent": len(realignments) / float(len(to_realign))})
Example #7
0
 def re_run(self, utt):
     if 'wavpath' not in utt:
         return
     
     k = Kaldi(
         get_resource('data/nnet_a_gpu_online'),
         self.gen_hclg_filename,
         get_resource('PROTO_LANGDIR'))
     audio = numm3.sound2np(
         os.path.join(self.resources['attach'].attachdir, utt['wavpath']),
         nchannels=1,
         R=8000)
     k.push_chunk(audio.tostring())
     wds = k.get_final()
     k.stop()
     for wd in wds:
         del wd['phones']
     utt['command_words'] = wds
     utt['command'] = ' '.join([X['word'] for X in wds])
     
     reactor.callFromThread(self.db.onchange, None, {"type": "change",
                                                     "id": utt["_id"],
                                                     "doc": utt})
Example #8
0
    def re_run(self, utt):
        if 'wavpath' not in utt:
            return

        k = Kaldi(get_resource('data/nnet_a_gpu_online'),
                  self.gen_hclg_filename, get_resource('PROTO_LANGDIR'))
        audio = numm3.sound2np(os.path.join(self.resources['attach'].attachdir,
                                            utt['wavpath']),
                               nchannels=1,
                               R=8000)
        k.push_chunk(audio.tostring())
        wds = k.get_final()
        k.stop()
        for wd in wds:
            del wd['phones']
        utt['command_words'] = wds
        utt['command'] = ' '.join([X['word'] for X in wds])

        reactor.callFromThread(self.db.onchange, None, {
            "type": "change",
            "id": utt["_id"],
            "doc": utt
        })
Example #9
0
    def render_POST(self, req):
        uid = self.transcriber.next_id()

        tran = req.args.get('transcript', [''])[0]
        audio = req.args['audio'][0]

        disfluency = True if 'disfluency' in req.args else False
        conservative = True if 'conservative' in req.args else False
        kwargs = {'disfluency': disfluency,
                  'conservative': conservative,
                  'disfluencies': set(['uh', 'um'])}

        async = True
        if 'async' in req.args and req.args['async'][0] == 'false':
            async = False

        # We need to make the transcription directory here, so that
        # when we redirect the user we are sure that there's a place
        # for them to go.
        outdir = os.path.join(self.transcriber.data_dir, 'transcriptions', uid)
        os.makedirs(outdir)

        # Copy over the HTML
        shutil.copy(get_resource('www/view_alignment.html'), os.path.join(outdir, 'index.html'))

        result_promise = threads.deferToThreadPool(
            reactor, reactor.getThreadPool(),
            self.transcriber.transcribe,
            uid, tran, audio, async, **kwargs)

        if not async:
            def write_result(result):
                '''Write JSON to client on completion'''
                req.setHeader("Content-Type", "application/json")
                req.write(json.dumps(result, indent=2))
                req.finish()
            result_promise.addCallback(write_result)
            result_promise.addErrback(lambda _: None) # ignore errors

            req.notifyFinish().addErrback(lambda _: result_promise.cancel())

            return NOT_DONE_YET

        req.setResponseCode(FOUND)
        req.setHeader(b"Location", "/transcriptions/%s" % (uid))
        return ''
Example #10
0
    def render_POST(self, req):
        uid = self.transcriber.next_id()

        tran = req.args['transcript'][0]
        audio = req.args['audio'][0]

        async = True
        if 'async' in req.args and req.args['async'][0] == 'false':
            async = False

        # We need to make the transcription directory here, so that
        # when we redirect the user we are sure that there's a place
        # for them to go.
        outdir = os.path.join(self.transcriber.data_dir, 'transcriptions', uid)
        os.makedirs(outdir)

        # Copy over the HTML
        shutil.copy(get_resource('www/view_alignment.html'),
                    os.path.join(outdir, 'index.html'))

        result_promise = threads.deferToThreadPool(
            self.reactor, self.reactor.getThreadPool(),
            self.transcriber.transcribe, uid, tran, audio, async)

        if not async:

            def write_result(result):
                '''Write JSON to client on completion'''
                req.headers["Content-Type"] = "application/json"
                req.write(json.dumps(result, indent=2))
                req.finish()

            result_promise.addCallback(write_result)
            result_promise.addErrback(lambda _: None)  # ignore errors

            req.notifyFinish().addErrback(lambda _: result_promise.cancel())

            return NOT_DONE_YET

        req.setResponseCode(FOUND)
        req.setHeader(b"Location", "/transcriptions/%s" % (uid))
        return ''
Example #11
0
import multiprocessing
from multiprocessing.pool import ThreadPool as Pool
import numpy as np
import os
import shutil
import tempfile
import time
import zipfile

from gentle.paths import get_resource
from gentle.standard_kaldi import Kaldi
import gentle.metasentence as metasentence
import gentle.language_model as language_model

# kaldi quirk...
proto_langdir = get_resource('PROTO_LANGDIR')
vocab_path = os.path.join(proto_langdir, "graphdir/words.txt")
with open(vocab_path) as f:
    vocab = metasentence.load_vocabulary(f)


class AudioConferenceFactory(WebSocketServerFactory):
    def __init__(self, resources, dbdir="db", db=None):
        WebSocketServerFactory.__init__(self, None)
        self.clients = {}  # peerstr -> client

        self.resources = resources

        self.db = db
        self.gen_hclg_filename = db.gen_hclg_filename if db else None
Example #12
0
    def transcribe(self, uid, transcript, audio):
        output = {
            'status': 'STARTED',
            'transcript': transcript,
        }

        def save():
            with open(os.path.join(outdir, 'align.json'), 'w') as jsfile:
                json.dump(output, jsfile, indent=2)
            with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile:
                csvfile.write(to_csv(output))

        outdir = os.path.join(self.data_dir, 'transcriptions', uid)
        os.makedirs(outdir)

        # Copy over the HTML
        shutil.copy(get_resource('www/view_alignment.html'),
                    os.path.join(outdir, 'index.html'))

        tran_path = os.path.join(outdir, 'transcript.txt')
        with open(tran_path, 'w') as tranfile:
            tranfile.write(transcript)
        audio_path = os.path.join(outdir, 'upload')
        with open(audio_path, 'w') as wavfile:
            wavfile.write(audio)

        output['status'] = 'ENCODING'
        with open(os.path.join(outdir, 'align.json'), 'w') as alignfile:
            json.dump(output, alignfile, indent=2)

        wavfile = os.path.join(outdir, 'a.wav')
        if to_wav(os.path.join(outdir, 'upload'), wavfile) != 0:
            output['status'] = 'ERROR'
            output[
                'error'] = "Encoding failed. Make sure that you've uploaded a valid media file."
            save()
            return

        output['status'] = 'TRANSCRIBING'
        save()

        # Run transcription
        progress = lm_transcribe_progress(
            wavfile,
            transcript,
            # XXX: should be configurable
            get_resource('PROTO_LANGDIR'),
            get_resource('data/nnet_a_gpu_online'))
        result = None
        for result in progress:
            output['words'] = result['words']
            output['transcript'] = result['transcript']
            save()

        # ...and remove the original upload
        os.unlink(os.path.join(outdir, 'upload'))

        output['status'] = 'OK'
        save()

        # Inline the alignment into the index.html file.
        htmltxt = open(get_resource('www/view_alignment.html')).read()
        htmltxt = htmltxt.replace("var INLINE_JSON;",
                                  "var INLINE_JSON=%s;" % (json.dumps(output)))
        open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt)

        logging.info('done with transcription.')

        return result
Example #13
0
    def transcribe(self, uid, transcript, audio, async):
        status = self.get_status(uid)

        status['status'] = 'STARTED'
        output = {
            'transcript': transcript
        }

        def save():
            with open(os.path.join(outdir, 'align.json'), 'w') as jsfile:
                json.dump(output, jsfile, indent=2)
            with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile:
                csvfile.write(to_csv(output))

        outdir = os.path.join(self.data_dir, 'transcriptions', uid)                

        tran_path = os.path.join(outdir, 'transcript.txt')
        with open(tran_path, 'w') as tranfile:
            tranfile.write(transcript)
        audio_path = os.path.join(outdir, 'upload')
        with open(audio_path, 'w') as wavfile:
            wavfile.write(audio)

        status['status'] = 'ENCODING'
        # with open(os.path.join(outdir, 'align.json'), 'w') as alignfile:
            # json.dump(output, alignfile, indent=2)

        wavfile = os.path.join(outdir, 'a.wav')
        if to_wav(os.path.join(outdir, 'upload'), wavfile) != 0:
            status['status'] = 'ERROR'
            status['error'] = "Encoding failed. Make sure that you've uploaded a valid media file."
            # Save the status so that errors are recovered on restart of the server
            # XXX: This won't work, because the endpoint will override this file
            with open(os.path.join(outdir, 'status.json'), 'w') as jsfile:
                json.dump(status, jsfile, indent=2)
            return

        # Find the duration

        #XXX: Maybe we should pass this wave object instead of the
        # file path to align_progress
        wav_obj = wave.open(wavfile, 'r')
        status['duration'] = wav_obj.getnframes() / float(wav_obj.getframerate())

        status['status'] = 'TRANSCRIBING'

        # Run transcription
        progress = align_progress(
            wavfile,
            transcript,
            # XXX: should be configurable
            get_resource('PROTO_LANGDIR'),
            get_resource('data/nnet_a_gpu_online'),
            want_progress=True)
        result = None
        for result in progress:
            if result.get("preview") is not None:
                status["message"] = result["preview"]
                status["t"] = result["t"]
            else:
                output['words'] = result['words']
                output['transcript'] = result['transcript']
            #save()

        # ...and remove the original upload
        os.unlink(os.path.join(outdir, 'upload'))

        save()

        # Inline the alignment into the index.html file.
        htmltxt = open(get_resource('www/view_alignment.html')).read()
        htmltxt = htmltxt.replace("var INLINE_JSON;", "var INLINE_JSON=%s;" % (json.dumps(output)));
        open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt)

        status['status'] = 'OK'

        logging.info('done with transcription.')

        return result
Example #14
0
    def transcribe(self, uid, transcript, audio, async):

        proto_langdir = get_resource('PROTO_LANGDIR')
        
        status = self.get_status(uid)

        status['status'] = 'STARTED'
        output = {
            'transcript': transcript
        }

        outdir = os.path.join(self.data_dir, 'transcriptions', uid)                

        tran_path = os.path.join(outdir, 'transcript.txt')
        with codecs.open(tran_path, 'w', 'utf-8') as tranfile:
            tranfile.write(transcript)
        audio_path = os.path.join(outdir, 'upload')
        with open(audio_path, 'w') as wavfile:
            wavfile.write(audio)

        status['status'] = 'ENCODING'

        wavfile = os.path.join(outdir, 'a.wav')
        if to_wav(os.path.join(outdir, 'upload'), wavfile) != 0:
            status['status'] = 'ERROR'
            status['error'] = "Encoding failed. Make sure that you've uploaded a valid media file."
            # Save the status so that errors are recovered on restart of the server
            # XXX: This won't work, because the endpoint will override this file
            with open(os.path.join(outdir, 'status.json'), 'w') as jsfile:
                json.dump(status, jsfile, indent=2)
            return

        #XXX: Maybe we should pass this wave object instead of the
        # file path to align_progress
        wav_obj = wave.open(wavfile, 'r')
        status['duration'] = wav_obj.getnframes() / float(wav_obj.getframerate())
        status['status'] = 'TRANSCRIBING'

        def on_progress(p):
            for k,v in p.items():
                status[k] = v

        if len(transcript.strip()) > 0:
            ms = metasentence.MetaSentence(transcript, self.vocab)
            ks = ms.get_kaldi_sequence()
            gen_hclg_filename = language_model.make_bigram_language_model(ks, proto_langdir)

            kaldi_queue = Queue()
            for i in range(self.nthreads):
                kaldi_queue.put(standard_kaldi.Kaldi(
                    get_resource('data/nnet_a_gpu_online'),
                    gen_hclg_filename,
                    proto_langdir)
                )

            mtt = MultiThreadedTranscriber(kaldi_queue, nthreads=self.nthreads)
        elif hasattr(self, 'full_transcriber'):
            mtt = self.full_transcriber
        else:
            status['status'] = 'ERROR'
            status['error']  = 'No transcript provided and no language model for full transcription'
            return

        words = mtt.transcribe(wavfile, progress_cb=on_progress)

        output = {}
        if len(transcript.strip()) > 0:
            # Clear queue (would this be gc'ed?)
            for i in range(self.nthreads):
                k = kaldi_queue.get()
                k.stop()

            # Align words
            output['words'] = diff_align.align(words, ms)
            output['transcript'] = transcript

            # Perform a second-pass with unaligned words
            logging.info("%d unaligned words (of %d)" % (len([X for X in output['words'] if X.get("case") == "not-found-in-audio"]), len(output['words'])))

            status['status'] = 'ALIGNING'

            output['words'] = multipass.realign(wavfile, output['words'], ms, nthreads=self.nthreads, progress_cb=on_progress)

            logging.info("after 2nd pass: %d unaligned words (of %d)" % (len([X for X in output['words'] if X.get("case") == "not-found-in-audio"]), len(output['words'])))
            
        else:
            # Match format
            output = make_transcription_alignment({"words": words})

        # ...remove the original upload
        os.unlink(os.path.join(outdir, 'upload'))

        # Save
        with open(os.path.join(outdir, 'align.json'), 'w') as jsfile:
            json.dump(output, jsfile, indent=2)
        with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile:
            csvfile.write(to_csv(output))

        # Inline the alignment into the index.html file.
        htmltxt = open(get_resource('www/view_alignment.html')).read()
        htmltxt = htmltxt.replace("var INLINE_JSON;", "var INLINE_JSON=%s;" % (json.dumps(output)));
        open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt)

        status['status'] = 'OK'

        logging.info('done with transcription.')

        return output
Example #15
0
    def transcribe(self, uid, transcript, audio):
        output = {
            'status': 'STARTED',
            'transcript': transcript,
        }

        def save():
            with open(os.path.join(outdir, 'align.json'), 'w') as jsfile:
                json.dump(output, jsfile, indent=2)
            with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile:
                csvfile.write(to_csv(output))

        outdir = os.path.join(self.data_dir, 'transcriptions', uid)
        os.makedirs(outdir)

        # Copy over the HTML
        shutil.copy(get_resource('www/view_alignment.html'), os.path.join(outdir, 'index.html'))

        tran_path = os.path.join(outdir, 'transcript.txt')
        with open(tran_path, 'w') as tranfile:
            tranfile.write(transcript)
        audio_path = os.path.join(outdir, 'upload')
        with open(audio_path, 'w') as wavfile:
            wavfile.write(audio)

        output['status'] = 'ENCODING'
        with open(os.path.join(outdir, 'align.json'), 'w') as alignfile:
            json.dump(output, alignfile, indent=2)

        wavfile = os.path.join(outdir, 'a.wav')
        if to_wav(os.path.join(outdir, 'upload'), wavfile) != 0:
            output['status'] = 'ERROR'
            output['error'] = "Encoding failed. Make sure that you've uploaded a valid media file."
            save()
            return

        output['status'] = 'TRANSCRIBING'
        save()

        # Run transcription
        progress = lm_transcribe_progress(
            wavfile,
            transcript,
            # XXX: should be configurable
            get_resource('PROTO_LANGDIR'),
            get_resource('data/nnet_a_gpu_online'))
        result = None
        for result in progress:
            output['words'] = result['words']
            output['transcript'] = result['transcript']
            save()

        # ...and remove the original upload
        os.unlink(os.path.join(outdir, 'upload'))

        output['status'] = 'OK'
        save()

        # Inline the alignment into the index.html file.
        htmltxt = open(get_resource('www/view_alignment.html')).read()
        htmltxt = htmltxt.replace("var INLINE_JSON;", "var INLINE_JSON=%s;" % (json.dumps(output)));
        open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt)

        logging.info('done with transcription.')

        return result
Example #16
0
import logging
from multiprocessing.pool import ThreadPool as Pool
import os
import wave

from gentle import standard_kaldi
from gentle import metasentence
from gentle import language_model
from gentle.paths import get_resource
from gentle import diff_align

# XXX: refactor out somewhere
proto_langdir = get_resource('PROTO_LANGDIR')
vocab_path = os.path.join(proto_langdir, "graphdir/words.txt")
with open(vocab_path) as f:
    vocab = metasentence.load_vocabulary(f)

def prepare_multipass(alignment):
    to_realign = []
    last_aligned_word = None
    cur_unaligned_words = []

    for wd_idx,wd in enumerate(alignment):
        if wd['case'] == 'not-found-in-audio':
            cur_unaligned_words.append(wd)
        elif wd['case'] == 'success':
            if len(cur_unaligned_words) > 0:
                to_realign.append({
                    "start": last_aligned_word,
                    "end": wd,
                    "words": cur_unaligned_words})
Example #17
0
    def transcribe(self, uid, transcript, audio, async):
        status = self.get_status(uid)

        status['status'] = 'STARTED'
        output = {'transcript': transcript}

        def save():
            with open(os.path.join(outdir, 'align.json'), 'w') as jsfile:
                json.dump(output, jsfile, indent=2)
            with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile:
                csvfile.write(to_csv(output))

        outdir = os.path.join(self.data_dir, 'transcriptions', uid)

        tran_path = os.path.join(outdir, 'transcript.txt')
        with open(tran_path, 'w') as tranfile:
            tranfile.write(transcript)
        audio_path = os.path.join(outdir, 'upload')
        with open(audio_path, 'w') as wavfile:
            wavfile.write(audio)

        status['status'] = 'ENCODING'
        # with open(os.path.join(outdir, 'align.json'), 'w') as alignfile:
        # json.dump(output, alignfile, indent=2)

        wavfile = os.path.join(outdir, 'a.wav')
        if to_wav(os.path.join(outdir, 'upload'), wavfile) != 0:
            status['status'] = 'ERROR'
            status[
                'error'] = "Encoding failed. Make sure that you've uploaded a valid media file."
            # Save the status so that errors are recovered on restart of the server
            # XXX: This won't work, because the endpoint will override this file
            with open(os.path.join(outdir, 'status.json'), 'w') as jsfile:
                json.dump(status, jsfile, indent=2)
            return

        # Find the duration

        #XXX: Maybe we should pass this wave object instead of the
        # file path to align_progress
        wav_obj = wave.open(wavfile, 'r')
        status['duration'] = wav_obj.getnframes() / float(
            wav_obj.getframerate())

        status['status'] = 'TRANSCRIBING'

        # Run transcription
        progress = align_progress(
            wavfile,
            transcript,
            # XXX: should be configurable
            get_resource('PROTO_LANGDIR'),
            get_resource('data/nnet_a_gpu_online'),
            want_progress=True)
        result = None
        for result in progress:
            if result.get("error") is not None:
                status["status"] = "ERROR"
                status["error"] = result["error"]

                # Save the status so that errors are recovered on restart of the server
                # XXX: This won't work, because the endpoint will override this file
                # XXX(2): duplicated code.
                with open(os.path.join(outdir, 'status.json'), 'w') as jsfile:
                    json.dump(status, jsfile, indent=2)
                return

            elif result.get("preview") is not None:
                status["message"] = result["preview"]
                status["t"] = result["t"]
            else:
                output['words'] = result['words']
                output['transcript'] = result['transcript']
            #save()

        # ...and remove the original upload
        os.unlink(os.path.join(outdir, 'upload'))

        save()

        # Inline the alignment into the index.html file.
        htmltxt = open(get_resource('www/view_alignment.html')).read()
        htmltxt = htmltxt.replace("var INLINE_JSON;",
                                  "var INLINE_JSON=%s;" % (json.dumps(output)))
        open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt)

        status['status'] = 'OK'

        logging.info('done with transcription.')

        return result