def __init__(self): self.proto_langdir = get_resource('PROTO_LANGDIR') self.nnet_gpu_path = get_resource('data/nnet_a_gpu_online') self.full_hclg_path = get_resource('data/graph/HCLG.fst') with open(os.path.join(self.proto_langdir, "graphdir/words.txt")) as fh: self.vocab = metasentence.load_vocabulary(fh)
def __init__(self): self.proto_langdir = get_resource('PROTO_LANGDIR') self.nnet_gpu_path = get_resource('data/nnet_a_gpu_online') self.full_hclg_path = get_resource('data/graph/HCLG.fst') def require_dir(path): if not os.path.isdir(path): raise RuntimeError("No resource directory %s. Check %s environment variable?" % (path, ENV_VAR)) require_dir(self.proto_langdir) require_dir(self.nnet_gpu_path) with open(os.path.join(self.proto_langdir, "graphdir/words.txt")) as fh: self.vocab = metasentence.load_vocabulary(fh)
def __init__(self): self.proto_langdir = get_resource('exp') self.nnet_gpu_path = get_resource('exp/tdnn_7b_chain_online/') self.full_hclg_path = get_resource( 'exp/tdnn_7b_chain_online/graph_pp/HCLG.fst') def require_dir(path): if not os.path.isdir(path): raise RuntimeError( "No resource directory %s. Check %s environment variable?" % (path, ENV_VAR)) require_dir(self.proto_langdir) require_dir(self.nnet_gpu_path) with open(os.path.join(self.proto_langdir, "langdir", "words.txt")) as fh: self.vocab = metasentence.load_vocabulary(fh)
def render_POST(self, req): uid = self.transcriber.next_id() tran = req.args.get('transcript', [''])[0] audio = req.args['audio'][0] disfluency = True if 'disfluency' in req.args else False conservative = True if 'conservative' in req.args else False kwargs = { 'disfluency': disfluency, 'conservative': conservative, 'disfluencies': set(['uh', 'um']) } async = True if 'async' in req.args and req.args['async'][0] == 'false': async = False # We need to make the transcription directory here, so that # when we redirect the user we are sure that there's a place # for them to go. outdir = os.path.join(self.transcriber.data_dir, 'transcriptions', uid) os.makedirs(outdir) # Copy over the HTML shutil.copy(get_resource('www/view_alignment.html'), os.path.join(outdir, 'index.html')) result_promise = threads.deferToThreadPool(reactor, reactor.getThreadPool(), self.transcriber.transcribe, uid, tran, audio, async, **kwargs) if not async: def write_result(result): '''Write JSON to client on completion''' req.setHeader("Content-Type", "application/json") req.write(result.to_json(indent=2)) req.finish() result_promise.addCallback(write_result) result_promise.addErrback(lambda _: None) # ignore errors req.notifyFinish().addErrback(lambda _: result_promise.cancel()) return NOT_DONE_YET req.setResponseCode(FOUND) req.setHeader(b"Location", "/transcriptions/%s" % (uid)) return ''
def __init__(self, modelDir): self.proto_langdir = get_resource(modelDir) self.nnet_gpu_path = get_resource(os.path.join(modelDir, 'online')) self.full_hclg_path = get_resource( os.path.join(self.nnet_gpu_path, 'graph', 'HCLG.fst')) self.config = Config() confPath = os.path.join(self.proto_langdir, 'config.yaml') if os.path.exists(confPath): self.config.load(confPath) def require_dir(path): if not os.path.isdir(path): raise RuntimeError( "No resource directory %s. Check %s environment variable?" % (path, ENV_VAR)) require_dir(self.proto_langdir) require_dir(self.nnet_gpu_path) with open(os.path.join(self.proto_langdir, "langdir", "words.txt")) as fh: self.vocab = metasentence.load_vocabulary(fh)
def serve(port=8765, interface='0.0.0.0', installSignalHandlers=0, nthreads=4, ntranscriptionthreads=2, data_dir=get_datadir('webdata')): logging.info("SERVE %d, %s, %d", port, interface, installSignalHandlers) if not os.path.exists(data_dir): os.makedirs(data_dir) zip_dir = os.path.join(data_dir, 'zip') if not os.path.exists(zip_dir): os.makedirs(zip_dir) f = File(data_dir) f.putChild('', File(get_resource('www/index.html'))) f.putChild('status.html', File(get_resource('www/status.html'))) f.putChild('preloader.gif', File(get_resource('www/preloader.gif'))) trans = Transcriber(data_dir, nthreads=nthreads, ntranscriptionthreads=ntranscriptionthreads) trans_ctrl = TranscriptionsController(trans) f.putChild('transcriptions', trans_ctrl) trans_zippr = TranscriptionZipper(zip_dir, trans) f.putChild('zip', trans_zippr) s = Site(f) logging.info("about to listen") reactor.listenTCP(port, s, interface=interface) logging.info("listening") reactor.run(installSignalHandlers=installSignalHandlers)
class Transcriber(): def __init__(self, data_dir, nthreads=4, ntranscriptionthreads=2): self.data_dir = data_dir self.nthreads = nthreads self.ntranscriptionthreads = ntranscriptionthreads self.resources = gentle.Resources() self.full_transcriber = gentle.FullTranscriber( self.resources, nthreads=ntranscriptionthreads) self._status_dicts = {} def get_status(self, uid): return self._status_dicts.setdefault(uid, {}) def out_dir(self, uid): return os.path.join(self.data_dir, 'transcriptions', uid) # TODO(maxhawkins): refactor so this is returned by transcribe() def next_id(self): uid = None while uid is None or os.path.exists(os.path.join(self.data_dir, uid)): uid = uuid.uuid4().get_hex()[:8] return uid def transcribe(self, uid, transcript, audio, async, **kwargs): status = self.get_status(uid) status['status'] = 'STARTED' output = {'transcript': transcript} outdir = os.path.join(self.data_dir, 'transcriptions', uid) tran_path = os.path.join(outdir, 'transcript.txt') with open(tran_path, 'w') as tranfile: tranfile.write(transcript) audio_path = os.path.join(outdir, 'upload') with open(audio_path, 'w') as wavfile: wavfile.write(audio) status['status'] = 'ENCODING' wavfile = os.path.join(outdir, 'a.wav') if gentle.resample(os.path.join(outdir, 'upload'), wavfile) != 0: status['status'] = 'ERROR' status[ 'error'] = "Encoding failed. Make sure that you've uploaded a valid media file." # Save the status so that errors are recovered on restart of the server # XXX: This won't work, because the endpoint will override this file with open(os.path.join(outdir, 'status.json'), 'w') as jsfile: json.dump(status, jsfile, indent=2) return #XXX: Maybe we should pass this wave object instead of the # file path to align_progress wav_obj = wave.open(wavfile, 'r') status['duration'] = wav_obj.getnframes() / float( wav_obj.getframerate()) status['status'] = 'TRANSCRIBING' def on_progress(p): for k, v in p.items(): status[k] = v if len(transcript.strip()) > 0: trans = gentle.ForcedAligner(self.resources, transcript, nthreads=self.nthreads, **kwargs) elif self.full_transcriber.available: trans = self.full_transcriber else: status['status'] = 'ERROR' status[ 'error'] = 'No transcript provided and no language model for full transcription' return output = trans.transcribe(wavfile, progress_cb=on_progress, logging=logging) # ...remove the original upload os.unlink(os.path.join(outdir, 'upload')) # Save with open(os.path.join(outdir, 'align.json'), 'w') as jsfile: jsfile.write(output.to_json(indent=2)) with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile: csvfile.write(output.to_csv()) # Inline the alignment into the index.html file. htmltxt = open(get_resource('www/view_alignment.html')).read() htmltxt = htmltxt.replace("var INLINE_JSON;", "var INLINE_JSON=%s;" % (output.to_json())) open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt) status['status'] = 'OK' logging.info('done with transcription.') return output