def align(nthreads=4, ntranscriptionthreads=2, data_dir=get_datadir('webdata')): logging.info("ALIGN") if not os.path.exists(data_dir): os.makedirs(data_dir) transcriber = Transcriber(data_dir, nthreads=nthreads, ntranscriptionthreads=ntranscriptionthreads) uid = transcriber.next_id() transcript = '' audio = os.environ.get('INPUT_MEDIA_URL', '') if 'INPUT_MEDIA_S3_BUCKET' in os.environ: audio = boto3.resource('s3').Object( os.environ.get('INPUT_MEDIA_S3_BUCKET'), os.environ.get('INPUT_MEDIA_S3_KEY')).get()['Body'].read() if 'INPUT_TRANSCRIPT_URL' in os.environ: with urllib.request.urlopen( os.environ.get('INPUT_TRANSCRIPT_URL')) as t: transcript = t.read().decode('utf-8') elif 'INPUT_TRANSCRIPT_S3_BUCKET' in os.environ: transcript = boto3.resource('s3').Object( os.environ.get('INPUT_TRANSCRIPT_S3_BUCKET'), os.environ.get('INPUT_TRANSCRIPT_S3_KEY')).get()['Body'].read( ).decode('utf-8') async_mode = False disfluency = True # if b'disfluency' in req.args else False conservative = True # if b'conservative' in req.args else False kwargs = { 'disfluency': disfluency, 'conservative': conservative, 'disfluencies': set(['uh', 'um']) } outdir = os.path.join(transcriber.data_dir, 'transcriptions', uid) os.makedirs(outdir) output = transcriber.transcribe(uid, transcript, audio, async_mode, **kwargs) if 'OUTPUT_S3_BUCKET' in os.environ: boto3.resource('s3').Object( os.environ.get('OUTPUT_S3_BUCKET'), os.environ.get('OUTPUT_S3_KEY', '{}/align.json'.format(uid))).put( Body=(bytes(output.to_json().encode('UTF-8')))) else: logging.info(output.to_json())
def serve(args): logging.info('SERVE %d, %s', args.port, args.host) data_dir = get_datadir('webdata') if not os.path.exists(data_dir): os.makedirs(data_dir) transcriber = Transcriber(nthreads=args.nthreads, ntranscriptionthreads=args.ntranscriptionthreads) controller = TranscriptionsController(data_dir, transcriber, webhook_url=args.webhook) file = File(data_dir) file.putChild(b'transcriptions', controller) site = Site(file) logging.info('about to listen') reactor.listenTCP(args.port, site, interface=args.host) logging.info('listening') reactor.run(installSignalHandlers=1)
def serve(port=8765, interface='0.0.0.0', installSignalHandlers=0, nthreads=4, ntranscriptionthreads=2, data_dir=get_datadir('webdata')): logging.info("SERVE %d, %s, %d", port, interface, installSignalHandlers) if not os.path.exists(data_dir): os.makedirs(data_dir) zip_dir = os.path.join(data_dir, 'zip') if not os.path.exists(zip_dir): os.makedirs(zip_dir) f = File(data_dir) f.putChild(b'', File(get_resource('www/index.html'))) f.putChild(b'status.html', File(get_resource('www/status.html'))) f.putChild(b'preloader.gif', File(get_resource('www/preloader.gif'))) trans = Transcriber(data_dir, nthreads=nthreads, ntranscriptionthreads=ntranscriptionthreads) trans_ctrl = TranscriptionsController(trans) f.putChild(b'transcriptions', trans_ctrl) trans_zippr = TranscriptionZipper(zip_dir, trans) f.putChild(b'zip', trans_zippr) s = Site(f) logging.info("about to listen") reactor.listenTCP(port, s, interface=interface) logging.info("listening") reactor.run(installSignalHandlers=installSignalHandlers)
def serve(port=8765, interface='0.0.0.0', installSignalHandlers=0, nthreads=4, ntranscriptionthreads=2, data_dir=get_datadir('webdata'), modelDir='exp'): logging.info("SERVE %d, %s, %d", port, interface, installSignalHandlers) if not os.path.exists(data_dir): os.makedirs(data_dir) zip_dir = os.path.join(data_dir, 'zip') if not os.path.exists(zip_dir): os.makedirs(zip_dir) f = File(data_dir) f.putChild('', File(get_resource('www/index.html'))) f.putChild('status.html', File(get_resource('www/status.html'))) f.putChild('preloader.gif', File(get_resource('www/preloader.gif'))) resources = gentle.Resources(modelDir) trans = Transcriber(data_dir, nthreads=nthreads, ntranscriptionthreads=ntranscriptionthreads, modelDir=modelDir) config = trans.config logging.info("CONFIG: samplerate %d, silencephones %s, context-width %s", config['samplerate'], config['silencephones'], config['context-width']) trans_ctrl = TranscriptionsController(trans) f.putChild('transcriptions', trans_ctrl) trans_zippr = TranscriptionZipper(zip_dir, trans) f.putChild('zip', trans_zippr) s = Site(f) logging.info("about to listen") reactor.listenTCP(port, s, interface=interface) logging.info("listening") reactor.run(installSignalHandlers=installSignalHandlers)