Exemple #1
0
def align(nthreads=4,
          ntranscriptionthreads=2,
          data_dir=get_datadir('webdata')):
    logging.info("ALIGN")

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    transcriber = Transcriber(data_dir,
                              nthreads=nthreads,
                              ntranscriptionthreads=ntranscriptionthreads)

    uid = transcriber.next_id()

    transcript = ''
    audio = os.environ.get('INPUT_MEDIA_URL', '')

    if 'INPUT_MEDIA_S3_BUCKET' in os.environ:
        audio = boto3.resource('s3').Object(
            os.environ.get('INPUT_MEDIA_S3_BUCKET'),
            os.environ.get('INPUT_MEDIA_S3_KEY')).get()['Body'].read()

    if 'INPUT_TRANSCRIPT_URL' in os.environ:
        with urllib.request.urlopen(
                os.environ.get('INPUT_TRANSCRIPT_URL')) as t:
            transcript = t.read().decode('utf-8')
    elif 'INPUT_TRANSCRIPT_S3_BUCKET' in os.environ:
        transcript = boto3.resource('s3').Object(
            os.environ.get('INPUT_TRANSCRIPT_S3_BUCKET'),
            os.environ.get('INPUT_TRANSCRIPT_S3_KEY')).get()['Body'].read(
            ).decode('utf-8')

    async_mode = False
    disfluency = True  # if b'disfluency' in req.args else False
    conservative = True  # if b'conservative' in req.args else False
    kwargs = {
        'disfluency': disfluency,
        'conservative': conservative,
        'disfluencies': set(['uh', 'um'])
    }

    outdir = os.path.join(transcriber.data_dir, 'transcriptions', uid)
    os.makedirs(outdir)

    output = transcriber.transcribe(uid, transcript, audio, async_mode,
                                    **kwargs)

    if 'OUTPUT_S3_BUCKET' in os.environ:
        boto3.resource('s3').Object(
            os.environ.get('OUTPUT_S3_BUCKET'),
            os.environ.get('OUTPUT_S3_KEY', '{}/align.json'.format(uid))).put(
                Body=(bytes(output.to_json().encode('UTF-8'))))
    else:
        logging.info(output.to_json())
Exemple #2
0
def serve(args):
    logging.info('SERVE %d, %s', args.port, args.host)

    data_dir = get_datadir('webdata')
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    transcriber = Transcriber(nthreads=args.nthreads, ntranscriptionthreads=args.ntranscriptionthreads)
    controller = TranscriptionsController(data_dir, transcriber, webhook_url=args.webhook)

    file = File(data_dir)
    file.putChild(b'transcriptions', controller)
    site = Site(file)

    logging.info('about to listen')
    reactor.listenTCP(args.port, site, interface=args.host)
    logging.info('listening')

    reactor.run(installSignalHandlers=1)
Exemple #3
0
def serve(port=8765,
          interface='0.0.0.0',
          installSignalHandlers=0,
          nthreads=4,
          ntranscriptionthreads=2,
          data_dir=get_datadir('webdata')):
    logging.info("SERVE %d, %s, %d", port, interface, installSignalHandlers)

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    zip_dir = os.path.join(data_dir, 'zip')
    if not os.path.exists(zip_dir):
        os.makedirs(zip_dir)

    f = File(data_dir)

    f.putChild(b'', File(get_resource('www/index.html')))
    f.putChild(b'status.html', File(get_resource('www/status.html')))
    f.putChild(b'preloader.gif', File(get_resource('www/preloader.gif')))

    trans = Transcriber(data_dir,
                        nthreads=nthreads,
                        ntranscriptionthreads=ntranscriptionthreads)
    trans_ctrl = TranscriptionsController(trans)
    f.putChild(b'transcriptions', trans_ctrl)

    trans_zippr = TranscriptionZipper(zip_dir, trans)
    f.putChild(b'zip', trans_zippr)

    s = Site(f)
    logging.info("about to listen")
    reactor.listenTCP(port, s, interface=interface)
    logging.info("listening")

    reactor.run(installSignalHandlers=installSignalHandlers)
Exemple #4
0
def serve(port=8765, interface='0.0.0.0', installSignalHandlers=0, nthreads=4, ntranscriptionthreads=2, data_dir=get_datadir('webdata')):
    logging.info("SERVE %d, %s, %d", port, interface, installSignalHandlers)

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    zip_dir = os.path.join(data_dir, 'zip')
    if not os.path.exists(zip_dir):
        os.makedirs(zip_dir)

    f = File(data_dir)

    f.putChild(b'', File(get_resource('www/index.html')))
    f.putChild(b'status.html', File(get_resource('www/status.html')))
    f.putChild(b'preloader.gif', File(get_resource('www/preloader.gif')))

    trans = Transcriber(data_dir, nthreads=nthreads, ntranscriptionthreads=ntranscriptionthreads)
    trans_ctrl = TranscriptionsController(trans)
    f.putChild(b'transcriptions', trans_ctrl)

    trans_zippr = TranscriptionZipper(zip_dir, trans)
    f.putChild(b'zip', trans_zippr)

    s = Site(f)
    logging.info("about to listen")
    reactor.listenTCP(port, s, interface=interface)
    logging.info("listening")

    reactor.run(installSignalHandlers=installSignalHandlers)
Exemple #5
0
def serve(port=8765, interface='0.0.0.0', installSignalHandlers=0, nthreads=4, ntranscriptionthreads=2, data_dir=get_datadir('webdata'), modelDir='exp'):
    logging.info("SERVE %d, %s, %d", port, interface, installSignalHandlers)

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    zip_dir = os.path.join(data_dir, 'zip')
    if not os.path.exists(zip_dir):
        os.makedirs(zip_dir)

    f = File(data_dir)

    f.putChild('', File(get_resource('www/index.html')))
    f.putChild('status.html', File(get_resource('www/status.html')))
    f.putChild('preloader.gif', File(get_resource('www/preloader.gif')))
   
    resources = gentle.Resources(modelDir)
    trans = Transcriber(data_dir, nthreads=nthreads, ntranscriptionthreads=ntranscriptionthreads, modelDir=modelDir)
    config = trans.config
    logging.info("CONFIG: samplerate %d, silencephones %s, context-width %s", config['samplerate'], config['silencephones'], config['context-width'])
    trans_ctrl = TranscriptionsController(trans)
    f.putChild('transcriptions', trans_ctrl)

    trans_zippr = TranscriptionZipper(zip_dir, trans)
    f.putChild('zip', trans_zippr)

    s = Site(f)
    logging.info("about to listen")
    reactor.listenTCP(port, s, interface=interface)
    logging.info("listening")

    reactor.run(installSignalHandlers=installSignalHandlers)