Esempio n. 1
0
def transcribe(item):
    url = item.audio_url if item.audio_url else item.video_url

    uuid = koemei.upload_direct(url)
    route = koemei.request_transcription(uuid)

    while True:
        status, progress, data = koemei.transcription_status(route)
        logger.info("{}: {}%".format(status, progress * 100))
        if data is not None:
            break

        current_task.update_state(
            state='PROGRESS',
            meta={ 'progress': progress, 'eta': None,
                   'time': None, 'duration': None })
        time.sleep(5 * 60)

    transcript = reader.read(data)
    raw_files = [dict(content_type='text/xml', file_name='koemei.transcript.xml',
                      body=ET.tostring(data))]
    save_transcription(item, clips = transcript['clips'],
                       speakers = transcript['speakers'],
                       engine = current_task.name,
                       raw_files = raw_files,
                       logger = logger)
Esempio n. 2
0
def transcribe(item):
    url = item.audio_url if item.audio_url else item.video_url

    uuid = koemei.upload_direct(url)
    route = koemei.request_transcription(uuid)

    while True:
        status, progress, data = koemei.transcription_status(route)
        logger.info("{}: {}%".format(status, progress * 100))
        if data is not None:
            break

        current_task.update_state(state='PROGRESS',
                                  meta={
                                      'progress': progress,
                                      'eta': None,
                                      'time': None,
                                      'duration': None
                                  })
        time.sleep(5 * 60)

    transcript = reader.read(data)
    raw_files = [
        dict(content_type='text/xml',
             file_name='koemei.transcript.xml',
             body=ET.tostring(data))
    ]
    save_transcription(item,
                       clips=transcript['clips'],
                       speakers=transcript['speakers'],
                       engine=current_task.name,
                       raw_files=raw_files,
                       logger=logger)
Esempio n. 3
0
def transcribe(item):
    url = item.audio_url if item.audio_url else item.video_url

    infile, wavfile = sphinx.transcode(url,
                                       current_task=current_task,
                                       log=logger.info)

    current_task.update_state(
        state='PROGRESS',
        meta={'time': 0, 'duration': item.duration, 'progress': 0, 'eta': None}
        )

    clips = []
    lastProgress, lastTime = 0, time.time()

    for clip in sphinx.transcribe_wavfile(wavfile, log=logger.info):
        clips.append(clip)
        progress = clip.outtime / item.duration
        dp = progress - lastProgress
        dt = time.time() - lastTime
        eta = (1 - progress) * (dt / dp)

        logger.info(u"{:.0f}s {:.0f}% eta:{} '{}'".format(
                clip.outtime, progress * 100,
                format_duration(int(eta)),
                clip.caption_text))

        current_task.update_state(
            state='PROGRESS',
            meta={ 'time': clip.outtime, 'duration': item.duration,
                   'progress': progress, 'eta': eta }
            )

    # raw_files=[dict(content_type='text/plain',
    #                 file_name='sphinx.output.text',
    #                 body='')]

    save_transcription(item, clips=clips, engine=current_task.name,
                       logger = logger)