def transcribe(item): url = item.audio_url if item.audio_url else item.video_url uuid = koemei.upload_direct(url) route = koemei.request_transcription(uuid) while True: status, progress, data = koemei.transcription_status(route) logger.info("{}: {}%".format(status, progress * 100)) if data is not None: break current_task.update_state( state='PROGRESS', meta={ 'progress': progress, 'eta': None, 'time': None, 'duration': None }) time.sleep(5 * 60) transcript = reader.read(data) raw_files = [dict(content_type='text/xml', file_name='koemei.transcript.xml', body=ET.tostring(data))] save_transcription(item, clips = transcript['clips'], speakers = transcript['speakers'], engine = current_task.name, raw_files = raw_files, logger = logger)
def transcribe(item): url = item.audio_url if item.audio_url else item.video_url uuid = koemei.upload_direct(url) route = koemei.request_transcription(uuid) while True: status, progress, data = koemei.transcription_status(route) logger.info("{}: {}%".format(status, progress * 100)) if data is not None: break current_task.update_state(state='PROGRESS', meta={ 'progress': progress, 'eta': None, 'time': None, 'duration': None }) time.sleep(5 * 60) transcript = reader.read(data) raw_files = [ dict(content_type='text/xml', file_name='koemei.transcript.xml', body=ET.tostring(data)) ] save_transcription(item, clips=transcript['clips'], speakers=transcript['speakers'], engine=current_task.name, raw_files=raw_files, logger=logger)
def transcribe(item): url = item.audio_url if item.audio_url else item.video_url infile, wavfile = sphinx.transcode(url, current_task=current_task, log=logger.info) current_task.update_state( state='PROGRESS', meta={'time': 0, 'duration': item.duration, 'progress': 0, 'eta': None} ) clips = [] lastProgress, lastTime = 0, time.time() for clip in sphinx.transcribe_wavfile(wavfile, log=logger.info): clips.append(clip) progress = clip.outtime / item.duration dp = progress - lastProgress dt = time.time() - lastTime eta = (1 - progress) * (dt / dp) logger.info(u"{:.0f}s {:.0f}% eta:{} '{}'".format( clip.outtime, progress * 100, format_duration(int(eta)), clip.caption_text)) current_task.update_state( state='PROGRESS', meta={ 'time': clip.outtime, 'duration': item.duration, 'progress': progress, 'eta': eta } ) # raw_files=[dict(content_type='text/plain', # file_name='sphinx.output.text', # body='')] save_transcription(item, clips=clips, engine=current_task.name, logger = logger)