def transcribe(self, output_dir, **kwargs): orig_audio = os.path.join(output_dir, AUDIO_FILENAME) resample_audio = os.path.join(output_dir, RESAMPLE_FILENAME) transcript = os.path.join(output_dir, TEXT_FILENAME) logging.info('Resampling audio file %s', orig_audio) if gentle.resample(orig_audio, resample_audio) != 0: logging.info('Failed to resample %s', orig_audio) return -1 def on_progress(p): for k,v in p.items(): logging.info('Transcribing %s, %s, %s', resample_audio, k, v) logging.info('Starting to transcribe %s', output_dir) with open(transcript, 'r', encoding='utf-8') as file: text = file.read() transcriber = gentle.ForcedAligner(self.resources, text, nthreads=self.nthreads, **kwargs) output = transcriber.transcribe(resample_audio, progress_cb=on_progress, logging=logging) logging.info('Finished transcribing %s', output_dir) return output
class Transcriber(): def __init__(self, data_dir, nthreads=4, ntranscriptionthreads=2): self.data_dir = data_dir self.nthreads = nthreads self.ntranscriptionthreads = ntranscriptionthreads self.resources = gentle.Resources() self.full_transcriber = gentle.FullTranscriber(self.resources, nthreads=ntranscriptionthreads) self._status_dicts = {} def get_status(self, uid): return self._status_dicts.setdefault(uid, {}) def out_dir(self, uid): return os.path.join(self.data_dir, 'transcriptions', uid) # TODO(maxhawkins): refactor so this is returned by transcribe() def next_id(self): uid = None while uid is None or os.path.exists(os.path.join(self.data_dir, uid)): uid = uuid.uuid4().get_hex()[:8] return uid def transcribe(self, uid, transcript, audio, async, **kwargs): status = self.get_status(uid) status['status'] = 'STARTED' output = { 'transcript': transcript } outdir = os.path.join(self.data_dir, 'transcriptions', uid) tran_path = os.path.join(outdir, 'transcript.txt') with open(tran_path, 'w') as tranfile: tranfile.write(transcript) audio_path = os.path.join(outdir, 'upload') with open(audio_path, 'w') as wavfile: wavfile.write(audio) status['status'] = 'ENCODING' wavfile = os.path.join(outdir, 'a.wav') if gentle.resample(os.path.join(outdir, 'upload'), wavfile) != 0: status['status'] = 'ERROR' status['error'] = "Encoding failed. Make sure that you've uploaded a valid media file." # Save the status so that errors are recovered on restart of the server # XXX: This won't work, because the endpoint will override this file with open(os.path.join(outdir, 'status.json'), 'w') as jsfile: json.dump(status, jsfile, indent=2) return #XXX: Maybe we should pass this wave object instead of the # file path to align_progress wav_obj = wave.open(wavfile, 'r') status['duration'] = wav_obj.getnframes() / float(wav_obj.getframerate()) status['status'] = 'TRANSCRIBING' def on_progress(p): for k,v in p.items(): status[k] = v if len(transcript.strip()) > 0: trans = gentle.ForcedAligner(self.resources, transcript, nthreads=self.nthreads, **kwargs) elif self.full_transcriber.available: trans = self.full_transcriber else: status['status'] = 'ERROR' status['error'] = 'No transcript provided and no language model for full transcription' return output = trans.transcribe(wavfile, progress_cb=on_progress, logging=logging) # ...remove the original upload os.unlink(os.path.join(outdir, 'upload')) # Save with open(os.path.join(outdir, 'align.json'), 'w') as jsfile: jsfile.write(output.to_json(indent=2)) with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile: csvfile.write(output.to_csv()) # Inline the alignment into the index.html file. htmltxt = open(get_resource('www/view_alignment.html')).read() htmltxt = htmltxt.replace("var INLINE_JSON;", "var INLINE_JSON=%s;" % (output.to_json())); open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt) status['status'] = 'OK' logging.info('done with transcription.') return output
def transcribe(self, uid, transcript, audio, async_mode, **kwargs): status = self.get_status(uid) status['status'] = 'STARTED' output = {'transcript': transcript} outdir = os.path.join(self.data_dir, 'transcriptions', uid) tran_path = os.path.join(outdir, 'transcript.txt') with open(tran_path, 'w') as tranfile: tranfile.write(transcript) if not isinstance(audio, str): audio_path = os.path.join(outdir, 'upload') with open(audio_path, 'wb') as wavfile: wavfile.write(audio) status['status'] = 'ENCODING' wavfile = os.path.join(outdir, 'a.wav') # if ((not isinstance(audio, str)) and gentle.resample(os.path.join(outdir, 'upload'), wavfile) != 0) or gentle.resample(audio, wavfile) != 0: if (not isinstance(audio, str)) and gentle.resample( os.path.join(outdir, 'upload'), wavfile) != 0: status['status'] = 'ERROR' status[ 'error'] = "Encoding failed. Make sure that you've uploaded a valid media file." # Save the status so that errors are recovered on restart of the server # XXX: This won't work, because the endpoint will override this file with open(os.path.join(outdir, 'status.json'), 'w') as jsfile: json.dump(status, jsfile, indent=2) return if isinstance(audio, str) and gentle.resample(audio, wavfile) != 0: status['status'] = 'ERROR' status[ 'error'] = "Encoding failed. Make sure that you've referenced a valid media URL." # Save the status so that errors are recovered on restart of the server # XXX: This won't work, because the endpoint will override this file with open(os.path.join(outdir, 'status.json'), 'w') as jsfile: json.dump(status, jsfile, indent=2) return # XXX: Maybe we should pass this wave object instead of the # file path to align_progress if not isinstance(audio, str): wav_obj = wave.open(wavfile, 'rb') status['duration'] = wav_obj.getnframes() / \ float(wav_obj.getframerate()) status['status'] = 'TRANSCRIBING' def on_progress(p): print(p) for k, v in p.items(): status[k] = v if len(transcript.strip()) > 0: trans = gentle.ForcedAligner(self.resources, transcript, nthreads=self.nthreads, **kwargs) elif self.full_transcriber.available: trans = self.full_transcriber else: status['status'] = 'ERROR' status[ 'error'] = 'No transcript provided and no language model for full transcription' return output = trans.transcribe(wavfile, progress_cb=on_progress, logging=logging) # ...remove the original upload if not isinstance(audio, str): os.unlink(os.path.join(outdir, 'upload')) # Save with open(os.path.join(outdir, 'align.json'), 'w') as jsfile: jsfile.write(output.to_json(indent=2)) with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile: csvfile.write(output.to_csv()) # Inline the alignment into the index.html file. htmltxt = open(get_resource('www/view_alignment.html')).read() htmltxt = htmltxt.replace("var INLINE_JSON;", "var INLINE_JSON=%s;" % (output.to_json())) open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt) status['status'] = 'OK' logging.info('done with transcription.') return output
class Transcriber(): def __init__(self, data_dir, nthreads=4, ntranscriptionthreads=2): self.data_dir = data_dir self.nthreads = nthreads self.ntranscriptionthreads = ntranscriptionthreads self.resources = gentle.Resources() self.full_transcriber = gentle.FullTranscriber( self.resources, nthreads=ntranscriptionthreads) self._status_dicts = {} def get_status(self, uid): return self._status_dicts.setdefault(uid, {}) def out_dir(self, uid): return os.path.join(self.data_dir, 'transcriptions', uid) # TODO(maxhawkins): refactor so this is returned by transcribe() def next_id(self): uid = None while uid is None or os.path.exists(os.path.join(self.data_dir, uid)): uid = uuid.uuid4().get_hex()[:8] return uid def transcribe(self, uid, transcript, audio, async, **kwargs): status = self.get_status(uid) status['status'] = 'STARTED' output = {'transcript': transcript} outdir = os.path.join(self.data_dir, 'transcriptions', uid) tran_path = os.path.join(outdir, 'transcript.txt') with open(tran_path, 'w') as tranfile: tranfile.write(transcript) audio_path = os.path.join(outdir, 'upload') with open(audio_path, 'w') as wavfile: wavfile.write(audio) status['status'] = 'ENCODING' wavfile = os.path.join(outdir, 'a.wav') if gentle.resample(os.path.join(outdir, 'upload'), wavfile) != 0: status['status'] = 'ERROR' status[ 'error'] = "Encoding failed. Make sure that you've uploaded a valid media file." # Save the status so that errors are recovered on restart of the server # XXX: This won't work, because the endpoint will override this file with open(os.path.join(outdir, 'status.json'), 'w') as jsfile: json.dump(status, jsfile, indent=2) return #XXX: Maybe we should pass this wave object instead of the # file path to align_progress wav_obj = wave.open(wavfile, 'r') status['duration'] = wav_obj.getnframes() / float( wav_obj.getframerate()) status['status'] = 'TRANSCRIBING' def on_progress(p): for k, v in p.items(): status[k] = v if len(transcript.strip()) > 0: trans = gentle.ForcedAligner(self.resources, transcript, nthreads=self.nthreads, **kwargs) elif self.full_transcriber.available: trans = self.full_transcriber else: status['status'] = 'ERROR' status[ 'error'] = 'No transcript provided and no language model for full transcription' return output = trans.transcribe(wavfile, progress_cb=on_progress, logging=logging) # ...remove the original upload os.unlink(os.path.join(outdir, 'upload')) # Save with open(os.path.join(outdir, 'align.json'), 'w') as jsfile: jsfile.write(output.to_json(indent=2)) with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile: csvfile.write(output.to_csv()) # add file datas sens_end_index = trans.ms.get_sentences_index() res = output.to_json() res = json.loads(res, encoding='utf-8', strict=True) time_sentences_index = [] ss_dot = 0 s_pos = None time_pos = 0 try: for i, w in enumerate(res['words']): if w["case"] != "success": continue end_v = w['endOffset'] start_v = w['startOffset'] if s_pos is None: s_pos = start_v time_pos = i if end_v >= sens_end_index[ss_dot]: ss_dot += 1 time_sentences_index.append( (res['words'][time_pos]["start"], res['words'][i]["end"])) time_pos = i s_pos = end_v if len(sens_end_index) != len(time_sentences_index): time_sentences_index.append( (res['words'][time_pos]["start"], res['words'][-1]["end"])) #print sens_end_index, len(sens_end_index) #print time_sentences_index, len(time_sentences_index) sens_str = trans.ms.get_sentences_string() save_ss = "" for i, t in enumerate(time_sentences_index): #print "{{time}}%s/%s{{end}}" % (str(round(float(t[0]), 2)), str(round(float(t[1]), 2))) #print "{{raw}}%s{{end}}" % (str(sens_str[i])) save_ss += "{{time}}" + str(round(float(t[0]), 2)) + "/" + str( round(float(t[1]), 2)) + "{{end}}\n" save_ss += "{{raw}}" + sens_str[i] + "{{end}}\n" with open(os.path.join(outdir, 'time.csv'), 'w') as timefile: timefile.write(save_ss) except Exception as e: print traceback.format_exc() # Inline the alignment into the index.html file. htmltxt = open(get_resource('www/view_alignment.html')).read() htmltxt = htmltxt.replace("var INLINE_JSON;", "var INLINE_JSON=%s;" % (output.to_json())) open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt) status['status'] = 'OK' logging.info('done with transcription.') return output