Esempio n. 1
0
    def transcribe(self, output_dir, **kwargs):
        orig_audio = os.path.join(output_dir, AUDIO_FILENAME)
        resample_audio = os.path.join(output_dir, RESAMPLE_FILENAME)
        transcript = os.path.join(output_dir, TEXT_FILENAME)

        logging.info('Resampling audio file %s', orig_audio)
        if gentle.resample(orig_audio, resample_audio) != 0:
            logging.info('Failed to resample %s', orig_audio)
            return -1

        def on_progress(p):
            for k,v in p.items():
                logging.info('Transcribing %s, %s, %s', resample_audio, k, v)

        logging.info('Starting to transcribe %s', output_dir)
        with open(transcript, 'r', encoding='utf-8') as file:
            text = file.read()
            transcriber = gentle.ForcedAligner(self.resources, text, nthreads=self.nthreads, **kwargs)
            output = transcriber.transcribe(resample_audio, progress_cb=on_progress, logging=logging)
        logging.info('Finished transcribing %s', output_dir)
        return output
Esempio n. 2
0
class Transcriber():
    def __init__(self, data_dir, nthreads=4, ntranscriptionthreads=2):
        self.data_dir = data_dir
        self.nthreads = nthreads
        self.ntranscriptionthreads = ntranscriptionthreads
        self.resources = gentle.Resources()

        self.full_transcriber = gentle.FullTranscriber(self.resources, nthreads=ntranscriptionthreads)
        self._status_dicts = {}

    def get_status(self, uid):
        return self._status_dicts.setdefault(uid, {})

    def out_dir(self, uid):
        return os.path.join(self.data_dir, 'transcriptions', uid)

    # TODO(maxhawkins): refactor so this is returned by transcribe()
    def next_id(self):
        uid = None
        while uid is None or os.path.exists(os.path.join(self.data_dir, uid)):
            uid = uuid.uuid4().get_hex()[:8]
        return uid

    def transcribe(self, uid, transcript, audio, async, **kwargs):

        status = self.get_status(uid)

        status['status'] = 'STARTED'
        output = {
            'transcript': transcript
        }

        outdir = os.path.join(self.data_dir, 'transcriptions', uid)

        tran_path = os.path.join(outdir, 'transcript.txt')
        with open(tran_path, 'w') as tranfile:
            tranfile.write(transcript)
        audio_path = os.path.join(outdir, 'upload')
        with open(audio_path, 'w') as wavfile:
            wavfile.write(audio)

        status['status'] = 'ENCODING'

        wavfile = os.path.join(outdir, 'a.wav')
        if gentle.resample(os.path.join(outdir, 'upload'), wavfile) != 0:
            status['status'] = 'ERROR'
            status['error'] = "Encoding failed. Make sure that you've uploaded a valid media file."
            # Save the status so that errors are recovered on restart of the server
            # XXX: This won't work, because the endpoint will override this file
            with open(os.path.join(outdir, 'status.json'), 'w') as jsfile:
                json.dump(status, jsfile, indent=2)
            return

        #XXX: Maybe we should pass this wave object instead of the
        # file path to align_progress
        wav_obj = wave.open(wavfile, 'r')
        status['duration'] = wav_obj.getnframes() / float(wav_obj.getframerate())
        status['status'] = 'TRANSCRIBING'

        def on_progress(p):
            for k,v in p.items():
                status[k] = v

        if len(transcript.strip()) > 0:
            trans = gentle.ForcedAligner(self.resources, transcript, nthreads=self.nthreads, **kwargs)
        elif self.full_transcriber.available:
            trans = self.full_transcriber
        else:
            status['status'] = 'ERROR'
            status['error']  = 'No transcript provided and no language model for full transcription'
            return

        output = trans.transcribe(wavfile, progress_cb=on_progress, logging=logging)

        # ...remove the original upload
        os.unlink(os.path.join(outdir, 'upload'))

        # Save
        with open(os.path.join(outdir, 'align.json'), 'w') as jsfile:
            jsfile.write(output.to_json(indent=2))
        with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile:
            csvfile.write(output.to_csv())

        # Inline the alignment into the index.html file.
        htmltxt = open(get_resource('www/view_alignment.html')).read()
        htmltxt = htmltxt.replace("var INLINE_JSON;", "var INLINE_JSON=%s;" % (output.to_json()));
        open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt)

        status['status'] = 'OK'

        logging.info('done with transcription.')

        return output
Esempio n. 3
0
    def transcribe(self, uid, transcript, audio, async_mode, **kwargs):

        status = self.get_status(uid)

        status['status'] = 'STARTED'
        output = {'transcript': transcript}

        outdir = os.path.join(self.data_dir, 'transcriptions', uid)

        tran_path = os.path.join(outdir, 'transcript.txt')
        with open(tran_path, 'w') as tranfile:
            tranfile.write(transcript)
        if not isinstance(audio, str):
            audio_path = os.path.join(outdir, 'upload')
            with open(audio_path, 'wb') as wavfile:
                wavfile.write(audio)

        status['status'] = 'ENCODING'

        wavfile = os.path.join(outdir, 'a.wav')
        # if ((not isinstance(audio, str)) and gentle.resample(os.path.join(outdir, 'upload'), wavfile) != 0) or gentle.resample(audio, wavfile) != 0:
        if (not isinstance(audio, str)) and gentle.resample(
                os.path.join(outdir, 'upload'), wavfile) != 0:
            status['status'] = 'ERROR'
            status[
                'error'] = "Encoding failed. Make sure that you've uploaded a valid media file."
            # Save the status so that errors are recovered on restart of the server
            # XXX: This won't work, because the endpoint will override this file
            with open(os.path.join(outdir, 'status.json'), 'w') as jsfile:
                json.dump(status, jsfile, indent=2)
            return

        if isinstance(audio, str) and gentle.resample(audio, wavfile) != 0:
            status['status'] = 'ERROR'
            status[
                'error'] = "Encoding failed. Make sure that you've referenced a valid media URL."
            # Save the status so that errors are recovered on restart of the server
            # XXX: This won't work, because the endpoint will override this file
            with open(os.path.join(outdir, 'status.json'), 'w') as jsfile:
                json.dump(status, jsfile, indent=2)
            return

        # XXX: Maybe we should pass this wave object instead of the
        # file path to align_progress
        if not isinstance(audio, str):
            wav_obj = wave.open(wavfile, 'rb')
            status['duration'] = wav_obj.getnframes() / \
                float(wav_obj.getframerate())
        status['status'] = 'TRANSCRIBING'

        def on_progress(p):
            print(p)
            for k, v in p.items():
                status[k] = v

        if len(transcript.strip()) > 0:
            trans = gentle.ForcedAligner(self.resources,
                                         transcript,
                                         nthreads=self.nthreads,
                                         **kwargs)
        elif self.full_transcriber.available:
            trans = self.full_transcriber
        else:
            status['status'] = 'ERROR'
            status[
                'error'] = 'No transcript provided and no language model for full transcription'
            return

        output = trans.transcribe(wavfile,
                                  progress_cb=on_progress,
                                  logging=logging)

        # ...remove the original upload
        if not isinstance(audio, str):
            os.unlink(os.path.join(outdir, 'upload'))

        # Save
        with open(os.path.join(outdir, 'align.json'), 'w') as jsfile:
            jsfile.write(output.to_json(indent=2))
        with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile:
            csvfile.write(output.to_csv())

        # Inline the alignment into the index.html file.
        htmltxt = open(get_resource('www/view_alignment.html')).read()
        htmltxt = htmltxt.replace("var INLINE_JSON;",
                                  "var INLINE_JSON=%s;" % (output.to_json()))
        open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt)

        status['status'] = 'OK'

        logging.info('done with transcription.')

        return output
Esempio n. 4
0
class Transcriber():
    def __init__(self, data_dir, nthreads=4, ntranscriptionthreads=2):
        self.data_dir = data_dir
        self.nthreads = nthreads
        self.ntranscriptionthreads = ntranscriptionthreads
        self.resources = gentle.Resources()

        self.full_transcriber = gentle.FullTranscriber(
            self.resources, nthreads=ntranscriptionthreads)
        self._status_dicts = {}

    def get_status(self, uid):
        return self._status_dicts.setdefault(uid, {})

    def out_dir(self, uid):
        return os.path.join(self.data_dir, 'transcriptions', uid)

    # TODO(maxhawkins): refactor so this is returned by transcribe()
    def next_id(self):
        uid = None
        while uid is None or os.path.exists(os.path.join(self.data_dir, uid)):
            uid = uuid.uuid4().get_hex()[:8]
        return uid

    def transcribe(self, uid, transcript, audio, async, **kwargs):

        status = self.get_status(uid)

        status['status'] = 'STARTED'
        output = {'transcript': transcript}

        outdir = os.path.join(self.data_dir, 'transcriptions', uid)

        tran_path = os.path.join(outdir, 'transcript.txt')
        with open(tran_path, 'w') as tranfile:
            tranfile.write(transcript)
        audio_path = os.path.join(outdir, 'upload')
        with open(audio_path, 'w') as wavfile:
            wavfile.write(audio)

        status['status'] = 'ENCODING'

        wavfile = os.path.join(outdir, 'a.wav')
        if gentle.resample(os.path.join(outdir, 'upload'), wavfile) != 0:
            status['status'] = 'ERROR'
            status[
                'error'] = "Encoding failed. Make sure that you've uploaded a valid media file."
            # Save the status so that errors are recovered on restart of the server
            # XXX: This won't work, because the endpoint will override this file
            with open(os.path.join(outdir, 'status.json'), 'w') as jsfile:
                json.dump(status, jsfile, indent=2)
            return

        #XXX: Maybe we should pass this wave object instead of the
        # file path to align_progress
        wav_obj = wave.open(wavfile, 'r')
        status['duration'] = wav_obj.getnframes() / float(
            wav_obj.getframerate())
        status['status'] = 'TRANSCRIBING'

        def on_progress(p):
            for k, v in p.items():
                status[k] = v

        if len(transcript.strip()) > 0:
            trans = gentle.ForcedAligner(self.resources,
                                         transcript,
                                         nthreads=self.nthreads,
                                         **kwargs)
        elif self.full_transcriber.available:
            trans = self.full_transcriber
        else:
            status['status'] = 'ERROR'
            status[
                'error'] = 'No transcript provided and no language model for full transcription'
            return

        output = trans.transcribe(wavfile,
                                  progress_cb=on_progress,
                                  logging=logging)

        # ...remove the original upload
        os.unlink(os.path.join(outdir, 'upload'))

        # Save
        with open(os.path.join(outdir, 'align.json'), 'w') as jsfile:
            jsfile.write(output.to_json(indent=2))
        with open(os.path.join(outdir, 'align.csv'), 'w') as csvfile:
            csvfile.write(output.to_csv())
        # add file datas
        sens_end_index = trans.ms.get_sentences_index()
        res = output.to_json()
        res = json.loads(res, encoding='utf-8', strict=True)
        time_sentences_index = []
        ss_dot = 0
        s_pos = None
        time_pos = 0
        try:
            for i, w in enumerate(res['words']):
                if w["case"] != "success":
                    continue
                end_v = w['endOffset']
                start_v = w['startOffset']
                if s_pos is None:
                    s_pos = start_v
                    time_pos = i

                if end_v >= sens_end_index[ss_dot]:
                    ss_dot += 1
                    time_sentences_index.append(
                        (res['words'][time_pos]["start"],
                         res['words'][i]["end"]))
                    time_pos = i
                    s_pos = end_v
            if len(sens_end_index) != len(time_sentences_index):
                time_sentences_index.append(
                    (res['words'][time_pos]["start"], res['words'][-1]["end"]))

            #print sens_end_index, len(sens_end_index)
            #print time_sentences_index, len(time_sentences_index)
            sens_str = trans.ms.get_sentences_string()
            save_ss = ""
            for i, t in enumerate(time_sentences_index):
                #print "{{time}}%s/%s{{end}}" % (str(round(float(t[0]), 2)), str(round(float(t[1]), 2)))
                #print "{{raw}}%s{{end}}" % (str(sens_str[i]))
                save_ss += "{{time}}" + str(round(float(t[0]), 2)) + "/" + str(
                    round(float(t[1]), 2)) + "{{end}}\n"
                save_ss += "{{raw}}" + sens_str[i] + "{{end}}\n"
            with open(os.path.join(outdir, 'time.csv'), 'w') as timefile:
                timefile.write(save_ss)
        except Exception as e:
            print traceback.format_exc()

        # Inline the alignment into the index.html file.
        htmltxt = open(get_resource('www/view_alignment.html')).read()
        htmltxt = htmltxt.replace("var INLINE_JSON;",
                                  "var INLINE_JSON=%s;" % (output.to_json()))
        open(os.path.join(outdir, 'index.html'), 'w').write(htmltxt)

        status['status'] = 'OK'

        logging.info('done with transcription.')

        return output