def transcribe_file(speech_file):
    """Transcribe the given audio file."""
    from google.cloud import speech
    import io

    client = speech.SpeechClient()

    with io.open(speech_file, "rb") as audio_file:
        content = audio_file.read()

    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=48000,
        audio_channel_count=2,
        enable_separate_recognition_per_channel=True,
        language_code="en-US",
    )

    response = client.recognize(config=config, audio=audio)

    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        # The first alternative is the most likely one for this portion.
        return "{}".format(result.alternatives[0].transcript)
def transcribe_file(speech_file):
    global TIMES, sentence, TRANSCRIPT
    """Transcribe the given audio file."""
    from google.cloud import speech
    import io

    client = speech.SpeechClient()

    with io.open(speech_file, "rb") as audio_file:
        content = audio_file.read()

    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        language_code="en-US",
        audio_channel_count=2,
        enable_automatic_punctuation=True,
        enable_word_time_offsets=True,
    )

    response = client.recognize(config=config, audio=audio)

    print_sentences(response)
    i = len(TIMES) - 1
    del TIMES[i]
    return FULL_TRANSCRIPT, TRANSCRIPT, TIMES
Esempio n. 3
0
    def google_STT(self, audio):
        client = speech_v1.SpeechClient.from_service_account_json(
            '/data/second-conquest-293723-05738e995f8f.json')

        # Loads the audio into memory
        with io.open(audio, "rb") as audio_file:
            content = audio_file.read()
            audio = speech_v1.RecognitionAudio(content=content)

        encoding = speech_v1.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED

        config = speech_v1.RecognitionConfig(
            encoding=encoding,
            sample_rate_hertz=22050,
            language_code="en-US",
            enable_automatic_punctuation=True,
        )

        # Detects speech in the audio file
        start = time.time()
        response = client.recognize(request={"config": config, "audio": audio})
        text = ''
        for result in response.results:
            text = text + result.alternatives[0].transcript
            # print("Transcript: {}".format())
        return text
Esempio n. 4
0
def parse_data(filename_weba, filename_wav):
    os.system("ffmpeg -i {} {} -y".format(filename_weba, filename_wav))
    content = None
    if os.path.exists(filename_wav):
        with io.open(filename_wav, "rb") as f:
            content = f.read()
            audio = speech_v1.RecognitionAudio(content=content)
            return audio
    return content
Esempio n. 5
0
def speech_to_text(local_speech_file):
    client = speech.SpeechClient()
    with io.open(local_speech_file, "rb") as audio_file:
        audio_content = audio_file.read()

    audio = speech.RecognitionAudio(content=audio_content)
    config = speech.RecognitionConfig(
        dict(
            encoding=speech.RecognitionConfig.AudioEncoding.
            ENCODING_UNSPECIFIED,
            sample_rate_hertz=16000,
            language_code="zh-TW",
        ))

    response = client.recognize(config=config, audio=audio)
    print_sentences(response)
Esempio n. 6
0
def STT(audio_path, save_path=None):
    client = speech_v1.SpeechClient.from_service_account_json(
        '/data/second-conquest-293723-05738e995f8f.json')

    # Loads the audio into memory
    with io.open(audio_path, "rb") as audio_file:
        content = audio_file.read()
        audio = speech_v1.RecognitionAudio(content=content)

    encoding = speech_v1.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED

    config = speech_v1.RecognitionConfig(
        encoding=encoding,
        sample_rate_hertz=22050,
        language_code="en-US",
        enable_automatic_punctuation=True,
    )

    # Detects speech in the audio file
    start = time.time()
    response = client.recognize(request={"config": config, "audio": audio})
    text = ''
    for result in response.results:
        text = text + result.alternatives[0].transcript
        # print("Transcript: {}".format())
    print(text)

    audio_name = audio_path.split('/')[-1].replace('.mp3', '')
    save_file_name = audio_name + GetCurrentDatetime() + '.txt'

    if save_path != None:
        os.makedirs(save_path, exist_ok=True)
        os.chdir(save_path)

    with open(save_file_name, 'w') as f:
        f.write(text)

    print('Inferred Audio File Name: ', audio_path)
    print('Transcribed Script File Saved: ', save_file_name)
    print('Processing Time: ', time.time() - start)
Esempio n. 7
0
def convert(file, folder, pack):

    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "api-key.json"
    files = sorted(os.listdir(str(file) + '/'))
    all_text = []
    for f in files:
        name = str(file) + '/' + f
        print("Transcribing File- " + str(name))
        with open(name, "rb") as audio_file:
            content = audio_file.read()
        try:
            config = speech.RecognitionConfig(
                encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
                language_code="en-US")
            audio = speech.RecognitionAudio(content=content)
            text = speech_to_text(config, audio)
            all_text.append(text)
        except Exception as e:
            print(e)
            text = "No Audio"
            all_text.append(text)
    transcript = ""
    for i, t in enumerate(all_text):
        total_seconds = i * 15
        m, s = divmod(total_seconds, 60)
        h, m = divmod(m, 60)

        total_seconds_n = total_seconds + 15
        m_n, s_n = divmod(total_seconds_n, 60)
        h_n, m_n = divmod(m_n, 60)

        transcript = transcript + "{}\n{:0>2d}:{:0>2d}:{:0>2d},000 --> {:0>2d}:{:0>2d}:{:0>2d},000\n {}\n\n".format(
            i + 1, h, m, s, h_n, m_n, s_n, t)
        print("Transcript completed- " + str(transcript))
    transcript_file = str(folder) + "/" + str(pack) + ".srt"
    with open(transcript_file, "w") as f:
        f.write(transcript)
Esempio n. 8
0
def text_from_audio(wav_fname):
    with io.open(wav_fname, "rb") as audio_file:
        content = audio_file.read()
    audio = speech.RecognitionAudio(content=content)
    return speech2text(audio)
    def google_STT(self, path):
        client = speech_v1.SpeechClient.from_service_account_json(
            '/data/second-conquest-293723-05738e995f8f.json')

        # Loads the audio into memory
        with io.open(path, "rb") as audio_file:
            content = audio_file.read()
            audio = speech_v1.RecognitionAudio(content=content)

        encoding = speech_v1.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED

        config = speech_v1.RecognitionConfig(
            encoding=encoding,
            sample_rate_hertz=22050,
            language_code="en-US",
            audio_channel_count=2 if path.endswith('.wav') else 1,
            enable_automatic_punctuation=True,
            enable_word_time_offsets=True,
        )

        # Detects speech in the audio file
        start = time.time()
        response = client.recognize(request={"config": config, "audio": audio})

        sent = ''
        text = []
        end_word = []
        sent_start_time = [
            0,
        ]

        for result in response.results:
            alternative = result.alternatives[0]
            # print("Transcript: {}".format(alternative.transcript))
            # print("Confidence: {}".format(alternative.confidence))

            seperate_sentence = sent_tokenize(alternative.transcript)
            # for i in range(len(seperate_sentence)):
            #     end_word.append(re.split(" ",text)[-1])

            for word_info in alternative.words:
                word = word_info.word
                # condition = re.search('\\.', word) | re.search('\\?', word)
                start_time = word_info.start_time
                end_time = word_info.end_time

                sent = sent + ' ' + word

                if (re.search('\\.', word) != None) | (re.search('\\?', word)
                                                       != None):
                    sent_start_time.append(end_time.total_seconds())
                    text.append(sent)
                    print(sent)
                    sent = ''
                    print(
                        f"Word: {word}, end_time: {end_time.total_seconds()}")

            print(sent_start_time)
            print(text)

        return text, sent_start_time