Ejemplo n.º 1
0
def speech_to_text_in_a_min(
        doc_title='範例1_一分鐘內雲端運算',
        title_pattern='nlpno',
        wd='/home/slave1/git/Speech2Text_workshop/record',
        json_os='/home/slave1/git/Speech2Text_workshop/speech2text-3de4444fd46a.json',
        sample_rate_hertz=48000):
    '''
    * json_os:憑證檔的路徑
    * title_pattern:錄音檔的名稱模式
    * sample_rate_hertz:錄音的取樣頻率
    * doc_title:docx文件名稱
    * wd:工作目錄
    
    '''

    # 計時
    start_time = time.time()
    # 從python client端對雲端speech2text服務進行驗證
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = json_os
    client = speech.SpeechClient()

    file_list = os.listdir(wd)

    # 選出title_pattern的錄音檔
    select_wav = []
    for i in file_list:
        if title_pattern in i:
            select_wav.append(wd + '\\' + i)

    aa = pd.DataFrame()

    for music in select_wav:

        # 將 audio錄音檔 讀入進來
        with io.open(music, 'rb') as audio_file:
            content = audio_file.read()

        # 將錄音檔轉換成google 看得懂的格式
        audio = types.RecognitionAudio(content=content)

        # 設定格式錄音檔
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=sample_rate_hertz,
            language_code='cmn-Hant-TW',
            enable_word_time_offsets=True)

        # 機器學習文字辨識(speech2text)
        print('')
        response = client.recognize(config, audio)

        transcript_list = []
        transcript_confidence = []
        timerecored = []
        # Each result is for a consecutive portion of the audio. Iterate through
        # them to get the transcripts for the entire audio file.
        for result in response.results:
            alternative = result.alternatives[0]
            # The first alternative is the most likely one for this portion.
            transcript_list.append(alternative.transcript)
            transcript_confidence.append(alternative.confidence)
            print('Transcript: {}'.format(alternative.transcript))
            print('Confidence: {}'.format(alternative.confidence))

            # begining and end time of a sentence
            sentence_start_time = alternative.words[0].start_time
            sentence_end_time = alternative.words[len(alternative.words) -
                                                  1].end_time

            # make time
            sentence_start_time = round(sentence_start_time.seconds +
                                        sentence_start_time.nanos * 1e-9)
            sentence_end_time = round(sentence_end_time.seconds +
                                      sentence_end_time.nanos * 1e-9)

            # make min
            sentence_start_time = str(
                datetime.timedelta(seconds=sentence_start_time))
            sentence_end_time = str(
                datetime.timedelta(seconds=sentence_end_time))
            timerecored.append([sentence_start_time, sentence_end_time])

        # pandas 建立信心程度資料表
        # make df
        transcript_df = pd.DataFrame(transcript_list, columns=['文章段句'])
        confidence_df = pd.DataFrame(transcript_confidence,
                                     columns=['機器認字信心水準'])
        confidence_df['機器認字信心水準'] = round(confidence_df['機器認字信心水準'], 2)
        time_df = pd.DataFrame(timerecored, columns=['start', 'end'])
        correctness_summary_df = pd.concat(
            [transcript_df, confidence_df, time_df], axis=1)
        correctness_summary_df = correctness_summary_df.sort_values(
            ['機器認字信心水準'])
        correctness_summary_df['改善順序'] = range(1,
                                               len(correctness_summary_df) + 1)

        timer_translist = []
        for hah, timer in zip(transcript_list, timerecored):
            timer_translist.append(hah + '  ' + '【' + ' to '.join(timer) + '】')

        aa = pd.concat([aa, correctness_summary_df])

    # 製作文字雲
    from speech2text import make_worldcould_report, text_freq
    cut_text = make_worldcould_report(data=aa,
                                      pd_text_col='文章段句',
                                      mask_pic=False,
                                      filename='wordcloud',
                                      pic_name='test.png')
    words_counts = text_freq(cut_text)

    # 計算重要程度
    max = words_counts['counts'].describe()['max']
    mean = words_counts['counts'].describe()['mean']

    # 僅取出max與mean的字詞
    words_counts = words_counts[(words_counts['counts'] <= max)
                                & (words_counts['counts'] >= mean)]

    df_count_all = pd.DataFrame()
    for index, i in words_counts.iterrows():

        df_count = correctness_summary_df[
            correctness_summary_df['文章段句'].str.contains(i['word'])]

        if not df_count.empty:
            df_count['重要性'] = i['counts']
            df_count_all = pd.concat([df_count_all, df_count])

    # group by
    correctness_summary_df = df_count_all.groupby(
        ['文章段句', '機器認字信心水準', 'start', 'end', '改善順序'],
        as_index=False)['重要性'].mean().round(2)

    # save to docx
    document = Document()
    document.add_heading(doc_title, 0)
    document.add_paragraph(
        '機器認字信心水準' + str(round(correctness_summary_df['機器認字信心水準'].mean(), 2)) +
        '\n\n' + '\n\n'.join(timer_translist))
    document.add_picture('wordcloud.png', width=Cm(15), height=Cm(13))
    document.save(doc_title + '_文章逐字稿.docx')
    print('Done')
    print('請看os檔案中有沒有兩個檔案,一格個是完整的docx檔案,一個是csv檔案')
    print("--- %s seconds ---" % (round(time.time() - start_time, 2)))
    return correctness_summary_df.to_excel(doc_title + '_文章認字信心矩陣.xlsx')
Ejemplo n.º 2
0
def speech_to_text(
        gcs_uri='gs://speechfashion/Acc.wav',
        doc_title='範例2_一分鐘以上雲端運算',
        timeout=None,
        path='',
        json_os='/home/slave1/git/Speech2Text/damnhow-db8d83229dd4.json',
        sample_rate_hertz=96000):
    '''
    1.產出文章認字信心矩陣csv,提供修改者文句之修正順序
    2.產出docx文本,並提供文章機器認字信心水準,供修改者修改
    '''

    # 計時
    start_time = time.time()
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = json_os

    client = speech.SpeechClient()

    audio = types.RecognitionAudio(uri=gcs_uri)

    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=sample_rate_hertz,
        language_code='cmn-Hant-TW',
        enable_word_time_offsets=True)

    #    config=types.StreamingRecognitionConfig(config=config)
    #    stream = [audio]
    #    requests = (types.StreamingRecognizeRequest(audio_content=chunk)
    #                for chunk in stream)
    #    responses = client.streaming_recognize(config, requests)

    operation = client.long_running_recognize(config, audio)

    print('機器學習文字辨識中...')
    response = operation.result(timeout=timeout)  #

    aa = pd.DataFrame()
    transcript_list = []
    transcript_confidence = []
    timerecored = []
    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.

    try:

        for result in response.results:
            alternative = result.alternatives[0]
            # The first alternative is the most likely one for this portion.
            transcript_list.append(alternative.transcript)
            transcript_confidence.append(alternative.confidence)
            print('Transcript: {}'.format(alternative.transcript))
            print('Confidence: {}'.format(alternative.confidence))

            # begining and end time of a sentence
            sentence_start_time = alternative.words[0].start_time
            sentence_end_time = alternative.words[len(alternative.words) -
                                                  1].end_time

            # make time
            sentence_start_time = round(sentence_start_time.seconds +
                                        sentence_start_time.nanos * 1e-9)
            sentence_end_time = round(sentence_end_time.seconds +
                                      sentence_end_time.nanos * 1e-9)

            # make min
            sentence_start_time = str(
                datetime.timedelta(seconds=sentence_start_time))
            sentence_end_time = str(
                datetime.timedelta(seconds=sentence_end_time))
            timerecored.append([sentence_start_time, sentence_end_time])

            # pandas 建立信心程度資料表
            # make df
            transcript_df = pd.DataFrame(transcript_list, columns=['文章段句'])
            confidence_df = pd.DataFrame(transcript_confidence,
                                         columns=['機器認字信心水準'])
            confidence_df['機器認字信心水準'] = round(confidence_df['機器認字信心水準'], 2)
            time_df = pd.DataFrame(timerecored, columns=['start', 'end'])
            correctness_summary_df = pd.concat(
                [transcript_df, confidence_df, time_df], axis=1)
            correctness_summary_df = correctness_summary_df.sort_values(
                ['機器認字信心水準'])
            correctness_summary_df['改善順序'] = range(
                1,
                len(correctness_summary_df) + 1)

            timer_translist = []
            for hah, timer in zip(transcript_list, timerecored):
                timer_translist.append(hah + '  ' + '【' + ' to '.join(timer) +
                                       '】')

            aa = pd.concat([aa, correctness_summary_df])

        # 製作文字雲
        from speech2text import make_worldcould_report, text_freq
        cut_text = make_worldcould_report(data=aa,
                                          pd_text_col='文章段句',
                                          mask_pic=False,
                                          filename='wordcloud',
                                          pic_name='test.png')
        words_counts = text_freq(cut_text)

        # 計算重要程度
        max = words_counts['counts'].describe()['max']
        mean = words_counts['counts'].describe()['mean']

        # 僅取出max與mean的字詞
        words_counts = words_counts[(words_counts['counts'] <= max)
                                    & (words_counts['counts'] >= mean)]

        df_count_all = pd.DataFrame()
        for index, i in words_counts.iterrows():

            df_count = correctness_summary_df[
                correctness_summary_df['文章段句'].str.contains(i['word'])]

            if not df_count.empty:
                df_count['重要性'] = i['counts']
                df_count_all = pd.concat([df_count_all, df_count])

        # group by
        correctness_summary_df = df_count_all.groupby(
            ['文章段句', '機器認字信心水準', 'start', 'end', '改善順序'],
            as_index=False)['重要性'].mean().round(2)

        # save to docx
        document = Document()
        document.add_heading(doc_title, 0)
        document.add_paragraph(
            '機器認字信心水準' +
            str(round(correctness_summary_df['機器認字信心水準'].mean(), 2)) + '\n\n' +
            '\n\n'.join(timer_translist))
        document.add_picture('wordcloud.png', width=Cm(15), height=Cm(13))
        document.save(path + doc_title + '_文章逐字稿.docx')
        print('Done')
        print('請看os檔案中有沒有兩個檔案,一格個是完整的docx檔案,一個是csv檔案')
        print("--- %s seconds ---" % (round(time.time() - start_time, 2)))
        return correctness_summary_df.to_excel(path + doc_title +
                                               '_文章認字信心矩陣.xlsx')

    except:
        print('錄音檔無法辨識,請檢查該錄音檔是否存在或該錄音檔音質問題')


# 請執行到這邊停住
Ejemplo n.º 3
0
    time_df = pd.DataFrame(timerecored, columns=['start', 'end'])
    correctness_summary_df = pd.concat([transcript_df, confidence_df, time_df],
                                       axis=1)
    correctness_summary_df = correctness_summary_df.sort_values(['機器認字信心水準'])
    correctness_summary_df['改善順序'] = range(1, len(correctness_summary_df) + 1)

    timer_translist = []
    for hah, timer in zip(transcript_list, timerecored):
        timer_translist.append(hah + '  ' + '【' + ' to '.join(timer) + '】')
    aa = pd.concat([aa, correctness_summary_df])

# 製作文字雲
from speech2text import make_worldcould_report, text_freq
cut_text = make_worldcould_report(data=aa,
                                  pd_text_col='文章段句',
                                  mask_pic=False,
                                  filename='wordcloud',
                                  pic_name='test.png')
words_counts = text_freq(cut_text)

# 計算重要程度
max = words_counts['counts'].describe()['max']
mean = words_counts['counts'].describe()['mean']

# 僅取出max與mean的字詞
words_counts = words_counts[(words_counts['counts'] <= max)
                            & (words_counts['counts'] >= mean)]

df_count_all = pd.DataFrame()
for index, i in words_counts.iterrows():