Beispiel #1
0
def SpeakLongText(long_text, max_text_length=GOOGLE_MAX_TEXT_LENGTH):
    "Converts a full length long_text text into an mp3"

    # Split the long_text into short_texts small enough to TTS
    long_text_as_short_texts = SplitTextToShortTexts(long_text,
                                                     max_text_length)

    # Allocate a temporary directory
    with tempfile.TemporaryDirectory() as temp_dir:

        # Get the event loop
        loop = asyncio.get_event_loop()
        concurrency_limit = asyncio.Semaphore(
            MAX_CONCURRENT_GOOGLE_API_REQUESTS)

        # NOTE: Google's text to speech library creates a TCP connection for each request but does not close it.
        #       These even stay open in the background after the Client is de-referenced (?!).
        #       These each use a File Descriptor, so for a large book, we hit the max file descriptors limit and crash.
        #       Running each TTS in its own proccess guarantees that at least at the end of the chapter, all will be de-allocated.

        # Manually create an executor so we can force it to clean up after
        with concurrent.futures.ProcessPoolExecutor(
                max_workers=MAX_CONCURRENT_GOOGLE_API_REQUESTS) as executor:

            # Call to spawn a thread to generate each short text
            async def GenerateShortTextInThread(loop, short_text, temp_dir):
                async with concurrency_limit:
                    return await loop.run_in_executor(executor, SpeakShortText,
                                                      short_text, temp_dir)

            # Call to generate MP3s for all the short texts (concurrently)
            async def SimultaneouslyGenerateSeveralShortTexts(
                    loop, all_short_texts, temp_dir):
                mp3_generation_tasks = [
                    GenerateShortTextInThread(loop, short_text, temp_dir)
                    for short_text in all_short_texts
                ]
                return await asyncio.gather(*mp3_generation_tasks)

            # Generate an MP3 for each short_text
            mp3s_of_short_texts = loop.run_until_complete(
                SimultaneouslyGenerateSeveralShortTexts(
                    loop, long_text_as_short_texts, temp_dir))

            # Attempt to clean up all resources
            executor.shutdown(wait=True)

        # Combine the short_texts into a single mp3
        mp3_long_text = Sine(300).to_audio_segment(duration=500)
        for mp3_short_text in mp3s_of_short_texts:
            mp3_long_text = mp3_long_text.append(
                AudioSegment.from_mp3(mp3_short_text))

        # Return the full Mp3 (as a temporary file)
        temporary_mp3 = tempfile.NamedTemporaryFile(suffix='.mp3',
                                                    delete=False)
        mp3_long_text.export(temporary_mp3.name, format="mp3")

        return temporary_mp3
Beispiel #2
0
def text_to_audio(text,
                  file_name,
                  export_file_format,  # e.g. "ogg"
                  codec=None,  # e.g. "opus"
                  frequency=700,
                  wpm=10,
                  cross_fade=2):
    unit_length_seconds = wpm_to_unit_length_seconds(wpm)
    intervals = sentence_to_intervals(text)
    segment = Sine(0).to_audio_segment(cross_fade)  # silence at the beginning for cross-fade
    for interval in intervals:
        segment = segment.append(interval_to_wave_data_segment(interval, frequency, unit_length_seconds),
                                 crossfade=cross_fade)
    segment.export(file_name,
                   format=export_file_format,
                   codec=codec)
def generate_notes(current_label):
    """generate notes based on full duration """
    for index, row in labels_df.iterrows():
        audio = row['1_filename.csv']
        path = 'mean_' + current_label
        mean_location = [f for f in os.listdir(path) if f.endswith('csv')]

        for filename in mean_location:
            y, sr = librosa.load(data_dir + '/' + audio)
            audio_duration = librosa.get_duration(y=y, sr=sr)
            current_composer = pd.read_csv(path + '/' + filename)
            row_number = 0
            transpose_value = len(current_composer)

            chroma_points = transpose_value
            sample_duration = audio_duration / chroma_points
            samples_ms = sample_duration * 1000

            if not os.path.exists('segments_of_notes_' + current_label):
                os.makedirs('segments_of_notes_' + current_label)
            if not os.path.exists('segments_of_notes_' + current_label + '/' +
                                  filename[:-4]):
                os.makedirs('segments_of_notes_' + current_label + '/' +
                            filename[:-4])

            for index, row in current_composer.iterrows():
                row_number = row_number + 1
                row_numb = str(row_number)
                row_numb = row_numb.zfill(2)
                C = row['C']
                Csh = row['Csh']
                D = row['D']
                Dsh = row['Dsh']
                E = row['E']
                F = row['F']
                Fsh = row['Fsh']
                G = row['G']
                Gsh = row['Gsh']
                A = row['A']
                Ash = row['Ash']
                B = row['B']
                if emo_choice == 'low':
                    # C2
                    Ctone = Sine(65.41).to_audio_segment(duration=samples_ms)
                    Cshtone = Sine(69.30).to_audio_segment(duration=samples_ms)
                    Dtone = Sine(73.42).to_audio_segment(duration=samples_ms)
                    Dshtone = Sine(77.78).to_audio_segment(duration=samples_ms)
                    Etone = Sine(82.41).to_audio_segment(duration=samples_ms)
                    Ftone = Sine(87.31).to_audio_segment(duration=samples_ms)
                    Fshtone = Sine(92.50).to_audio_segment(duration=samples_ms)
                    Gtone = Sine(98.00).to_audio_segment(duration=samples_ms)
                    Gshtone = Sine(103.83).to_audio_segment(
                        duration=samples_ms)
                    Atone = Sine(110.00).to_audio_segment(duration=samples_ms)
                    Ashtone = Sine(116.54).to_audio_segment(
                        duration=samples_ms)
                    Btone = Sine(123.47).to_audio_segment(duration=samples_ms)
                if emo_choice == 'high':
                    # C3
                    Ctone = Sine(130.81).to_audio_segment(duration=samples_ms)
                    Cshtone = Sine(138.59).to_audio_segment(
                        duration=samples_ms)
                    Dtone = Sine(146.83).to_audio_segment(duration=samples_ms)
                    Dshtone = Sine(155.56).to_audio_segment(
                        duration=samples_ms)
                    Etone = Sine(164.81).to_audio_segment(duration=samples_ms)
                    Ftone = Sine(174.61).to_audio_segment(duration=samples_ms)
                    Fshtone = Sine(185.00).to_audio_segment(
                        duration=samples_ms)
                    Gtone = Sine(196.00).to_audio_segment(duration=samples_ms)
                    Gshtone = Sine(207.65).to_audio_segment(
                        duration=samples_ms)
                    Atone = Sine(220.00).to_audio_segment(duration=samples_ms)
                    Ashtone = Sine(233.08).to_audio_segment(
                        duration=samples_ms)
                    Btone = Sine(246.94).to_audio_segment(duration=samples_ms)

                volume_reduct = 50
                Cvolume = np.mean(C)
                Cvolume = np.negative(Cvolume) - volume_reduct

                Ctone = Ctone + Cvolume
                Cshvolume = np.mean(Csh)
                Cshvolume = np.negative(Cshvolume) - volume_reduct
                Cshtone = Cshtone + Cshvolume
                Dvolume = np.mean(D)
                Dvolume = np.negative(Dvolume) - volume_reduct
                Dtone = Dtone + Dvolume
                Dshvolume = np.mean(Dsh)
                Dshvolume = np.negative(Dshvolume) - volume_reduct
                Dshtone = Dshtone + Dshvolume
                Evolume = np.mean(E)
                Evolume = np.negative(Evolume) - volume_reduct
                Etone = Etone + Evolume
                Fvolume = np.mean(F)
                Fvolume = np.negative(Fvolume) - volume_reduct
                Ftone = Ftone + Fvolume
                Fshvolume = np.mean(Fsh)
                Fshvolume = np.negative(Fshvolume) - volume_reduct
                Fshtone = Fshtone + Fshvolume
                Gvolume = np.mean(G)
                Gvolume = np.negative(Gvolume) - volume_reduct
                Gtone = Gtone + Gvolume
                Gshvolume = np.mean(Gsh)
                Gshvolume = np.negative(Gshvolume) - volume_reduct
                Gshtone = Gshtone + Gshvolume
                Avolume = np.mean(A)
                Avolume = np.negative(Avolume) - volume_reduct
                Atone = Atone + Avolume
                Ashvolume = np.mean(Ash)
                Ashvolume = np.negative(Ashvolume) - volume_reduct
                Ashtone = Ashtone + Ashvolume
                Bvolume = np.mean(B)
                Bvolume = np.negative(Bvolume) - volume_reduct
                Btone = Btone + Bvolume

                Ctone.export('segments_of_notes_' + current_label + '/' +
                             filename[:-4] + '/' + row_numb + '_C.wav',
                             format="wav")
                Cshtone.export('segments_of_notes_' + current_label + '/' +
                               filename[:-4] + '/' + row_numb + '_Csh.wav',
                               format="wav")
                Dtone.export('segments_of_notes_' + current_label + '/' +
                             filename[:-4] + '/' + row_numb + '_D.wav',
                             format="wav")
                Dshtone.export('segments_of_notes_' + current_label + '/' +
                               filename[:-4] + '/' + row_numb + '_Dsh.wav',
                               format="wav")
                Etone.export('segments_of_notes_' + current_label + '/' +
                             filename[:-4] + '/' + row_numb + '_E.wav',
                             format="wav")
                Ftone.export('segments_of_notes_' + current_label + '/' +
                             filename[:-4] + '/' + row_numb + '_F.wav',
                             format="wav")
                Fshtone.export('segments_of_notes_' + current_label + '/' +
                               filename[:-4] + '/' + row_numb + '_Fsh.wav',
                               format="wav")
                Gtone.export('segments_of_notes_' + current_label + '/' +
                             filename[:-4] + '/' + row_numb + '_G.wav',
                             format="wav")
                Gshtone.export('segments_of_notes_' + current_label + '/' +
                               filename[:-4] + '/' + row_numb + '_Gsh.wav',
                               format="wav")
                Atone.export('segments_of_notes_' + current_label + '/' +
                             filename[:-4] + '/' + row_numb + '_A.wav',
                             format="wav")
                Ashtone.export('segments_of_notes_' + current_label + '/' +
                               filename[:-4] + '/' + row_numb + '_Ash.wav',
                               format="wav")
                Btone.export('segments_of_notes_' + current_label + '/' +
                             filename[:-4] + '/' + row_numb + '_B.wav',
                             format="wav")
            print('segmented audio generated for : ' + filename)