def synthesize(self, phonemes_list : List[str]) -> bytes: """converts a list of phonemes (only their names) to a wav file""" pho_list = PhonemeList([]) for pho in phonemes_list: if pho in self.voice.phonems.VOWELS: new_phoneme = Phoneme(name=pho, duration=int(torch.normal(*self.stats.consonants_stats[pho]["duration"])[0]), pitch_mods=self.gen_vowel_pitches(pho)) else: #it's a consonant new_phoneme = Phoneme(name=pho, duration=int(torch.normal(*self.stats.consonants_stats[pho]["duration"])[0]), pitch_mods=[]) pho_list.append(new_phoneme) return self.voice.to_audio(pho_list)
def process(self, phonems: PhonemeList): reconstructed = PhonemeList([]) ng_phonem = Phoneme("N", 100) euh_phonem = Phoneme("2", 79) phonems.append(Phoneme("_", 10)) # end-silence-padding, just to be safe for i, phoneme in enumerate(phonems): if phoneme.name in FrenchPhonemes.NASAL_WOVELS: reconstructed += [phoneme, ng_phonem] elif phoneme.name in FrenchPhonemes.CONSONANTS - {"w"} and phonems[i + 1].name not in FrenchPhonemes.VOWELS: reconstructed += [phoneme, euh_phonem] elif phoneme.name == "o": phoneme.name = "O" reconstructed.append(phoneme) else: reconstructed.append(phoneme) return reconstructed
def process(self, phonems: PhonemeList): w_phonem = Phoneme("w", 103, []) for i, phoneme in enumerate(phonems): if phoneme.name == "R": phoneme.name = "w" if random.randint(0, 1) == 0: for j in range(2): phonems.insert(i, w_phonem) else: phoneme.duration = 206 return phonems
def process(self, phonems: PhonemeList): silence = Phoneme("_", 61) reconstructed = PhonemeList([]) for i, phoneme in enumerate(phonems): if phonems[i].name in FrenchPhonemes.CONSONANTS \ and phonems[i + 1].name in FrenchPhonemes.VOWELS \ and random.randint(1, 3) == 1: reconstructed += [phonems[i], phonems[i + 1]] * 2 elif phoneme.name in FrenchPhonemes.VOWELS and random.randint(1, 3) == 1: reconstructed += [phoneme, silence, phoneme] else: reconstructed.append(phoneme) return reconstructed
beat_duration = len(beats_track) / (rate * BEATS_TRACK_BEAT_COUNT) beats_track_looped = np.tile( beats_track, LOOPS_COUNT * BEATS_PER_MEASURE * len(CHORDS_PROGRESSION) // BEATS_TRACK_BEAT_COUNT) logging.info("Beat time : %dms" % (beat_duration * 1000)) logging.info("Measure time : %dms" % (beat_duration * 1000 * 4)) prog_freqs = get_trinote_progression_freqs(CHORDS_PROGRESSION) logging.info("First freq progression: \n %s" % (str(prog_freqs))) track_freqs = prog_freqs * LOOPS_COUNT progression_phonems = PhonemeList([]) for freq in track_freqs: progression_phonems.append( Phoneme("a", int(beat_duration * 1000), [(0, freq), (100, freq)])) logging.info("Rendering audio") voice = Voice(lang="fr", voice_id=2) wav = voice.to_audio(progression_phonems) if BEATS_TRACK_FILE is not None: rate, wave_array = to_f32_16k(wav) mixed_tracks = mix_tracks(beats_track_looped * 0.6, wave_array * 0.3, align="left") wav = to_wav_bytes(mixed_tracks, 16000) player = AudioPlayer() player.set_file(BytesIO(wav)) player.play() player.close()