コード例 #1
0
ファイル: tts.py プロジェクト: Kami/raspberry-pi-ham-radio
    def text_to_speech(self,
                       text: str,
                       slow: bool = False,
                       use_cache: bool = True) -> str:
        # TODO: Allow various settings to be changed via config option
        from espeakng import ESpeakNG

        file_path = self._get_cache_file_path(text=text, use_cache=use_cache)

        if self._is_valid_cached_file(file_path=file_path,
                                      use_cache=use_cache):
            LOG.debug("Using existing cached file: %s" % (file_path))
            return file_path

        LOG.trace('Performing TTS on text "%s" and saving result to %s' %
                  (text, file_path))

        esng = ESpeakNG()
        esng.voice = "en-us"
        esng.pitch = 32
        esng.pitch = 32

        if slow:
            esng.speed = 80
        else:
            esng.speed = 150

        wave_data = esng.synth_wav(text)

        with open(file_path, "wb") as fp:
            fp.write(wave_data)

        return file_path
コード例 #2
0
    def text_to_speech(self, text, voice='en-us', bit_rate=8000):
        speak = ESpeakNG(volume=200)
        speak.voice = voice

        wav_data = speak.synth_wav(text)
        self._tmp_in_file.seek(0)
        self._tmp_in_file.write(wav_data)

        self._convert(bit_rate)
コード例 #3
0
    def test_synth_wav_xsampa(self):

        esng = ESpeakNG(voice='english-us')
        esng.pitch = 32
        esng.speed = 150
        wavs = esng.synth_wav("h@l'oU", fmt='xs')
        wav = wave.open(BytesIO(wavs))

        self.assertEqual(wav.getnchannels(), 1)
        self.assertEqual(wav.getframerate(), 22050)
        self.assertGreater(wav.getnframes(), 20000)
コード例 #4
0
    def test_synth_wav(self):

        esng = ESpeakNG(voice='english-us')
        esng.pitch = 32
        esng.speed = 150
        wavs = esng.synth_wav('Hello World!')
        wav = wave.open(BytesIO(wavs))

        self.assertEqual(wav.getnchannels(), 1)
        self.assertEqual(wav.getframerate(), 22050)
        self.assertGreater(wav.getnframes(), 24000)
コード例 #5
0
class MService():


    def __repr__(self):
        return "Mservice"
    
    
    def __init__(self, *args, **kwargs):
        self.__espeak = ESpeakNG()
        self.__espeak.speed = 150
        #self.generate_song("a b c d e f g h i j k l m n o p q r s t u v w x y z 1 2 3 4 5 6 7 8 9 0",export_path="cache.mp3")

    default_path = "distopianM"
    __hard_code_path = "sounds_db"
    __prefix = "-transform.wav"
    __suffix = "vocal-"
    __distopian_music = "distopianM"
    try:
        __distopian_segment = AudioSegment.from_file(default_path + "/distopian.mp3")
    except:
        raise FileNotFoundError("Default music was not found!")

    def generate_song(self,lyrics,base_music=default_path,export_path="/",name_file="out",export_format="mp3"):

        base_music_audio = None

        if not os.path.exists(base_music):
            raise FileNotFoundError("The base music was not found")

        if base_music == self.default_path:
            base_music_audio = self.__distopian_segment[0:240000]
        else:
            base_music_audio = AudioSegment.from_file(base_music)

        lyrics = lyrics.lower()

        sounds_lyrics = self.convert_lyrics_to_voice(lyrics)

        number_of_segments = len(sounds_lyrics)
        
        segments_music = self.split_segment(base_music_audio,number_of_segments)  

        segments_lyrics = sounds_lyrics #[self.segment_from_raw_data(sound) for sound in sounds_lyrics]
        
        segments_lyrics = self.inject_silence_to_segments(segments_lyrics,1500)

        concatenated_segments = self.concatenate_segments(segments_lyrics,segments_music)

        concatenated_segments.export(f"{export_path}", format=export_format)




    def concatenate_segments(self,segment_voice,segment_music):
        empty_segment = AudioSegment.empty()
        
        if len(segment_voice) >= len(segment_music):
            low = len(segment_music)
            high = len(segment_voice)
            high_list = segment_voice
        else:
            low = len(segment_voice)
            high = len(segment_music)
            high_list = segment_music
            
        
        for x in range(0,low):
            empty_segment = empty_segment + segment_voice[x] + segment_music[x]
        
        for x in range(low,high):
            empty_segment = empty_segment + high_list[x]

        return empty_segment


    def inject_silence_to_segments(self,segments,duration_silence_ms=1500):
        list_of_segments_with_silence = []
        silence = AudioSegment.silent(duration=duration_silence_ms)
        for x in range(len(segments)):
            list_of_segments_with_silence.append(silence + segments[x] + silence)
        return list_of_segments_with_silence
    
    @cached(cache=LRUCache(maxsize=150))
    def __get_text_as_raw_voice(self,text):
        #mp3_bytes = BytesIO()
        #voice = gTTS(text)
        #voice.write_to_fp(mp3_bytes)
        wav_generated = self.__espeak.synth_wav(text)
        song_as_bytes = BytesIO(wav_generated)
        preprocess = self.segment_from_raw_data(song_as_bytes)
        return preprocess


    def convert_lyrics_to_voice(self,lyrics):
        words = lyrics.split(" ")
        list_of_words_as_sound = [self.__get_text_as_raw_voice(word) for word in words]
        return list_of_words_as_sound
        
   
    def segment_from_raw_data(self,raw_data):
        raw_segment = AudioSegment.from_file(raw_data)
        raw_data.close()
        return raw_segment
    
    def split_segment(self,segment,number_of_segments):
        list_of_partitions = []
        miliseconds_segment = int(segment.duration_seconds * 1000)
        if(number_of_segments > miliseconds_segment):
            number_of_segments = miliseconds_segment
        equality = miliseconds_segment // number_of_segments
        sorted_list = [x for x in range(equality,miliseconds_segment + 1,equality)]
        sorted_list.append(miliseconds_segment)
        sorted_list.sort()
        low_begin = 0
        for part in sorted_list:
            list_of_partitions.append(segment[low_begin:part])
            low_begin = part

        return list_of_partitions
            

            
        
        
コード例 #6
0
ファイル: tts.py プロジェクト: madkote/rnd_speech_recognition
class TTS(object):
    def __init__(
            self,
            host_tts='local',
            port_tts=8300,
            locale='en_US',
            engine='mary',
            voice='cmu-rms-hsmm',
            pitch=50,  # 0-99
            speed=175):  # approx. words per minute

        self._host_tts = host_tts
        self._port_tts = port_tts
        self._locale = locale
        self._engine = engine
        self._voice = voice
        self._pitch = pitch
        self._speed = speed

        if host_tts == 'local':
            self.player = PulsePlayer('Local TTS Client')
            self.espeak = ESpeakNG()
            self.marytts = MaryTTS()
            self.picotts = PicoTTS()

    @property
    def locale(self):
        return self._locale

    @locale.setter
    def locale(self, v):
        self._locale = v

    @property
    def engine(self):
        return self._engine

    @engine.setter
    def engine(self, v):
        self._engine = v

    @property
    def voice(self):
        return self._voice

    @voice.setter
    def voice(self, v):
        self._voice = v

    @property
    def pitch(self):
        return self._pitch

    @pitch.setter
    def pitch(self, v):
        self._pitch = v

    @property
    def speed(self):
        return self._speed

    @speed.setter
    def speed(self, v):
        self._speed = v

    def synthesize(self, txt, mode='txt'):

        if self._host_tts == 'local':

            # import pdb; pdb.set_trace()

            wav = None

            if self.engine == 'mary':

                self.marytts.voice = self._voice
                self.marytts.locale = self._locale

                if mode == 'txt':
                    wav = self.marytts.synth_wav(txt)
                elif mode == 'ipa':
                    xs = ipa2mary('ipa', txt)
                    wav = self.marytts.synth_wav(xs, fmt='xs')
                else:
                    raise Exception("unknown mary mode '%s'" % mode)

            elif self.engine == 'espeak':

                if mode == 'txt':

                    self.espeak.voice = self._voice
                    self.espeak.speed = self._speed
                    self.espeak.pitch = self._pitch
                    wav = self.espeak.synth_wav(txt)
                    # logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(wav)))

                elif mode == 'ipa':
                    xs = ipa2xsampa('ipa', txt)
                    logging.debug('synthesize: %s %s -> %s' %
                                  (txt, mode, repr(xs)))
                    wav = self.espeak.synth_wav(xs, fmt='xs')

            elif self.engine == 'pico':

                if mode == 'txt':

                    self.picotts.voice = self._voice
                    wav = self.picotts.synth_wav(txt)
                    # logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(wav)))

                else:
                    raise Exception("unknown espeak mode '%s'" % mode)
            else:

                raise Exception("unknown engine '%s'" % self.engine)

        else:

            args = {
                'l': self._locale,
                'v': self._voice,
                'e': self._engine,
                'm': mode,
                't': txt.encode('utf8')
            }
            url = 'http://%s:%s/tts/synth?%s' % (
                self._host_tts, self._port_tts, urllib.urlencode(args))

            response = requests.get(url)

            if response.status_code != 200:
                return None

            wav = response.content

        if wav:
            logging.debug('synthesize: %s %s -> WAV' % (txt, mode))
        else:
            logging.error('synthesize: %s %s -> NO WAV' % (txt, mode))

        return wav

    def play_wav(self, wav, async=False):

        if self._host_tts == 'local':

            if wav:
                self.player.play(wav, async)
            else:
                raise Exception('no wav given')

        else:

            url = 'http://%s:%s/tts/play' % (self._host_tts, self._port_tts)

            if async:
                url += '?async=t'

            response = requests.post(url, data=wav)