Example #1
0
def speak_for_me(msg):

    picotts = PicoTTS()
    wavs = picotts.synth_wav(msg)
    wav = wave.open(StringIO.StringIO(wavs))
    #print wav.getnchannels(), wav.getframerate(), wav.getnframes()
    f = wav

    p = pyaudio.PyAudio()
    stream = p.open(format=p.get_format_from_width(wav.getsampwidth()),
                    channels=wav.getnchannels(),
                    rate=f.getframerate(),
                    output=True)

    chunk = 1024
    data = f.readframes(chunk)

    while data:
        stream.write(data)
        data = f.readframes(chunk)

    stream.stop_stream()
    stream.close()

    p.terminate()
Example #2
0
 def create_voice_file(self, voice_text, voice_type, file_path):
     picotts = PicoTTS()
     picotts.voice = voice_type
     wavs = picotts.synth_wav(voice_text)
     with open(file_path, mode="wb") as f:
         f.write(wavs)
         f.close()
     y, s = librosa.load(file_path, sr=8000)
     sf.write(file_path, y, s)
Example #3
0
    def test_synth_wav_de(self):

        picotts = PicoTTS(voice='de-DE')
        wavs = picotts.synth_wav('Hallo Welt!')
        wav = wave.open(StringIO.StringIO(wavs))

        self.assertEqual(wav.getnchannels(), 1)
        self.assertEqual(wav.getframerate(), 16000)
        self.assertGreater(wav.getnframes(), 20000)
Example #4
0
    def __init__(self,
                 library_path,
                 model_path,
                 keyword_paths,
                 sensitivities,
                 input_device_index=None,
                 output_path=None):
        """
        Constructor.

        :param library_path: Absolute path to Porcupine's dynamic library.
        :param model_file_path: Absolute path to the model parameter file.
        :param keyword_file_paths: List of absolute paths to keyword files.
        :param sensitivities: Sensitivity parameter for each wake word. For more information refer to
        'include/pv_porcupine.h'. It uses the
        same sensitivity value for all keywords.
        :param input_device_index: Optional argument. If provided, audio is recorded from this input device. Otherwise,
        the default audio input device is used.
        :param output_path: If provided recorded audio will be stored in this location at the end of the run.
        """

        super(BertaDeepSpeech, self).__init__()

        self._library_path = library_path
        self._model_path = model_path
        self._keyword_paths = keyword_paths
        self._sensitivities = sensitivities
        self._input_device_index = input_device_index
        self.db_model = None
        self.pt = PicoTTS()
        self.pa = pyaudio.PyAudio()

        self._output_path = output_path
        if self._output_path is not None:
            self._recorded_frames = []

        #Load DeepSpeech model
        print('Initializing model...')
        dirname = os.path.dirname(os.path.abspath(__file__))
        model_name = glob.glob(os.path.join(dirname, 'libs/*.tflite'))[0]
        logging.info("Model: %s", model_name)
        self.model = deepspeech.Model(model_name)
        try:
            scorer_name = glob.glob(os.path.join(dirname, '*.scorer'))[0]
            logging.info("Language model: %s", scorer_name)
            self.model.enableExternalScorer(scorer_name)
        except Exception as e:
            pass
 def speak(self, text):
     if os.name == 'nt':
         self.voice_output_engine.say(text)
         self.voice_output_engine.runAndWait()
     else:
         if self.engine == 'espeak':
             espeak_command = [self.espeak_exec_path, '-v' + self.lang, '-s' + str(self.speed), '-a' + str(self.amplitude),
                              '-p' + str(self.pitch), '-w' + self.path, text]
             # generate the file with eSpeak
             subprocess.call(espeak_command, stderr=sys.stderr)
             f = wave.open(self.path, "rb")
         if self.engine == 'picotts':
             self.voice_output_engine = PicoTTS()
             self.voice_output_engine.voice = self.lang
             synth = self.voice_output_engine.synth_wav(text)
             w = StringIO.StringIO(synth)
             f = wave.open(w)
         self.play(f)
    def __init__(self, args):
        reload(sys)
        sys.setdefaultencoding('utf8')
        self.lang = args.get('lang', 'fr-FR')
        self.path = args.get('path', r"/tmp/output.wav")
        self.pitch = args.get('pitch', 50)
        self.amplitude = args.get('amplitude', 90)
        self.speed = args.get('speed', 100)
        self.espeak_exec_path = args.get('espeak_exec_path', r"/usr/bin/espeak")
        self.engine = args.get('engine', 'espeak')

        if os.name == 'nt':
            self.voice_output_engine = pyttsx3.init()
            if self.lang == 'fr-FR' or self.lang == 'fr_FR':
                voice = self.voice_output_engine.getProperty('voices')[0]  # the french voice
                self.voice_output_engine.setProperty('voice', voice.id)
        else:
            if self.engine == 'picotts':
                self.voice_output_engine = PicoTTS()
                self.voice_output_engine.voice = self.lang
def setup_TTS():

    # TTS objects
    picotts = PicoTTS()
    p = pyaudio.PyAudio()

    outport = mido.open_output()

    input1 = mido.get_input_names()[0]
    for inp in mido.get_input_names():
        if inp.find('Keystation') > -1:
            input1 = inp

    return picotts, p, input1, outport
Example #8
0
    def __init__(
            self,
            host_tts='local',
            port_tts=8300,
            locale='en_US',
            engine='mary',
            voice='cmu-rms-hsmm',
            pitch=50,  # 0-99
            speed=175):  # approx. words per minute

        self._host_tts = host_tts
        self._port_tts = port_tts
        self._locale = locale
        self._engine = engine
        self._voice = voice
        self._pitch = pitch
        self._speed = speed

        if host_tts == 'local':
            self.player = PulsePlayer('Local TTS Client')
            self.espeak = ESpeakNG()
            self.marytts = MaryTTS()
            self.picotts = PicoTTS()
Example #9
0
class TTS(object):
    def __init__(
            self,
            host_tts='local',
            port_tts=8300,
            locale=DEFAULT_MARY_LOCALE,
            engine='mary',
            voice=DEFAULT_MARY_VOICE,
            pitch=50,  # 0-99
            speed=175):  # approx. words per minute

        self._host_tts = host_tts
        self._port_tts = port_tts
        self._locale = locale
        self._engine = engine
        self._voice = voice
        self._pitch = pitch
        self._speed = speed

        if host_tts == 'local':
            self.marytts = MaryTTS()
            # lazy-loading to reduce package dependencies
            self.picotts = None

    @property
    def locale(self):
        return self._locale

    @locale.setter
    def locale(self, v):
        self._locale = v

    @property
    def engine(self):
        return self._engine

    @engine.setter
    def engine(self, v):
        self._engine = v

    @property
    def voice(self):
        return self._voice

    @voice.setter
    def voice(self, v):
        self._voice = v

    @property
    def pitch(self):
        return self._pitch

    @pitch.setter
    def pitch(self, v):
        self._pitch = v

    @property
    def speed(self):
        return self._speed

    @speed.setter
    def speed(self, v):
        self._speed = v

    def synthesize(self, txt, mode='txt'):

        if self._host_tts == 'local':

            # import pdb; pdb.set_trace()

            wav = None

            if self.engine == 'mary':

                self.marytts.voice = self._voice
                self.marytts.locale = self._locale

                if mode == 'txt':
                    wav = self.marytts.synth_wav(txt)
                elif mode == 'ipa':
                    xs = ipa2mary('ipa', txt)
                    wav = self.marytts.synth_wav(xs, fmt='xs')
                elif mode == 'mary':
                    wav = self.marytts.synth_wav(txt, fmt='xs')
                else:
                    raise Exception("unknown mary mode '%s'" % mode)

            elif self.engine == 'pico':

                if mode == 'txt':

                    if not self.picotts:
                        from picotts import PicoTTS
                        self.picotts = PicoTTS()

                    self.picotts.voice = self._voice
                    wav = self.picotts.synth_wav(txt)
                    # logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(wav)))

                else:
                    raise Exception("unknown pico mode '%s'" % mode)
            else:

                raise Exception("unknown engine '%s'" % self.engine)

        else:

            args = {
                'l': self._locale,
                'v': self._voice,
                'e': self._engine,
                'm': mode,
                't': txt.encode('utf8')
            }
            url = 'http://%s:%s/tts/synth?%s' % (
                self._host_tts, self._port_tts, urllib.urlencode(args))

            response = requests.get(url)

            if response.status_code != 200:
                return None

            wav = response.content

        if wav:
            logging.debug('synthesize: %s %s -> WAV' % (txt, mode))
        else:
            logging.error('synthesize: %s %s -> NO WAV' % (txt, mode))

        return wav

    def play_wav(self, wav, async_play=False):

        if self._host_tts == 'local':

            if wav:
                with io.BytesIO(wav) as tmp_buffer:
                    wave_read = sa.wave.open(tmp_buffer, 'rb')
                    wave_obj = sa.WaveObject.from_wave_read(wave_read)
                    play_obj = wave_obj.play()
                    if not async_play:
                        play_obj.wait_done()
            else:
                raise Exception('no wav data given')

        else:

            url = 'http://%s:%s/tts/play' % (self._host_tts, self._port_tts)

            if async_play:
                url += '?async=t'

            response = requests.post(url, data=wav)

    def say(self, utterance, async_play=False):

        wav = self.synthesize(utterance)
        self.play_wav(wav, async_play=async_play)

    def say_phn(self, phn, phn_format='mary', async_play=False):

        wav = self.synthesize(phn, mode=phn_format)
        self.play_wav(wav, async_play=async_play)

    def gen_phn(self, word, model='dicts/de_g2p_model-6', phn_format='mary'):

        if self._host_tts == 'local':

            if self.engine == 'mary':

                self.marytts.voice = self._voice
                self.marytts.locale = self._locale

                mp = self.marytts.g2p(word)
                if phn_format == 'mary':
                    return mp
                elif phn_format == 'ipa':
                    return mary2ipa(word, mp)
                else:
                    raise Exception("Format not supported: '%s'" % phn_format)

            elif self.engine == 'sequitur':

                return sequiturclient.sequitur_gen_ipa(model, word)

            else:
                raise Exception("unknown engine '%s'" % self.engine)

        else:
            args = {
                'l': self._locale,
                'v': self._voice,
                'e': self._engine,
                't': word.encode('utf8')
            }
            url = 'http://%s:%s/tts/g2p?%s' % (self._host_tts, self._port_tts,
                                               urllib.urlencode(args))

            response = requests.get(url)

            if response.status_code != 200:
                return None

            return response.json()['ipa']
Example #10
0
    def synthesize(self, txt, mode='txt'):

        if self._host_tts == 'local':

            # import pdb; pdb.set_trace()

            wav = None

            if self.engine == 'mary':

                self.marytts.voice = self._voice
                self.marytts.locale = self._locale

                if mode == 'txt':
                    wav = self.marytts.synth_wav(txt)
                elif mode == 'ipa':
                    xs = ipa2mary('ipa', txt)
                    wav = self.marytts.synth_wav(xs, fmt='xs')
                elif mode == 'mary':
                    wav = self.marytts.synth_wav(txt, fmt='xs')
                else:
                    raise Exception("unknown mary mode '%s'" % mode)

            elif self.engine == 'pico':

                if mode == 'txt':

                    if not self.picotts:
                        from picotts import PicoTTS
                        self.picotts = PicoTTS()

                    self.picotts.voice = self._voice
                    wav = self.picotts.synth_wav(txt)
                    # logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(wav)))

                else:
                    raise Exception("unknown pico mode '%s'" % mode)
            else:

                raise Exception("unknown engine '%s'" % self.engine)

        else:

            args = {
                'l': self._locale,
                'v': self._voice,
                'e': self._engine,
                'm': mode,
                't': txt.encode('utf8')
            }
            url = 'http://%s:%s/tts/synth?%s' % (
                self._host_tts, self._port_tts, urllib.urlencode(args))

            response = requests.get(url)

            if response.status_code != 200:
                return None

            wav = response.content

        if wav:
            logging.debug('synthesize: %s %s -> WAV' % (txt, mode))
        else:
            logging.error('synthesize: %s %s -> NO WAV' % (txt, mode))

        return wav
Example #11
0
class TTS(object):
    def __init__(
            self,
            host_tts='local',
            port_tts=8300,
            locale='en_US',
            engine='mary',
            voice='cmu-rms-hsmm',
            pitch=50,  # 0-99
            speed=175):  # approx. words per minute

        self._host_tts = host_tts
        self._port_tts = port_tts
        self._locale = locale
        self._engine = engine
        self._voice = voice
        self._pitch = pitch
        self._speed = speed

        if host_tts == 'local':
            self.player = PulsePlayer('Local TTS Client')
            self.espeak = ESpeakNG()
            self.marytts = MaryTTS()
            self.picotts = PicoTTS()

    @property
    def locale(self):
        return self._locale

    @locale.setter
    def locale(self, v):
        self._locale = v

    @property
    def engine(self):
        return self._engine

    @engine.setter
    def engine(self, v):
        self._engine = v

    @property
    def voice(self):
        return self._voice

    @voice.setter
    def voice(self, v):
        self._voice = v

    @property
    def pitch(self):
        return self._pitch

    @pitch.setter
    def pitch(self, v):
        self._pitch = v

    @property
    def speed(self):
        return self._speed

    @speed.setter
    def speed(self, v):
        self._speed = v

    def synthesize(self, txt, mode='txt'):

        if self._host_tts == 'local':

            # import pdb; pdb.set_trace()

            wav = None

            if self.engine == 'mary':

                self.marytts.voice = self._voice
                self.marytts.locale = self._locale

                if mode == 'txt':
                    wav = self.marytts.synth_wav(txt)
                elif mode == 'ipa':
                    xs = ipa2mary('ipa', txt)
                    wav = self.marytts.synth_wav(xs, fmt='xs')
                else:
                    raise Exception("unknown mary mode '%s'" % mode)

            elif self.engine == 'espeak':

                if mode == 'txt':

                    self.espeak.voice = self._voice
                    self.espeak.speed = self._speed
                    self.espeak.pitch = self._pitch
                    wav = self.espeak.synth_wav(txt)
                    # logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(wav)))

                elif mode == 'ipa':
                    xs = ipa2xsampa('ipa', txt)
                    logging.debug('synthesize: %s %s -> %s' %
                                  (txt, mode, repr(xs)))
                    wav = self.espeak.synth_wav(xs, fmt='xs')

            elif self.engine == 'pico':

                if mode == 'txt':

                    self.picotts.voice = self._voice
                    wav = self.picotts.synth_wav(txt)
                    # logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(wav)))

                else:
                    raise Exception("unknown espeak mode '%s'" % mode)
            else:

                raise Exception("unknown engine '%s'" % self.engine)

        else:

            args = {
                'l': self._locale,
                'v': self._voice,
                'e': self._engine,
                'm': mode,
                't': txt.encode('utf8')
            }
            url = 'http://%s:%s/tts/synth?%s' % (
                self._host_tts, self._port_tts, urllib.urlencode(args))

            response = requests.get(url)

            if response.status_code != 200:
                return None

            wav = response.content

        if wav:
            logging.debug('synthesize: %s %s -> WAV' % (txt, mode))
        else:
            logging.error('synthesize: %s %s -> NO WAV' % (txt, mode))

        return wav

    def play_wav(self, wav, async=False):

        if self._host_tts == 'local':

            if wav:
                self.player.play(wav, async)
            else:
                raise Exception('no wav given')

        else:

            url = 'http://%s:%s/tts/play' % (self._host_tts, self._port_tts)

            if async:
                url += '?async=t'

            response = requests.post(url, data=wav)
Example #12
0
class VoiceOutput(object):

    def __init__(self, args):
        reload(sys)
        sys.setdefaultencoding('utf8')
        self.lang = args.get('lang', 'fr-FR')
        self.path = args.get('path', r"/tmp/output.wav")
        self.pitch = args.get('pitch', 50)
        self.amplitude = args.get('amplitude', 90)
        self.speed = args.get('speed', 100)
        self.espeak_exec_path = args.get('espeak_exec_path', r"/usr/bin/espeak")
        self.engine = args.get('engine', 'espeak')

        if os.name == 'nt':
            self.voice_output_engine = pyttsx3.init()
            if self.lang == 'fr-FR' or self.lang == 'fr_FR':
                voice = self.voice_output_engine.getProperty('voices')[0]  # the french voice
                self.voice_output_engine.setProperty('voice', voice.id)
        else:
            if self.engine == 'picotts':
                self.voice_output_engine = PicoTTS()
                self.voice_output_engine.voice = self.lang

    def speak(self, text):
        if os.name == 'nt':
            self.voice_output_engine.say(text)
            self.voice_output_engine.runAndWait()
        else:
            if self.engine == 'espeak':
                espeak_command = [self.espeak_exec_path, '-v' + self.lang, '-s' + str(self.speed), '-a' + str(self.amplitude),
                                 '-p' + str(self.pitch), '-w' + self.path, text]
                # generate the file with eSpeak
                subprocess.call(espeak_command, stderr=sys.stderr)
                f = wave.open(self.path, "rb")
            if self.engine == 'picotts':
                self.voice_output_engine = PicoTTS()
                self.voice_output_engine.voice = self.lang
                synth = self.voice_output_engine.synth_wav(text)
                w = StringIO.StringIO(synth)
                f = wave.open(w)
            self.play(f)

    def play(self, f):
        import pyaudio
        # instantiate PyAudio
        p = pyaudio.PyAudio()
        # open stream
        stream = p.open(format=p.get_format_from_width(f.getsampwidth()),
                        channels=f.getnchannels(),
                        rate=f.getframerate(),
                        output=True)

        # define stream chunk
        chunk = 1024
        # read data
        data = f.readframes(chunk)

        # play stream
        while data:
            stream.write(data)
            data = f.readframes(chunk)

            # stop stream
        stream.stop_stream()
        stream.close()

        # close PyAudio
        p.terminate()
Example #13
0
class BertaDeepSpeech(Thread):
    """
    Class for wake word detection (aka Porcupine) library. It creates an input audio stream from a microphone,
    monitors it, and upon detecting the specified wake word(s) prints the detection time and index of wake word on
    console. 
    """
    def __init__(self,
                 library_path,
                 model_path,
                 keyword_paths,
                 sensitivities,
                 input_device_index=None,
                 output_path=None):
        """
        Constructor.

        :param library_path: Absolute path to Porcupine's dynamic library.
        :param model_file_path: Absolute path to the model parameter file.
        :param keyword_file_paths: List of absolute paths to keyword files.
        :param sensitivities: Sensitivity parameter for each wake word. For more information refer to
        'include/pv_porcupine.h'. It uses the
        same sensitivity value for all keywords.
        :param input_device_index: Optional argument. If provided, audio is recorded from this input device. Otherwise,
        the default audio input device is used.
        :param output_path: If provided recorded audio will be stored in this location at the end of the run.
        """

        super(BertaDeepSpeech, self).__init__()

        self._library_path = library_path
        self._model_path = model_path
        self._keyword_paths = keyword_paths
        self._sensitivities = sensitivities
        self._input_device_index = input_device_index
        self.db_model = None
        self.pt = PicoTTS()
        self.pa = pyaudio.PyAudio()

        self._output_path = output_path
        if self._output_path is not None:
            self._recorded_frames = []

        #Load DeepSpeech model
        print('Initializing model...')
        dirname = os.path.dirname(os.path.abspath(__file__))
        model_name = glob.glob(os.path.join(dirname, 'libs/*.tflite'))[0]
        logging.info("Model: %s", model_name)
        self.model = deepspeech.Model(model_name)
        try:
            scorer_name = glob.glob(os.path.join(dirname, '*.scorer'))[0]
            logging.info("Language model: %s", scorer_name)
            self.model.enableExternalScorer(scorer_name)
        except Exception as e:
            pass

    def set_model(self, db_model):
        self.db_model = db_model

    def transcribe(self):
        # Start audio with VAD
        vad_audio = VADAudio(aggressiveness=1,
                             device=None,
                             input_rate=16000,
                             file=None)
        print("Listening (ctrl-C to exit)...")
        frames = vad_audio.vad_collector()

        # Stream from microphone to DeepSpeech using VAD
        #spinner = Halo(spinner='line')
        stream_context = self.model.createStream()
        #wav_data = bytearray()
        listening = False
        for frame in frames:
            if frame is not None:
                if not listening:
                    pixels.listen()
                    listening = True
        #        if spinner: spinner.start()
                logging.debug("streaming frame")
                stream_context.feedAudioContent(np.frombuffer(frame, np.int16))
                #if ARGS.savewav: wav_data.extend(frame)
            else:
                if listening:
                    listening = False
                    pixels.think()
            # if spinner: spinner.stop()
                logging.debug("end utterence")
                #if ARGS.savewav:
                #    vad_audio.write_wav(os.path.join(ARGS.savewav, datetime.now().strftime("savewav_%Y-%m-%d_%H-%M-%S_%f.wav")), wav_data)
                #    wav_data = bytearray()
                text = stream_context.finishStream()
                print("Recognized: %s" % text)

                log = (text, self.analyze(text))
                return (1, log)

                if 'stop recording' in text:
                    vad_audio.destroy()
                    #break
                    return 1
                stream_context = self.model.createStream()

    def find_action(self, phrase):
        words = phrase.lower().split()
        default = [
            x for x in ActionProvider.plugins if 'default' in x.categories
        ][0]
        for word in words:
            action = [
                x for x in ActionProvider.plugins if word in x.categories
            ]
            if action:
                return action[0]()
        return default()

    def analyze(self, phrase):
        """Method that analyzes the phrase given, speaks and returns the answer"""
        # find the correct action to take
        action = self.find_action(phrase)
        # perform the action and get the answer
        answer = action.perform()
        # speek the answer
        self.speek(answer)
        # return answer for saving into database
        return answer

    def test_phrase(self, phrase):
        """Method used in web application to test apis maually"""
        # find the correct action to take
        action = self.find_action(phrase)
        # perform the action and get the answer
        answer = action.perform()
        # return answer for saving into database
        return answer

    def speek(self, answer):
        """Method that generates the audio data and plays it on the microphone"""
        self.pa = pyaudio.PyAudio()
        # 1kb of data at a time
        chunk = 1024
        # create the picotts wav
        wavs = self.pt.synth_wav(str(answer))
        # open wav for processing
        wav = wave.open(BytesIO(wavs))
        # create audio stream for output
        stream = self.pa.open(format=self.pa.get_format_from_width(
            wav.getsampwidth()),
                              channels=wav.getnchannels(),
                              rate=wav.getframerate(),
                              output=True)
        data = wav.readframes(chunk)
        pixels.speak()
        print("speaking here")

        while data:
            #print(data)
            stream.write(data)
            data = wav.readframes(chunk)
        print("done speaking")
        pixels.off()
        stream.stop_stream()
        stream.close()
        self.pa.terminate()

    def run(self):
        """
         Creates an input audio stream, initializes wake word detection (Porcupine) object, and monitors the audio
         stream for occurrences of the wake word(s). It prints the time of detection for each occurrence and index of
         wake word.
         """

        num_keywords = len(self._keyword_paths)

        keywords = list()
        for x in self._keyword_paths:
            keywords.append(
                os.path.basename(x).replace('.ppn',
                                            '').replace('_compressed',
                                                        '').split('_')[0])

        print('listening for:')
        for keyword, sensitivity in zip(keywords, self._sensitivities):
            print('- %s (sensitivity: %f)' % (keyword, sensitivity))

        porcupine = None
        pa = None
        audio_stream = None
        try:
            porcupine = Porcupine(library_path=self._library_path,
                                  model_path=self._model_path,
                                  keyword_paths=self._keyword_paths,
                                  sensitivities=self._sensitivities)

            pa = pyaudio.PyAudio()
            audio_stream = pa.open(rate=porcupine.sample_rate,
                                   channels=1,
                                   format=pyaudio.paInt16,
                                   input=True,
                                   frames_per_buffer=porcupine.frame_length,
                                   input_device_index=self._input_device_index)

            while True:
                pcm = audio_stream.read(porcupine.frame_length)

                pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)

                if self._output_path is not None:
                    self._recorded_frames.append(pcm)

                result = porcupine.process(pcm)

                if result >= 0:
                    print('[%s] Detected %s' %
                          (str(datetime.now()), keywords[result]))
                    pixels.wakeup()
                    audio_stream.close()
                    ds_result = self.transcribe()
                    #if self.transcribe():
                    if ds_result[0]:
                        audio_stream = pa.open(
                            rate=porcupine.sample_rate,
                            channels=1,
                            format=pyaudio.paInt16,
                            input=True,
                            frames_per_buffer=porcupine.frame_length,
                            input_device_index=self._input_device_index)
                    return ds_result[1]

        except KeyboardInterrupt:
            print('stopping ...')
            raise KeyboardInterrupt
        finally:
            if porcupine is not None:
                porcupine.delete()

            if audio_stream is not None:
                audio_stream.close()

            if pa is not None:
                pa.terminate()

            if self._output_path is not None and len(
                    self._recorded_frames) > 0:
                recorded_audio = np.concatenate(self._recorded_frames,
                                                axis=0).astype(np.int16)
                soundfile.write(self._output_path,
                                recorded_audio,
                                samplerate=porcupine.sample_rate,
                                subtype='PCM_16')

            pixels.off()

    _AUDIO_DEVICE_INFO_KEYS = [
        'index', 'name', 'defaultSampleRate', 'maxInputChannels'
    ]

    @classmethod
    def show_audio_devices_info(cls):
        """ Provides information regarding different audio devices available. """

        pa = pyaudio.PyAudio()

        for i in range(pa.get_device_count()):
            info = pa.get_device_info_by_index(i)
            print(', '.join("'%s': '%s'" % (k, str(info[k]))
                            for k in cls._AUDIO_DEVICE_INFO_KEYS))

        pa.terminate()
Example #14
0
    def test_voices(self):
        picotts = PicoTTS()

        voices = picotts.voices
        self.assertGreater(len(voices), 5)
Example #15
0
import picamera
import pyaudio
import time
import wave
import StringIO
from picotts import PicoTTS
from google.cloud import vision
from google.cloud.vision import types
from PIL import Image, ImageDraw
from firebase import firebase
from socketIO_client import SocketIO, LoggingNamespace

firebase = firebase.FirebaseApplication('https://metronome-nyc.firebaseio.com',
                                        None)
camera = picamera.PiCamera()
picotts = PicoTTS()

SERVER = 'api.memeboard.net'
PORT = 80
TRAIN_ID = 10011
CAR_ID = 0

STATION_LIST = [0, 1, 2, 3]

station_index = 1


def playSound():
    global picotts
    wavs = picotts.synth_wav('Stand clear of the closing doors please.')
    wav = wave.open(StringIO.StringIO(wavs))
Example #16
0
engine.say("the temperature is -21 celsius.")
engine.runAndWait()
'''
'''
import pyttsx3
engine = pyttsx3.init()

voices = engine.getProperty('voices')
for voice in voices:
    print("Voice:")
    print(" - ID: %s" % voice.id)
    print(" - Name: %s" % voice.name)
    print(" - Languages: %s" % voice.languages)
    print(" - Gender: %s" % voice.gender)
    print(" - Age: %s" % voice.age)
'''
import wave
try:
    from StringIO import StringIO
except ImportError:
    import io
from picotts import PicoTTS

picotts = PicoTTS()
wavs = picotts.synth_wav("Hello World!")
wav = wave.open(io.BytesIO(wavs))
wav.getnchannels(), wav.getframerate(), wav.getnframes()
wav.open()

#pico2wave( -l=en-US, -w=file.wav, "This is a test of pico")
Example #17
0
def synth_wav(sentence):
    picotts = PicoTTS(voice='fr-FR')
    filename = picotts.synth_wav(sentence)
    wave.open(filename, 'rb')
    return filename