def _synthesize_single_helper(self, text, voice_code, output_file_path=None): """ This is an helper function to synthesize a single text fragment via Python call. The caller can choose whether the output file should be written to disk or not. :rtype: tuple (result, (duration, sample_rate, encoding, data)) """ # # NOTE in this example, we assume that the Speect voice data files # are located in the same directory of this .py source file # and that the voice JSON file is called "voice.json" # # NOTE the voice_code value is ignored in this example, # but in general one might select a voice file to load, # depending on voice_code # voice_json_path = gf.safe_str(gf.absolute_path("voice.json", __file__)) voice = speect.SVoice(voice_json_path) utt = voice.synth(text) audio = utt.features["audio"] if output_file_path is not None: audio.save_riff(gf.safe_str(output_file_path)) # get length and data using speect Python API waveform = audio.get_audio_waveform() audio_sample_rate = int(waveform["samplerate"]) audio_length = TimeValue(audio.num_samples() / audio_sample_rate) audio_format = "pcm16" audio_samples = numpy.fromstring(waveform["samples"], dtype=numpy.int16).astype("float64") / 32768 # return data return (audio_length, audio_sample_rate, audio_format, audio_samples)
def test_safe_str(self): tests = [ (u"", ""), (u"foo", "foo"), (u"foà", "foà"), ] self.assertIsNone(gf.safe_str(None)) for test in tests: self.assertEqual(gf.safe_str(test[0]), test[1])
def _synthesize_single_helper(self, text, voice_code, output_file_path=None): """ This is an helper function to synthesize a single text fragment via Python call. The caller can choose whether the output file should be written to disk or not. :rtype: tuple (result, (duration, sample_rate, encoding, data)) """ # # NOTE in this example, we assume that the Speect voice data files # are located in the same directory of this .py source file # and that the voice JSON file is called "voice.json" # # NOTE the voice_code value is ignored in this example, # but in general one might select a voice file to load, # depending on voice_code # voice_json_path = gf.safe_str(gf.absolute_path("voice.json", __file__)) voice = speect.SVoice(voice_json_path) utt = voice.synth(text) audio = utt.features["audio"] if output_file_path is not None: audio.save_riff(gf.safe_str(output_file_path)) # get length and data using speect Python API waveform = audio.get_audio_waveform() audio_sample_rate = int(waveform["samplerate"]) audio_length = TimeValue(audio.num_samples() / audio_sample_rate) audio_format = "pcm16" audio_samples = numpy.fromstring( waveform["samples"], dtype=numpy.int16).astype("float64") / 32768 # return data return (audio_length, audio_sample_rate, audio_format, audio_samples)
def __str__(self): return gf.safe_str(self.__unicode__())
def _synthesize_single_python_helper( self, text, voice_code, output_file_path=None, return_audio_data=True ): """ This is an helper function to synthesize a single text fragment via a Python call. If ``output_file_path`` is ``None``, the audio data will not persist to file at the end of the method. :rtype: tuple (result, (duration, sample_rate, encoding, data)) """ # return zero if text is the empty string if len(text) == 0: # # NOTE values of sample_rate, encoding, data # do not matter if the duration is 0.000, # so set them to None instead of the more precise: # return (True, (TimeValue("0.000"), 16000, "pcm_s16le", numpy.array([]))) # self.log(u"len(text) is zero: returning 0.000") return (True, (TimeValue("0.000"), None, None, None)) # # NOTE in this example, we assume that the Speect voice data files # are located in the same directory of this .py source file # and that the voice JSON file is called "voice.json" # # NOTE the voice_code value is ignored in this example, # since we have only one TTS voice, # but in general one might select a voice file to load, # depending on voice_code; # in fact, we could have created the ``voice`` object # only once, in the constructor, instead of creating it # each time this function is invoked, # achieving slightly faster synthesis # voice_json_path = gf.safe_str(gf.absolute_path("voice.json", __file__)) voice = speect.SVoice(voice_json_path) utt = voice.synth(text) audio = utt.features["audio"] if output_file_path is None: self.log(u"output_file_path is None => not saving to file") else: self.log(u"output_file_path is not None => saving to file...") # NOTE apparently, save_riff needs the path to be a byte string audio.save_riff(gf.safe_str(output_file_path)) self.log(u"output_file_path is not None => saving to file... done") # return immediately if returning audio data is not needed if not return_audio_data: self.log(u"return_audio_data is True => return immediately") return (True, None) # get length and data using speect Python API self.log(u"return_audio_data is True => read and return audio data") waveform = audio.get_audio_waveform() audio_sample_rate = int(waveform["samplerate"]) audio_length = TimeValue(audio.num_samples() / audio_sample_rate) audio_format = "pcm16" audio_samples = numpy.fromstring( waveform["samples"], dtype=numpy.int16 ).astype("float64") / 32768 return (True, ( audio_length, audio_sample_rate, audio_format, audio_samples ))