def do_it(voicefile, text): voice = speect.SVoice(voicefile) utt = voice.synth(text) segment_relation = utt.relation_get("Segment") for item in segment_relation: hts_label = item["hts_label"] print hts_label
def _synthesize_single_helper(self, text, voice_code, output_file_path=None): """ This is an helper function to synthesize a single text fragment via Python call. The caller can choose whether the output file should be written to disk or not. :rtype: tuple (result, (duration, sample_rate, encoding, data)) """ # # NOTE in this example, we assume that the Speect voice data files # are located in the same directory of this .py source file # and that the voice JSON file is called "voice.json" # # NOTE the voice_code value is ignored in this example, # but in general one might select a voice file to load, # depending on voice_code # voice_json_path = gf.safe_str(gf.absolute_path("voice.json", __file__)) voice = speect.SVoice(voice_json_path) utt = voice.synth(text) audio = utt.features["audio"] if output_file_path is not None: audio.save_riff(gf.safe_str(output_file_path)) # get length and data using speect Python API waveform = audio.get_audio_waveform() audio_sample_rate = int(waveform["samplerate"]) audio_length = TimeValue(audio.num_samples() / audio_sample_rate) audio_format = "pcm16" audio_samples = numpy.fromstring( waveform["samples"], dtype=numpy.int16).astype("float64") / 32768 # return data return (audio_length, audio_sample_rate, audio_format, audio_samples)
def _synthesize_single_python_helper( self, text, voice_code, output_file_path=None, return_audio_data=True ): """ This is an helper function to synthesize a single text fragment via a Python call. If ``output_file_path`` is ``None``, the audio data will not persist to file at the end of the method. :rtype: tuple (result, (duration, sample_rate, encoding, data)) """ # return zero if text is the empty string if len(text) == 0: # # NOTE values of sample_rate, encoding, data # do not matter if the duration is 0.000, # so set them to None instead of the more precise: # return (True, (TimeValue("0.000"), 16000, "pcm_s16le", numpy.array([]))) # self.log(u"len(text) is zero: returning 0.000") return (True, (TimeValue("0.000"), None, None, None)) # # NOTE in this example, we assume that the Speect voice data files # are located in the same directory of this .py source file # and that the voice JSON file is called "voice.json" # # NOTE the voice_code value is ignored in this example, # since we have only one TTS voice, # but in general one might select a voice file to load, # depending on voice_code; # in fact, we could have created the ``voice`` object # only once, in the constructor, instead of creating it # each time this function is invoked, # achieving slightly faster synthesis # voice_json_path = gf.safe_str(gf.absolute_path("voice.json", __file__)) voice = speect.SVoice(voice_json_path) utt = voice.synth(text) audio = utt.features["audio"] if output_file_path is None: self.log(u"output_file_path is None => not saving to file") else: self.log(u"output_file_path is not None => saving to file...") # NOTE apparently, save_riff needs the path to be a byte string audio.save_riff(gf.safe_str(output_file_path)) self.log(u"output_file_path is not None => saving to file... done") # return immediately if returning audio data is not needed if not return_audio_data: self.log(u"return_audio_data is True => return immediately") return (True, None) # get length and data using speect Python API self.log(u"return_audio_data is True => read and return audio data") waveform = audio.get_audio_waveform() audio_sample_rate = int(waveform["samplerate"]) audio_length = TimeValue(audio.num_samples() / audio_sample_rate) audio_format = "pcm16" audio_samples = numpy.fromstring( waveform["samples"], dtype=numpy.int16 ).astype("float64") / 32768 return (True, ( audio_length, audio_sample_rate, audio_format, audio_samples ))
def load_voice(): import speect import speect.uttproc_cb # utterance callback interface # load voice json file voice = speect.SVoice("voice.hts.json") # load voice definition # load and add g2p import speect.modules.g2p_rewrites_festival as g2p_rewrites import ibibio_g2p ig2p = g2p_rewrites.G2P_Rewrites_Festival(ibibio_g2p.rules, ibibio_g2p.sets) # set g2p in voice voice.data_set("g2p", ig2p) # load and add syllabification import speect.modules.syllab_rewrites as syllab_rewrites import ibibio_syll isyll = syllab_rewrites.Syllab_Rewrites(ibibio_syll.rules, ibibio_syll.sets) # set syllabification in voice voice.data_set("syllabification", isyll) # # Create utterance processors # # create tokenizer import speect.modules.tokenize_processor as tokenize_processor tok_utt_proc = speect.SUttProcessor.callback( tokenize_processor.utt_processor) # create normalizer import speect.modules.normalize_processor as normalize_processor norm_utt_proc = speect.SUttProcessor.callback( normalize_processor.utt_processor) # create phrasing processor import speect.modules.phrasing_processor as phrasing_processor phrasing_utt_proc = speect.SUttProcessor.callback( phrasing_processor.utt_processor) # create lexical lookup processor import speect.modules.lexical_processor as lexical_processor lexlookup_utt_proc = speect.SUttProcessor.callback( lexical_processor.utt_processor) # create pause insertion processor import speect.modules.pause_processor as pause_processor pause_utt_proc = speect.SUttProcessor.callback( pause_processor.utt_processor) # # Replace voice definition file processors with above # voice.uttProcessor_set("Tokenize", tok_utt_proc) voice.uttProcessor_set("Normalize", norm_utt_proc) voice.uttProcessor_set("Phrasify", phrasing_utt_proc) voice.uttProcessor_set("LexLookup", lexlookup_utt_proc) voice.uttProcessor_set("Pauses", pause_utt_proc) return voice
import speect import speect.uttproc_cb def do_it(utt): rel = utt.relation_new("Token") item = rel.append() item["name"] = "first" item = rel.append() item["name"] = "second" utt_processor = speect.SUttProcessor.callback(do_it) v = speect.SVoice("/home/aby/Development/testing_voices/eng-ZA/voice.txt") v.uttProcessor_set("Tokenize", utt_processor) utt = v.synth("hello world", "text-to-segments") print(utt)
def loadvoice(self, voicename, voicefilename): log.info("Loading voice from file '%s'" % (voicefilename)) v = speect.SVoice(voicefilename) log.info("Voice '%s' loaded." % (voicename)) self.voices[voicename] = v