def synth_text_voice_exclude(voicefile, licensefile, text_dict, outdir, opts): # Create an engine engine = cerevoice_eng.CPRCEN_engine_new() # Set the loading mode - all data to RAM or with audio and indexes on disk loadmode = cerevoice_eng.CPRC_VOICE_LOAD # Load voice ret = cerevoice_eng.CPRCEN_engine_load_voice(engine, licensefile, "", voicefile, loadmode) if not ret: sys.stderr.write( "ERROR: could not load the voice, check license integrity\n") sys.exit(1) # Open channel channel = cerevoice_eng.CPRCEN_engine_open_default_channel(engine) #cerevoice_eng.CPRCEN_engine_channel_speak(engine, channel, "<doc>test</doc>", -1, 1) # spurtxml = cerevoice_eng.CPRCEN_engine_chan_get_last_spurt(engine, channel) spurt = cerevoice_eng.CPRCEN_engine_chan_get_last_spurt_struct( engine, channel) vtbctrl = cerevoice.CPRC_ltcmgr_vtbctrl(spurt) if opts.non_contig: vtbctrl.non_contig = 1 if opts.boost_weights: voice = cerevoice_eng.CPRCEN_channel_get_cerevoice(engine, channel) for weightname in _weights: weight, scaling = _weights[weightname] cerevoice.CPRC_cfmgr_set_weights(voice, weightname, len(weightname), weight, scaling) if opts.hts_full: cerevoice_eng.CPRCEN_channel_synth_type_hts(engine, channel) # ) # Generate spurtxml for every spurt for id in sorted(text_dict.keys()): spurt_txt = text_dict[id]['spurt'] print spurt_txt vtbctrl.sptid_exclude = id cerevoice_eng.CPRCEN_engine_channel_to_file( engine, channel, os.path.join(outdir, id + ".wav"), cerevoice_eng.CPRCEN_RIFF) #cerevoice_eng.CPRCEN_engine_channel_speak(engine, channel, spurt_txt, len(spurt_txt), 1) cerevoice_eng.CPRCEN_engine_channel_speak_spurt( engine, channel, spurt_txt, len(spurt_txt)) #cerevoice_eng.CPRCEN_engine_speak_to_file(engine, indata, wavout) if opts.hts_full: spurt = cerevoice_eng.CPRCEN_engine_chan_get_last_spurt_struct( engine, channel) cerevoice.CPRC_spurt_set_hts(spurt, cerevoice.CPRC_HTS_MODE_FULL) cerevoice.CPRC_featmgr_fx(spurt) htsfull = cerevoice.CPRC_buf_get(cerevoice.CPRC_spurt_hts(spurt)) print htsfull #fp = open(os.path.join(htsfulloutdir, spt + ".lab"), 'w') #fp.write(htsfull) xml = cerevoice_eng.CPRCEN_engine_chan_get_last_spurt(engine, channel) text_dict[id]['xml'] = xml # replace_id(xml, id) cerevoice_eng.CPRCEN_engine_delete(engine)
def synth(input_str, filename="output.wav", outdir=None, textout=False): if not outdir: cwd = os.getcwd() outdir = cwd licensefile = os.path.dirname( os.path.abspath(__file__)) + "/voice/heather.lic" voicefile = os.path.dirname( os.path.abspath(__file__)) + "/voice/cerevoice_heather_4.0.0_48k.voice" ondisk = False # Create an engine engine = cerevoice_eng.CPRCEN_engine_new() # Set the loading mode - all data to RAM or with audio and indexes on disk loadmode = cerevoice_eng.CPRC_VOICE_LOAD if ondisk: loadmode = cerevoice_eng.CPRC_VOICE_LOAD_EMB # Load the voice ret = cerevoice_eng.CPRCEN_engine_load_voice(engine, licensefile, "", voicefile, loadmode) if not ret: sys.stderr.write( "ERROR: could not load the voice, check license integrity\n") sys.exit(1) # Get some information about the first loaded voice (index 0) name = cerevoice_eng.CPRCEN_engine_get_voice_info(engine, 0, "VOICE_NAME") srate = cerevoice_eng.CPRCEN_engine_get_voice_info(engine, 0, "SAMPLE_RATE") #sys.stderr.write("INFO: voice name is '%s', sample rate '%s'\n" % (name, srate)) wavout = os.path.join(outdir) + "/{name}".format(name=filename) if textout: textout = wavout[:-4] + ".json" # synthesize channel = cerevoice_eng.CPRCEN_engine_open_default_channel(engine) freq = int( cerevoice_eng.CPRCEN_channel_get_voice_info(engine, channel, "SAMPLE_RATE")) userdata = EngineUserData(wavout, engine, channel, textout) cc = CereVoiceEngineCallback(userdata) res = cerevoice_eng.engine_set_callback(engine, channel, cc) if res: #print("INFO: callback set successfully") cerevoice_eng.CPRCEN_engine_channel_speak(engine, channel, input_str, len(input_str), 1) else: sys.stderr.write( "ERROR: could not set callback, synthesis data cannot be processed" ) # Clean up cerevoice_eng.CPRCEN_engine_delete(engine)
def __init__(self): self.engine = cerevoice_eng.CPRCEN_engine_new() # Set the loading mode - all data to RAM or with audio and indexes ozn disk loadmode = cerevoice_eng.CPRC_VOICE_LOAD voicePath = rospy.get_param('/speech_synthesis/voicePath') licensePath = rospy.get_param('/speech_synthesis/licensePath') # Load the voice ret = cerevoice_eng.CPRCEN_engine_load_voice( self.engine, licensePath, "", voicePath, cerevoice_eng.CPRC_VOICE_LOAD_EMB) if not ret: sys.stderr.write( "ERROR: could not load the voice, check license integrity\n") sys.exit(1) info = cerevoice_eng.CPRCEN_engine_get_voice_info( self.engine, 0, "VOICE_NAME") sample_rate = cerevoice_eng.CPRCEN_engine_get_voice_info( self.engine, 0, "SAMPLE_RATE") sys.stderr.write("INFO: voice name is '%s', sample rate '%s'\n" % (info, sample_rate)) self.channel = cerevoice_eng.CPRCEN_engine_open_default_channel( self.engine) freq = int( cerevoice_eng.CPRCEN_channel_get_voice_info( self.engine, self.channel, "SAMPLE_RATE")) self.wavout = False self.player = cerevoice_aud.CPRC_sc_player_new(freq) self.userdata = EngineUserData(self.wavout, self.engine, self.channel, self.player) res = cerevoice_eng.engine_set_callback(self.engine, self.channel, self) # User-configurable parameter, could be as simple as a file # name for the output, or a richer data structure. #self.ws= serverThread(1, "Thread-1", 9090) #self.ws.start() self.pub = rospy.Publisher('/roboy/cognition/speech/synthesis', SpeechSynthesis, queue_size=10) self.t_pub = rospy.Publisher( '/roboy/cognition/speech/synthesis/duration', Int32, queue_size=10)
def synth_text_voice(voicefile, licensefile, text_dict): # Create an engine engine = cerevoice_eng.CPRCEN_engine_new() # Set the loading mode - all data to RAM or with audio and indexes on disk loadmode = cerevoice_eng.CPRC_VOICE_LOAD # Load voice ret = cerevoice_eng.CPRCEN_engine_load_voice(engine, licensefile, "", voicefile, loadmode) if not ret: sys.stderr.write("ERROR: could not load the voice, check license integrity\n") sys.exit(1) # Open channel channel = cerevoice_eng.CPRCEN_engine_open_default_channel(engine) # Generate spurtxml for every spurt for id in text_dict.keys(): spurt = text_dict[id]['spurt'] cerevoice_eng.CPRCEN_engine_channel_speak(engine, channel, spurt, len(spurt), 1) xml = cerevoice_eng.CPRCEN_engine_chan_get_last_spurt(engine, channel) text_dict[id]['xml'] = replace_id(xml, id) cerevoice_eng.CPRCEN_engine_delete(engine)
def main(): from optparse import OptionParser # Default input/output directory cwd = os.getcwd() # Setup option parsing usage = "usage: %prog [options] -L licensefile -V voicefile infile1 [infile2...]\nSynthesise an xml or text file to a wave file and transcription." parser = OptionParser(usage=usage) parser.add_option("-L", "--licensefile", dest="licensefile", help="CereProc license file") parser.add_option("-V", "--voicefile", dest="voicefile", help="Voice file") parser.add_option("-o", "--outdir", dest="outdir", default=cwd, help="Output directory, defaults to '%s'" % cwd) parser.add_option("-d", "--ondisk", dest="ondisk", action="store_true", default=False, help="Load keeping audio and index data on disk") opts, args = parser.parse_args() # Check correct info supplied if len(args) < 1: parser.error("at least one input file must be supplied") if not opts.voicefile: parser.error("a voice file must be supplied") if not os.access(opts.voicefile, os.R_OK): parser.error("can't access voice file '%s'" % voicefile) if not os.access(opts.licensefile, os.R_OK): parser.error("can't access license file '%s'" % licensefile) if opts.outdir: if not os.access(opts.outdir, os.W_OK): parse.error( "can't write to output directory output directory '%s'", opts.outdir) # Create an engine engine = cerevoice_eng.CPRCEN_engine_new() # Set the loading mode - all data to RAM or with audio and indexes on disk loadmode = cerevoice_eng.CPRC_VOICE_LOAD if opts.ondisk: loadmode = cerevoice_eng.CPRC_VOICE_LOAD_EMB # Load the voice ret = cerevoice_eng.CPRCEN_engine_load_voice(engine, opts.licensefile, "", opts.voicefile, loadmode) if not ret: sys.stderr.write( "ERROR: could not load the voice, check license integrity\n") sys.exit(1) # Get some information about the first loaded voice (index 0) name = cerevoice_eng.CPRCEN_engine_get_voice_info(engine, 0, "VOICE_NAME") srate = cerevoice_eng.CPRCEN_engine_get_voice_info(engine, 0, "SAMPLE_RATE") sys.stderr.write("INFO: voice name is '%s', sample rate '%s'\n" % (name, srate)) # Process the input files for f in args: indata = open(f).read() # Synthesise to a file wavout = os.path.join( opts.outdir, os.path.basename(os.path.splitext(f)[0])) + ".wav" cerevoice_eng.CPRCEN_engine_speak_to_file(engine, indata, wavout) sys.stderr.write("INFO: wrote wav file '%s'\n" % wavout) # Clean up cerevoice_eng.CPRCEN_engine_delete(engine)