Exemple #1
0
def synth_text_voice_exclude(voicefile, licensefile, text_dict, outdir, opts):
    # Create an engine
    engine = cerevoice_eng.CPRCEN_engine_new()

    # Set the loading mode - all data to RAM or with audio and indexes on disk
    loadmode = cerevoice_eng.CPRC_VOICE_LOAD

    # Load voice
    ret = cerevoice_eng.CPRCEN_engine_load_voice(engine, licensefile, "",
                                                 voicefile, loadmode)
    if not ret:
        sys.stderr.write(
            "ERROR: could not load the voice, check license integrity\n")
        sys.exit(1)

    # Open channel
    channel = cerevoice_eng.CPRCEN_engine_open_default_channel(engine)
    #cerevoice_eng.CPRCEN_engine_channel_speak(engine, channel, "<doc>test</doc>", -1, 1)

    # spurtxml = cerevoice_eng.CPRCEN_engine_chan_get_last_spurt(engine, channel)
    spurt = cerevoice_eng.CPRCEN_engine_chan_get_last_spurt_struct(
        engine, channel)
    vtbctrl = cerevoice.CPRC_ltcmgr_vtbctrl(spurt)
    if opts.non_contig:
        vtbctrl.non_contig = 1
    if opts.boost_weights:
        voice = cerevoice_eng.CPRCEN_channel_get_cerevoice(engine, channel)
        for weightname in _weights:
            weight, scaling = _weights[weightname]
            cerevoice.CPRC_cfmgr_set_weights(voice, weightname,
                                             len(weightname), weight, scaling)
    if opts.hts_full:
        cerevoice_eng.CPRCEN_channel_synth_type_hts(engine, channel)
    #   )

    # Generate spurtxml for every spurt
    for id in sorted(text_dict.keys()):
        spurt_txt = text_dict[id]['spurt']
        print spurt_txt
        vtbctrl.sptid_exclude = id
        cerevoice_eng.CPRCEN_engine_channel_to_file(
            engine, channel, os.path.join(outdir, id + ".wav"),
            cerevoice_eng.CPRCEN_RIFF)
        #cerevoice_eng.CPRCEN_engine_channel_speak(engine, channel, spurt_txt, len(spurt_txt), 1)
        cerevoice_eng.CPRCEN_engine_channel_speak_spurt(
            engine, channel, spurt_txt, len(spurt_txt))
        #cerevoice_eng.CPRCEN_engine_speak_to_file(engine, indata, wavout)
        if opts.hts_full:
            spurt = cerevoice_eng.CPRCEN_engine_chan_get_last_spurt_struct(
                engine, channel)
            cerevoice.CPRC_spurt_set_hts(spurt, cerevoice.CPRC_HTS_MODE_FULL)
            cerevoice.CPRC_featmgr_fx(spurt)
            htsfull = cerevoice.CPRC_buf_get(cerevoice.CPRC_spurt_hts(spurt))
            print htsfull
            #fp = open(os.path.join(htsfulloutdir, spt + ".lab"), 'w')
            #fp.write(htsfull)
        xml = cerevoice_eng.CPRCEN_engine_chan_get_last_spurt(engine, channel)
        text_dict[id]['xml'] = xml  # replace_id(xml, id)

    cerevoice_eng.CPRCEN_engine_delete(engine)
def synth(input_str, filename="output.wav", outdir=None, textout=False):

    if not outdir:
        cwd = os.getcwd()
        outdir = cwd
    licensefile = os.path.dirname(
        os.path.abspath(__file__)) + "/voice/heather.lic"
    voicefile = os.path.dirname(
        os.path.abspath(__file__)) + "/voice/cerevoice_heather_4.0.0_48k.voice"
    ondisk = False

    # Create an engine
    engine = cerevoice_eng.CPRCEN_engine_new()

    # Set the loading mode - all data to RAM or with audio and indexes on disk
    loadmode = cerevoice_eng.CPRC_VOICE_LOAD
    if ondisk:
        loadmode = cerevoice_eng.CPRC_VOICE_LOAD_EMB

    # Load the voice
    ret = cerevoice_eng.CPRCEN_engine_load_voice(engine, licensefile, "",
                                                 voicefile, loadmode)
    if not ret:
        sys.stderr.write(
            "ERROR: could not load the voice, check license integrity\n")
        sys.exit(1)

    # Get some information about the first loaded voice (index 0)
    name = cerevoice_eng.CPRCEN_engine_get_voice_info(engine, 0, "VOICE_NAME")
    srate = cerevoice_eng.CPRCEN_engine_get_voice_info(engine, 0,
                                                       "SAMPLE_RATE")
    #sys.stderr.write("INFO: voice name is '%s', sample rate '%s'\n" % (name, srate))

    wavout = os.path.join(outdir) + "/{name}".format(name=filename)
    if textout:
        textout = wavout[:-4] + ".json"

    # synthesize
    channel = cerevoice_eng.CPRCEN_engine_open_default_channel(engine)
    freq = int(
        cerevoice_eng.CPRCEN_channel_get_voice_info(engine, channel,
                                                    "SAMPLE_RATE"))
    userdata = EngineUserData(wavout, engine, channel, textout)
    cc = CereVoiceEngineCallback(userdata)
    res = cerevoice_eng.engine_set_callback(engine, channel, cc)
    if res:
        #print("INFO: callback set successfully")
        cerevoice_eng.CPRCEN_engine_channel_speak(engine, channel, input_str,
                                                  len(input_str), 1)
    else:
        sys.stderr.write(
            "ERROR: could not set callback, synthesis data cannot be processed"
        )

    # Clean up
    cerevoice_eng.CPRCEN_engine_delete(engine)
    def __init__(self):
        self.engine = cerevoice_eng.CPRCEN_engine_new()

        # Set the loading mode - all data to RAM or with audio and indexes ozn disk
        loadmode = cerevoice_eng.CPRC_VOICE_LOAD

        voicePath = rospy.get_param('/speech_synthesis/voicePath')
        licensePath = rospy.get_param('/speech_synthesis/licensePath')
        # Load the voice
        ret = cerevoice_eng.CPRCEN_engine_load_voice(
            self.engine, licensePath, "", voicePath,
            cerevoice_eng.CPRC_VOICE_LOAD_EMB)

        if not ret:
            sys.stderr.write(
                "ERROR: could not load the voice, check license integrity\n")
            sys.exit(1)

        info = cerevoice_eng.CPRCEN_engine_get_voice_info(
            self.engine, 0, "VOICE_NAME")
        sample_rate = cerevoice_eng.CPRCEN_engine_get_voice_info(
            self.engine, 0, "SAMPLE_RATE")
        sys.stderr.write("INFO: voice name is '%s', sample rate '%s'\n" %
                         (info, sample_rate))

        self.channel = cerevoice_eng.CPRCEN_engine_open_default_channel(
            self.engine)
        freq = int(
            cerevoice_eng.CPRCEN_channel_get_voice_info(
                self.engine, self.channel, "SAMPLE_RATE"))

        self.wavout = False
        self.player = cerevoice_aud.CPRC_sc_player_new(freq)

        self.userdata = EngineUserData(self.wavout, self.engine, self.channel,
                                       self.player)

        res = cerevoice_eng.engine_set_callback(self.engine, self.channel,
                                                self)

        # User-configurable parameter, could be as simple as a file
        # name for the output, or a richer data structure.
        #self.ws= serverThread(1, "Thread-1", 9090)
        #self.ws.start()
        self.pub = rospy.Publisher('/roboy/cognition/speech/synthesis',
                                   SpeechSynthesis,
                                   queue_size=10)
        self.t_pub = rospy.Publisher(
            '/roboy/cognition/speech/synthesis/duration', Int32, queue_size=10)
Exemple #4
0
def synth_text_voice(voicefile, licensefile, text_dict):
    # Create an engine
    engine = cerevoice_eng.CPRCEN_engine_new()

    # Set the loading mode - all data to RAM or with audio and indexes on disk
    loadmode = cerevoice_eng.CPRC_VOICE_LOAD
    
    # Load voice
    ret = cerevoice_eng.CPRCEN_engine_load_voice(engine, licensefile, "", voicefile, loadmode)
    if not ret:
        sys.stderr.write("ERROR: could not load the voice, check license integrity\n")
        sys.exit(1)
    
    # Open channel
    channel = cerevoice_eng.CPRCEN_engine_open_default_channel(engine)

    # Generate spurtxml for every spurt
    for id in text_dict.keys():
        spurt = text_dict[id]['spurt']
        cerevoice_eng.CPRCEN_engine_channel_speak(engine, channel, spurt, len(spurt), 1)
        xml = cerevoice_eng.CPRCEN_engine_chan_get_last_spurt(engine, channel)
        text_dict[id]['xml'] = replace_id(xml, id)
    
    cerevoice_eng.CPRCEN_engine_delete(engine)
Exemple #5
0
def main():
    from optparse import OptionParser

    # Default input/output directory
    cwd = os.getcwd()

    # Setup option parsing
    usage = "usage: %prog [options] -L licensefile -V voicefile infile1 [infile2...]\nSynthesise an xml or text file to a wave file and transcription."
    parser = OptionParser(usage=usage)

    parser.add_option("-L",
                      "--licensefile",
                      dest="licensefile",
                      help="CereProc license file")
    parser.add_option("-V", "--voicefile", dest="voicefile", help="Voice file")
    parser.add_option("-o",
                      "--outdir",
                      dest="outdir",
                      default=cwd,
                      help="Output directory, defaults to '%s'" % cwd)
    parser.add_option("-d",
                      "--ondisk",
                      dest="ondisk",
                      action="store_true",
                      default=False,
                      help="Load keeping audio and index data on disk")

    opts, args = parser.parse_args()

    # Check correct info supplied
    if len(args) < 1:
        parser.error("at least one input file must be supplied")
    if not opts.voicefile:
        parser.error("a voice file must be supplied")
    if not os.access(opts.voicefile, os.R_OK):
        parser.error("can't access voice file '%s'" % voicefile)
    if not os.access(opts.licensefile, os.R_OK):
        parser.error("can't access license file '%s'" % licensefile)
    if opts.outdir:
        if not os.access(opts.outdir, os.W_OK):
            parse.error(
                "can't write to output directory output directory '%s'",
                opts.outdir)

    # Create an engine
    engine = cerevoice_eng.CPRCEN_engine_new()

    # Set the loading mode - all data to RAM or with audio and indexes on disk
    loadmode = cerevoice_eng.CPRC_VOICE_LOAD
    if opts.ondisk:
        loadmode = cerevoice_eng.CPRC_VOICE_LOAD_EMB

    # Load the voice
    ret = cerevoice_eng.CPRCEN_engine_load_voice(engine, opts.licensefile, "",
                                                 opts.voicefile, loadmode)
    if not ret:
        sys.stderr.write(
            "ERROR: could not load the voice, check license integrity\n")
        sys.exit(1)
    # Get some information about the first loaded voice (index 0)
    name = cerevoice_eng.CPRCEN_engine_get_voice_info(engine, 0, "VOICE_NAME")
    srate = cerevoice_eng.CPRCEN_engine_get_voice_info(engine, 0,
                                                       "SAMPLE_RATE")
    sys.stderr.write("INFO: voice name is '%s', sample rate '%s'\n" %
                     (name, srate))

    # Process the input files
    for f in args:
        indata = open(f).read()
        # Synthesise to a file
        wavout = os.path.join(
            opts.outdir, os.path.basename(os.path.splitext(f)[0])) + ".wav"
        cerevoice_eng.CPRCEN_engine_speak_to_file(engine, indata, wavout)
        sys.stderr.write("INFO: wrote wav file '%s'\n" % wavout)

    # Clean up
    cerevoice_eng.CPRCEN_engine_delete(engine)