Example #1
0
def train(opts, dirs):

    ## Handle corpus:
    print(" -- Gather corpus")

    ## Get names of directories containing corpus data (all txt and wav):
    corpora = []

    if opts.command_line_corpus:
        for location in opts.command_line_corpus:
            assert os.path.isdir(location)
            corpora.append(location)

    else:
        corpora.append(
            os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS,
                         opts.speaker, "txt"))
        corpora.append(
            os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS,
                         opts.speaker, "wav"))

        # additional large text corpus:
        if opts.text_corpus_name:
            corpora.append(
                os.path.join(dirs['CORPUS'], opts.lang, fname.TEXT_CORPORA,
                             opts.text_corpus_name))

    # Set file number
    if opts.file_num:
        file_num = int(opts.file_num)
    else:
        file_num = float("inf")

    # Get names of individual txt and wav files:
    voice_data = []

    for c in corpora:
        count = 0
        file_list = sorted(os.listdir(c))
        if opts.shuffle:
            random.seed(1)
            random.shuffle(file_list)

        for f in file_list:
            if '._' not in f:
                voice_data.append(os.path.join(c, f))
                count += 1
                # Stop appending voice data at file_num
            if count >= file_num:
                break

    corpus = Corpus.Corpus(voice_data)

    print(" -- Train voice")
    voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, \
                dirs, clear_old_data=opts.clear, max_cores=opts.max_cores)

    ## Train the voice (i.e. train processors in pipeline context):
    voice.train(corpus)
Example #2
0
def train(opts, dirs):

    ## Handle corpus:
    print " -- Gather corpus"

    ## Get names of directories containing corpus data (all txt and wav):
    corpora = []

    if opts.command_line_corpus:
        for location in opts.command_line_corpus:
            assert os.path.isdir(location)
            corpora.append(location)

    else:
        corpora.append(
            os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS,
                         opts.speaker, "txt"))
        corpora.append(
            os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS,
                         opts.speaker, "wav"))

        # additional large text corpus:
        if opts.text_corpus_name:
            corpora.append(
                os.path.join(dirs['CORPUS'], opts.lang, fname.TEXT_CORPORA,
                             opts.text_corpus_name))

    ## Get names of individual txt and wav files:
    voice_data = []
    for c in corpora:
        for f in os.listdir(c):
            voice_data.append(os.path.join(c, f))

    corpus = Corpus.Corpus(voice_data)

    print " -- Train voice"
    voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, \
                dirs, clear_old_data=opts.clear, max_cores=opts.max_cores)

    ## Train the voice (i.e. train processors in pipeline context):
    voice.train(corpus)
Example #3
0
def main_work():

    #################################################

    # root is one level below this file in directory structure, ie. below the 'scripts' folder
    ROOT = os.path.split(
        os.path.realpath(
            os.path.abspath(
                os.path.dirname(inspect.getfile(
                    inspect.currentframe())))))[0] + '/'

    dirs = {
        'ROOT': ROOT,
        'CONFIG': ROOT + "configs/",
        'VOICES': ROOT + "voices/",
        'TRAIN': ROOT + "train/",
        'RULES': ROOT + "rules/",
        'CORPUS': ROOT + "corpus/",
        'BIN': ROOT + "/tools/bin/"
    }

    # ======== Get stuff from command line ==========

    a = ArgumentParser()
    a.add_argument('-s', dest='speaker', required=True, \
                    help= "the name of the speaker: <ROOT>/corpus/<LANG>/<SPEAKER>")
    a.add_argument('-l', dest='lang', required=True, \
                    help= "the language of the speaker: <ROOT>/corpus/<LANG>")
    a.add_argument('-o', dest='output', required=False, default=False, \
                    help= "output audio here")
    a.add_argument('-t', dest='stage', required=False, default="runtime", \
                    help=""" defines the current usage stage 
                            (definitions of stages should by found in <config>/recipe.cfg""")
    a.add_argument('-play', dest='play', action="store_true", required=False, default=False, \
                    help=" play audio after synthesis")
    a.add_argument('-lab', dest='make_label', action="store_true", default=False, \
                    help= "make label file as well as wave in output location")
    a.add_argument('config',
                   help="""configuration to use: naive, semi-naive, gold, 
                                    as defined in <ROOT>/recipes/<config> -directory"""
                   )
    a.add_argument('-bin', dest='custom_bindir')
    a.add_argument('files',
                   nargs='*',
                   help="text files to speak, reading from stdin by default")
    a.add_argument('-m',
                   dest='model_dir',
                   required=True,
                   type=str,
                   help="model directory")
    opts = a.parse_args()

    dirs['TRAIN'] = opts.model_dir + "/train/"
    dirs['VOICES'] = opts.model_dir + "/voices/"

    if opts.custom_bindir != None:
        dirs['BIN'] = opts.custom_bindir

    voice_location = os.path.join(dirs['VOICES'], opts.lang, opts.speaker,
                                  opts.config)
    train_location = os.path.join(dirs['TRAIN'], opts.lang, "speakers",
                                  opts.speaker, opts.config)
    config_path = os.path.join(dirs['CONFIG'], opts.config)
    voice_config = os.path.join(config_path, fname.RECIPE)

    ## Make Voice object to contain voice elements trained on this corpus:
    voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, dirs)

    if not opts.output:
        output_wavefile = os.path.join(voice_location, 'output', 'wav',
                                       'temp.wav')
    else:
        output_wavefile = opts.output

    if not opts.output:
        output_labfile = None
    else:
        output_labfile = output_wavefile.replace('.wav', '.lab')

    prevspace = False
    para = []
    # Go through the files a paragraph at a time, unless it's SSML in which case we parse it
    # An empty line marks the change of paragraphs in plain text files
    for line in fileinput.input(opts.files):
        line = line.decode('utf-8').rstrip()
        t = start_clock('Synthesise sentence')
        print line
        if fileinput.isfirstline():
            if para != []:
                voice.synth_utterance(''.join(para), output_wavefile=output_wavefile, \
                             output_labfile=output_labfile)
                if opts.play:
                    os.system('play ' + output_wavefile)
                para = []
            line = line.lstrip()
            if line.startswith('<speak') or line.startswith('<xml'):
                tree = etree.parse(fileinput.filename())
                parseSSML(tree, voice)
                fileinput.nextfile()
            else:
                para.append(line)
        elif line.isspace():
            prevspace = True
        elif prevspace and para != []:
            voice.synth_utterance(''.join(para), output_wavefile=output_wavefile, \
                             output_labfile=output_labfile)
            prevspace = False
            para = [line]
        else:
            para.append(line)

    if para != []:
        voice.synth_utterance(''.join(para), output_wavefile=output_wavefile, \
                             output_labfile=output_labfile)
        if opts.play:
            os.system('play ' + output_wavefile)
    stop_clock(t)
Example #4
0
def main_work():

    #################################################

    # ======== Get stuff from command line ==========

    def usage():
        print("Usage: ......  ")
        sys.exit(1)

    # e.g.

    try:

        voice_config = sys.argv[1]
        voice_components = sys.argv[2]
        ENGINE_BIN = sys.argv[3]
        RESYNTH_BIN = sys.argv[4]
        trained_model_dir = sys.argv[5]

    except:

        usage()

    #################################################
    sys.path.append("/afs/inf.ed.ac.uk/user/o/owatts/naive/script/")
    #################################################

    ## Lots of these paths should be interpolated from system-wide options (e.g. bin dir etc).
    ## Absolute paths for now.
    context_file_location = "/afs/inf.ed.ac.uk/user/o/owatts/naive/context_files/"
    ESTDIR = "/group/project/nlp-speech/bin/"
    HTSDIR = "/afs/inf.ed.ac.uk/user/o/owatts/repos/simple4all/CSTRVoiceClone/trunk/bin/"
    SCRIPT = "/afs/inf.ed.ac.uk/user/o/owatts/naive/script"
    GENSIM_LOCATION = "%s/gensim-0.5.0/src/" % (SCRIPT)
    #################################################

    sys.path.append(GENSIM_LOCATION)  ## add gensim to path
    from VSMTagger import VSMTagger

    print(" -- Open the existing voice")

    voice = Voice(config_file=voice_config)

    print(" -- Make an utterance processor from a (trained) acoustic model   ")

    ### This will only perform work where an utt does not have a wavefile attached:
    parameter_generator = AcousticModel(config_file=voice_components +
                                        "/parameter_generator.cfg",
                                        processor_name="parameter_generator",
                                        ENGINE_BIN=ENGINE_BIN,
                                        model_location=trained_model_dir,
                                        HTSDIR=HTSDIR)
    parameter_generator.save()

    ### WAVESYNTH
    waveform_synthesiser = WaveSynthesiser(
        config_file=voice_components + "/waveform_synthesiser.cfg",
        processor_name="waveform_synthesiser",
        RESYNTH_BIN=RESYNTH_BIN,
        HTSDIR=HTSDIR)
    waveform_synthesiser.save()

    ### WAVE PLAYER (call e.g. sox etc)
    wave_player = WavePlayer(config_file=voice_components + "/wave_player.cfg",
                             processor_name="wave_player")
    wave_player.save()

    voice.add_processor(voice_components + "/parameter_generator.cfg")
    voice.add_processor(voice_components + "/waveform_synthesiser.cfg")
    voice.add_processor(voice_components + "/wave_player.cfg")

    print(" -- Save voice")
    voice.save()

    print(" -- Synthesize a test utterance (from some Spanish text...)")
    ## Use the voice to synth a test utterance:
    voice.synth_utterance("Esto es: una prueba.")
Example #5
0
def main_work():

    #################################################

    # root is one level below this file in directory structure, ie. below the 'scripts' folder
    ROOT = os.path.split(
        os.path.realpath(
            os.path.abspath(
                os.path.dirname(inspect.getfile(
                    inspect.currentframe())))))[0] + '/'

    dirs = {
        'ROOT': ROOT,
        'CONFIG': ROOT + "configs/",
        'VOICES': ROOT + "voices/",
        'TRAIN': ROOT + "train/",
        'RULES': ROOT + "rules/",
        'CORPUS': ROOT + "corpus/",
        'BIN': ROOT + "/tools/bin/"
    }

    # ======== Get stuff from command line ==========

    a = ArgumentParser()
    a.add_argument('-s', dest='speaker', required=True, \
                    help= "the name of the speaker: <ROOT>/corpus/<LANG>/<SPEAKER>")
    a.add_argument('-l', dest='lang', required=True, \
                    help= "the language of the speaker: <ROOT>/corpus/<LANG>")
    a.add_argument('-o', dest='output', required=False, default=False, \
                    help= "output audio here")
    a.add_argument('-t', dest='stage', required=False, default="runtime", \
                    help=""" defines the current usage stage 
                            (definitions of stages should by found in <config>/recipe.cfg""")
    a.add_argument('-u', dest='output_utt', required=False, default=False, \
                    help= "output utt files here")
    a.add_argument('-play', dest='play', action="store_true", required=False, default=False, \
                    help=" play audio after synthesis")
    a.add_argument('-lab', dest='make_label', action="store_true", default=False, \
                    help= "make label file as well as wave in output location")
    a.add_argument('config',
                   help="""configuration to use: naive, semi-naive, gold, 
                                    as defined in <ROOT>/recipes/<config> -directory"""
                   )
    a.add_argument('files',
                   nargs='*',
                   help="text files to speak, reading from stdin by default")
    opts = a.parse_args()

    voice_location = os.path.join(dirs['VOICES'], opts.lang, opts.speaker,
                                  opts.config)
    train_location = os.path.join(dirs['TRAIN'], opts.lang, "speakers",
                                  opts.speaker, opts.config)
    config_path = os.path.join(dirs['CONFIG'], opts.config)
    voice_config = os.path.join(config_path, fname.RECIPE)

    ## Make Voice object to contain voice elements trained on this corpus:
    voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, dirs)

    if not opts.output:
        output_dir = os.path.join(voice_location, 'output', 'wav')
    else:
        output_dir = opts.output

#    if not opts.output:
#        output_labfile = None
#    else:
#        output_labfile = output_wavefile.replace('.wav', '.lab')

    if opts.output_utt:
        if not os.path.isdir(opts.output_utt):
            os.makedirs(opts.output_utt)

    output_extensions = []
    if opts.make_label:
        output_extensions.append('dnn_lab')

    for filename in opts.files:
        base = get_basename(filename)
        output_wavefile = os.path.join(output_dir, base + '.wav')
        text = ' '.join(readlist(filename))
        try:
            print(text)
        except:
            print('  ')  ## weird characgers
        print(base)
        if opts.output_utt:
            voice.synth_utterance(text, output_wavefile=output_wavefile, \
                         output_uttfile=os.path.join(opts.output_utt, base + '.utt'),\
                         output_extensions=output_extensions)
        else:
            voice.synth_utterance(text, output_wavefile=output_wavefile, \
                                             output_extensions=output_extensions)