def train(opts, dirs): ## Handle corpus: print(" -- Gather corpus") ## Get names of directories containing corpus data (all txt and wav): corpora = [] if opts.command_line_corpus: for location in opts.command_line_corpus: assert os.path.isdir(location) corpora.append(location) else: corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS, opts.speaker, "txt")) corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS, opts.speaker, "wav")) # additional large text corpus: if opts.text_corpus_name: corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.TEXT_CORPORA, opts.text_corpus_name)) # Set file number if opts.file_num: file_num = int(opts.file_num) else: file_num = float("inf") # Get names of individual txt and wav files: voice_data = [] for c in corpora: count = 0 file_list = sorted(os.listdir(c)) if opts.shuffle: random.seed(1) random.shuffle(file_list) for f in file_list: if '._' not in f: voice_data.append(os.path.join(c, f)) count += 1 # Stop appending voice data at file_num if count >= file_num: break corpus = Corpus.Corpus(voice_data) print(" -- Train voice") voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, \ dirs, clear_old_data=opts.clear, max_cores=opts.max_cores) ## Train the voice (i.e. train processors in pipeline context): voice.train(corpus)
def train(opts, dirs): ## Handle corpus: print " -- Gather corpus" ## Get names of directories containing corpus data (all txt and wav): corpora = [] if opts.command_line_corpus: for location in opts.command_line_corpus: assert os.path.isdir(location) corpora.append(location) else: corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS, opts.speaker, "txt")) corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.SPEAKERS, opts.speaker, "wav")) # additional large text corpus: if opts.text_corpus_name: corpora.append( os.path.join(dirs['CORPUS'], opts.lang, fname.TEXT_CORPORA, opts.text_corpus_name)) ## Get names of individual txt and wav files: voice_data = [] for c in corpora: for f in os.listdir(c): voice_data.append(os.path.join(c, f)) corpus = Corpus.Corpus(voice_data) print " -- Train voice" voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, \ dirs, clear_old_data=opts.clear, max_cores=opts.max_cores) ## Train the voice (i.e. train processors in pipeline context): voice.train(corpus)
def main_work(): ################################################# # root is one level below this file in directory structure, ie. below the 'scripts' folder ROOT = os.path.split( os.path.realpath( os.path.abspath( os.path.dirname(inspect.getfile( inspect.currentframe())))))[0] + '/' dirs = { 'ROOT': ROOT, 'CONFIG': ROOT + "configs/", 'VOICES': ROOT + "voices/", 'TRAIN': ROOT + "train/", 'RULES': ROOT + "rules/", 'CORPUS': ROOT + "corpus/", 'BIN': ROOT + "/tools/bin/" } # ======== Get stuff from command line ========== a = ArgumentParser() a.add_argument('-s', dest='speaker', required=True, \ help= "the name of the speaker: <ROOT>/corpus/<LANG>/<SPEAKER>") a.add_argument('-l', dest='lang', required=True, \ help= "the language of the speaker: <ROOT>/corpus/<LANG>") a.add_argument('-o', dest='output', required=False, default=False, \ help= "output audio here") a.add_argument('-t', dest='stage', required=False, default="runtime", \ help=""" defines the current usage stage (definitions of stages should by found in <config>/recipe.cfg""") a.add_argument('-play', dest='play', action="store_true", required=False, default=False, \ help=" play audio after synthesis") a.add_argument('-lab', dest='make_label', action="store_true", default=False, \ help= "make label file as well as wave in output location") a.add_argument('config', help="""configuration to use: naive, semi-naive, gold, as defined in <ROOT>/recipes/<config> -directory""" ) a.add_argument('-bin', dest='custom_bindir') a.add_argument('files', nargs='*', help="text files to speak, reading from stdin by default") a.add_argument('-m', dest='model_dir', required=True, type=str, help="model directory") opts = a.parse_args() dirs['TRAIN'] = opts.model_dir + "/train/" dirs['VOICES'] = opts.model_dir + "/voices/" if opts.custom_bindir != None: dirs['BIN'] = opts.custom_bindir voice_location = os.path.join(dirs['VOICES'], opts.lang, opts.speaker, opts.config) train_location = os.path.join(dirs['TRAIN'], opts.lang, "speakers", opts.speaker, opts.config) config_path = os.path.join(dirs['CONFIG'], opts.config) voice_config = os.path.join(config_path, fname.RECIPE) ## Make Voice object to contain voice elements trained on this corpus: voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, dirs) if not opts.output: output_wavefile = os.path.join(voice_location, 'output', 'wav', 'temp.wav') else: output_wavefile = opts.output if not opts.output: output_labfile = None else: output_labfile = output_wavefile.replace('.wav', '.lab') prevspace = False para = [] # Go through the files a paragraph at a time, unless it's SSML in which case we parse it # An empty line marks the change of paragraphs in plain text files for line in fileinput.input(opts.files): line = line.decode('utf-8').rstrip() t = start_clock('Synthesise sentence') print line if fileinput.isfirstline(): if para != []: voice.synth_utterance(''.join(para), output_wavefile=output_wavefile, \ output_labfile=output_labfile) if opts.play: os.system('play ' + output_wavefile) para = [] line = line.lstrip() if line.startswith('<speak') or line.startswith('<xml'): tree = etree.parse(fileinput.filename()) parseSSML(tree, voice) fileinput.nextfile() else: para.append(line) elif line.isspace(): prevspace = True elif prevspace and para != []: voice.synth_utterance(''.join(para), output_wavefile=output_wavefile, \ output_labfile=output_labfile) prevspace = False para = [line] else: para.append(line) if para != []: voice.synth_utterance(''.join(para), output_wavefile=output_wavefile, \ output_labfile=output_labfile) if opts.play: os.system('play ' + output_wavefile) stop_clock(t)
def main_work(): ################################################# # ======== Get stuff from command line ========== def usage(): print("Usage: ...... ") sys.exit(1) # e.g. try: voice_config = sys.argv[1] voice_components = sys.argv[2] ENGINE_BIN = sys.argv[3] RESYNTH_BIN = sys.argv[4] trained_model_dir = sys.argv[5] except: usage() ################################################# sys.path.append("/afs/inf.ed.ac.uk/user/o/owatts/naive/script/") ################################################# ## Lots of these paths should be interpolated from system-wide options (e.g. bin dir etc). ## Absolute paths for now. context_file_location = "/afs/inf.ed.ac.uk/user/o/owatts/naive/context_files/" ESTDIR = "/group/project/nlp-speech/bin/" HTSDIR = "/afs/inf.ed.ac.uk/user/o/owatts/repos/simple4all/CSTRVoiceClone/trunk/bin/" SCRIPT = "/afs/inf.ed.ac.uk/user/o/owatts/naive/script" GENSIM_LOCATION = "%s/gensim-0.5.0/src/" % (SCRIPT) ################################################# sys.path.append(GENSIM_LOCATION) ## add gensim to path from VSMTagger import VSMTagger print(" -- Open the existing voice") voice = Voice(config_file=voice_config) print(" -- Make an utterance processor from a (trained) acoustic model ") ### This will only perform work where an utt does not have a wavefile attached: parameter_generator = AcousticModel(config_file=voice_components + "/parameter_generator.cfg", processor_name="parameter_generator", ENGINE_BIN=ENGINE_BIN, model_location=trained_model_dir, HTSDIR=HTSDIR) parameter_generator.save() ### WAVESYNTH waveform_synthesiser = WaveSynthesiser( config_file=voice_components + "/waveform_synthesiser.cfg", processor_name="waveform_synthesiser", RESYNTH_BIN=RESYNTH_BIN, HTSDIR=HTSDIR) waveform_synthesiser.save() ### WAVE PLAYER (call e.g. sox etc) wave_player = WavePlayer(config_file=voice_components + "/wave_player.cfg", processor_name="wave_player") wave_player.save() voice.add_processor(voice_components + "/parameter_generator.cfg") voice.add_processor(voice_components + "/waveform_synthesiser.cfg") voice.add_processor(voice_components + "/wave_player.cfg") print(" -- Save voice") voice.save() print(" -- Synthesize a test utterance (from some Spanish text...)") ## Use the voice to synth a test utterance: voice.synth_utterance("Esto es: una prueba.")
def main_work(): ################################################# # root is one level below this file in directory structure, ie. below the 'scripts' folder ROOT = os.path.split( os.path.realpath( os.path.abspath( os.path.dirname(inspect.getfile( inspect.currentframe())))))[0] + '/' dirs = { 'ROOT': ROOT, 'CONFIG': ROOT + "configs/", 'VOICES': ROOT + "voices/", 'TRAIN': ROOT + "train/", 'RULES': ROOT + "rules/", 'CORPUS': ROOT + "corpus/", 'BIN': ROOT + "/tools/bin/" } # ======== Get stuff from command line ========== a = ArgumentParser() a.add_argument('-s', dest='speaker', required=True, \ help= "the name of the speaker: <ROOT>/corpus/<LANG>/<SPEAKER>") a.add_argument('-l', dest='lang', required=True, \ help= "the language of the speaker: <ROOT>/corpus/<LANG>") a.add_argument('-o', dest='output', required=False, default=False, \ help= "output audio here") a.add_argument('-t', dest='stage', required=False, default="runtime", \ help=""" defines the current usage stage (definitions of stages should by found in <config>/recipe.cfg""") a.add_argument('-u', dest='output_utt', required=False, default=False, \ help= "output utt files here") a.add_argument('-play', dest='play', action="store_true", required=False, default=False, \ help=" play audio after synthesis") a.add_argument('-lab', dest='make_label', action="store_true", default=False, \ help= "make label file as well as wave in output location") a.add_argument('config', help="""configuration to use: naive, semi-naive, gold, as defined in <ROOT>/recipes/<config> -directory""" ) a.add_argument('files', nargs='*', help="text files to speak, reading from stdin by default") opts = a.parse_args() voice_location = os.path.join(dirs['VOICES'], opts.lang, opts.speaker, opts.config) train_location = os.path.join(dirs['TRAIN'], opts.lang, "speakers", opts.speaker, opts.config) config_path = os.path.join(dirs['CONFIG'], opts.config) voice_config = os.path.join(config_path, fname.RECIPE) ## Make Voice object to contain voice elements trained on this corpus: voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, dirs) if not opts.output: output_dir = os.path.join(voice_location, 'output', 'wav') else: output_dir = opts.output # if not opts.output: # output_labfile = None # else: # output_labfile = output_wavefile.replace('.wav', '.lab') if opts.output_utt: if not os.path.isdir(opts.output_utt): os.makedirs(opts.output_utt) output_extensions = [] if opts.make_label: output_extensions.append('dnn_lab') for filename in opts.files: base = get_basename(filename) output_wavefile = os.path.join(output_dir, base + '.wav') text = ' '.join(readlist(filename)) try: print(text) except: print(' ') ## weird characgers print(base) if opts.output_utt: voice.synth_utterance(text, output_wavefile=output_wavefile, \ output_uttfile=os.path.join(opts.output_utt, base + '.utt'),\ output_extensions=output_extensions) else: voice.synth_utterance(text, output_wavefile=output_wavefile, \ output_extensions=output_extensions)