logging.info("%s written." % fn) fn = '%s/local/lm/wordlist.txt' % data_dir with open(fn, 'w') as f: for token in sorted(utt_dict): f.write((u'%s\n' % token).encode('utf8')) logging.info("%s written." % fn) # # copy scripts and config files # misc.copy_file('data/src/speech/kaldi-run-lm.sh', '%s/run-lm.sh' % work_dir) # misc.copy_file ('data/src/speech/kaldi-run-am.sh', '%s/run-am.sh' % work_dir) # misc.copy_file ('data/src/speech/kaldi-run-nnet3.sh', '%s/run-nnet3.sh' % work_dir) misc.copy_file('data/src/speech/kaldi-run-chain.sh', '%s/run-chain.sh' % work_dir) misc.copy_file('data/src/speech/kaldi-cmd.sh', '%s/cmd.sh' % work_dir) misc.copy_file('data/src/speech/kaldi-path.sh', '%s/path.sh' % work_dir) misc.mkdirs('%s/conf' % work_dir) misc.copy_file('data/src/speech/kaldi-mfcc.conf', '%s/conf/mfcc.conf' % work_dir) misc.copy_file('data/src/speech/kaldi-mfcc-hires.conf', '%s/conf/mfcc_hires.conf' % work_dir) misc.copy_file('data/src/speech/kaldi-online-cmvn.conf', '%s/conf/online_cmvn.conf' % work_dir) misc.mkdirs('%s/local' % work_dir) misc.copy_file('data/src/speech/kaldi-score.sh',
misc.mkdirs('%s/valid' % data_dir) misc.mkdirs('%s/train' % data_dir) # # load dict # logging.info("loading lexicon...") lex = Lexicon(file_name=dictionary) logging.info("loading lexicon...done.") # # language model # misc.copy_file('%s/lm.arpa' % language_model_dir, '%s/lm.arpa' % data_dir) # # scripts # misc.render_template('data/src/speech/w2l_run_train.sh.template', '%s/run_train.sh' % work_dir, w2l_env_activate=w2l_env_activate, w2l_train=w2l_train) misc.mkdirs('%s/config/conv_glu' % work_dir) misc.render_template('data/src/speech/w2l_config_conv_glu_train.cfg.template', '%s/config/conv_glu/train.cfg' % work_dir, runname=model_name) misc.copy_file('data/src/speech/w2l_config_conv_glu_network.arch', '%s/config/conv_glu/network.arch' % work_dir) # # export audio #