def main(): # process the options based on the default build configuration build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME) # parse commamd line if __name__ == '__main__': opts, args = parser.parse_args() # and load custom configurations if opts.bldconf: build_conf.parse(opts.bldconf) if opts.spkconf: build_conf.parse(opts.spkconf) else: parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml") build_conf.updatefromopts(opts) # set up logging, check idlak-scratch, check dependencies and build as required build_conf.set_build_environment(SCRIPT_NAME) # MODULE SPECIFIC CODE # get required input files from idlak-data # get required directories from dependent modules # examine general settings and set as appropriate # process data # END OF MODULE SPECIFIC CODE build_conf.end_processing(SCRIPT_NAME)
def main(): # process the options based on the default build configuration build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME) # parse commamd line if __name__ == '__main__': opts, args = parser.parse_args() # and load custom configurations if opts.bldconf: build_conf.parse(opts.bldconf) if opts.spkconf: build_conf.parse(opts.spkconf) else: parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml") build_conf.updatefromopts(opts) # set up logging, check idlak-scratch, check dependencies and build as required build_conf.set_build_environment(SCRIPT_NAME) # MODULE SPECIFIC CODE # get required input files from idlak-data # get required directories from dependent modules aligndir = build_conf.get_input_dir('align_def') outdir = os.path.join(build_conf.outdir, 'output') # examine general settings and set as appropriate # process data wrds_dir = os.path.join(aligndir, 'wrds') file_list = glob.glob('%s/*.wrd' % (wrds_dir)) for f in file_list: wrd_file = open(f, 'r') # Get the input file's name stem so we can use it for the output filename filename_stem = os.path.split(os.path.splitext(f)[0])[1] output_filename = os.path.join(outdir, '%s.dur' % (filename_stem)) output_file = open(output_filename, 'w') for line in wrd_file: columns = line.split() # column #0 is the alotted time prior to the given phone. # column #1 is the alotted time after the given phone. phone_dur = float(columns[1]) - float(columns[0]) output_file.write(str(phone_dur) + '\n') wrd_file.close() output_file.close() # END OF MODULE SPECIFIC CODE build_conf.end_processing(SCRIPT_NAME)
def main(): # process the options based on the default build configuration build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME) #print 'SEQ', build_conf.dataseq # parse commamd line if __name__ == '__main__': opts, args = parser.parse_args() # and load custom configurations if opts.bldconf: build_conf.parse(opts.bldconf) if opts.spkconf: build_conf.parse(opts.spkconf) else: parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml") build_conf.updatefromopts(opts) # set up logging, check idlak-scratch, check dependencies and build as required logger = build_conf.set_build_environment(SCRIPT_NAME) # MODULE SPECIFIC CODE # get required input files from idlak-data kaldisrcdir = os.path.join(build_conf.kaldidir, 'src') accdir = os.path.join(build_conf.idlakdata, build_conf.lang, build_conf.acc) spkdir = os.path.join(accdir, build_conf.spk) outdir = build_conf.outdir # get required directories from dependent modules # NONE # examine module settings and set as appropriate # NO MODULE OPTIONS # process data #run text through the idlak text processing module com = '%s/idlaktxpbin/idlaktxp --pretty --tpdb=%s %s %s\n' % (kaldisrcdir, accdir, os.path.join(spkdir, "text.xml"), os.path.join(outdir, "output", "text_norm.xml")) logger.log('Info', 'Running normalisation on input xml text: %s' % (com)) os.system(com) # create kaldi required input files (modified from egs/arctic/s1/run.py logger.log('Info', 'Creating kaldi input files and train dir') wavdir = os.path.join(build_conf.idlakwav, build_conf.lang, build_conf.acc, build_conf.spk, build_conf.srate) # use of relative and absolute paths here appears broken MA140305 kaldidata(os.path.join(outdir, "output"), wavdir, build_conf.spk, build_conf.flist, True) # END OF MODULE SPECIFIC CODE build_conf.end_processing(SCRIPT_NAME)
def main(): # process the options based on the default build configuration build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME) # parse commamd line if __name__ == '__main__': opts, args = parser.parse_args() # and load custom configurations if opts.bldconf: build_conf.parse(opts.bldconf) if opts.spkconf: build_conf.parse(opts.spkconf) else: parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml") build_conf.updatefromopts(opts) # set up logging, check idlak-scratch, check dependencies and build as required build_conf.set_build_environment(SCRIPT_NAME) # MODULE SPECIFIC CODE # get required input files from idlak-data outdir = build_conf.outdir # get required directories from dependent modules kaldisrcdir = os.path.join(build_conf.kaldidir, 'src') # examine general settings and set as appropriate getf0_path = build_conf.getval('pitch_def', 'getf0') pplain_path = build_conf.getval('pitch_def', 'pplain') # if not os.path.isfile(getf0_path): # build_conf.logger.log('error', 'Supplied get_f0 location %s does not exist!' % (getf0_path)) # raise IOError('Supplied get_f0 location %s does not exist!' % (getf0_path)) # if not os.path.isfile(pplain_path): # build_conf.logger.log('error', 'Supplied pplain location %s does not exist!' % (pplain_path)) # raise IOError('Supplied pplain location %s does not exist!' % (pplain_path)) # process data wavdir = os.path.join(build_conf.idlakwav, build_conf.lang, build_conf.acc, build_conf.spk, build_conf.srate) outdir_data = os.path.join(outdir, "data") if not os.path.isdir(outdir_data): os.mkdir(outdir_data) process_data(outdir, wavdir, getf0_path, pplain_path, build_conf.flist, kaldisrcdir) # END OF MODULE SPECIFIC CODE build_conf.end_processing(SCRIPT_NAME)
def main(): # process the options based on the default build configuration build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME) # parse commamd line if __name__ == '__main__': opts, args = parser.parse_args() # and load custom configurations if opts.bldconf: build_conf.parse(opts.bldconf) if opts.spkconf: build_conf.parse(opts.spkconf) else: parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml") build_conf.updatefromopts(opts) # set up logging, check idlak-scratch, check dependencies and build as required build_conf.set_build_environment(SCRIPT_NAME) # MODULE SPECIFIC CODE # get required input files from idlak-data # question file # get required directories from dependent modules aligndir = build_conf.get_input_dir('align_def') cexdir = build_conf.get_input_dir('cex_def') pitchdir = build_conf.get_input_dir('pitch_def') outdir_data = os.path.join(build_conf.outdir, 'data') if not os.path.isdir(outdir_data): os.mkdir(outdir_data) # examine general settings and set as appropriate # process data # merge full context alignment with quinphone alignment build_conf.logger.log('info', 'Merging full context information with quinphone alignment') makefullctx = os.path.join(build_conf.kaldidir, 'src', 'bin', 'make-fullctx-ali') quinphonemodel = os.path.join(aligndir, 'kaldidelta_quin_output', 'final.mdl') quinphonealign = os.path.join(aligndir, 'kaldidelta_quin_output', 'ali.1.gz') contextdata = os.path.join(cexdir, 'cex.ark') fullctxali = os.path.join(build_conf.outdir, 'output', 'ali') com = '%s %s "ark:gunzip -c %s|" ark,t:%s ark,t:%s' % (makefullctx, quinphonemodel, quinphonealign, contextdata, fullctxali) os.system(com) # compile context question sets from cex_def compilequestions = os.path.join(build_conf.kaldidir, 'src', 'bin', 'compile-questions') ctxqset = os.path.join(cexdir, 'qset.dat') # dummy questions.int dummyqset = os.path.join(build_conf.outdir, 'output', 'questions.int') os.system("touch %s" % (dummyqset)) topo = os.path.join(aligndir, 'data', 'lang', 'topo') ctxqsetbin = os.path.join(build_conf.outdir, 'output', 'qset_binary.dat') # unclear how the topology affects the pdf generation here com = "%s --central-position=2 --binary=false --context-width=5 --keyed-questions=%s %s %s %s" % (compilequestions, ctxqset, topo, dummyqset, ctxqsetbin) os.system(com) # accumulate statistics for pitch fullctxacc = os.path.join(build_conf.kaldidir, 'src', 'bin', 'acc-fullctx-stats') pitchfeatures = os.path.join(pitchdir, 'lf0.ark') pitchacc = os.path.join(build_conf.outdir, 'output', 'pitch_acc.dat') com = '%s --binary=false 2 ark:%s ark:%s %s' % (fullctxacc, pitchfeatures, fullctxali, pitchacc) os.system(com) # build a tree buildtree = os.path.join(build_conf.kaldidir, 'src', 'bin', 'build-tree') roots = os.path.join(aligndir, 'data', 'lang', 'phones', 'roots.int') rootsdummy = '/afs/inf.ed.ac.uk/user/m/matthewa/kaldi/matthewa/kaldi-idlak/idlak-voice-build/dummy.int' treeout = os.path.join(build_conf.outdir, 'output', 'pitch.tree') com = "%s --binary=false --verbose=1 --context-width=5 --central-position=2 %s %s %s %s %s" % (buildtree, pitchacc, roots, ctxqsetbin, topo, treeout) os.system(com) # make a model from the tree and the statistics gmminitmodel = os.path.join(build_conf.kaldidir, 'src', 'gmmbin', 'gmm-init-model') modelout = os.path.join(build_conf.outdir, 'output', 'pitch.mdl') com = "%s --binary=false %s %s %s %s" % (gmminitmodel, treeout, pitchacc, topo, modelout) os.system(com) ###################################################################### # DURATION MODELLING ###################################################################### # modify full context alignment to have a single line for each phone # and generate duration parameters for state durations convert_ali_durations_data(NOSTATES, fullctxali, outdir_data) # build context question set ctxqsetbin = os.path.join(build_conf.outdir, 'output', 'qset_binary_dur.dat') com = "%s --central-position=2 --binary=false --context-width=5 --keyed-questions=%s %s %s %s" % (compilequestions, ctxqset, topo + '2', dummyqset, ctxqsetbin) os.system(com) # accumulate statistics for state duration stateduracc = os.path.join(build_conf.outdir, 'output', 'statedur_acc.dat') com = '%s --binary=false --var-floor=20.0 2 ark:%s ark:%s %s' % (fullctxacc, os.path.join(outdir_data, 'durations_states.ark'), os.path.join(outdir_data, 'durationali.ark'), stateduracc) os.system(com) # accumulate statistics for phone durations phoneduracc = os.path.join(build_conf.outdir, 'output', 'phonedur_acc.dat') com = '%s --binary=false --var-floor=20.0 2 ark:%s ark:%s %s' % (fullctxacc, os.path.join(outdir_data, 'durations_phones.ark'), os.path.join(outdir_data, 'durationali.ark'), phoneduracc) os.system(com) # build a tree # For Interspeech 15 work we have the following duration trees and models # 1. Kaldi out of the box # 2. Kaldi with same number of leaves and no initial roots questions # 3. As 2 but using 5 dim state duration data # 4. As 3 but with no post processing treeout1 = os.path.join(build_conf.outdir, 'output', 'dur_1.tree') treeout2 = os.path.join(build_conf.outdir, 'output', 'dur_2.tree') treeout3 = os.path.join(build_conf.outdir, 'output', 'dur_3.tree') treeout4 = os.path.join(build_conf.outdir, 'output', 'dur_4.tree') #11.3 thresh for statedur stats -> 511 leaves #8.3 thresh for phonedur stats -> 518 leaves com = "%s --binary=false --verbose=1 --context-width=5 --central-position=2 %s %s %s %s %s" % (buildtree, phoneduracc, roots, ctxqsetbin, topo + '2', treeout1) os.system(com) com = "%s --binary=false --max-leaves=513 --thresh=0 --verbose=1 --context-width=5 --central-position=2 %s %s %s %s %s" % (buildtree, phoneduracc, rootsdummy, ctxqsetbin, topo + '2', treeout2) os.system(com) com = "%s --binary=false --max-leaves=513 --thresh=0 --verbose=1 --context-width=5 --central-position=2 %s %s %s %s %s" % (buildtree, stateduracc, rootsdummy, ctxqsetbin, topo, treeout3) os.system(com) com = "%s --binary=false --max-leaves=513 --cluster-thresh=0 --thresh=0 --verbose=1 --context-width=5 --central-position=2 %s %s %s %s %s" % (buildtree, stateduracc, rootsdummy, ctxqsetbin, topo, treeout4) os.system(com) # make a model from the tree and the state statistics modelout1 = os.path.join(build_conf.outdir, 'output', 'dur_1.mdl') modelout2 = os.path.join(build_conf.outdir, 'output', 'dur_2.mdl') modelout3 = os.path.join(build_conf.outdir, 'output', 'dur_3.mdl') modelout4 = os.path.join(build_conf.outdir, 'output', 'dur_4.mdl') com = "%s --binary=false %s %s %s %s" % (gmminitmodel, treeout1, stateduracc, topo, modelout1) os.system(com) com = "%s --binary=false %s %s %s %s" % (gmminitmodel, treeout2, stateduracc, topo, modelout2) os.system(com) com = "%s --binary=false %s %s %s %s" % (gmminitmodel, treeout3, stateduracc, topo, modelout3) os.system(com) com = "%s --binary=false %s %s %s %s" % (gmminitmodel, treeout4, stateduracc, topo, modelout4) os.system(com) # END OF MODULE SPECIFIC CODE build_conf.end_processing(SCRIPT_NAME)
def main(): # process the options based on the default build configuration build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME) # parse commamd line if __name__ == '__main__': opts, args = parser.parse_args() # and load custom configurations if opts.bldconf: build_conf.parse(opts.bldconf) if opts.spkconf: build_conf.parse(opts.spkconf) else: parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml") build_conf.updatefromopts(opts) # set up logging, check idlak-scratch, check dependencies and build as required logger = build_conf.set_build_environment(SCRIPT_NAME) # MODULE SPECIFIC CODE # get required input files from idlak-data # text for testing voice after build (same as original HTSDEMO test text alicetxtfile = os.path.join(build_conf.idlakdata, build_conf.lang, 'testdata', 'alice.xml') # Maping from original arctic corpus id to idlak corpus id corpusid2idlakidfile = os.path.join(build_conf.idlakdata, build_conf.lang, build_conf.acc, build_conf.spk, 'corpusid2idlakid.txt') # get required directories from dependent modules cexdir = build_conf.get_input_dir('cex_def') # examine general settings and set as appropriate htsdemodir = build_conf.getval('hts_test', 'htsdemodir') if not os.path.isdir(htsdemodir): logger.log('critical', '[%s] does not exist' % (htsdemodir)) if not os.path.isdir(os.path.join(htsdemodir, 'HTS-demo_CMU-ARCTIC-SLT')): logger.log('critical', '[%s] does not contain an HTS demo' % (htsdemodir)) if not build_conf.spk == 'slt': logger.log('critical', 'This test currently only setup to work with en/ga/slt') # get info to cut slt raw audio in HTSDEMO into spurts spttimesfile = os.path.join(cexdir, 'spt_times.dat') # get directory for full model files htsmodeldir = os.path.join(cexdir, 'htslab') # new question set for data qsetfile = os.path.join(cexdir, 'questions-kaldi-en-ga.hed') # process data # create or replace label file directories htsdatadir = os.path.join(htsdemodir, 'HTS-demo_CMU-ARCTIC-SLT', 'data') # full models if os.path.isdir(os.path.join(htsdatadir, 'labels', 'full')): if not os.path.isdir(os.path.join(htsdatadir, 'labels', 'full_orig')): os.system('mv %s %s' % (os.path.join(htsdatadir, 'labels', 'full'), os.path.join(htsdatadir, 'labels', 'full_orig'))) os.mkdir(os.path.join(htsdatadir, 'labels', 'full')) else: os.mkdir(os.path.join(htsdatadir, 'labels', 'full')) # mono models if os.path.isdir(os.path.join(htsdatadir, 'labels', 'mono')): if not os.path.isdir(os.path.join(htsdatadir, 'labels', 'mono_orig')): os.system('mv %s %s' % (os.path.join(htsdatadir, 'labels', 'mono'), os.path.join(htsdatadir, 'labels', 'mono_orig'))) os.mkdir(os.path.join(htsdatadir, 'labels', 'mono')) else: os.mkdir(os.path.join(htsdatadir, 'labels', 'mono')) # label files compatible with wavesurfer if not os.path.isdir(os.path.join(htsdatadir, 'labels', 'wsurf')): os.mkdir(os.path.join(htsdatadir, 'labels', 'wsurf')) # create full, mono and wavesurfer label files labfiles = glob.glob(htsmodeldir + "/*.lab") labfiles.sort() for f in labfiles: stem = os.path.split(f)[1] fp1 = open(os.path.join(htsdatadir, 'labels', 'full', 'cmu_us_arctic_' + stem), 'w') fp2 = open(os.path.join(htsdatadir, 'labels', 'mono', 'cmu_us_arctic_' + stem), 'w') fp3 = open(os.path.join(htsdatadir, 'labels', 'wsurf', 'cmu_us_arctic_' + stem), 'w') for l in open(f).readlines(): fp1.write(l) pat = re.match('^([0-9]+)\s+([0-9]+)\s\S+\-(.*?)\+.*$', l) fp2.write("%s %s %s\n" % pat.groups()) fp3.write("%.3f %.3f %s\n" % (float(pat.group(1))/10000000.0, float(pat.group(2))/10000000.0, pat.group(3))) fp1.close() fp2.close() fp3.close() # copy question file oldqset = os.path.join(htsdatadir, 'questions', 'questions_qst001.hed') olduttqset = os.path.join(htsdatadir, 'questions', 'questions_utt_qst001.hed') if not os.path.isfile(oldqset + '.orig'): os.system('mv %s %s.orig' % (oldqset, oldqset)) if not os.path.isfile(olduttqset + '.orig'): os.system('mv %s %s.orig' % (olduttqset, olduttqset)) os.system('cp %s %s' % (qsetfile, oldqset)) # construct utterance qset from qset lines = open(qsetfile).readlines() fp = open(olduttqset, 'w') for l in lines: uttqs = False for name in UTTQSET: if l.find(name) > -1: uttqs = True break if uttqs: fp.write(l) # cut up audio to correct spt sized chunks if not os.path.isdir(os.path.join(htsdatadir, 'kaldiraw')): os.system('mv %s %s.orig' % (os.path.join(htsdatadir, 'raw'), os.path.join(htsdatadir, 'raw'))) os.mkdir(os.path.join(htsdatadir, 'kaldiraw')) os.system('ln -s %s %s' % (os.path.join(htsdatadir, 'kaldiraw'), os.path.join(htsdatadir, 'raw'))) # load lookup between arcti ids and kaldi ids idlak2corpus = {} lines = open(corpusid2idlakidfile).readlines() for l in lines: ll = l.split() idlak2corpus[ll[1]] = ll[0] # open spt times lines = open(spttimesfile).readlines() for l in lines: ll = l.split() origwav = 'cmu_us_arctic_slt_' + idlak2corpus[ll[0][4:-8]].split('_')[1] # currently use ch_wave change to kaldi style MA070314 cmd = '%s -o %s/cmu_us_arctic_%s.raw -f 48000 -itype raw -otype raw -start %s -end %s %s/%s.raw' % ( os.path.join(htsdemodir, 'speech_tools/bin/ch_wave'), os.path.join(htsdatadir, 'kaldiraw'), ll[0][:-4], ll[1], ll[2], os.path.join(htsdatadir, 'raw.orig'), origwav) print cmd os.system(cmd) #TODO create gen labels using script in utils # END OF MODULE SPECIFIC CODE build_conf.end_processing(SCRIPT_NAME)
def main(): # process the options based on the default build configuration build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME) # print 'SEQ', build_conf.dataseq # parse commamd line if __name__ == "__main__": opts, args = parser.parse_args() # and load custom configurations if opts.bldconf: build_conf.parse(opts.bldconf) if opts.spkconf: build_conf.parse(opts.spkconf) else: parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml") build_conf.updatefromopts(opts) # set up logging, check idlak-scratch, check dependencies and build as required build_conf.set_build_environment(SCRIPT_NAME) if opts.flist: build_conf.logger.log("warn", "flist does NOT currently work in align_def.py") # ADD MODULE SPECIFIC CODE HERE # get required input files from idlak-data spkdir = os.path.join(build_conf.idlakdata, build_conf.lang, build_conf.acc, build_conf.spk) # get required directories from dependent modules alignsetupdir = build_conf.get_input_dir("alignsetup_def") # examine module specific settings and set as appropriate breaktype = build_conf.getval("align_def", "break") breakdef = build_conf.getval("align_def", "breakdef") # process dat # remove old setup data com = "rm -rf %s" % (os.path.join(build_conf.outdir, "output", "data")) build_conf.logger.log("info", "Removing old alignsetup information: %s" % (com)) os.system(com) # copy setup data com = "cp -R %s %s" % (alignsetupdir, os.path.join(build_conf.outdir, "output", "data")) build_conf.logger.log("info", "Copying alignsetup information: %s" % (com)) os.system(com) # link conf, steps and utils directories from egs/wsj/s5 com = "ln -s %s %s" % ( os.path.join(build_conf.kaldidir, "egs", "wsj", "s5", "conf"), os.path.join(build_conf.outdir, "output", "conf"), ) build_conf.logger.log("info", "Linking wsj s5 conf: %s" % (com)) os.system(com) com = "ln -s %s %s" % ( os.path.join(build_conf.kaldidir, "egs", "wsj", "s5", "utils"), os.path.join(build_conf.outdir, "output", "utils"), ) build_conf.logger.log("info", "Linking wsj s5 utils: %s" % (com)) os.system(com) com = "ln -s %s %s" % ( os.path.join(build_conf.kaldidir, "egs", "wsj", "s5", "steps"), os.path.join(build_conf.outdir, "output", "steps"), ) build_conf.logger.log("info", "Linking wsj s5 steps: %s" % (com)) os.system(com) # update path for kaldi scripts pathlist = [ os.path.join(build_conf.outdir, "output", "utils"), os.path.join(build_conf.kaldidir, "src", "featbin"), os.path.join(build_conf.kaldidir, "src", "bin"), os.path.join(build_conf.kaldidir, "src", "fstbin"), os.path.join(build_conf.kaldidir, "tools", "openfst", "bin"), os.path.join(build_conf.kaldidir, "src", "latbin"), os.path.join(build_conf.kaldidir, "src", "lm"), os.path.join(build_conf.kaldidir, "src", "sgmmbin"), os.path.join(build_conf.kaldidir, "src", "sgmm2bin"), os.path.join(build_conf.kaldidir, "src", "fgmmbin"), os.path.join(build_conf.kaldidir, "src", "nnetbin"), os.path.join(build_conf.kaldidir, "src", "nnet-cpubin"), os.path.join(build_conf.kaldidir, "src", "kwsbin"), os.path.join(build_conf.kaldidir, "src", "gmmbin"), ] os.environ["PATH"] += os.pathsep + os.pathsep.join(pathlist) datadir = os.path.join(build_conf.outdir, "output", "data") # create lang directory using kaldi script com = "cd %s/output; utils/prepare_lang.sh --num-nonsil-states %d data '<OOV>' data/lang data/lang" % ( build_conf.outdir, NOSTATES, ) build_conf.logger.log("info", "running kaldi script to build lang subdir") os.system(com) # extract mfccs # com = "cd %s/output; steps/make_mfcc.sh --nj 1 data/train data/mfcc_log data/mfcc" % (build_conf.outdir) # build_conf.logger.log('info', 'running kaldi script to extract mfccs') build_conf.logger.log("info", "making mfcc directory") mfccdir = os.path.join(build_conf.outdir, "output", "data", "mfcc") if not os.path.isdir(mfccdir): os.mkdir(mfccdir) build_conf.logger.log("info", "extracting mfccs") com = ( "cd %s/output; compute-mfcc-feats --frame-shift=%d --verbose=0 --config=%s scp:%s ark:- | copy-feats --compress=false ark:- ark,scp:%s,%s" % ( build_conf.outdir, int(FRAMESHIFT * 1000), "conf/mfcc.conf", "data/train/wav.scp", "data/mfcc/raw_mfcc_train.1.ark", "data/mfcc/raw_mfcc_train.1.scp", ) ) os.system(com) # build dummy spk to utt file com = "cd %s/output; utt2spk_to_spk2utt.pl data/train/utt2spk > data/train/spk2utt" % (build_conf.outdir) build_conf.logger.log("info", "running kaldi script to compute dummy spk2utt file") os.system(com) # compute feature stats # copy scp file to train/feats.scp build_conf.logger.log("info", "copying mfcc scp to feats scp") com = "cd %s/output; cp data/mfcc/raw_mfcc_train.1.scp data/train/feats.scp" % (build_conf.outdir) os.system(com) com = "cd %s/output; steps/compute_cmvn_stats.sh data/train data/mfcc data/mfcc" % (build_conf.outdir) build_conf.logger.log("info", "running kaldi script to compute feature statistics") os.system(com) # mono train com = "cd %s/output; steps/train_mono.sh --nj 1 data/train data/lang kaldimono_output" % (build_conf.outdir) build_conf.logger.log("info", "running kaldi script to compute flat start monophone models") os.system(com) # delta train (triphone) com = ( "cd %s/output; steps/train_deltas.sh 2000 10000 3 data/train data/lang kaldimono_output kaldidelta_tri_output" % (build_conf.outdir) ) build_conf.logger.log("info", "running kaldi script to compute flat start triphone models") os.system(com) # delta train (quinphone) com = ( "cd %s/output; steps/train_deltas.sh 2000 10000 5 data/train data/lang kaldidelta_tri_output kaldidelta_quin_output" % (build_conf.outdir) ) build_conf.logger.log("info", "running kaldi script to compute flat start quinphone models") os.system(com) # extract the phone alignment com = ( 'cd %s/output; ali-to-phones --per-frame kaldidelta_quin_output/35.mdl "ark:gunzip -c kaldidelta_quin_output/ali.1.gz|" ark,t:- | utils/int2sym.pl -f 2- data/lang/phones.txt > align.dat' % (build_conf.outdir) ) # com = 'cd %s/output; show-alignments data/lang/phones.txt kaldidelta_quin_output/35.mdl "ark:gunzip -c kaldidelta_quin_output/ali.1.gz|" > align.dat' % (build_conf.outdir) build_conf.logger.log("info", "running kaldi script to extract alignment") os.system(com) # extract the state alignment com = ( 'cd %s/output; ali-to-hmmstate kaldidelta_quin_output/35.mdl "ark:gunzip -c kaldidelta_quin_output/ali.1.gz|" ark,t:- > sttalign.dat' % (build_conf.outdir) ) build_conf.logger.log("info", "running kaldi script to extract state alignment") os.system(com) # extract the word alignment com = ( "cd %s/output; linear-to-nbest \"ark:gunzip -c kaldidelta_quin_output/ali.1.gz|\" \"ark:utils/sym2int.pl --map-oov 1669 -f 2- data/lang/words.txt < data/train/text |\" '' '' ark:- | lattice-align-words data/lang/phones/word_boundary.int kaldidelta_quin_output/35.mdl ark:- ark:- | nbest-to-ctm --frame-shift=%f --precision=3 ark:- - | utils/int2sym.pl -f 5 data/lang/words.txt > wrdalign.dat" % (build_conf.outdir, FRAMESHIFT) ) build_conf.logger.log("info", "running kaldi scripts to extract word alignment") os.system(com) # get actual duration times of all wav files build_conf.logger.log("info", "Collecting wav file durations") wavdurations = get_wav_durations( build_conf.kaldidir, os.path.join(build_conf.outdir, "output", "data", "train", "wav.scp") ) # write alignment as files that are readbale by wavesurfer etc for checking build_conf.logger.log("info", "Writing lab and wrd files") labdir = os.path.join(build_conf.outdir, "output", "labs") if not os.path.isdir(labdir): os.mkdir(labdir) write_as_labs(os.path.join(build_conf.outdir, "output", "align.dat"), FRAMESHIFT, wavdurations, labdir) wrddir = os.path.join(build_conf.outdir, "output", "wrds") if not os.path.isdir(wrddir): os.mkdir(wrddir) write_as_wrdlabs(os.path.join(build_conf.outdir, "output", "wrdalign.dat"), wavdurations, labdir, wrddir) statedir = os.path.join(build_conf.outdir, "output", "stts") if not os.path.isdir(statedir): os.mkdir(statedir) write_as_statelabs( os.path.join(build_conf.outdir, "output", "sttalign.dat"), FRAMESHIFT, NOSTATES, wavdurations, labdir, statedir ) # write alignment based xml text file write_xml_textalign(breaktype, breakdef, labdir, wrddir, os.path.join(build_conf.outdir, "output", "text.xml")) # END OF MODULE SPECIFIC CODE build_conf.end_processing(SCRIPT_NAME)
def main(): # process the options based on the default build configuration build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME) # parse commamd line if __name__ == "__main__": opts, args = parser.parse_args() # and load custom configurations if opts.bldconf: build_conf.parse(opts.bldconf) if opts.spkconf: build_conf.parse(opts.spkconf) else: parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml") build_conf.updatefromopts(opts) # set up logging, check idlak-scratch, check dependencies and build as required build_conf.set_build_environment(SCRIPT_NAME) # MODULE SPECIFIC CODE # get required input files from idlak-data # get required directories from dependent modules kaldisrcdir = os.path.join(build_conf.kaldidir, "src") # examine general settings and set as appropriate sptk_root = build_conf.getval("mcep_def", "sptk_root") if not os.path.isdir(sptk_root): build_conf.logger.log("error", "Supplied sptk_root location %s does not exist!" % (sptk_root)) raise IOError("Supplied sptk_root location %s does not exist!" % (sptk_root)) # process data wavdir = os.path.join(build_conf.idlakwav, build_conf.lang, build_conf.acc, build_conf.spk, build_conf.srate) valid_ids = load_input_wavs(wavdir, build_conf.flist) sptk_bin_root = os.path.join(sptk_root, "bin") for wavfile in valid_ids: window_length = 400 frame_shift = 80 # all-pass constant alpha = 0.42 # order of mel-generalised cepstrum order = 12 # Strips headers from RIFF wav file. wavdata_com = "%s/featbin/wav-data %s/%s.wav" % (kaldisrcdir, wavdir, wavfile) # Converts data from short to float (+sf). x2x_com = "%s/x2x/x2x +sf" % (sptk_bin_root) frame_com = "%s/frame/frame -l %s -p %s" % (sptk_bin_root, window_length, frame_shift) # '-L 512' is the output frame length. # '-w 1' refers to the usage of a Hamming window. # '-n 1' is sigma(n=0,L-1)(w2(n)=1) normalisation. window_com = "%s/window/window -l %s -L 512 -w 1 -n 1" % (sptk_bin_root, window_length) # '-e 0.001' is a small value added to periodogram # '-l 512' is frame length. mcep_com = "%s/mcep/mcep -a %s -e 0.001 -m %s -l 512" % (sptk_bin_root, alpha, order) com = "%s | %s | %s | %s | %s | %s/x2x/x2x +fa" % ( wavdata_com, x2x_com, frame_com, window_com, mcep_com, sptk_bin_root, ) build_conf.logger.log("info", com) com_output = os.system(com) # END OF MODULE SPECIFIC CODE build_conf.end_processing(SCRIPT_NAME)
def main(): # process the options based on the default build configuration build_conf, parser = build_configuration.get_config(SCRIPT_NAME, DESCRIPTION, SCRIPT_NAME) #print 'SEQ', build_conf.dataseq # parse commamd line if __name__ == '__main__': opts, args = parser.parse_args() # and load custom configurations if opts.bldconf: build_conf.parse(opts.bldconf) if opts.spkconf: build_conf.parse(opts.spkconf) else: parser.error("Speaker configuration is required e.g. speaker_conf/bdl.xml") build_conf.updatefromopts(opts) # set up logging, check idlak-scratch, check dependencies and build as required build_conf.set_build_environment(SCRIPT_NAME) # ADD MODULE SPECIFIC CODE HERE # get required input files from idlak-data tpdbdir = os.path.join(build_conf.idlakdata, build_conf.lang, build_conf.acc) qset = os.path.join(build_conf.idlakdata, build_conf.lang, build_conf.acc, "qset-default.xml") outdir = build_conf.outdir # get audio directory wavdir = os.path.join(build_conf.idlakwav, build_conf.lang, build_conf.acc, build_conf.spk, build_conf.srate) if not os.path.isabs(wavdir): wavdir = os.path.realpath(os.path.join(os.path.curdir, wavdir)) # get required directories from dependent modules aligndir = build_conf.get_input_dir('align_def') # Check to see if we generate HTS style context models as well hts = build_conf.getval('cex_def', 'hts') # examine modulespecific settings and set as appropriate # process data # get path to txpbin pathlist = [os.path.join(build_conf.kaldidir, 'src', 'idlaktxpbin')] os.environ["PATH"] += os.pathsep + os.pathsep.join(pathlist) # Process script through txp and cex output_filename = os.path.join(outdir, 'output', 'cex.xml') cmd = "idlaktxp --pretty --tpdb=%s %s - | " % (tpdbdir, os.path.join(aligndir, "text.xml")) + \ "idlakcex --pretty --tpdb=%s - %s" % (tpdbdir, output_filename) os.system(cmd) # read in the cex xml output and generate kaldi files for tree building dom = parse(output_filename) cexs, output_contexts, freqtables, cexheader = output_kaldicex(build_conf.logger, dom, outdir) # write out script to split original wavs into spts if required (i.e for HTS test) phon_labs = write_spt_times(build_conf.logger, dom, os.path.join(aligndir, 'labs'), os.path.join(outdir, 'output', 'spt_times.dat')) # generate HTS style context model names cexheaderhts = None if hts == "True": output_filename = os.path.join(outdir, 'output', 'cex_hts.xml') cmd = "idlaktxp --pretty --tpdb=%s %s - | " % (tpdbdir, os.path.join(aligndir, "text.xml")) + \ "idlakcex --pretty --cex-arch=hts --tpdb=%s - %s" % (tpdbdir, output_filename) os.system(cmd) dom = parse(output_filename) filecontexts, cexheaderhts = output_htscex(build_conf.logger, dom, outdir, phon_labs) htsqset = os.path.join(outdir, 'output', 'questions-kaldi-%s-%s.hed' % (build_conf.lang, build_conf.acc)) write_htsqset(build_conf.logger, qset, htsqset, cexheaderhts) # # write frequency tables of contexts for audit purposes # for ftable in freqtables.keys(): # fp = open(os.path.join(outdir, 'output', ftable + '_freq.txt'), 'w') # vals = freqtables[ftable].keys() # vals.sort() # for v in vals: # fp.write("%s %d\n" % (v, freqtables[ftable][v])) # fp.close() # create lookup tables if required lookuptables = {} for i in range(len(cexs)): key = 'cex' + ('000' + str(i))[-3:] vals = freqtables[key].keys() vals.sort() for v in vals: if not re.match('[0-9]+', v): # found a non integer value create a lookup table lookuptables[key] = {} mapping = 1 for v in vals: if v == '0': lookuptables[key][v] = 0 else: lookuptables[key][v] = mapping mapping += 1 break # # output lookup tables # for table in lookuptables.keys(): # fp = open(os.path.join(outdir, 'output', table + '_lkp.txt'), 'w') # vals = lookuptables[table].keys() # vals.sort() # for v in vals: # fp.write("%s %d\n" % (v, lookuptables[table][v])) # fp.close() # Output the context information used in an XMl readable form. # incudes frequency/lookup tables field names etc. write_kaldi_context_setup(cexheader, cexheaderhts, freqtables, lookuptables, outdir) # write kaldi style archive replacing symbols with lookup output_filename = os.path.join(outdir, 'output', 'cex.ark') fp = open(output_filename, 'w') for f in output_contexts: key = f[0] fp.write(key + ' ') for p in f[1]: for i, v in enumerate(p): # replace symbols with integers table = 'cex' + ('000' + str(i))[-3:] if lookuptables.has_key(table): v = str(lookuptables[table][v]) fp.write(v + ' ') fp.write('; ') fp.write('\n') fp.close() kaldiqset = os.path.join(outdir, 'output', 'qset.dat') write_kaldiqset(build_conf.logger, qset, kaldiqset, cexheader, lookuptables) print filecontexts # END OF MODULE SPECIFIC CODE build_conf.end_processing(SCRIPT_NAME)