def estimate_weights(p_lists, ths=0.000001, max_iter=500): ''' use EM algorithm to estimate the the optimal interpolated weights ''' # print log log_txt = 'Estimate weights ...' h.print_log(log_txt) lm_num = len(p_lists) weights = [1.0 / lm_num for dummy_i in range(lm_num)] w_1_pre = weights[0] for iter_num in range(max_iter): weighted_p_lists = weighted_p(weights, p_lists) for lm_id in range(lm_num): word_num = len(weighted_p_lists[lm_id]) weights[lm_id] = 1.0 / (word_num + 1) * sum(weighted_p_lists[lm_id]) if abs(w_1_pre - weights[0]) < ths: break else: w_1_pre = weights[0] # print log log_txt = '... {iter_num} iteration used'.format(iter_num=iter_num) h.print_log(log_txt) return weights
def estimate_weights(p_lists, ths=0.000001, max_iter=500): ''' use EM algorithm to estimate the the optimal interpolated weights ''' # print log log_txt = 'Estimate weights ...' h.print_log(log_txt) lm_num = len(p_lists) weights = [1.0 / lm_num for dummy_i in range(lm_num)] w_1_pre = weights[0] for iter_num in range(max_iter): weighted_p_lists = weighted_p(weights, p_lists) for lm_id in range(lm_num): word_num = len(weighted_p_lists[lm_id]) weights[lm_id] = 1.0 / (word_num + 1) * sum( weighted_p_lists[lm_id]) if abs(w_1_pre - weights[0]) < ths: break else: w_1_pre = weights[0] # print log log_txt = '... {iter_num} iteration used'.format(iter_num=iter_num) h.print_log(log_txt) return weights
def lmrescore(show, lm, sys): ''' wrapper for LMRESCORE command using in this task ''' cmd = LMRESCORE.format(show=show, lm=lm, sys=sys) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd)
def lmrescore_batch(show_set, lm, sys): ''' running LMRESCORE for all shows in a show set given a LM ''' for show in h.SHOWLIST[show_set]: lmrescore(show, lm, sys) # print log log_txt = 'LMRESCORE: show_set = {show_set}, lm = {lm}, sys = {sys}'.format( show_set=show_set, lm=lm, sys=sys) h.print_log(log_txt)
def lmerge(weights, lm): ''' merge language models given weights ''' cmd = LMERGE.format(weights=weights, lm=lm) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd)
def lplex(stream, lm, dat_file): ''' wrapper for LPLEX command using in this task ''' cmd = LPLEX.format(stream=stream, lm=lm, dat_file=dat_file) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd)
def score(sys, show_set): ''' wrapper for SCORE command using in this task ''' cmd = SCORE.format(sys=sys, show_set=show_set) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) f = os.popen(cmd) return f.read()
def lplex2(lm, dat_file): ''' wrapper for LPLEX2 command using in this task ''' cmd = LPLEX2.format(lm=lm, dat_file=dat_file) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) f = os.popen(cmd) return f.read()
def lmrescore_batch(show_set, lm, sys): ''' running LMRESCORE for all shows in a show set given a LM ''' for show in h.SHOWLIST[show_set]: lmrescore(show, lm, sys) # print log log_txt = 'LMRESCORE: show_set = {show_set}, lm = {lm}, sys = {sys}'.format( show_set=show_set, lm=lm, sys=sys ) h.print_log(log_txt)
def lmerge(weights, lm): ''' merge language models given weights ''' cmd = LMERGE.format( weights=weights, lm=lm ) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd)
def lplex2(lm, dat_file): ''' wrapper for LPLEX2 command using in this task ''' cmd = LPLEX2.format( lm=lm, dat_file=dat_file ) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) f = os.popen(cmd) return f.read()
def lplex(stream, lm, dat_file): ''' wrapper for LPLEX command using in this task ''' cmd = LPLEX.format( stream=stream, lm=lm, dat_file=dat_file ) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd)
def lmrescore(show, lm, sys): ''' wrapper for LMRESCORE command using in this task ''' cmd = LMRESCORE.format( show=show, lm=lm, sys=sys ) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd)
def score(sys, show_set): ''' wrapper for SCORE command using in this task ''' cmd = SCORE.format( sys=sys, show_set=show_set ) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) f = os.popen(cmd) return f.read()
def batch_mlf2dat(sys, show_set): ''' convert all .mlf files in a show set to .dat for a given system ''' # print log log_txt = 'mlf2dat: sys = {sys}, show_set = {show_set}'.format( sys=sys, show_set=show_set) h.print_log(log_txt) for show in h.SHOWLIST[show_set]: sentences = [] mlf_file = "{sys}/{show}/rescore/rescore.mlf".format(sys=sys, show=show) dat_file = "{sys}/{show}/rescore/rescore.dat".format(sys=sys, show=show) mlf2dat(mlf_file, dat_file)
def batch_mlf2dat(sys, show_set): ''' convert all .mlf files in a show set to .dat for a given system ''' # print log log_txt = 'mlf2dat: sys = {sys}, show_set = {show_set}'.format( sys=sys, show_set=show_set ) h.print_log(log_txt) for show in h.SHOWLIST[show_set]: sentences = [] mlf_file = "{sys}/{show}/rescore/rescore.mlf".format( sys=sys, show=show ) dat_file = "{sys}/{show}/rescore/rescore.dat".format( sys=sys, show=show ) mlf2dat(mlf_file, dat_file)
def main(): print 'Acoustic Model Adaptation' lm = 'my_lms/lm_int_dev03' # LM to be used showset = 'dev03' print 'working on show set: {showset}'.format(showset=showset) # rescore using the interpolated language model for show in h.SHOWLIST[showset]: cmd = LMRESCORE.format(show=show, lm=lm) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd) h.wait_qsub() # merge lattices for show in h.SHOWLIST[showset]: cmd = MERGELATS.format(show=show) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd) h.wait_qsub() # rescore using acoustic models for smodel, smodeltype in zip(MODELS, MODELTYPES): for show in h.SHOWLIST[showset]: cmd = HMMRESCORE.format(show=show, smodel=smodel, smodeltype=smodeltype) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd) h.wait_qsub() # generate transformations for model, modeltype in zip(SUBMODELS, SUBMODELTYPES): for smodel, smodeltype in zip(MODELS, MODELTYPES): # for smodel, smodeltype in zip(['hybrid-int'], ['hybrid']): adapt = 'adapt-{smodeltype}'.format(smodeltype=smodeltype) amodel = '{modeltype}-adapt-int'.format(modeltype=modeltype) for show in h.SHOWLIST[showset]: cmd = HMMADAPT.format(adapt=adapt, show=show, smodel=smodel, amodel=amodel, modeltype=modeltype) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd) h.wait_qsub() # apply transformations for model, modeltype in zip(SUBMODELS, SUBMODELTYPES): for smodel, smodeltype in zip(MODELS, MODELTYPES): # for smodel, smodeltype in zip(['hybrid-int'], ['hybrid']): amodel = '{modeltype}-adapt-int'.format(modeltype=modeltype) adapt = 'adapt-{smodeltype}'.format(smodeltype=smodeltype) decode = 'decode-{smodeltype}'.format(smodeltype=smodeltype) for show in h.SHOWLIST[showset]: cmd = ADAPTRESCORE.format(amodel=amodel, show=show, model=model, modeltype=modeltype, adapt=adapt, decode=decode) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd)
def main(): print 'Acoustic Model Adaptation' lm = 'my_lms/lm_int_dev03' # LM to be used showset = 'dev03' print 'working on show set: {showset}'.format(showset=showset) # rescore using the interpolated language model for show in h.SHOWLIST[showset]: cmd = LMRESCORE.format( show=show, lm=lm ) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd) h.wait_qsub() # merge lattices for show in h.SHOWLIST[showset]: cmd = MERGELATS.format(show=show) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd) h.wait_qsub() # rescore using acoustic models for smodel, smodeltype in zip(MODELS, MODELTYPES): for show in h.SHOWLIST[showset]: cmd = HMMRESCORE.format( show=show, smodel=smodel, smodeltype=smodeltype ) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd) h.wait_qsub() # generate transformations for model, modeltype in zip(SUBMODELS, SUBMODELTYPES): for smodel, smodeltype in zip(MODELS, MODELTYPES): # for smodel, smodeltype in zip(['hybrid-int'], ['hybrid']): adapt = 'adapt-{smodeltype}'.format(smodeltype=smodeltype) amodel = '{modeltype}-adapt-int'.format(modeltype=modeltype) for show in h.SHOWLIST[showset]: cmd = HMMADAPT.format( adapt=adapt, show=show, smodel=smodel, amodel=amodel, modeltype=modeltype ) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd) h.wait_qsub() # apply transformations for model, modeltype in zip(SUBMODELS, SUBMODELTYPES): for smodel, smodeltype in zip(MODELS, MODELTYPES): # for smodel, smodeltype in zip(['hybrid-int'], ['hybrid']): amodel = '{modeltype}-adapt-int'.format(modeltype=modeltype) adapt = 'adapt-{smodeltype}'.format(smodeltype=smodeltype) decode = 'decode-{smodeltype}'.format(smodeltype=smodeltype) for show in h.SHOWLIST[showset]: cmd = ADAPTRESCORE.format( amodel=amodel, show=show, model=model, modeltype=modeltype, adapt=adapt, decode=decode ) # print log log_txt = 'Running command:\n {cmd}'.format(cmd=cmd) h.print_log(log_txt) os.system(cmd)