Beispiel #1
0
def estimate_weights(p_lists, ths=0.000001, max_iter=500):
    '''
    use EM algorithm to estimate the the optimal interpolated weights
    '''
    # print log
    log_txt = 'Estimate weights ...'
    h.print_log(log_txt)

    lm_num = len(p_lists)
    weights = [1.0 / lm_num for dummy_i in range(lm_num)]
    w_1_pre = weights[0]
    for iter_num in range(max_iter):
        weighted_p_lists = weighted_p(weights, p_lists)
        for lm_id in range(lm_num):
            word_num = len(weighted_p_lists[lm_id])
            weights[lm_id] = 1.0 / (word_num + 1) * sum(weighted_p_lists[lm_id])
        if abs(w_1_pre - weights[0]) < ths:
            break
        else:
            w_1_pre = weights[0]

    # print log
    log_txt = '... {iter_num} iteration used'.format(iter_num=iter_num)
    h.print_log(log_txt)

    return weights
Beispiel #2
0
def estimate_weights(p_lists, ths=0.000001, max_iter=500):
    '''
    use EM algorithm to estimate the the optimal interpolated weights
    '''
    # print log
    log_txt = 'Estimate weights ...'
    h.print_log(log_txt)

    lm_num = len(p_lists)
    weights = [1.0 / lm_num for dummy_i in range(lm_num)]
    w_1_pre = weights[0]
    for iter_num in range(max_iter):
        weighted_p_lists = weighted_p(weights, p_lists)
        for lm_id in range(lm_num):
            word_num = len(weighted_p_lists[lm_id])
            weights[lm_id] = 1.0 / (word_num + 1) * sum(
                weighted_p_lists[lm_id])
        if abs(w_1_pre - weights[0]) < ths:
            break
        else:
            w_1_pre = weights[0]

    # print log
    log_txt = '... {iter_num} iteration used'.format(iter_num=iter_num)
    h.print_log(log_txt)

    return weights
Beispiel #3
0
def lmrescore(show, lm, sys):
    '''
    wrapper for LMRESCORE command using in this task
    '''
    cmd = LMRESCORE.format(show=show, lm=lm, sys=sys)

    # print log
    log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
    h.print_log(log_txt)

    os.system(cmd)
Beispiel #4
0
def lmrescore_batch(show_set, lm, sys):
    '''
    running LMRESCORE for all shows in a show set given a LM
    '''
    for show in h.SHOWLIST[show_set]:
        lmrescore(show, lm, sys)

    # print log
    log_txt = 'LMRESCORE: show_set = {show_set}, lm = {lm}, sys = {sys}'.format(
        show_set=show_set, lm=lm, sys=sys)
    h.print_log(log_txt)
Beispiel #5
0
def lmerge(weights, lm):
    '''
    merge language models given weights
    '''
    cmd = LMERGE.format(weights=weights, lm=lm)

    # print log
    log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
    h.print_log(log_txt)

    os.system(cmd)
Beispiel #6
0
def lplex(stream, lm, dat_file):
    '''
    wrapper for LPLEX command using in this task
    '''
    cmd = LPLEX.format(stream=stream, lm=lm, dat_file=dat_file)

    # print log
    log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
    h.print_log(log_txt)

    os.system(cmd)
Beispiel #7
0
def score(sys, show_set):
    '''
    wrapper for SCORE command using in this task
    '''
    cmd = SCORE.format(sys=sys, show_set=show_set)

    # print log
    log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
    h.print_log(log_txt)

    f = os.popen(cmd)
    return f.read()
Beispiel #8
0
def lplex2(lm, dat_file):
    '''
    wrapper for LPLEX2 command using in this task
    '''
    cmd = LPLEX2.format(lm=lm, dat_file=dat_file)

    # print log
    log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
    h.print_log(log_txt)

    f = os.popen(cmd)
    return f.read()
Beispiel #9
0
def lmrescore_batch(show_set, lm, sys):
    '''
    running LMRESCORE for all shows in a show set given a LM
    '''
    for show in h.SHOWLIST[show_set]:
        lmrescore(show, lm, sys)

    # print log
    log_txt = 'LMRESCORE: show_set = {show_set}, lm = {lm}, sys = {sys}'.format(
        show_set=show_set,
        lm=lm,
        sys=sys
    )
    h.print_log(log_txt)
Beispiel #10
0
def lmerge(weights, lm):
    '''
    merge language models given weights
    '''
    cmd = LMERGE.format(
        weights=weights,
        lm=lm
    )

    # print log
    log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
    h.print_log(log_txt)

    os.system(cmd)
Beispiel #11
0
def lplex2(lm, dat_file):
    '''
    wrapper for LPLEX2 command using in this task
    '''
    cmd = LPLEX2.format(
        lm=lm,
        dat_file=dat_file
    )

    # print log
    log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
    h.print_log(log_txt)

    f = os.popen(cmd)
    return f.read()
Beispiel #12
0
def lplex(stream, lm, dat_file):
    '''
    wrapper for LPLEX command using in this task
    '''
    cmd = LPLEX.format(
        stream=stream,
        lm=lm,
        dat_file=dat_file
    )

    # print log
    log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
    h.print_log(log_txt)

    os.system(cmd)
Beispiel #13
0
def lmrescore(show, lm, sys):
    '''
    wrapper for LMRESCORE command using in this task
    '''
    cmd = LMRESCORE.format(
        show=show,
        lm=lm,
        sys=sys
    )

    # print log
    log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
    h.print_log(log_txt)

    os.system(cmd)
Beispiel #14
0
def score(sys, show_set):
    '''
    wrapper for SCORE command using in this task
    '''
    cmd = SCORE.format(
        sys=sys,
        show_set=show_set
    )

    # print log
    log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
    h.print_log(log_txt)

    f = os.popen(cmd)
    return f.read()
Beispiel #15
0
def batch_mlf2dat(sys, show_set):
    '''
    convert all .mlf files in a show set to .dat for a given system
    '''
    # print log
    log_txt = 'mlf2dat: sys = {sys}, show_set = {show_set}'.format(
        sys=sys, show_set=show_set)
    h.print_log(log_txt)

    for show in h.SHOWLIST[show_set]:
        sentences = []
        mlf_file = "{sys}/{show}/rescore/rescore.mlf".format(sys=sys,
                                                             show=show)
        dat_file = "{sys}/{show}/rescore/rescore.dat".format(sys=sys,
                                                             show=show)
        mlf2dat(mlf_file, dat_file)
Beispiel #16
0
def batch_mlf2dat(sys, show_set):
    '''
    convert all .mlf files in a show set to .dat for a given system
    '''
    # print log
    log_txt = 'mlf2dat: sys = {sys}, show_set = {show_set}'.format(
        sys=sys,
        show_set=show_set
    )
    h.print_log(log_txt)

    for show in h.SHOWLIST[show_set]:
        sentences = []
        mlf_file = "{sys}/{show}/rescore/rescore.mlf".format(
            sys=sys,
            show=show
        )
        dat_file = "{sys}/{show}/rescore/rescore.dat".format(
            sys=sys,
            show=show
        )
        mlf2dat(mlf_file, dat_file)
Beispiel #17
0
def main():
    print 'Acoustic Model Adaptation'
    lm = 'my_lms/lm_int_dev03'  # LM to be used
    showset = 'dev03'
    print 'working on show set: {showset}'.format(showset=showset)

    # rescore using the interpolated language model
    for show in h.SHOWLIST[showset]:
        cmd = LMRESCORE.format(show=show, lm=lm)

        # print log
        log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
        h.print_log(log_txt)

        os.system(cmd)

    h.wait_qsub()

    # merge lattices
    for show in h.SHOWLIST[showset]:
        cmd = MERGELATS.format(show=show)

        # print log
        log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
        h.print_log(log_txt)

        os.system(cmd)

    h.wait_qsub()

    # rescore using acoustic models
    for smodel, smodeltype in zip(MODELS, MODELTYPES):
        for show in h.SHOWLIST[showset]:
            cmd = HMMRESCORE.format(show=show,
                                    smodel=smodel,
                                    smodeltype=smodeltype)

            # print log
            log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
            h.print_log(log_txt)

            os.system(cmd)

    h.wait_qsub()

    # generate transformations
    for model, modeltype in zip(SUBMODELS, SUBMODELTYPES):
        for smodel, smodeltype in zip(MODELS, MODELTYPES):
            # for smodel, smodeltype in zip(['hybrid-int'], ['hybrid']):
            adapt = 'adapt-{smodeltype}'.format(smodeltype=smodeltype)
            amodel = '{modeltype}-adapt-int'.format(modeltype=modeltype)
            for show in h.SHOWLIST[showset]:
                cmd = HMMADAPT.format(adapt=adapt,
                                      show=show,
                                      smodel=smodel,
                                      amodel=amodel,
                                      modeltype=modeltype)

                # print log
                log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
                h.print_log(log_txt)

                os.system(cmd)

    h.wait_qsub()

    # apply transformations
    for model, modeltype in zip(SUBMODELS, SUBMODELTYPES):
        for smodel, smodeltype in zip(MODELS, MODELTYPES):
            # for smodel, smodeltype in zip(['hybrid-int'], ['hybrid']):
            amodel = '{modeltype}-adapt-int'.format(modeltype=modeltype)
            adapt = 'adapt-{smodeltype}'.format(smodeltype=smodeltype)
            decode = 'decode-{smodeltype}'.format(smodeltype=smodeltype)
            for show in h.SHOWLIST[showset]:
                cmd = ADAPTRESCORE.format(amodel=amodel,
                                          show=show,
                                          model=model,
                                          modeltype=modeltype,
                                          adapt=adapt,
                                          decode=decode)

                # print log
                log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
                h.print_log(log_txt)

                os.system(cmd)
Beispiel #18
0
def main():
    print 'Acoustic Model Adaptation'
    lm = 'my_lms/lm_int_dev03'          # LM to be used
    showset = 'dev03'
    print 'working on show set: {showset}'.format(showset=showset)

    # rescore using the interpolated language model
    for show in h.SHOWLIST[showset]:
        cmd = LMRESCORE.format(
            show=show,
            lm=lm
        )

        # print log
        log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
        h.print_log(log_txt)

        os.system(cmd)

    h.wait_qsub()

    # merge lattices
    for show in h.SHOWLIST[showset]:
        cmd = MERGELATS.format(show=show)

        # print log
        log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
        h.print_log(log_txt)

        os.system(cmd)

    h.wait_qsub()

    # rescore using acoustic models
    for smodel, smodeltype in zip(MODELS, MODELTYPES):
        for show in h.SHOWLIST[showset]:
            cmd = HMMRESCORE.format(
                show=show,
                smodel=smodel,
                smodeltype=smodeltype
            )

            # print log
            log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
            h.print_log(log_txt)

            os.system(cmd)

    h.wait_qsub()

    # generate transformations
    for model, modeltype in zip(SUBMODELS, SUBMODELTYPES):
        for smodel, smodeltype in zip(MODELS, MODELTYPES):
        # for smodel, smodeltype in zip(['hybrid-int'], ['hybrid']):
            adapt = 'adapt-{smodeltype}'.format(smodeltype=smodeltype)
            amodel = '{modeltype}-adapt-int'.format(modeltype=modeltype)
            for show in h.SHOWLIST[showset]:
                cmd = HMMADAPT.format(
                    adapt=adapt,
                    show=show,
                    smodel=smodel,
                    amodel=amodel,
                    modeltype=modeltype
                )

                # print log
                log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
                h.print_log(log_txt)

                os.system(cmd)

    h.wait_qsub()

    # apply transformations
    for model, modeltype in zip(SUBMODELS, SUBMODELTYPES):
        for smodel, smodeltype in zip(MODELS, MODELTYPES):
        # for smodel, smodeltype in zip(['hybrid-int'], ['hybrid']):
            amodel = '{modeltype}-adapt-int'.format(modeltype=modeltype)
            adapt = 'adapt-{smodeltype}'.format(smodeltype=smodeltype)
            decode = 'decode-{smodeltype}'.format(smodeltype=smodeltype)
            for show in h.SHOWLIST[showset]:
                cmd = ADAPTRESCORE.format(
                    amodel=amodel,
                    show=show,
                    model=model,
                    modeltype=modeltype,
                    adapt=adapt,
                    decode=decode
                )

                # print log
                log_txt = 'Running command:\n  {cmd}'.format(cmd=cmd)
                h.print_log(log_txt)

                os.system(cmd)