def train_alignment(corpus='cmudict',
                    stress="unstressed",
                    subset=False,
                    delete_prob=0.01,
                    insert_prob=0.01,
                    kerberos_cmd=None):
    ### load the corpus and dict of allowables
    ab_pairs = load_pronunciations()
    alignment_scores = load_allowables(delete_prob=delete_prob,
                                       insert_prob=insert_prob)

    ### are we testing with a subset?
    if subset:
        # test with 0.1% of the corpus
        ab_pairs = random.sample(ab_pairs, len(ab_pairs) / 1000)

    ab_pairs.sort(cmp=lambda x, y: cmp(x[0], y[0]))

    # initialize the EM with the corpus and allowables
    em = ViterbiEM(ab_pairs, alignment_scores, max_iterations=100)

    # check to see if we've got a saved model
    em_fname = construct_model_fname(corpus, stress, subset)
    try:
        em.load(em_fname)
    except IOError:
        pass

    if kerberos_cmd is not None:
        os.system(kerberos_cmd)

    # run the Viterbi aligner EM, saving as we go
    while True:
        em.run_EM(1)

        em.save(em_fname)

        if em.converged:
            break

        if em.iteration_number > em.max_iterations:
            break

    return em
def train_alignment(corpus='cmudict',
                    stress="unstressed",
                    subset=False,
                    delete_prob=0.01,
                    insert_prob=0.01,
                    kerberos_cmd=None):
    ### load the corpus and dict of allowables
    ab_pairs = load_pronunciations()
    alignment_scores = load_allowables(delete_prob=delete_prob,
                                       insert_prob=insert_prob)

    ### are we testing with a subset?
    if subset:
        # test with 0.1% of the corpus
        ab_pairs = random.sample(ab_pairs, len(ab_pairs)/1000)

    ab_pairs.sort(cmp = lambda x,y: cmp(x[0], y[0]))

    # initialize the EM with the corpus and allowables
    em = ViterbiEM(ab_pairs, alignment_scores, max_iterations=100)

    # check to see if we've got a saved model
    em_fname = construct_model_fname(corpus, stress, subset)
    try: em.load(em_fname)
    except IOError: pass

    if kerberos_cmd is not None:
        os.system(kerberos_cmd)

    # run the Viterbi aligner EM, saving as we go
    while True:
        em.run_EM(1)

        em.save(em_fname)

        if em.converged:
            break

        if em.iteration_number > em.max_iterations:
            break

    return em