def train_alignment(corpus='cmudict', stress="unstressed", subset=False, delete_prob=0.01, insert_prob=0.01, kerberos_cmd=None): ### load the corpus and dict of allowables ab_pairs = load_pronunciations() alignment_scores = load_allowables(delete_prob=delete_prob, insert_prob=insert_prob) ### are we testing with a subset? if subset: # test with 0.1% of the corpus ab_pairs = random.sample(ab_pairs, len(ab_pairs) / 1000) ab_pairs.sort(cmp=lambda x, y: cmp(x[0], y[0])) # initialize the EM with the corpus and allowables em = ViterbiEM(ab_pairs, alignment_scores, max_iterations=100) # check to see if we've got a saved model em_fname = construct_model_fname(corpus, stress, subset) try: em.load(em_fname) except IOError: pass if kerberos_cmd is not None: os.system(kerberos_cmd) # run the Viterbi aligner EM, saving as we go while True: em.run_EM(1) em.save(em_fname) if em.converged: break if em.iteration_number > em.max_iterations: break return em
def train_alignment(corpus='cmudict', stress="unstressed", subset=False, delete_prob=0.01, insert_prob=0.01, kerberos_cmd=None): ### load the corpus and dict of allowables ab_pairs = load_pronunciations() alignment_scores = load_allowables(delete_prob=delete_prob, insert_prob=insert_prob) ### are we testing with a subset? if subset: # test with 0.1% of the corpus ab_pairs = random.sample(ab_pairs, len(ab_pairs)/1000) ab_pairs.sort(cmp = lambda x,y: cmp(x[0], y[0])) # initialize the EM with the corpus and allowables em = ViterbiEM(ab_pairs, alignment_scores, max_iterations=100) # check to see if we've got a saved model em_fname = construct_model_fname(corpus, stress, subset) try: em.load(em_fname) except IOError: pass if kerberos_cmd is not None: os.system(kerberos_cmd) # run the Viterbi aligner EM, saving as we go while True: em.run_EM(1) em.save(em_fname) if em.converged: break if em.iteration_number > em.max_iterations: break return em