Esempio n. 1
0
def augment_language_model(target, source, env):
    """
    Input: old language model, old pronunciations, new pronunciations|
    ** old language model, old pronunciations, new pronunciations
    Output: new language model, new vocab, new pronunciations
    """
    #from arpabo import Arpabo, Pronunciations

    weighted = len(source) == 5
        

    old_prons = Pronunciations(meta_open(source[0].rstr()))
    old_lm = Arpabo(meta_open(source[1].rstr()))
    new_prons = Pronunciations(meta_open(source[2].rstr()))
    mass = source[-1].read()

    logging.info("Old LM: %s", old_lm)
    logging.info("Old Pronunciations: %s", old_prons)
    logging.info("Words to add: %s", new_prons)

    if weighted:
        new_probs = ProbabilityList(meta_open(source[3].rstr()))
        logging.info("Words to add (probabilities): %s", new_probs)


    old_prons.add_entries(new_prons)
    if weighted:
        old_lm.add_unigrams_with_probs(new_probs, mass)
    else:
        old_lm.add_unigrams(new_prons.get_words(), mass)

    logging.info("New Pronunciations: %s", old_prons)
    logging.info("New LM: %s", old_lm)
    logging.info("New words have weight %s", old_lm.get_probability_of_words(new_prons.get_words()))
    logging.info("Old words have weight %s", old_lm.get_probability_of_not_words(new_prons.get_words()))

    with meta_open(target[0].rstr(), "w") as new_vocab, meta_open(target[1].rstr(), "w") as new_prons, meta_open(target[2].rstr(), "w") as new_lm:
        new_lm.write(old_lm.format())
        new_vocab.write(old_prons.format_vocabulary())
        new_prons.write(old_prons.format())
    return None