def probability_list_to_vocabulary(target, source, env): with meta_open(source[0].rstr()) as ifd: probs = ProbabilityList(ifd) with meta_open(target[0].rstr(), "w") as ofd: vocab = Vocabulary.from_set(probs.get_words()) ofd.write(vocab.format()) return None
def top_words(target, source, env): args = source[-1].read() with meta_open(source[0].rstr()) as words_ifd, meta_open(source[1].rstr()) as pron_ifd: top = ProbabilityList(words_ifd).get_top_n(args["COUNT"]) prons = Pronunciations(pron_ifd) prons.filter_by(top) with meta_open(target[0].rstr(), "w") as words_ofd, meta_open(target[1].rstr(), "w") as pron_ofd: words_ofd.write(top.format()) pron_ofd.write(prons.format()) return None
def filter_babel_gum(target, source, env): with meta_open(source[0].rstr()) as pron_ifd, meta_open(source[1].rstr()) as prob_ifd, meta_open(source[2].rstr()) as lim_ifd: pron = Pronunciations(pron_ifd) logging.info("Old pronunciations: %s", pron) prob = ProbabilityList(prob_ifd) logging.info("Old probabilities: %s", prob) filt = Vocabulary(lim_ifd) logging.info("Correct words: %s", filt) pron.filter_by(filt) logging.info("New pronunciations: %s", pron) prob.filter_by(filt) logging.info("New probabilities: %s", prob) with meta_open(target[0].rstr(), "w") as pron_ofd, meta_open(target[1].rstr(), "w") as prob_ofd: pron_ofd.write(pron.format()) prob_ofd.write(prob.format()) return None
def plot_probabilities(target, source, env): p = ProbabilityList(meta_open(source[0].rstr())) ps = sorted([x.prob() for x in p.values()]) pyplot.plot(ps) pyplot.savefig(target[0].rstr()) return None