def probability_list_to_vocabulary(target, source, env): with meta_open(source[0].rstr()) as ifd: probs = ProbabilityList(ifd) with meta_open(target[0].rstr(), "w") as ofd: vocab = Vocabulary.from_set(probs.get_words()) ofd.write(vocab.format()) return None
def text_to_vocabulary(target, source, env): lower_case = len(source) == 1 or source[1].read() with meta_open(source[0].rstr()) as ifd: if lower_case: words = set(ifd.read().lower().split()) else: words = set(ifd.read().split()) vocab = Vocabulary.from_set([w for w in words if "_" not in w and not w.startswith("-") and not w.endswith("-")]) with meta_open(target[0].rstr(), "w") as ofd: ofd.write(vocab.format()) return None