def __init__(self, content, index=0, name=None, score=0): """Accepts either a filepath or a list of words, possibly with ranks.""" if isinstance(content, str): words = list(read_wordlist(content, score)) self.path = content else: self.path = None words = [(w if isinstance(w, tuple) else (w, score)) for w in content] # for now, concat compound words words = [(w.replace(" ", ''), score) for w, score in words] # reject all non-alphabet chars ord_A = ord("A") ord_Z = ord("Z") ord_a = ord("a") ord_z = ord("z") def is_ok(w): for c in w: ord_c = ord(c) if not (ord_A <= ord_c <= ord_Z or ord_a <= ord_c <= ord_z): return False return True words = [item for item in words if is_ok(item[0])] self.words = cPalabra.preprocess(words, index) # keys of self.words = lengths # values = list of words of that length with (word, score) self.index = index self.name = name
def remove_words(self, words): """ Remove a list of words from the word list. This method removes the words and rebuilds the word list. """ rebuild = False for item in words: key = len(item[0]) if item in self.words[key]: self.words[key].remove(item) rebuild = True if rebuild: new_words = [] for l in self.words.keys(): new_words.extend(self.words[l]) self.words = cPalabra.preprocess(new_words, self.index)