def _load_dic(self, uid, aff, dic): """ Helper func. """ # aff (rules) part. key = (CACHE_PREFIX, uid, self.dics[uid]["aff_hash"]) if DO_CACHE and key in cache.cache: c = cache.cache[key] self.dics[uid]["flag_mode"] = c[0] self.dics[uid]["af_map"] = c[1] self.dics[uid]["af_classes"] = c[2] else: utils.printf("Parsing {}’s aff rules file... ".format(uid), end="") self.parse_aff(self.dics[uid], aff) if DO_CACHE: c = (self.dics[uid]["flag_mode"], self.dics[uid]["af_map"], self.dics[uid]["af_classes"]) key = (CACHE_PREFIX, uid, self.dics[uid]["aff_hash"]) cache.cache[key] = c print("Done.") # dicc (base words) part. key = (CACHE_PREFIX, uid, self.dics[uid]["dic_hash"]) if DO_CACHE and key in cache.cache: bw = cache.cache[key] self.dics[uid]["base_words"] = bw else: utils.printf("Parsing {}’s dic base words file... ".format(uid), end="") self.parse_dic(self.dics[uid], dic) if DO_CACHE: key = (CACHE_PREFIX, uid, self.dics[uid]["dic_hash"]) cache.cache[key] = self.dics[uid]["base_words"] print("Done.")
def init(self, ids=None, charset=None, charmap=None, minlen=None, maxlen=None): """ Inits this MatchDic object, by getting all words generated from set generator for the given ids (or all, if None). It will optionnaly apply to each generated word, before storing it: * charset and charmap operations (i.e. removing from all char not in charset, and then calling str.translate with charmap). * length operation (i.e. rejecting words smaller than minlen (defaults to 1) or longer than maxlen (defaults to 32767). """ if charset: self.charset = set(charset) else: self.charset = None if charmap: self.charmap = str.maketrans(charmap) else: self.charmap = None if minlen or maxlen: if not minlen: minlen = 1 if not maxlen: maxlen = 32767 # XXX Arbitrary high value. self.minlen = minlen self.maxlen = maxlen self.do_minmax_len = bool(minlen or maxlen) if DO_CACHE: hsh_param = pickle.dumps((self.charset, self.charmap, minlen, maxlen)) if ids == None: ids = self.word_gen.ids for uid in ids: if DO_CACHE: hsh = self.word_gen.get_hash(uid) hsh.update(hsh_param) hsh.update(self._hash_salt) hsh = hsh.hexdigest() key = (CACHE_PREFIX, uid, hsh) if key in cache.cache: self.ids[uid] = cache.cache[key] continue utils.printf("Building {}’s list of words... ".format(uid), end="") lst = self.ids[uid] = [] lst_ln = len(lst) for w in (self.preprocess(w) for w in self.word_gen.gen_words(dics=(uid,))): if not w: continue ln = len(w) if ln > lst_ln: lst += [set() for i in range(ln - lst_ln)] lst_ln = ln lst[ln - 1].add(w) if DO_CACHE: cache.cache[key] = lst print("Done.")
def init(self, ids=None, charset=None, charmap=None, func=None, minlen=None, maxlen=None): """ Inits this MatchDic object, by getting all words generated from set generator for the given ids (or all, if None). It will optionnaly apply to each generated word, before storing it: * charset and charmap operations (i.e. removing all chars not in charset, and then calling str.translate with charmap). * If func is not None, it must be a callable taking one str arg, and returning an str (can be e.g. str.lower()...). * length operation (i.e. rejecting words smaller than minlen (defaults to 1) or longer than maxlen (defaults to 32767). """ if charset: self.charset = set(charset) else: self.charset = None if charmap: self.charmap = str.maketrans(charmap) else: self.charmap = None self.func = func if minlen or maxlen: if not minlen: minlen = 1 if not maxlen: maxlen = 32767 # XXX Arbitrary high value. self.minlen = minlen self.maxlen = maxlen self.do_minmax_len = bool(minlen or maxlen) if DO_CACHE: # XXX Eeek! func.__name__ is weak. :( hsh_param = pickle.dumps( (self.charset, self.charmap, func.__name__ if func else None, minlen, maxlen)) if ids == None: ids = self.word_gen.ids for uid in ids: if DO_CACHE: hsh = self.word_gen.get_hash(uid) hsh.update(hsh_param) hsh.update(self._hash_salt) hsh = hsh.hexdigest() key = (CACHE_PREFIX, uid, hsh) if key in cache.cache: self.ids[uid] = cache.cache[key] continue utils.printf("Building {}’s list of words... ".format(uid), end="") lst = self.ids[uid] = [] lst_ln = len(lst) for w in (self.preprocess(w) for w in self.word_gen.gen_words(dics=(uid, ))): if not w: continue ln = len(w) if ln > lst_ln: lst += [set() for i in range(ln - lst_ln)] lst_ln = ln lst[ln - 1].add(w) if DO_CACHE: cache.cache[key] = lst print("Done.")