Beispiel #1
0
 def _load_dic(self, uid, aff, dic):
     """
     Helper func.
     """
     # aff (rules) part.
     key = (CACHE_PREFIX, uid, self.dics[uid]["aff_hash"])
     if DO_CACHE and key in cache.cache:
         c = cache.cache[key]
         self.dics[uid]["flag_mode"] = c[0]
         self.dics[uid]["af_map"] = c[1]
         self.dics[uid]["af_classes"] = c[2]
     else:
         utils.printf("Parsing {}’s aff rules file... ".format(uid), end="")
         self.parse_aff(self.dics[uid], aff)
         if DO_CACHE:
             c = (self.dics[uid]["flag_mode"], self.dics[uid]["af_map"], self.dics[uid]["af_classes"])
             key = (CACHE_PREFIX, uid, self.dics[uid]["aff_hash"])
             cache.cache[key] = c
         print("Done.")
     # dicc (base words) part.
     key = (CACHE_PREFIX, uid, self.dics[uid]["dic_hash"])
     if DO_CACHE and key in cache.cache:
         bw = cache.cache[key]
         self.dics[uid]["base_words"] = bw
     else:
         utils.printf("Parsing {}’s dic base words file... ".format(uid), end="")
         self.parse_dic(self.dics[uid], dic)
         if DO_CACHE:
             key = (CACHE_PREFIX, uid, self.dics[uid]["dic_hash"])
             cache.cache[key] = self.dics[uid]["base_words"]
         print("Done.")
Beispiel #2
0
 def _load_dic(self, uid, aff, dic):
     """
     Helper func.
     """
     # aff (rules) part.
     key = (CACHE_PREFIX, uid, self.dics[uid]["aff_hash"])
     if DO_CACHE and key in cache.cache:
         c = cache.cache[key]
         self.dics[uid]["flag_mode"] = c[0]
         self.dics[uid]["af_map"] = c[1]
         self.dics[uid]["af_classes"] = c[2]
     else:
         utils.printf("Parsing {}’s aff rules file... ".format(uid), end="")
         self.parse_aff(self.dics[uid], aff)
         if DO_CACHE:
             c = (self.dics[uid]["flag_mode"], self.dics[uid]["af_map"],
                  self.dics[uid]["af_classes"])
             key = (CACHE_PREFIX, uid, self.dics[uid]["aff_hash"])
             cache.cache[key] = c
         print("Done.")
     # dicc (base words) part.
     key = (CACHE_PREFIX, uid, self.dics[uid]["dic_hash"])
     if DO_CACHE and key in cache.cache:
         bw = cache.cache[key]
         self.dics[uid]["base_words"] = bw
     else:
         utils.printf("Parsing {}’s dic base words file... ".format(uid),
                      end="")
         self.parse_dic(self.dics[uid], dic)
         if DO_CACHE:
             key = (CACHE_PREFIX, uid, self.dics[uid]["dic_hash"])
             cache.cache[key] = self.dics[uid]["base_words"]
         print("Done.")
Beispiel #3
0
    def init(self, ids=None, charset=None, charmap=None, minlen=None,
             maxlen=None):
        """
        Inits this MatchDic object, by getting all words generated from set
        generator for the given ids (or all, if None).
        It will optionnaly apply to each generated word, before storing it:
            * charset and charmap operations (i.e. removing from all char
              not in charset, and then calling str.translate with charmap).
            * length operation (i.e. rejecting words smaller than minlen
              (defaults to 1) or longer than maxlen (defaults to 32767).
        """
        if charset:
            self.charset = set(charset)
        else:
            self.charset = None
        if charmap:
            self.charmap = str.maketrans(charmap)
        else:
            self.charmap = None
        if minlen or maxlen:
            if not minlen:
                minlen = 1
            if not maxlen:
                maxlen = 32767  # XXX Arbitrary high value.
        self.minlen = minlen
        self.maxlen = maxlen
        self.do_minmax_len = bool(minlen or maxlen)
        if DO_CACHE:
            hsh_param = pickle.dumps((self.charset, self.charmap,
                                      minlen, maxlen))

        if ids == None:
            ids = self.word_gen.ids
        for uid in ids:
            if DO_CACHE:
                hsh = self.word_gen.get_hash(uid)
                hsh.update(hsh_param)
                hsh.update(self._hash_salt)
                hsh = hsh.hexdigest()
                key = (CACHE_PREFIX, uid, hsh)
                if key in cache.cache:
                    self.ids[uid] = cache.cache[key]
                    continue
            utils.printf("Building {}’s list of words... ".format(uid), end="")
            lst = self.ids[uid] = []
            lst_ln = len(lst)
            for w in (self.preprocess(w)
                      for w in self.word_gen.gen_words(dics=(uid,))):
                if not w:
                    continue
                ln = len(w)
                if ln > lst_ln:
                    lst += [set() for i in range(ln - lst_ln)]
                    lst_ln = ln
                lst[ln - 1].add(w)
            if DO_CACHE:
                cache.cache[key] = lst
            print("Done.")
Beispiel #4
0
    def init(self,
             ids=None,
             charset=None,
             charmap=None,
             func=None,
             minlen=None,
             maxlen=None):
        """
        Inits this MatchDic object, by getting all words generated from set
        generator for the given ids (or all, if None).
        It will optionnaly apply to each generated word, before storing it:
            * charset and charmap operations (i.e. removing all chars
              not in charset, and then calling str.translate with charmap).
            * If func is not None, it must be a callable taking one str arg,
              and returning an str (can be e.g. str.lower()...).
            * length operation (i.e. rejecting words smaller than minlen
              (defaults to 1) or longer than maxlen (defaults to 32767).
        """
        if charset:
            self.charset = set(charset)
        else:
            self.charset = None
        if charmap:
            self.charmap = str.maketrans(charmap)
        else:
            self.charmap = None
        self.func = func
        if minlen or maxlen:
            if not minlen:
                minlen = 1
            if not maxlen:
                maxlen = 32767  # XXX Arbitrary high value.
        self.minlen = minlen
        self.maxlen = maxlen
        self.do_minmax_len = bool(minlen or maxlen)
        if DO_CACHE:
            # XXX Eeek! func.__name__ is weak. :(
            hsh_param = pickle.dumps(
                (self.charset, self.charmap, func.__name__ if func else None,
                 minlen, maxlen))

        if ids == None:
            ids = self.word_gen.ids
        for uid in ids:
            if DO_CACHE:
                hsh = self.word_gen.get_hash(uid)
                hsh.update(hsh_param)
                hsh.update(self._hash_salt)
                hsh = hsh.hexdigest()
                key = (CACHE_PREFIX, uid, hsh)
                if key in cache.cache:
                    self.ids[uid] = cache.cache[key]
                    continue
            utils.printf("Building {}’s list of words... ".format(uid), end="")
            lst = self.ids[uid] = []
            lst_ln = len(lst)
            for w in (self.preprocess(w)
                      for w in self.word_gen.gen_words(dics=(uid, ))):
                if not w:
                    continue
                ln = len(w)
                if ln > lst_ln:
                    lst += [set() for i in range(ln - lst_ln)]
                    lst_ln = ln
                lst[ln - 1].add(w)
            if DO_CACHE:
                cache.cache[key] = lst
            print("Done.")