Esempio n. 1
0
 def word_values(self, value, **kwargs):
     seen = defaultdict(int)
     if self.boost_as_freq:
         for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
             seen[t.text] += int(t.boost)
     else:
         for t in unstopped(self.analyzer(value, **kwargs)):
             seen[t.text] += 1
     
     encode = self.encode
     return ((w, freq, encode(freq)) for w, freq in seen.iteritems())
Esempio n. 2
0
    def word_values(self, value, **kwargs):
        seen = defaultdict(int)
        if self.boost_as_freq:
            for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
                seen[t.text] += int(t.boost)
        else:
            for t in unstopped(self.analyzer(value, **kwargs)):
                seen[t.text] += 1

        encode = self.encode
        return ((w, freq, encode(freq)) for w, freq in six.iteritems(seen))
Esempio n. 3
0
    def word_values(self, value, doc_boost=1.0, **kwargs):
        seen = defaultdict(int)
        for t in unstopped(self.analyzer(value, **kwargs)):
            seen[t.text] += 1

        encode = self.encode
        return ((w, freq, encode((freq, doc_boost)))
                for w, freq in six.iteritems(seen))
Esempio n. 4
0
 def word_values(self, value, doc_boost=1.0, **kwargs):
     seen = defaultdict(int)
     for t in unstopped(self.analyzer(value, **kwargs)):
         seen[t.text] += 1
     
     encode = self.encode
     return ((w, freq, encode((freq, doc_boost)))
             for w, freq in seen.iteritems())
Esempio n. 5
0
 def word_values(self, value, start_pos=0, **kwargs):
     seen = defaultdict(list)
     for t in unstopped(self.analyzer(value, positions=True,
                                      start_pos=start_pos, **kwargs)):
         seen[t.text].append(start_pos + t.pos)
     
     encode = self.encode
     return ((w, len(poslist), encode(poslist))
             for w, poslist in seen.iteritems())
Esempio n. 6
0
 def word_values(self, value, start_pos=0, start_char=0, **kwargs):
     seen = defaultdict(list)
     
     for t in unstopped(self.analyzer(value, positions=True, chars=True,
                                      start_pos=start_pos,
                                      start_char=start_char, **kwargs)):
         seen[t.text].append((t.pos, start_char + t.startchar,
                              start_char + t.endchar))
     
     encode = self.encode
     return ((w, len(ls), encode(ls)) for w, ls in seen.iteritems())
Esempio n. 7
0
    def word_values(self, value, start_pos=0, **kwargs):
        seen = defaultdict(iter)
        for t in unstopped(
                self.analyzer(value,
                              positions=True,
                              boosts=True,
                              start_pos=start_pos,
                              **kwargs)):
            pos = t.pos
            boost = t.boost
            seen[t.text].append((pos, boost))

        encode = self.encode
        return ((w, len(poslist), encode(poslist))
                for w, poslist in six.iteritems(seen))
Esempio n. 8
0
 def word_values(self, value, **kwargs):
     wordset = set(t.text
                   for t in unstopped(self.analyzer(value, **kwargs)))
     return ((w, 1, '') for w in wordset)
Esempio n. 9
0
 def word_values(self, value, **kwargs):
     wordset = set(t.text for t
                   in unstopped(self.analyzer(value, **kwargs)))
     return ((w, 1, '') for w in wordset)