def word_values(self, value, **kwargs): seen = defaultdict(int) if self.boost_as_freq: for t in unstopped(self.analyzer(value, boosts=True, **kwargs)): seen[t.text] += int(t.boost) else: for t in unstopped(self.analyzer(value, **kwargs)): seen[t.text] += 1 encode = self.encode return ((w, freq, encode(freq)) for w, freq in seen.iteritems())
def word_values(self, value, **kwargs): seen = defaultdict(int) if self.boost_as_freq: for t in unstopped(self.analyzer(value, boosts=True, **kwargs)): seen[t.text] += int(t.boost) else: for t in unstopped(self.analyzer(value, **kwargs)): seen[t.text] += 1 encode = self.encode return ((w, freq, encode(freq)) for w, freq in six.iteritems(seen))
def word_values(self, value, doc_boost=1.0, **kwargs): seen = defaultdict(int) for t in unstopped(self.analyzer(value, **kwargs)): seen[t.text] += 1 encode = self.encode return ((w, freq, encode((freq, doc_boost))) for w, freq in six.iteritems(seen))
def word_values(self, value, doc_boost=1.0, **kwargs): seen = defaultdict(int) for t in unstopped(self.analyzer(value, **kwargs)): seen[t.text] += 1 encode = self.encode return ((w, freq, encode((freq, doc_boost))) for w, freq in seen.iteritems())
def word_values(self, value, start_pos=0, **kwargs): seen = defaultdict(list) for t in unstopped(self.analyzer(value, positions=True, start_pos=start_pos, **kwargs)): seen[t.text].append(start_pos + t.pos) encode = self.encode return ((w, len(poslist), encode(poslist)) for w, poslist in seen.iteritems())
def word_values(self, value, start_pos=0, start_char=0, **kwargs): seen = defaultdict(list) for t in unstopped(self.analyzer(value, positions=True, chars=True, start_pos=start_pos, start_char=start_char, **kwargs)): seen[t.text].append((t.pos, start_char + t.startchar, start_char + t.endchar)) encode = self.encode return ((w, len(ls), encode(ls)) for w, ls in seen.iteritems())
def word_values(self, value, start_pos=0, **kwargs): seen = defaultdict(iter) for t in unstopped( self.analyzer(value, positions=True, boosts=True, start_pos=start_pos, **kwargs)): pos = t.pos boost = t.boost seen[t.text].append((pos, boost)) encode = self.encode return ((w, len(poslist), encode(poslist)) for w, poslist in six.iteritems(seen))
def word_values(self, value, **kwargs): wordset = set(t.text for t in unstopped(self.analyzer(value, **kwargs))) return ((w, 1, '') for w in wordset)