def word_datas(self, value, **kwargs): seen = defaultdict(int) if self.boost_as_freq: for t in unstopped(self.analyzer(value, boosts=True)): seen[t.text] += int(t.boost) else: for t in unstopped(self.analyzer(value)): seen[t.text] += 1 return ((w, freq, freq) for w, freq in seen.iteritems())
def word_values(self, value, **kwargs): seen = defaultdict(int) if self.boost_as_freq: for t in unstopped(self.analyzer(value, boosts=True, **kwargs)): seen[t.text] += int(t.boost) else: for t in unstopped(self.analyzer(value, **kwargs)): seen[t.text] += 1 encode = self.encode return ((w, freq, float(freq), encode(freq)) for w, freq in seen.iteritems())
def word_values(self, value, doc_boost = 1.0, **kwargs): seen = defaultdict(int) for t in unstopped(self.analyzer(value, **kwargs)): seen[t.text] += 1 encode = self.encode return ((w, freq, encode((freq, doc_boost))) for w, freq in seen.iteritems())
def word_datas(self, value, start_pos=0, **kwargs): seen = defaultdict(list) for t in unstopped( self.analyzer(value, positions=True, start_pos=start_pos)): seen[t.text].append(start_pos + t.pos) return ((w, len(poslist), poslist) for w, poslist in seen.iteritems())
def word_values(self, value, start_pos = 0, **kwargs): seen = defaultdict(list) for t in unstopped(self.analyzer(value, positions = True, start_pos = start_pos, **kwargs)): seen[t.text].append(start_pos + t.pos) encode = self.encode return ((w, len(poslist), encode(poslist)) for w, poslist in seen.iteritems())
def word_values(self, value, doc_boost=1.0, **kwargs): seen = defaultdict(int) for t in unstopped(self.analyzer(value, **kwargs)): seen[t.text] += 1 encode = self.encode return ((w, freq, encode((freq, doc_boost))) for w, freq in seen.iteritems())
def word_datas(self, value, start_pos=0, **kwargs): seen = defaultdict(iter) for t in unstopped(self.analyzer(value, positions=True, boosts=True, start_pos=start_pos)): pos = t.pos boost = t.boost seen[t.text].append((pos, boost)) return ((w, len(poslist), poslist) for w, poslist in seen.iteritems())
def word_datas(self, value, start_pos = 0, start_char = 0, **kwargs): seen = defaultdict(list) for t in unstopped(self.analyzer(value, positions = True, chars = True, start_pos = start_pos, start_char = start_char)): seen[t.text].append((t.pos, start_char + t.startchar, start_char + t.endchar)) return ((w, len(ls), ls) for w, ls in seen.iteritems())
def word_values(self, value, doc_boost=1.0, **kwargs): freqs = defaultdict(int) weights = defaultdict(float) for t in unstopped(self.analyzer(value, boosts=True, **kwargs)): weights[t.text] += t.boost freqs[t.text] += 1 encode = self.encode return ((w, freq, weights[w] * doc_boost, encode((freq, doc_boost))) for w, freq in freqs.iteritems())
def word_values(self, value, start_pos=0, **kwargs): seen = defaultdict(iter) for t in unstopped(self.analyzer(value, positions=True, boosts=True, start_pos=start_pos, **kwargs)): pos = t.pos boost = t.boost seen[t.text].append((pos, boost)) encode = self.encode return ((w, len(poslist), sum(p[1] for p in poslist), encode(poslist)) for w, poslist in seen.iteritems())
def word_values(self, value, start_pos=0, **kwargs): poses = defaultdict(list) weights = defaultdict(float) for t in unstopped(self.analyzer(value, positions=True, start_pos=start_pos, **kwargs)): poses[t.text].append(start_pos + t.pos) weights[t.text] += t.boost encode = self.encode return ((w, len(poslist), weights[w], encode(poslist)) for w, poslist in poses.iteritems())
def word_values(self, value, **kwargs): fb = self.field_boost freqs = defaultdict(int) weights = defaultdict(float) for t in unstopped(self.analyzer(value, boosts=True, **kwargs)): freqs[t.text] += 1 weights[t.text] += t.boost encode = self.encode return ((w, freq, weights[w] * fb, encode(freq)) for w, freq in freqs.iteritems())
def word_values(self, value, start_pos=0, **kwargs): seen = defaultdict(list) for t in unstopped( self.analyzer(value, positions=True, start_pos=start_pos, **kwargs)): seen[t.text].append(start_pos + t.pos) encode = self.encode return ((w, len(poslist), float(len(poslist)), encode(poslist)) for w, poslist in seen.iteritems())
def word_values(self, value, start_pos = 0, start_char = 0, **kwargs): seen = defaultdict(iter) for t in unstopped(self.analyzer(value, positions = True, characters = True, boosts = True, start_pos = start_pos, start_char = start_char, **kwargs)): seen[t.text].append((t.pos, start_char + t.startchar, start_char + t.endchar, t.boost)) encode = self.encode return ((w, len(poslist), encode(poslist)) for w, poslist in seen.iteritems())
def word_values(self, value, start_pos=0, start_char=0, **kwargs): seen = defaultdict(iter) for t in unstopped(self.analyzer(value, positions=True, characters=True, boosts=True, start_pos=start_pos, start_char=start_char, **kwargs)): seen[t.text].append((t.pos, start_char + t.startchar, start_char + t.endchar, t.boost)) encode = self.encode return ((w, len(poslist), sum(p[3] for p in poslist), encode(poslist)) for w, poslist in seen.iteritems())
def word_values(self, value, start_pos=0, start_char=0, **kwargs): fb = self.field_boost seen = defaultdict(list) weights = defaultdict(float) for t in unstopped(self.analyzer(value, positions=True, chars=True, boosts=True, start_pos=start_pos, start_char=start_char, **kwargs)): seen[t.text].append((t.pos, start_char + t.startchar, start_char + t.endchar)) weights[t.text] += t.boost encode = self.encode return ((w, len(ls), weights[w] * fb, encode(ls)) for w, ls in seen.iteritems())
def word_values(self, value, start_pos=0, start_char=0, **kwargs): fb = self.field_boost seen = defaultdict(list) weights = defaultdict(float) for t in unstopped( self.analyzer(value, positions=True, chars=True, boosts=True, start_pos=start_pos, start_char=start_char, **kwargs)): seen[t.text].append( (t.pos, start_char + t.startchar, start_char + t.endchar)) weights[t.text] += t.boost encode = self.encode return ((w, len(ls), weights[w] * fb, encode(ls)) for w, ls in seen.iteritems())
def tokens(value, analyzer, kwargs): if isinstance(value, (tuple, list)): gen = entoken(value, **kwargs) else: gen = analyzer(value, **kwargs) return unstopped(gen)
def word_values(self, value, **kwargs): fb = self.field_boost wordset = set(t.text for t in unstopped(self.analyzer(value, **kwargs))) return ((w, 1, fb, '') for w in wordset)
def word_values(self, value, **kwargs): wordset = set(t.text for t in unstopped(self.analyzer(value, **kwargs))) return ((w, 1, '') for w in wordset)
def word_datas(self, value, doc_boost=1.0, **kwargs): seen = defaultdict(int) for t in unstopped(self.analyzer(value)): seen[t.text] += 1 return ((w, freq, (freq, doc_boost)) for w, freq in seen.iteritems())
def word_values(self, value, **kwargs): wordset = set(t.text for t in unstopped(self.analyzer(value, **kwargs))) return ((w, 1, 1.0, '') for w in wordset)
def word_datas(self, value, **kwargs): seen = set() for t in unstopped(self.analyzer(value)): seen.add(t.text) return ((w, 1, None) for w in seen)
def word_values(self, value, **kwargs): return ((w, 1, '') for w in set(t.text for t in unstopped(self.analyzer(value, **kwargs))))