def word_values(self, value, analyzer, **kwargs): fb = self.field_boost seen = defaultdict(list) kwargs["positions"] = True kwargs["chars"] = True kwargs["boosts"] = True for t in tokens(value, analyzer, kwargs): seen[t.text].append((t.pos, t.startchar, t.endchar, t.boost)) for w, poses in iteritems(seen): # posns_chars_boosts = [(pos, startchar, endchar, boost), ...] codes = [] posbase = 0 charbase = 0 summedboost = 0 for pos, startchar, endchar, boost in poses: codes.append((pos - posbase, startchar - charbase, endchar - startchar, boost)) posbase = pos charbase = endchar summedboost += boost value = (pack_uint(len(poses)) + pack_float(summedboost * fb) + dumps(codes, -1)[2:-1]) yield (w, len(poses), summedboost * fb, value)
def encode(self, poses): codes = [] base = 0 summedboost = 0 for pos, boost in poses: summedboost += boost codes.append((pos - base, boost)) base = pos return (pack_uint(len(poses)) + pack_float(summedboost) + dumps(codes, -1)[2:-1])
def encode(self, posns_chars_boosts): # posns_chars_boosts = [(pos, startchar, endchar, boost), ...] codes = [] posbase = 0 charbase = 0 summedboost = 0 for pos, startchar, endchar, boost in posns_chars_boosts: codes.append((pos - posbase, startchar - charbase, endchar - startchar, boost)) posbase = pos charbase = endchar summedboost += boost return (pack_uint(len(posns_chars_boosts)) + pack_float(summedboost) + dumps(codes, -1)[2:-1])
def encode(self, poses): fb = self.field_boost # posns_chars_boosts = [(pos, startchar, endchar, boost), ...] codes = [] posbase = 0 charbase = 0 summedboost = 0 for pos, startchar, endchar, boost in poses: codes.append((pos - posbase, startchar - charbase, endchar - startchar, boost)) posbase = pos charbase = endchar summedboost += boost return ((pack_uint(len(poses)) + pack_float(summedboost * fb) + dumps(codes, 2)), summedboost)
def word_values(self, value, analyzer, **kwargs): fb = self.field_boost seen = defaultdict(list) kwargs["positions"] = True kwargs["boosts"] = True for t in tokens(value, analyzer, kwargs): pos = t.pos boost = t.boost seen[t.text].append((pos, boost)) for w, poses in iteritems(seen): codes = [] base = 0 summedboost = 0 for pos, boost in poses: summedboost += boost codes.append((pos - base, boost)) base = pos value = (pack_uint(len(poses)) + pack_float(summedboost) + dumps(codes, -1)[2:-1]) yield (w, len(poses), sum(p[1] for p in poses) * fb, value)
def write_float(self, n): self.file.write(pack_float(n))