def stored_fields(self, docnum): if self.is_closed: raise ReaderClosed assert docnum >= 0 schema = self.schema sfs = self._perdoc.stored_fields(docnum) # Double-check with schema to filter out removed fields return dict(item for item in iteritems(sfs) if item[0] in schema)
def __init__(self, codec, dbfile, length, postfile): self._codec = codec self._dbfile = dbfile self._tindex = filetables.OrderedHashReader(dbfile, length) self._fieldmap = self._tindex.extras["fieldmap"] self._postfile = postfile self._fieldunmap = [None] * len(self._fieldmap) for fieldname, num in iteritems(self._fieldmap): self._fieldunmap[num] = fieldname
def word_values(self, value, analyzer, **kwargs): seen = defaultdict(list) kwargs["positions"] = True kwargs["chars"] = True kwargs["boosts"] = True for t in tokens(value, analyzer, kwargs): seen[t.text].append((t.pos, t.startchar, t.endchar, t.boost)) for w, poses in iteritems(seen): value, summedboost = self.encode(poses) yield (w, len(poses), summedboost, value)
def word_values(self, value, analyzer, **kwargs): fb = self.field_boost poses = defaultdict(list) weights = defaultdict(float) kwargs["positions"] = True kwargs["boosts"] = True for t in tokens(value, analyzer, kwargs): poses[t.text].append(t.pos) weights[t.text] += t.boost for w, poslist in iteritems(poses): value = self.encode(poslist) yield (w, len(poslist), weights[w] * fb, value)
def word_values(self, value, analyzer, **kwargs): fb = self.field_boost seen = defaultdict(list) kwargs["positions"] = True kwargs["boosts"] = True for t in tokens(value, analyzer, kwargs): pos = t.pos boost = t.boost seen[t.text].append((pos, boost)) for w, poses in iteritems(seen): value = self.encode(poses) yield (w, len(poses), sum(p[1] for p in poses) * fb, value)
def word_values(self, value, analyzer, **kwargs): fb = self.field_boost length = 0 freqs = defaultdict(int) weights = defaultdict(float) kwargs["boosts"] = True for t in tokens(value, analyzer, kwargs): length += 1 freqs[t.text] += 1 weights[t.text] += t.boost wvs = ((w, freq, weights[w] * fb, pack_uint(freq)) for w, freq in iteritems(freqs)) return wvs
def word_values(self, value, analyzer, **kwargs): fb = self.field_boost seen = defaultdict(list) weights = defaultdict(float) kwargs["positions"] = True kwargs["chars"] = True kwargs["boosts"] = True for t in tokens(value, analyzer, kwargs): if hasattr(t, "meta"): kwargs["meta"] = True meta = tuple(f"{k}={v}" for k, v in getattr(t, "meta").items()) seen[t.text].append((t.pos, t.startchar, t.endchar, meta)) else: seen[t.text].append((t.pos, t.startchar, t.endchar, ())) weights[t.text] += t.boost for w, poslist in iteritems(seen): value = self.encode(poslist) yield (w, len(poslist), weights[w] * fb, value)
def __init__(self, B=0.75, K1=1.2, **kwargs): """ >>> from whoosh import scoring >>> # Set a custom B value for the "content" field >>> w = scoring.BM25F(B=0.75, content_B=1.0, K1=1.5) :param B: free parameter, see the BM25 literature. Keyword arguments of the form ``fieldname_B`` (for example, ``body_B``) set field- specific values for B. :param K1: free parameter, see the BM25 literature. """ self.B = B self.K1 = K1 self._field_B = {} for k, v in iteritems(kwargs): if k.endswith("_B"): fieldname = k[:-2] self._field_B[fieldname] = v
def collect(self, sub_docnum): matcher = self.child.matcher global_docnum = sub_docnum + self.child.offset # We want the sort key for the document so we can (by default) sort # the facet groups sortkey = self.child.collect(sub_docnum) # For each facet we're grouping by for name, categorizer in iteritems(self.categorizers): add = self.facetmaps[name].add # We have to do more work if the facet allows overlapping groups if categorizer.allow_overlap: for key in categorizer.keys_for(matcher, sub_docnum): add(categorizer.key_to_name(key), global_docnum, sortkey) else: key = categorizer.key_for(matcher, sub_docnum) key = categorizer.key_to_name(key) add(key, global_docnum, sortkey) return sortkey
def __repr__(self): parms = ", ".join("%s=%r" % (name, value) for name, value in iteritems(self.__dict__)) return "%s(%s)" % (self.__class__.__name__, parms)
def __repr__(self): attrs = "" if self.__dict__: attrs = ", ".join("%s=%r" % (key, value) for key, value in iteritems(self.__dict__)) return self.__class__.__name__ + "(%s)" % attrs