def __unicode__(self): r = u("%s:%s") % (self.fieldname, self.text) + u("~") if self.maxdist > 1: r += u("%d") % self.maxdist if self.boost != 1.0: r += u("^%f") % self.boost return r
def __unicode__(self): r = u("DisMax(") r += " ".join(sorted(text_type(s) for s in self.subqueries)) r += u(")") if self.tiebreak: r += u("~") + text_type(self.tiebreak) return r
def __unicode__(self): r = u("(") r += (self.JOINT).join([text_type(s) for s in self.subqueries]) r += u(")") if self.minmatch: r += u(">%s") % self.minmatch return r
def __unicode__(self): text = self.text if isinstance(text, bytes_type): try: text = text.decode("ascii") except UnicodeDecodeError: text = repr(text) t = u("%s:%s") % (self.fieldname, text) if self.boost != 1: t += u("^") + text_type(self.boost) return t
def __unicode__(self): startchar = "{" if self.startexcl else "[" endchar = "}" if self.endexcl else "]" start = "" if self.start is None else self.start end = "" if self.end is None else self.end return u("%s:%s%s TO %s%s") % ( self.fieldname, startchar, start, end, endchar, )
class WildcardPlugin(whoosh.qparser.plugins.TaggingPlugin): # \u055E = Armenian question mark # \u061F = Arabic question mark # \u1367 = Ethiopic question mark qmarks = u("?\u055E\u061F\u1367") expr = "(?P<text>[*%s])" % qmarks def filters(self, parser): # Run early, but definitely before multifield plugin return [(self.do_wildcards, 50)] def do_wildcards(self, parser, group): i = 0 while i < len(group): node = group[i] if isinstance(node, self.WildcardNode): if i < len(group) - 1 and group[i + 1].is_text(): nextnode = group.pop(i + 1) node.text += nextnode.text if i > 0 and group[i - 1].is_text(): prevnode = group.pop(i - 1) node.text = prevnode.text + node.text else: i += 1 else: if isinstance(node, CylleneusGroupNode): self.do_wildcards(parser, node) i += 1 for i in xrange(len(group)): node = group[i] if isinstance(node, self.WildcardNode): text = node.text if (len(text) > 1 and all(qm not in text for qm in self.qmarks) and text.find("*") == len(text) - 1): newnode = PrefixPlugin.PrefixNode(text[:-1]) newnode.startchar = node.startchar newnode.endchar = node.endchar group[i] = newnode return group class WildcardNode(whoosh.qparser.syntax.TextNode): # Note that this node inherits tokenize = False from TextNode, # so the text in this node will not be analyzed... just passed # straight to the query qclass = cylleneus.engine.query.terms.Wildcard def r(self): return "Wild %r" % self.text nodetype = WildcardNode
def normalize(self): if self.start in ("", None) and self.end in (u("\uffff"), None): from whoosh.query import Every return Every(self.fieldname, boost=self.boost) elif self.start == self.end: if self.startexcl or self.endexcl: return qcore.NullQuery return terms.Term(self.fieldname, self.start, boost=self.boost) else: return TermRange( self.fieldname, self.start, self.end, self.startexcl, self.endexcl, boost=self.boost, )
def iter_all_terms(self, ixreader, phrases=True): """Returns an iterator of (fieldname, text) pairs for all terms in this query tree. >>> qp = qparser.QueryParser("text", myindex.schema) >>> q = myparser.parse("alfa bravo title:charlie") >>> # List the terms in a query >>> list(q.iter_all_terms()) [("text", "alfa"), ("text", "bravo"), ("title", "charlie")] >>> # Get a set of all terms in the query that don't exist in the index >>> r = myindex.reader() >>> missing = set(t for t in q.iter_all_terms() if t not in r) set([("text", "alfa"), ("title", "charlie")]) >>> # All terms in the query that occur in fewer than 5 documents in >>> # the index >>> [t for t in q.iter_all_terms() if r.doc_frequency(t[0], t[1]) < 5] [("title", "charlie")] :param phrases: Whether to add words found in Phrase queries. """ for btext in self._btexts(ixreader): yield (self.fieldname, u(btext))
def __unicode__(self): return u("NOT ") + text_type(self.query)
def __unicode__(self): return u("%s:<%s>") % (self.fieldname, self.text)
def __unicode__(self): r = u("(") r += self.JOINT.join([text_type(s) for s in self.subqueries]) r += u(")") return r
def __call__( self, value, positions=False, chars=False, keeporiginal=False, removestops=True, start_pos=0, start_char=0, tokenize=True, mode="", **kwargs ): """ :param value: The unicode string to tokenize. :param positions: Whether to record token positions in the token. :param chars: Whether to record character offsets in the token. :param start_pos: The position number of the first token. For example, if you set start_pos=2, the tokens will be numbered 2,3,4,... instead of 0,1,2,... :param start_char: The offset of the first character of the first token. For example, if you set start_char=2, the text "aaa bbb" will have chars (2,5),(6,9) instead (0,3),(4,7). :param tokenize: if True, the text should be tokenized. """ assert isinstance(value, text_type), "%r is not unicode" % value t = CylleneusToken( positions, chars, removestops=removestops, mode=mode, **kwargs ) if not tokenize: t.original = t.text = value t.boost = 1.0 if positions: t.pos = start_pos if chars: t.startchar = start_char t.endchar = start_char + len(value) yield t else: text = u("") charmap = self.charmap pos = start_pos startchar = currentchar = start_char for char in value: tchar = charmap[ord(char)] if tchar: text += tchar else: if currentchar > startchar: t.text = text t.boost = 1.0 if keeporiginal: t.original = t.text if positions: t.pos = pos pos += 1 if chars: t.startchar = startchar t.endchar = currentchar yield t startchar = currentchar + 1 text = u("") currentchar += 1 if currentchar > startchar: t.text = value[startchar:currentchar] t.boost = 1.0 if keeporiginal: t.original = t.text if positions: t.pos = pos if chars: t.startchar = startchar t.endchar = currentchar yield t
def __unicode__(self): return u("%s:*") % self.fieldname
def __unicode__(self): return u("<_NullQuery>")