def __init__(self, expr=r"(?P<rel>(<=|>=|<|>|=<|=>))"): """ :param expr: a regular expression that must capture a "rel" group (which contains <, >, >=, <=, =>, or =<) """ self.expr = rcompile(expr)
class Word(BasicSyntax): """Syntax object representing a term. """ expr = rcompile("[^ \t\r\n)]+") tokenize = True removestops = True
def __init__(self, expr, grouptype, left_assoc=True): """ :param expr: a pattern string or compiled expression of the token text. :param grouptype: a :class:`Group` subclass that should be created to contain objects affected by the operator. """ self.expr = rcompile(expr) self.grouptype = grouptype self.left_assoc = left_assoc
class Prefix(BasicSyntax): expr = rcompile("[^ \t\r\n*]+\\*(?= |$|\\))") qclass = query.Prefix def __repr__(self): r = "%s:pre(%r)" % (self.fieldname, self.text) if self.boost != 1.0: r += "^%s" % self.boost return r @classmethod def create(cls, parser, match): return cls(match.group(0)[:-1])
class Field(Token): expr = rcompile(r"(?P<fieldname>\w+):") def __init__(self, fieldname, original): self.fieldname = fieldname self.original = original def __repr__(self): return "<%s:>" % self.fieldname @classmethod def create(cls, parser, match): fieldname = match.group("fieldname") return cls(fieldname, match.group(0))
class Boost(Token): expr = rcompile("\\^([0-9]+(.[0-9]+)?)($|(?=[ \t\r\n]))") def __init__(self, original, boost): self.original = original self.boost = boost def __repr__(self): return "<^%s>" % self.boost @classmethod def create(cls, parser, match): try: return cls(match.group(0), float(match.group(1))) except ValueError: return Word(match.group(0))
class Wild(BasicSyntax): # \u055E = Armenian question mark # \u061F = Arabic question mark # \u1367 = Ethiopic question mark expr = rcompile( u"[^ \t\r\n*?\u055E\u061F\u1367]*[*?\u055E\u061F\u1367]\\S*") qclass = query.Wildcard def __repr__(self): r = "%s:wild(%r)" % (self.fieldname, self.text) if self.boost != 1.0: r += "^%s" % self.boost return r @classmethod def create(cls, parser, match): return cls(match.group(0))
class Field(Token): expr = rcompile(u"(\w[\w\d]*):") def __init__(self, fieldname): self.fieldname = fieldname def __repr__(self): return "<%s:>" % self.fieldname def set_fieldname(self, fieldname, force=False): return self.__class__(fieldname) @classmethod def create(cls, parser, match): fieldname = match.group(1) if not parser.schema or fieldname == "*" or (fieldname in parser.schema): return cls(fieldname)
class Wild(BasicSyntax): # Any number of word chars, followed by at least one question mark or # star, followed by any number of word chars, question marks, or stars # \u055E = Armenian question mark # \u061F = Arabic question mark # \u1367 = Ethiopic question mark expr = rcompile( u"\\w*[*?\u055E\u061F\u1367](\\w|[*?\u055E\u061F\u1367])*") qclass = query.Wildcard def __repr__(self): r = "%s:wild(%r)" % (self.fieldname, self.text) if self.boost != 1.0: r += "^%s" % self.boost return r @classmethod def create(cls, parser, match): return cls(match.group(0))
class Quotes(BasicSyntax): expr = rcompile('"(.*?)"') def __init__(self, text, fieldname=None, boost=1.0, slop=1): super(PhrasePlugin.Quotes, self).__init__(text, fieldname=fieldname, boost=boost) self.slop = slop def __repr__(self): r = "%s:q(%r)" % (self.fieldname, self.text) if self.boost != 1.0: r += "^%s" % self.boost return r @classmethod def create(cls, parser, match): slop = 1 #if match.group(5): # try: # slop = int(match.group(5)) # except ValueError: # pass return cls(match.group(1), slop=slop) def query(self, parser): fieldname = self.fieldname or parser.fieldname if parser.schema and fieldname in parser.schema: field = parser.schema[fieldname] #if field.self_parsing(): # return field.parse_query(fieldname, self.text, boost=self.boost) #else: words = list(field.process_text(self.text, mode="query")) else: words = self.text.split(" ") return parser.phraseclass(fieldname, words, boost=self.boost, slop=self.slop)
class Not(Singleton): expr = rcompile(token)
class White(Singleton): expr = rcompile("\\s+")
class SingleQuotes(Token): expr = rcompile(r"(^|(?<=\W))'(.*?)'(?=\s|\]|[)}]|$)") @classmethod def create(cls, parser, match): return Word(match.group(2))
class Open(Singleton): expr = rcompile("\\(")
class Minus(Singleton): expr = rcompile("-")
class Plus(Singleton): expr = rcompile("\\+")
class Close(Singleton): expr = rcompile("\\)")
class Range(Token): expr = rcompile( r""" (?P<open>\{|\[) # Open paren ( # Begin optional "start" ( # Begin choice between start1 and start2 ('(?P<start2>[^']+)') # Quoted start | (?P<start1>[^ ]+) # ...or regular start ) # End choice [ ]+)? # Space at end of optional "start" [Tt][Oo] # "to" between start and end ([ ]+ # Space at start of optional "end" ( # Begin choice between end1 and end2 ('(?P<end2>[^']+)') # Quoted end | (?P<end1>[^\]\}]*) # ...or normal end ) # End choice )? # End of optional "end (?P<close>\}|\]) # Close paren """, re.VERBOSE) def __init__(self, start, end, startexcl, endexcl, fieldname=None, boost=1.0): self.fieldname = fieldname self.start = start self.end = end self.startexcl = startexcl self.endexcl = endexcl self.boost = boost def set_boost(self, b): return self.__class__(self.start, self.end, self.startexcl, self.endexcl, fieldname=self.fieldname, boost=b) def set_fieldname(self, name, force=False): if force or self.fieldname is None: return self.__class__(self.start, self.end, self.startexcl, self.endexcl, fieldname=name, boost=self.boost) else: return self def __repr__(self): r = "%s:(%r, %r, %s, %s)" % (self.fieldname, self.start, self.end, self.startexcl, self.endexcl) if self.boost != 1.0: r += "^%s" % self.boost return r @classmethod def create(cls, parser, match): start = match.group("start2") or match.group("start1") end = match.group("end2") or match.group("end1") return cls(start, end, startexcl=match.group("open") == "{", endexcl=match.group("close") == "}") def query(self, parser): fieldname = self.fieldname or parser.fieldname start, end = self.start, self.end if parser.schema and fieldname in parser.schema: field = parser.schema[fieldname] if field.self_parsing(): try: rangeq = field.parse_range(fieldname, start, end, self.startexcl, self.endexcl, boost=self.boost) if rangeq is not None: return rangeq except QueryParserError, e: return query.NullQuery if start: start = get_single_text(field, start, tokenize=False, removestops=False) if end: end = get_single_text(field, end, tokenize=False, removestops=False) if start is None: start = u'' if end is None: end = u'\uFFFF' return query.TermRange(fieldname, start, end, self.startexcl, self.endexcl, boost=self.boost)