def __init__(self, *patterns): self.patterns = patterns self.exprs = [rcompile(pat, re.IGNORECASE) for pat in self.patterns] self.pattern = ("(?P<month>" + "|".join("(%s)" % pat for pat in self.patterns) + ")") self.expr = rcompile(self.pattern, re.IGNORECASE)
def __init__(self, next, last, daynames): self.next_pattern = next self.last_pattern = last self._dayname_exprs = tuple(rcompile(pat, re.IGNORECASE) for pat in daynames) dn_pattern = "|".join(daynames) self.pattern = ("(?P<dir>%s|%s) +(?P<day>%s)(?=(\\W|$))" % (next, last, dn_pattern)) self.expr = rcompile(self.pattern, re.IGNORECASE)
def __init__(self, next, last, daynames): self.next_pattern = next self.last_pattern = last self._dayname_exprs = tuple( rcompile(pat, re.IGNORECASE) for pat in daynames) dn_pattern = "|".join(daynames) self.pattern = ("(?P<dir>%s|%s) +(?P<day>%s)(?=(\\W|$))" % (next, last, dn_pattern)) self.expr = rcompile(self.pattern, re.IGNORECASE)
def __init__(self, elements, sep="(\\s+|\\s*,\\s*)", onceper=True, requireall=False, allof=None, anyof=None, name=None): """ :param elements: the sub-elements to parse. :param sep: a separator regular expression to match between elements, or None to not have separators. :param onceper: only allow each element to match once. :param requireall: if True, the sub-elements can match in any order, but they must all match. :param allof: a list of indexes into the list of elements. When this argument is not None, this element matches only if all the indicated sub-elements match. :param allof: a list of indexes into the list of elements. When this argument is not None, this element matches only if any of the indicated sub-elements match. :param name: a name for this element (for debugging purposes only). """ super(Bag, self).__init__(elements, name) self.sep_expr = rcompile(sep, re.IGNORECASE) self.onceper = onceper self.requireall = requireall self.allof = allof self.anyof = anyof
class RangePlugin(Plugin): """Adds the ability to specify term ranges. """ expr = rcompile(r""" (?P<open>\{|\[) # Open paren (?P<start> ('[^']*?'\s+) # single-quoted | # or (.+?(?=[Tt][Oo])) # everything until "to" )? [Tt][Oo] # "to" (?P<end> (\s+'[^']*?') # single-quoted | # or ((.+?)(?=]|})) # everything until "]" or "}" )? (?P<close>}|]) # Close paren """, verbose=True) class RangeTagger(RegexTagger): def __init__(self, expr, excl_start, excl_end): self.expr = expr self.excl_start = excl_start self.excl_end = excl_end def create(self, parser, match): start = match.group("start") end = match.group("end") if start: # Strip the space before the "to" start = start.rstrip() # Strip single quotes if start.startswith("'") and start.endswith("'"): start = start[1:-1] if end: # Strip the space before the "to" end = end.lstrip() # Strip single quotes if end.startswith("'") and end.endswith("'"): end = end[1:-1] # What kind of open and close brackets were used? startexcl = match.group("open") == self.excl_start endexcl = match.group("close") == self.excl_end rn = syntax.RangeNode(start, end, startexcl, endexcl) return rn def __init__(self, expr=None, excl_start="{", excl_end="}"): self.expr = expr or self.expr self.excl_start = excl_start self.excl_end = excl_end def taggers(self, parser): tagger = self.RangeTagger(self.expr, self.excl_start, self.excl_end) return [(tagger, 1)]
def __init__(self, years, months, weeks, days, hours, minutes, seconds): rel_years = "((?P<years>[0-9]+) *(%s))?" % years rel_months = "((?P<months>[0-9]+) *(%s))?" % months rel_weeks = "((?P<weeks>[0-9]+) *(%s))?" % weeks rel_days = "((?P<days>[0-9]+) *(%s))?" % days rel_hours = "((?P<hours>[0-9]+) *(%s))?" % hours rel_mins = "((?P<mins>[0-9]+) *(%s))?" % minutes rel_secs = "((?P<secs>[0-9]+) *(%s))?" % seconds self.pattern = ("(?P<dir>[+-]) *%s *%s *%s *%s *%s *%s *%s(?=(\\W|$))" % (rel_years, rel_months, rel_weeks, rel_days, rel_hours, rel_mins, rel_secs)) self.expr = rcompile(self.pattern, re.IGNORECASE)
def __init__(self, years, months, weeks, days, hours, minutes, seconds): rel_years = "((?P<years>[0-9]+) *(%s))?" % years rel_months = "((?P<months>[0-9]+) *(%s))?" % months rel_weeks = "((?P<weeks>[0-9]+) *(%s))?" % weeks rel_days = "((?P<days>[0-9]+) *(%s))?" % days rel_hours = "((?P<hours>[0-9]+) *(%s))?" % hours rel_mins = "((?P<mins>[0-9]+) *(%s))?" % minutes rel_secs = "((?P<secs>[0-9]+) *(%s))?" % seconds self.pattern = ( "(?P<dir>[+-]) *%s *%s *%s *%s *%s *%s *%s(?=(\\W|$))" % (rel_years, rel_months, rel_weeks, rel_days, rel_hours, rel_mins, rel_secs)) self.expr = rcompile(self.pattern, re.IGNORECASE)
def __init__(self, elements, sep="(\\s+|\\s*,\\s*)", name=None, progressive=False): """ :param elements: the sequence of sub-elements to parse. :param sep: a separator regular expression to match between elements, or None to not have separators. :param name: a name for this element (for debugging purposes only). :param progressive: if True, elements after the first do not need to match. That is, for elements (a, b, c) and progressive=True, the sequence matches like ``a[b[c]]``. """ super(Sequence, self).__init__(elements, name) self.sep_pattern = sep if sep: self.sep_expr = rcompile(sep, re.IGNORECASE) else: self.sep_expr = None self.progressive = progressive
def __init__(self, expr=None): self.expr = rcompile(expr or self.expr)
class PhrasePlugin(Plugin): """Adds the ability to specify phrase queries inside double quotes. """ # Didn't use TaggingPlugin because I need to add slop parsing at some # point # Expression used to find words if a schema isn't available wordexpr = rcompile(r'\S+') class PhraseNode(syntax.TextNode): def __init__(self, text, textstartchar, slop=1): syntax.TextNode.__init__(self, text) self.textstartchar = textstartchar self.slop = slop def r(self): return "%s %r~%s" % (self.__class__.__name__, self.text, self.slop) def apply(self, fn): return self.__class__(self.type, [fn(node) for node in self.nodes], slop=self.slop, boost=self.boost) def query(self, parser): text = self.text fieldname = self.fieldname or parser.fieldname # We want to process the text of the phrase into "words" (tokens), # and also record the startchar and endchar of each word sc = self.textstartchar if parser.schema and fieldname in parser.schema: field = parser.schema[fieldname] if field.analyzer: # We have a field with an analyzer, so use it to parse # the phrase into tokens tokens = field.tokenize(text, mode="query", chars=True) words = [] char_ranges = [] for t in tokens: words.append(t.text) char_ranges.append((sc + t.startchar, sc + t.endchar)) else: # We have a field but it doesn't have a format object, # for some reason (it's self-parsing?), so use process_text # to get the texts (we won't know the start/end chars) words = list(field.process_text(text, mode="query")) char_ranges = [(None, None)] * len(words) else: # We're parsing without a schema, so just use the default # regular expression to break the text into words words = [] char_ranges = [] for match in PhrasePlugin.wordexpr.finditer(text): words.append(match.group(0)) char_ranges.append((sc + match.start(), sc + match.end())) qclass = parser.phraseclass q = qclass(fieldname, words, slop=self.slop, boost=self.boost, char_ranges=char_ranges) return attach(q, self) class PhraseTagger(RegexTagger): def create(self, parser, match): return PhrasePlugin.PhraseNode(match.group("text"), match.start("text")) def __init__(self, expr='"(?P<text>.*?)"'): self.expr = expr def taggers(self, parser): return [(self.PhraseTagger(self.expr), 0)]
def __init__(self, pattern, fn=None, modify=None): self.pattern = pattern self.expr = rcompile(pattern, re.IGNORECASE) self.fn = fn self.modify = modify
def __init__(self): self.pattern = ("(?P<hour>[1-9]|10|11|12)(:(?P<mins>[0-5][0-9])" "(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?)?" "\\s*(?P<ampm>am|pm)(?=(\\W|$))") self.expr = rcompile(self.pattern, re.IGNORECASE)
def __init__(self, plugin, expr): self.plugin = plugin self.expr = rcompile(expr, re.IGNORECASE)
def __init__(self, expression="[^/]+"): self.expr = rcompile(expression, re.UNICODE)
def __init__(self, expr): self.expr = rcompile(expr)