Example #1
0
    def __init__(self, *patterns):
        self.patterns = patterns
        self.exprs = [rcompile(pat, re.IGNORECASE) for pat in self.patterns]

        self.pattern = ("(?P<month>" +
                        "|".join("(%s)" % pat for pat in self.patterns) + ")")
        self.expr = rcompile(self.pattern, re.IGNORECASE)
Example #2
0
    def __init__(self, *patterns):
        self.patterns = patterns
        self.exprs = [rcompile(pat, re.IGNORECASE) for pat in self.patterns]

        self.pattern = ("(?P<month>"
                        + "|".join("(%s)" % pat for pat in self.patterns)
                        + ")")
        self.expr = rcompile(self.pattern, re.IGNORECASE)
Example #3
0
 def __init__(self, next, last, daynames):
     self.next_pattern = next
     self.last_pattern = last
     self._dayname_exprs = tuple(rcompile(pat, re.IGNORECASE)
                                 for pat in daynames)
     dn_pattern = "|".join(daynames)
     self.pattern = ("(?P<dir>%s|%s) +(?P<day>%s)(?=(\\W|$))"
                     % (next, last, dn_pattern))
     self.expr = rcompile(self.pattern, re.IGNORECASE)
Example #4
0
 def __init__(self, next, last, daynames):
     self.next_pattern = next
     self.last_pattern = last
     self._dayname_exprs = tuple(
         rcompile(pat, re.IGNORECASE) for pat in daynames)
     dn_pattern = "|".join(daynames)
     self.pattern = ("(?P<dir>%s|%s) +(?P<day>%s)(?=(\\W|$))" %
                     (next, last, dn_pattern))
     self.expr = rcompile(self.pattern, re.IGNORECASE)
Example #5
0
    def __init__(self,
                 elements,
                 sep="(\\s+|\\s*,\\s*)",
                 onceper=True,
                 requireall=False,
                 allof=None,
                 anyof=None,
                 name=None):
        """
        :param elements: the sub-elements to parse.
        :param sep: a separator regular expression to match between elements,
            or None to not have separators.
        :param onceper: only allow each element to match once.
        :param requireall: if True, the sub-elements can match in any order,
            but they must all match.
        :param allof: a list of indexes into the list of elements. When this
            argument is not None, this element matches only if all the
            indicated sub-elements match.
        :param allof: a list of indexes into the list of elements. When this
            argument is not None, this element matches only if any of the
            indicated sub-elements match.
        :param name: a name for this element (for debugging purposes only).
        """

        super(Bag, self).__init__(elements, name)
        self.sep_expr = rcompile(sep, re.IGNORECASE)
        self.onceper = onceper
        self.requireall = requireall
        self.allof = allof
        self.anyof = anyof
Example #6
0
class RangePlugin(Plugin):
    """Adds the ability to specify term ranges.
    """

    expr = rcompile(r"""
    (?P<open>\{|\[)               # Open paren
    (?P<start>
        ('[^']*?'\s+)             # single-quoted 
        |                         # or
        (.+?(?=[Tt][Oo]))         # everything until "to"
    )?
    [Tt][Oo]                      # "to"
    (?P<end>
        (\s+'[^']*?')             # single-quoted
        |                         # or
        ((.+?)(?=]|}))            # everything until "]" or "}"
    )?
    (?P<close>}|])                # Close paren
    """,
                    verbose=True)

    class RangeTagger(RegexTagger):
        def __init__(self, expr, excl_start, excl_end):
            self.expr = expr
            self.excl_start = excl_start
            self.excl_end = excl_end

        def create(self, parser, match):
            start = match.group("start")
            end = match.group("end")
            if start:
                # Strip the space before the "to"
                start = start.rstrip()
                # Strip single quotes
                if start.startswith("'") and start.endswith("'"):
                    start = start[1:-1]
            if end:
                # Strip the space before the "to"
                end = end.lstrip()
                # Strip single quotes
                if end.startswith("'") and end.endswith("'"):
                    end = end[1:-1]
            # What kind of open and close brackets were used?
            startexcl = match.group("open") == self.excl_start
            endexcl = match.group("close") == self.excl_end

            rn = syntax.RangeNode(start, end, startexcl, endexcl)
            return rn

    def __init__(self, expr=None, excl_start="{", excl_end="}"):
        self.expr = expr or self.expr
        self.excl_start = excl_start
        self.excl_end = excl_end

    def taggers(self, parser):
        tagger = self.RangeTagger(self.expr, self.excl_start, self.excl_end)
        return [(tagger, 1)]
Example #7
0
    def __init__(self, years, months, weeks, days, hours, minutes, seconds):
        rel_years = "((?P<years>[0-9]+) *(%s))?" % years
        rel_months = "((?P<months>[0-9]+) *(%s))?" % months
        rel_weeks = "((?P<weeks>[0-9]+) *(%s))?" % weeks
        rel_days = "((?P<days>[0-9]+) *(%s))?" % days
        rel_hours = "((?P<hours>[0-9]+) *(%s))?" % hours
        rel_mins = "((?P<mins>[0-9]+) *(%s))?" % minutes
        rel_secs = "((?P<secs>[0-9]+) *(%s))?" % seconds

        self.pattern = ("(?P<dir>[+-]) *%s *%s *%s *%s *%s *%s *%s(?=(\\W|$))"
                        % (rel_years, rel_months, rel_weeks, rel_days,
                           rel_hours, rel_mins, rel_secs))
        self.expr = rcompile(self.pattern, re.IGNORECASE)
Example #8
0
    def __init__(self, years, months, weeks, days, hours, minutes, seconds):
        rel_years = "((?P<years>[0-9]+) *(%s))?" % years
        rel_months = "((?P<months>[0-9]+) *(%s))?" % months
        rel_weeks = "((?P<weeks>[0-9]+) *(%s))?" % weeks
        rel_days = "((?P<days>[0-9]+) *(%s))?" % days
        rel_hours = "((?P<hours>[0-9]+) *(%s))?" % hours
        rel_mins = "((?P<mins>[0-9]+) *(%s))?" % minutes
        rel_secs = "((?P<secs>[0-9]+) *(%s))?" % seconds

        self.pattern = (
            "(?P<dir>[+-]) *%s *%s *%s *%s *%s *%s *%s(?=(\\W|$))" %
            (rel_years, rel_months, rel_weeks, rel_days, rel_hours, rel_mins,
             rel_secs))
        self.expr = rcompile(self.pattern, re.IGNORECASE)
Example #9
0
    def __init__(self, elements, sep="(\\s+|\\s*,\\s*)", name=None,
                 progressive=False):
        """
        :param elements: the sequence of sub-elements to parse.
        :param sep: a separator regular expression to match between elements,
            or None to not have separators.
        :param name: a name for this element (for debugging purposes only).
        :param progressive: if True, elements after the first do not need to
            match. That is, for elements (a, b, c) and progressive=True, the
            sequence matches like ``a[b[c]]``.
        """

        super(Sequence, self).__init__(elements, name)
        self.sep_pattern = sep
        if sep:
            self.sep_expr = rcompile(sep, re.IGNORECASE)
        else:
            self.sep_expr = None
        self.progressive = progressive
Example #10
0
    def __init__(self,
                 elements,
                 sep="(\\s+|\\s*,\\s*)",
                 name=None,
                 progressive=False):
        """
        :param elements: the sequence of sub-elements to parse.
        :param sep: a separator regular expression to match between elements,
            or None to not have separators.
        :param name: a name for this element (for debugging purposes only).
        :param progressive: if True, elements after the first do not need to
            match. That is, for elements (a, b, c) and progressive=True, the
            sequence matches like ``a[b[c]]``.
        """

        super(Sequence, self).__init__(elements, name)
        self.sep_pattern = sep
        if sep:
            self.sep_expr = rcompile(sep, re.IGNORECASE)
        else:
            self.sep_expr = None
        self.progressive = progressive
Example #11
0
    def __init__(self, elements, sep="(\\s+|\\s*,\\s*)", onceper=True,
                 requireall=False, allof=None, anyof=None, name=None):
        """
        :param elements: the sub-elements to parse.
        :param sep: a separator regular expression to match between elements,
            or None to not have separators.
        :param onceper: only allow each element to match once.
        :param requireall: if True, the sub-elements can match in any order,
            but they must all match.
        :param allof: a list of indexes into the list of elements. When this
            argument is not None, this element matches only if all the
            indicated sub-elements match.
        :param allof: a list of indexes into the list of elements. When this
            argument is not None, this element matches only if any of the
            indicated sub-elements match.
        :param name: a name for this element (for debugging purposes only).
        """

        super(Bag, self).__init__(elements, name)
        self.sep_expr = rcompile(sep, re.IGNORECASE)
        self.onceper = onceper
        self.requireall = requireall
        self.allof = allof
        self.anyof = anyof
Example #12
0
 def __init__(self, expr=None):
     self.expr = rcompile(expr or self.expr)
Example #13
0
class PhrasePlugin(Plugin):
    """Adds the ability to specify phrase queries inside double quotes.
    """

    # Didn't use TaggingPlugin because I need to add slop parsing at some
    # point

    # Expression used to find words if a schema isn't available
    wordexpr = rcompile(r'\S+')

    class PhraseNode(syntax.TextNode):
        def __init__(self, text, textstartchar, slop=1):
            syntax.TextNode.__init__(self, text)
            self.textstartchar = textstartchar
            self.slop = slop

        def r(self):
            return "%s %r~%s" % (self.__class__.__name__, self.text, self.slop)

        def apply(self, fn):
            return self.__class__(self.type, [fn(node) for node in self.nodes],
                                  slop=self.slop,
                                  boost=self.boost)

        def query(self, parser):
            text = self.text
            fieldname = self.fieldname or parser.fieldname

            # We want to process the text of the phrase into "words" (tokens),
            # and also record the startchar and endchar of each word

            sc = self.textstartchar
            if parser.schema and fieldname in parser.schema:
                field = parser.schema[fieldname]
                if field.analyzer:
                    # We have a field with an analyzer, so use it to parse
                    # the phrase into tokens
                    tokens = field.tokenize(text, mode="query", chars=True)
                    words = []
                    char_ranges = []
                    for t in tokens:
                        words.append(t.text)
                        char_ranges.append((sc + t.startchar, sc + t.endchar))
                else:
                    # We have a field but it doesn't have a format object,
                    # for some reason (it's self-parsing?), so use process_text
                    # to get the texts (we won't know the start/end chars)
                    words = list(field.process_text(text, mode="query"))
                    char_ranges = [(None, None)] * len(words)
            else:
                # We're parsing without a schema, so just use the default
                # regular expression to break the text into words
                words = []
                char_ranges = []
                for match in PhrasePlugin.wordexpr.finditer(text):
                    words.append(match.group(0))
                    char_ranges.append((sc + match.start(), sc + match.end()))

            qclass = parser.phraseclass
            q = qclass(fieldname,
                       words,
                       slop=self.slop,
                       boost=self.boost,
                       char_ranges=char_ranges)
            return attach(q, self)

    class PhraseTagger(RegexTagger):
        def create(self, parser, match):
            return PhrasePlugin.PhraseNode(match.group("text"),
                                           match.start("text"))

    def __init__(self, expr='"(?P<text>.*?)"'):
        self.expr = expr

    def taggers(self, parser):
        return [(self.PhraseTagger(self.expr), 0)]
Example #14
0
 def __init__(self, pattern, fn=None, modify=None):
     self.pattern = pattern
     self.expr = rcompile(pattern, re.IGNORECASE)
     self.fn = fn
     self.modify = modify
Example #15
0
 def __init__(self):
     self.pattern = ("(?P<hour>[1-9]|10|11|12)(:(?P<mins>[0-5][0-9])"
                     "(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?)?"
                     "\\s*(?P<ampm>am|pm)(?=(\\W|$))")
     self.expr = rcompile(self.pattern, re.IGNORECASE)
Example #16
0
 def __init__(self, plugin, expr):
     self.plugin = plugin
     self.expr = rcompile(expr, re.IGNORECASE)
Example #17
0
 def __init__(self):
     self.pattern = ("(?P<hour>[1-9]|10|11|12)(:(?P<mins>[0-5][0-9])"
                     "(:(?P<secs>[0-5][0-9])(\\.(?P<usecs>[0-9]{1,5}))?)?)?"
                     "\\s*(?P<ampm>am|pm)(?=(\\W|$))")
     self.expr = rcompile(self.pattern, re.IGNORECASE)
Example #18
0
 def __init__(self, plugin, expr):
     self.plugin = plugin
     self.expr = rcompile(expr, re.IGNORECASE)
Example #19
0
 def __init__(self, expr=None):
     self.expr = rcompile(expr or self.expr)
Example #20
0
 def __init__(self, pattern, fn=None, modify=None):
     self.pattern = pattern
     self.expr = rcompile(pattern, re.IGNORECASE)
     self.fn = fn
     self.modify = modify
Example #21
0
 def __init__(self, expression="[^/]+"):
     self.expr = rcompile(expression, re.UNICODE)
Example #22
0
 def __init__(self, expr):
     self.expr = rcompile(expr)
Example #23
0
 def __init__(self, expression="[^/]+"):
     self.expr = rcompile(expression, re.UNICODE)