def tag(self, text, pos=0, debug=False): """Returns a group of syntax nodes corresponding to the given text, created by matching the Taggers provided by the parser's plugins. :param text: the text to tag. :param pos: the position in the text to start tagging at. """ # The list out output tags stack = [] # End position of the previous match prev = pos # Priorized list of taggers provided by the parser's plugins taggers = self.taggers() if debug: print_debug(debug, "Taggers: %r" % taggers) # Define a function that will make a WordNode from the "interstitial" # text between matches def inter(startchar, endchar): n = syntax.WordNode(text[startchar:endchar]) n.startchar = startchar n.endchar = endchar return n while pos < len(text): node = None # Try each tagger to see if it matches at the current position for tagger in taggers: node = tagger.match(self, text, pos) if node is not None: if node.endchar <= pos: raise Exception("Token %r did not move cursor forward." " (%r, %s)" % (tagger, text, pos)) if prev < pos: tween = inter(prev, pos) if debug: print_debug(debug, "Tween: %r" % tween) stack.append(tween) if debug: print_debug(debug, "Tagger: %r at %s: %r" % (tagger, pos, node)) stack.append(node) prev = pos = node.endchar break if not node: # No taggers matched, move forward pos += 1 # If there's unmatched text left over on the end, put it in a WordNode if prev < len(text): stack.append(inter(prev, len(text))) # Wrap the list of nodes in a group node group = self.group(stack) if debug: print_debug(debug, "Tagged group: %r" % group) return group
def parse(self, text, normalize=True, debug=False): """Parses the input string and returns a :class:`whoosh.query.Query` object/tree. :param text: the unicode string to parse. :param normalize: whether to call normalize() on the query object/tree before returning it. This should be left on unless you're trying to debug the parser output. :rtype: :class:`whoosh.query.Query` """ if not isinstance(text, text_type): text = text.decode("latin1") nodes = self.process(text, debug=debug) print_debug(debug, "Syntax tree: %r" % nodes) q = nodes.query(self) if not q: q = query.NullQuery print_debug(debug, "Pre-normalized query: %r" % q) if normalize: q = q.normalize() print_debug(debug, "Normalized query: %r" % q) return q
def filterize(self, nodes, debug=False): """Takes a group of nodes and runs the filters provided by the parser's plugins. """ # Call each filter in the priorized list of plugin filters print_debug(debug, "Pre-filtered group: %r" % nodes) for f in self.filters(): print_debug(debug, "..Applying: %r" % f) nodes = f(self, nodes) print_debug(debug, "..Result: %r" % nodes) if nodes is None: raise Exception("Filter %r did not return anything" % f) return nodes