def _path_trigram_filter(path, is_case_sensitive): """Return an ES filter clause that returns docs whose paths match the given path all the way to their ends. If a given path starts with a /, the user is explicitly requesting a match starting at the root level. """ if path.startswith('/'): path = path[1:] # Leading slashes aren't stored in the index. regex = '^{0}$' # Insist it start at the beginning. else: regex = '(/|^){0}$' # Start at any path segment. return es_regex_filter( regex_grammar.parse( regex.format( re.escape( path.encode('ascii', 'backslashreplace') ) ) ), 'path', is_case_sensitive )
def test_python_visitor(): """Make sure we can render out Python regexes from parse trees. There's just one difference from JS so far. """ eq_(PythonRegexVisitor().visit(regex_grammar.parse(r'\a')), r'\a')
def filter(self): glob = self._term["arg"] try: return es_regex_filter( regex_grammar.parse(glob_to_regex(glob)), "path", is_case_sensitive=self._term["case_sensitive"] ) except NoTrigrams: raise BadTerm("Path globs need at least 3 literal characters in a row " "for speed.")
def js_eq(regex, expected): """Assert that taking a regex apart into an AST and then building a JS regex from it matches the expected result. :arg regex: A string representing a regex pattern :arg expected: What to compare the reconstructed regex against """ eq_(JsRegexVisitor().visit(regex_grammar.parse(regex)), expected)
def filter(self): glob = self._term['arg'] try: return es_regex_filter( regex_grammar.parse(glob_to_regex(glob)), 'path', is_case_sensitive=self._term['case_sensitive']) except NoTrigrams: raise BadTerm('Path globs need at least 3 literal characters in a row ' 'for speed.')
def _regex_filter(self, path_seg_property_name, no_trigrams_error_text): """Return an ES regex filter that matches this filter's glob against the path segment at path_seg_property_name. """ glob = self._term['arg'] try: return es_regex_filter( regex_grammar.parse(glob_to_regex(glob)), path_seg_property_name, is_case_sensitive=self._term['case_sensitive']) except NoTrigrams: raise BadTerm(no_trigrams_error_text)
def _path_trigram_filter(path, is_case_sensitive): """Return an ES filter clause that returns docs whose paths match the given path all the way to their ends. If a given path starts with a /, the user is explicitly requesting a match starting at the root level. """ if path.startswith('/'): path = path[1:] # Leading slashes aren't stored in the index. regex = '^{0}$' # Insist it start at the beginning. else: regex = '(/|^){0}$' # Start at any path segment. return es_regex_filter(regex_grammar.parse(regex.format(re.escape(path))), 'path', is_case_sensitive)
def __init__(self, term, enabled_plugins): """Compile the Python equivalent of the regex so we don't have to lean on the regex cache during highlighting. Python's regex cache is naive: after it hits 100, it just clears: no LRU. """ super(RegexpFilter, self).__init__(term, enabled_plugins) try: self._parsed_regex = regex_grammar.parse(term['arg']) except ParseError: raise BadTerm('Invalid regex.') self._compiled_regex = ( re.compile(PythonRegexVisitor().visit(self._parsed_regex), flags=0 if self._term['case_sensitive'] else re.I))
def test_parse_regexp(): regex_grammar.parse('hello+ dolly') regex_grammar.parse('hello+|hi') regex_grammar.parse(r'(hello|hi) dolly') regex_grammar.parse(r'(hello|hi|) dolly') regex_grammar.parse(r'(hello||hi) dolly') regex_grammar.parse(r'|hello|hi') regex_grammar.parse(ur'aböut \d{2}') assert_raises(ParseError, regex_grammar.parse, '[smoo') # This isn't supported yet, so it's better to throw an error than to # quietly misinterpret the user's intent: assert_raises(ParseError, regex_grammar.parse, '(?:hi)')
def visit_regex(regex): return SubstringTreeVisitor().visit(regex_grammar.parse(regex))