def test_comment(): assert compile(Operator('comment', Either([]))) == asm.Literal('') assert compile(Operator('comment', Literal('a'))) == asm.Literal('') assert compile(Operator('comment', Either([Literal('a'), Literal('b')]))) == asm.Literal('') assert compile( Concat([ Macro('#sl'), Operator('comment', Literal('yo')), Macro('#el') ])) == asm.Concat([asm.Boundary('^', None), asm.Boundary('$', None)])
def test_builtin_macros(): assert compile(Macro('#any')) == asm.ANY not_linefeed = asm.CharacterClass([r'\n'], inverted=True) assert compile(Macro('#not_linefeed')) == not_linefeed assert compile(Macro('#nlf')) == not_linefeed assert compile(Macro('#crlf')) == asm.Literal('\r\n') assert compile(Concat([Macro('#sl'), Literal('yo'), Macro('#el')])) == asm.Concat([ asm.Boundary('^', None), asm.Literal('yo'), asm.Boundary('$', None) ])
def test_builtin_macros(): assert compile(Macro('#any')) == asm.ANY not_linefeed = asm.CharacterClass([r'\n'], inverted=True) assert compile(Macro('#not_linefeed')) == not_linefeed assert compile(Macro('#not_linefeed')) == not_linefeed assert compile(Macro('#windows_newline')) == asm.Literal('\r\n') assert compile(Concat([Macro('#sl'), Literal('yo'), Macro('#el')])) == asm.Concat([ asm.Boundary('^', None), asm.Literal('yo'), asm.Boundary('$', None) ]) assert compile(Macro('#quote')) == asm.Literal("'") assert compile(Macro('#double_quote')) == asm.Literal('"') assert compile(Macro('#left_brace')) == asm.Literal("[") assert compile(Macro('#right_brace')) == asm.Literal(']')
def compile_concat(concat, macros): defs = filter(lambda x: isinstance(x, Def), concat.items) regexes = filter(lambda x: not isinstance(x, Def), concat.items) for d in defs: if d.name in macros: raise KeyError('Macro %s already defined' % d.name) macros[d.name] = compile_ast(d.subregex, macros) compiled = [compile_ast(s, macros) for s in regexes] compiled = filter(is_not_empty, compiled) for d in defs: del macros[d.name] if not compiled: return EMPTY if len(compiled) == 1: compiled, = compiled return compiled return asm.Concat(compiled)
import re from re2.parser import Concat, Either, Def, Operator, Macro, Literal, Nothing from re2 import asm EMPTY = asm.Literal('') EMPTY_CONCAT = asm.Concat([]) class CompileError(Exception): pass builtin_macros = { '#any': asm.ANY, '#linefeed': asm.LINEFEED, '#carriage_return': asm.CARRIAGE_RETURN, '#windows_newline': asm.Literal('\r\n'), '#tab': asm.TAB, '#digit': asm.DIGIT, '#letter': asm.LETTER, '#lowercase': asm.LOWERCASE, '#uppercase': asm.UPPERCASE, '#space': asm.SPACE, '#word_character': asm.WORD_CHARACTER, '#start_string': asm.START_STRING, '#end_string': asm.END_STRING, '#start_line': asm.START_LINE, '#end_line': asm.END_LINE, '#word_boundary': asm.WORD_BOUNDARY, '#quote': asm.Literal("'"), '#double_quote': asm.Literal('"'), }
def test_concat(): assert compile(Concat(map(Literal, 'abc'))) == asm.Concat( [asm.Literal('a'), asm.Literal('b'), asm.Literal('c')])