def test_invert(): assert compile(Operator('not', Literal('a'))) == asm.CharacterClass(['a'], inverted=True) assert compile(Operator('not', Either([Literal('a'), Literal('b') ]))) == asm.CharacterClass(['a', 'b'], inverted=True) assert compile(Operator('not', Either([Literal('a'), Macro('#d') ]))) == asm.CharacterClass(['a', r'\d'], inverted=True) assert compile(Operator('not', Either( [Literal('a'), Macro('#l')]))) == asm.CharacterClass(['a', ['a', 'z'], ['A', 'Z']], inverted=True)
def invert_operator(expr): if isinstance(expr, asm.Literal) and len(expr.string) == 1: return asm.CharacterClass([expr.string], True) try: return expr.invert() except AttributeError: raise CompileError('Expression %s cannot be inverted' % expr.to_regex())
def test_range_macros(): assert compile(Range('a', 'f')) == asm.CharacterClass([('a', 'f')], False) assert compile(Range('B', 'Z')) == asm.CharacterClass([('B', 'Z')], False) assert compile(Range('2', '6')) == asm.CharacterClass([('2', '6')], False) assert compile(Either([Range('a', 'f'), Macro('#digit') ])) == asm.CharacterClass([('a', 'f'), r'\d'], False) with pytest.raises(CompileError): compile(Range('a', '5')) with pytest.raises(CompileError): compile(Range('a', 'F')) with pytest.raises(CompileError): compile(Range('c', 'a')) with pytest.raises(CompileError): compile(Range('a', 'a')) with pytest.raises(AssertionError): compile(Range('!', ','))
def test_character_class(): assert compile(Either([Literal('a'), Literal('b')])) == asm.CharacterClass(['a', 'b'], False) assert compile(Operator('not', Either([Literal('a'), Literal('b') ]))) == asm.CharacterClass(['a', 'b'], inverted=True) with pytest.raises(CompileError): compile(Operator('not', Either([Literal('a'), Literal('bc')]))) assert compile(Either([Literal('a'), Literal('b'), Literal('0') ])) == asm.CharacterClass(['a', 'b', '0'], False) assert compile(Either([Literal('a'), Macro('#d')])) == asm.CharacterClass(['a', r'\d'], False)
def compile_range(range, _): if character_category(range.start) != character_category(range.end): raise CompileError( "Range start and end not of the same category: '%s' is a %s but '%s' is a %s" % (range.start, character_category( range.start), range.end, character_category(range.end))) if range.start >= range.end: raise CompileError("Range start not before range end: '%s' >= '%s'" % (range.start, range.end)) return asm.CharacterClass([(range.start, range.end)], False)
def compile_either(e, macros): compiled = [compile_ast(s, macros) for s in e.items] if all(is_single_char(c) for c in compiled): characters = [] for c in compiled: if isinstance(c, asm.Literal) and len(c.string) == 1: characters.append(c.string) elif isinstance(c, asm.CharacterClass): characters += c.characters return asm.CharacterClass(characters, False) return asm.Either(compiled)
def test_builtin_macros(): assert compile(Macro('#any')) == asm.ANY not_linefeed = asm.CharacterClass([r'\n'], inverted=True) assert compile(Macro('#not_linefeed')) == not_linefeed assert compile(Macro('#nlf')) == not_linefeed assert compile(Macro('#crlf')) == asm.Literal('\r\n') assert compile(Concat([Macro('#sl'), Literal('yo'), Macro('#el')])) == asm.Concat([ asm.Boundary('^', None), asm.Literal('yo'), asm.Boundary('$', None) ])
def test_builtin_macros(): assert compile(Macro('#any')) == asm.ANY not_linefeed = asm.CharacterClass([r'\n'], inverted=True) assert compile(Macro('#not_linefeed')) == not_linefeed assert compile(Macro('#not_linefeed')) == not_linefeed assert compile(Macro('#windows_newline')) == asm.Literal('\r\n') assert compile(Concat([Macro('#sl'), Literal('yo'), Macro('#el')])) == asm.Concat([ asm.Boundary('^', None), asm.Literal('yo'), asm.Boundary('$', None) ]) assert compile(Macro('#quote')) == asm.Literal("'") assert compile(Macro('#double_quote')) == asm.Literal('"') assert compile(Macro('#left_brace')) == asm.Literal("[") assert compile(Macro('#right_brace')) == asm.Literal(']')