Beispiel #1
0
def test_invert():
    assert compile(Operator('not',
                            Literal('a'))) == asm.CharacterClass(['a'],
                                                                 inverted=True)
    assert compile(Operator('not',
                            Either([Literal('a'), Literal('b')
                                    ]))) == asm.CharacterClass(['a', 'b'],
                                                               inverted=True)
    assert compile(Operator('not',
                            Either([Literal('a'), Macro('#d')
                                    ]))) == asm.CharacterClass(['a', r'\d'],
                                                               inverted=True)
    assert compile(Operator('not', Either(
        [Literal('a'),
         Macro('#l')]))) == asm.CharacterClass(['a', ['a', 'z'], ['A', 'Z']],
                                               inverted=True)
Beispiel #2
0
def invert_operator(expr):
    if isinstance(expr, asm.Literal) and len(expr.string) == 1:
        return asm.CharacterClass([expr.string], True)

    try:
        return expr.invert()
    except AttributeError:
        raise CompileError('Expression %s cannot be inverted' % expr.to_regex())
Beispiel #3
0
def test_range_macros():
    assert compile(Range('a', 'f')) == asm.CharacterClass([('a', 'f')], False)
    assert compile(Range('B', 'Z')) == asm.CharacterClass([('B', 'Z')], False)
    assert compile(Range('2', '6')) == asm.CharacterClass([('2', '6')], False)
    assert compile(Either([Range('a', 'f'), Macro('#digit')
                           ])) == asm.CharacterClass([('a', 'f'), r'\d'],
                                                     False)
    with pytest.raises(CompileError):
        compile(Range('a', '5'))
    with pytest.raises(CompileError):
        compile(Range('a', 'F'))
    with pytest.raises(CompileError):
        compile(Range('c', 'a'))
    with pytest.raises(CompileError):
        compile(Range('a', 'a'))
    with pytest.raises(AssertionError):
        compile(Range('!', ','))
Beispiel #4
0
def test_character_class():
    assert compile(Either([Literal('a'),
                           Literal('b')])) == asm.CharacterClass(['a', 'b'],
                                                                 False)
    assert compile(Operator('not',
                            Either([Literal('a'), Literal('b')
                                    ]))) == asm.CharacterClass(['a', 'b'],
                                                               inverted=True)
    with pytest.raises(CompileError):
        compile(Operator('not', Either([Literal('a'), Literal('bc')])))
    assert compile(Either([Literal('a'),
                           Literal('b'),
                           Literal('0')
                           ])) == asm.CharacterClass(['a', 'b', '0'], False)
    assert compile(Either([Literal('a'),
                           Macro('#d')])) == asm.CharacterClass(['a', r'\d'],
                                                                False)
Beispiel #5
0
def compile_range(range, _):
    if character_category(range.start) != character_category(range.end):
        raise CompileError(
            "Range start and end not of the same category: '%s' is a %s but '%s' is a %s"
            % (range.start, character_category(
                range.start), range.end, character_category(range.end)))
    if range.start >= range.end:
        raise CompileError("Range start not before range end: '%s' >= '%s'" %
                           (range.start, range.end))
    return asm.CharacterClass([(range.start, range.end)], False)
Beispiel #6
0
def compile_either(e, macros):
    compiled = [compile_ast(s, macros) for s in e.items]
    if all(is_single_char(c) for c in compiled):
        characters = []
        for c in compiled:
            if isinstance(c, asm.Literal) and len(c.string) == 1:
                characters.append(c.string)
            elif isinstance(c, asm.CharacterClass):
                characters += c.characters
        return asm.CharacterClass(characters, False)
    return asm.Either(compiled)
Beispiel #7
0
def test_builtin_macros():
    assert compile(Macro('#any')) == asm.ANY
    not_linefeed = asm.CharacterClass([r'\n'], inverted=True)
    assert compile(Macro('#not_linefeed')) == not_linefeed
    assert compile(Macro('#nlf')) == not_linefeed
    assert compile(Macro('#crlf')) == asm.Literal('\r\n')
    assert compile(Concat([Macro('#sl'),
                           Literal('yo'),
                           Macro('#el')])) == asm.Concat([
                               asm.Boundary('^', None),
                               asm.Literal('yo'),
                               asm.Boundary('$', None)
                           ])
Beispiel #8
0
def test_builtin_macros():
    assert compile(Macro('#any')) == asm.ANY
    not_linefeed = asm.CharacterClass([r'\n'], inverted=True)
    assert compile(Macro('#not_linefeed')) == not_linefeed
    assert compile(Macro('#not_linefeed')) == not_linefeed
    assert compile(Macro('#windows_newline')) == asm.Literal('\r\n')
    assert compile(Concat([Macro('#sl'),
                           Literal('yo'),
                           Macro('#el')])) == asm.Concat([
                               asm.Boundary('^', None),
                               asm.Literal('yo'),
                               asm.Boundary('$', None)
                           ])
    assert compile(Macro('#quote')) == asm.Literal("'")
    assert compile(Macro('#double_quote')) == asm.Literal('"')
    assert compile(Macro('#left_brace')) == asm.Literal("[")
    assert compile(Macro('#right_brace')) == asm.Literal(']')