def test_doubled_quotes_count_as_an_escaped_quote():
    raw = '''
"encoded "" string",
'encoded '' string',
'''
    tokens = list(Lexer(raw).tokenize())
    assert token_is_parsed((Token.String, '"encoded " string"'), tokens)
    assert token_is_parsed((Token.String, "'encoded ' string'"), tokens)
def test_single_quote_string_is_tokenized():
    raw = '''
dot. comma, semicolo;
'''
    tokens = list(Lexer(raw).tokenize())
    assert token_is_parsed((Token.Dot, '.'), tokens)
    assert token_is_parsed((Token.Comma, ','), tokens)
    assert token_is_parsed((Token.Semicolon, ';'), tokens)
Ejemplo n.º 3
0
def test_hash_comment_is_tokenized():
    raw = '''
-- this is a comment
--this is not a comment
'''
    tokens = list(Lexer(raw).tokenize())
    assert token_is_parsed((Token.Comment, '-- this is a comment\n'), tokens)
    assert not token_is_parsed(
        (Token.Comment, '--this is not a comment\n'), tokens)
Ejemplo n.º 4
0
def test_schema_object_is_tokenized():
    raw = '''
`schema`.`object`
@`not a schema object`
'''
    tokens = list(Lexer(raw).tokenize())
    assert token_is_parsed((Token.Name, '`schema`'), tokens)
    assert token_is_parsed((Token.Name, '`object`'), tokens)
    assert not token_is_parsed((Token.Name, '`not a schema object`'), tokens)
Ejemplo n.º 5
0
def test_c_style_comment_is_tokenized():
    raw = '''
/* this is a c style comment */
/* unterminated comment
'''
    tokens = list(Lexer(raw).tokenize())
    assert token_is_parsed((Token.Comment, '/* this is a c style comment */'),
                           tokens)
    assert token_is_parsed((Token.Comment, '/* unterminated comment\n'),
                           tokens)
def test_multiple_strings_are_tokenized():
    raw = '''
"first string",
'second string' 'third string'|"fourth string" '
last string
'''
    tokens = list(Lexer(raw).tokenize())
    assert token_is_parsed((Token.String, '"first string"'), tokens)
    assert token_is_parsed((Token.String, "'second string'"), tokens)
    assert token_is_parsed((Token.String, "'third string'"), tokens)
    assert token_is_parsed((Token.String, '"fourth string"'), tokens)
    assert token_is_parsed((Token.String, "'\nlast string\n"), tokens)
Ejemplo n.º 7
0
def test_decimal_numbers_are_tokenized():
    raw = '''
-1,
20.321,
102e10
2312.123e-15,
-123312.123e-1
102e,
.203
1e20e30
120a30
'''
    tokens = list(Lexer(raw).tokenize())
    assert token_is_parsed((Token.Number.Dec, '1'), tokens)
    assert token_is_parsed((Token.Number.Dec, '20.321'), tokens)
    assert token_is_parsed((Token.Number.Dec, '102e10'), tokens)
    assert token_is_parsed((Token.Number.Dec, '2312.123e-15'), tokens)
    assert token_is_parsed((Token.Number.Dec, '123312.123e-1'), tokens)
    assert not token_is_parsed((Token.Number.Dec, '102e'), tokens)
    assert token_is_parsed((Token.Number.Dec, '.203'), tokens)
    assert not token_is_parsed((Token.Number.Dec, '1e20e30'), tokens)
    assert not token_is_parsed((Token.Number.Dec, '120a30'), tokens)
Ejemplo n.º 8
0
def test_variables_are_tokenized():
    raw = '''
@@GLOBAL.g,
@@SESSION.s,
@@LOCAL.l,
@variable,
@'single quoted variable',
@"double quoted variable",
@`backtick quoted variable`
@@global
'''
    tokens = list(Lexer(raw).tokenize())
    assert token_is_parsed((Token.Variable, '@@GLOBAL.g'), tokens)
    assert token_is_parsed((Token.Variable, '@@SESSION.s'), tokens)
    assert token_is_parsed((Token.Variable, '@@LOCAL.l'), tokens)
    assert token_is_parsed((Token.Variable, '@variable'), tokens)
    assert token_is_parsed((Token.Variable, "@'single quoted variable'"),
                           tokens)
    assert token_is_parsed((Token.Variable, '@"double quoted variable"'),
                           tokens)
    assert token_is_parsed((Token.Variable, '@`backtick quoted variable`'),
                           tokens)
    assert token_is_parsed((Token.Variable, '@@global'), tokens)
Ejemplo n.º 9
0
def test_operators_are_tokenized():
    raw = '''
    ->>,
    <=>,
    >>,
    >=,
    <>,
    !=,
    <<,
    <=,
    ->,
    :=,
    ||,
    &&,
    &,
    >,
    <,
    %,
    *,
    +,
    -,
    -,
    /,
    =,
    =,
    ^,
    |,
    ~

    and
    between
    binary
    case
    div

    is
    not
    like
    mod
    regexp

    or
    rlike
    sounds
    xor

    # invalid operators
    :==
    *=
    >>>

'''
    tokens = list(Lexer(raw).tokenize())
    assert token_is_parsed((Token.Operator.Symbol, '->>'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '<=>'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '>>'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '>='), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '<>'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '!='), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '<<'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '<='), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '->'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, ':='), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '||'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '&&'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '&'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '>'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '<'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '%'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '*'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '+'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '-'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '-'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '/'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '='), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '='), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '^'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '|'), tokens)
    assert token_is_parsed((Token.Operator.Symbol, '~'), tokens)

    assert not token_is_parsed((Token.Operator.Symbol, ':=='), tokens)
    assert not token_is_parsed((Token.Operator.Symbol, '*='), tokens)
    assert not token_is_parsed((Token.Operator.Symbol, '>>>'), tokens)

    assert token_is_parsed((Token.Operator, 'and'), tokens)
    assert token_is_parsed((Token.Operator, 'between'), tokens)
    assert token_is_parsed((Token.Operator, 'case'), tokens)
    assert token_is_parsed((Token.Operator, 'div'), tokens)
    assert token_is_parsed((Token.Operator, 'is'), tokens)
    assert token_is_parsed((Token.Operator, 'not'), tokens)
    assert token_is_parsed((Token.Operator, 'like'), tokens)
    assert token_is_parsed((Token.Operator, 'mod'), tokens)
    assert token_is_parsed((Token.Operator, 'regexp'), tokens)
    assert token_is_parsed((Token.Operator, 'or'), tokens)
    assert token_is_parsed((Token.Operator, 'rlike'), tokens)
    assert token_is_parsed((Token.Operator, 'sounds'), tokens)
    assert token_is_parsed((Token.Operator, 'xor'), tokens)
def test_hex_numbers_are_tokenized():
    raw = '''
-1,
20.321,
102e10
0x12af
0X12af
0x12gh
x'fff'
x"aaa"
x'123j'
X'fff'
X"aaa"
X'123j'
x'fff
'''
    tokens = list(Lexer(raw).tokenize())
    assert not token_is_parsed((Token.Number.Hex, '1'), tokens)
    assert not token_is_parsed((Token.Number.Hex, '20.321'), tokens)
    assert not token_is_parsed((Token.Number.Hex, '102e10'), tokens)
    assert token_is_parsed((Token.Number.Hex, '0x12af'), tokens)
    assert not token_is_parsed((Token.Number.Hex, '0X12af'), tokens)
    assert not token_is_parsed((Token.Number.Hex, '0x12gh'), tokens)
    assert token_is_parsed((Token.Number.Hex, "x'fff'"), tokens)
    assert not token_is_parsed((Token.Number.Hex, 'x"aaa"'), tokens)
    assert not token_is_parsed((Token.Number.Hex, "x'123j'"), tokens)
    assert token_is_parsed((Token.Number.Hex, "X'fff'"), tokens)
    assert not token_is_parsed((Token.Number.Hex, 'X"aaa"'), tokens)
    assert not token_is_parsed((Token.Number.Hex, "X'123j'"), tokens)
    assert not token_is_parsed((Token.Number.Hex, "x'fff"), tokens)
def test_bit_numbers_are_tokenized():
    raw = '''
-1,
20.321,
102e10
0b01
0b02
0b111
0B111
b'01'
B'01'
B"01"
0b11b1
b'2'
'''
    tokens = list(Lexer(raw).tokenize())
    assert not token_is_parsed((Token.Number.Bit, '1'), tokens)
    assert not token_is_parsed((Token.Number.Bit, '20.321'), tokens)
    assert not token_is_parsed((Token.Number.Bit, '102e10'), tokens)
    assert token_is_parsed((Token.Number.Bit, '0b01'), tokens)
    assert not token_is_parsed((Token.Number.Bit, '0b02'), tokens)
    assert token_is_parsed((Token.Number.Bit, '0b111'), tokens)
    assert not token_is_parsed((Token.Number.Bit, '0B111'), tokens)
    assert token_is_parsed((Token.Number.Bit, "b'01'"), tokens)
    assert token_is_parsed((Token.Number.Bit, "B'01'"), tokens)
    assert not token_is_parsed((Token.Number.Bit, 'B"01"'), tokens)
    assert not token_is_parsed((Token.Number.Bit, '0b11b1'), tokens)
    assert not token_is_parsed((Token.Number.Bit, "b'2'"), tokens)