Exemple #1
0
def test_invalid_number_literal(tokenize, literal, error):
    tokens = tokenize(literal.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.ERROR, error, 1, 0, 1, len(error)),
    ]
    assert tokens == expected
Exemple #2
0
def test_black_1012(tokenize):
    tokens = tokenize('\\'.encode('utf-8'))
    assert(tokens == [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NEWLINE, '', 1, 1, 1, 1),
        woosh.Token(woosh.EOF, '', 1, 1, 1, 1),
    ])
Exemple #3
0
def test_name_split_by_token(tokenize, code, name):
    tokens = tokenize(code.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NAME, name, 1, 0, 1, len(name)),
    ]
    assert tokens[:2] == expected
Exemple #4
0
def test_number_split_by_token(tokenize, code, number):
    tokens = tokenize(code.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NUMBER, number, 1, 0, 1, len(number)),
    ]
    assert tokens[:2] == expected
Exemple #5
0
def test_null_byte(tokenize):
    tokens = tokenize(b'\x00')
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.ERROR, '\x00', 1, 0, 1, 1),
    ]
    assert tokens == expected
Exemple #6
0
def test_utf8_bom_encoding(tokenize):
    tokens = tokenize(UTF8_BOM)
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NEWLINE, '', 1, 0, 1, 0),
        woosh.Token(woosh.EOF, '', 1, 0, 1, 0),
    ]
    assert tokens == expected
Exemple #7
0
def test_empty(tokenize):
    tokens = tokenize(''.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NEWLINE, '', 1, 0, 1, 0),
        woosh.Token(woosh.EOF, '', 1, 0, 1, 0),
    ]
    assert tokens == expected
Exemple #8
0
def test_black_970(tokenize):
    tokens = tokenize('pass #\r#\n'.encode('utf-8'))
    assert(tokens == [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NAME, 'pass', 1, 0, 1, 4),
        woosh.Token(woosh.COMMENT, '#\r#', 1, 5, 1, 8),
        woosh.Token(woosh.NEWLINE, '\n', 1, 8, 2, 0),
        woosh.Token(woosh.EOF, '', 2, 0, 2, 0),
    ])
Exemple #9
0
def test_bpo_40661(tokenize):
    tokens = tokenize('import äˆ ð£„¯ð¢·žð±‹á”€ð””ð‘©±å®ä±¬ð©¾\n𗶽'.encode('utf-8'))
    assert(tokens == [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NAME, 'import', 1, 0, 1, 6),
        woosh.Token(woosh.NAME, 'äˆ', 1, 7, 1, 9),
        woosh.Token(woosh.NAME, 'ð', 1, 10, 1, 11),
        woosh.Token(woosh.ERROR, '£', 1, 11, 1, 12),
    ])
Exemple #10
0
def test_valid_number_literal(tokenize, literal):
    tokens = tokenize(literal.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NUMBER, literal, 1, 0, 1, len(literal)),
        woosh.Token(woosh.NEWLINE, '', 1, len(literal), 1, len(literal)),
        woosh.Token(woosh.EOF, '', 1, len(literal), 1, len(literal)),
    ]
    assert tokens == expected
Exemple #11
0
def test_line_continuation(tokenize, newline, post_whitespace):
    tokens = tokenize(f'xx\\{post_whitespace}{newline}yy'.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NAME, 'xx', 1, 0, 1, 2),
        woosh.Token(woosh.NAME, 'yy', 2, 0, 2, 2),
        woosh.Token(woosh.NEWLINE, '', 2, 2, 2, 2),
        woosh.Token(woosh.EOF, '', 2, 2, 2, 2),
    ]
    assert tokens == expected
Exemple #12
0
def test_unterminated_one_line_string(tokenize, quote):
    tokens = tokenize(f'{quote}hello\nx'.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.ERROR, f'{quote}hello', 1, 0, 1, 6),
        woosh.Token(woosh.NAME, f'x', 2, 0, 2, 1),
        woosh.Token(woosh.NEWLINE, '', 2, 1, 2, 1),
        woosh.Token(woosh.EOF, '', 2, 1, 2, 1),
    ]
    assert tokens == expected
Exemple #13
0
def test_non_equality_operators():
    a = woosh.Token(woosh.OP, '>', 0, 1, 2, 3)
    b = woosh.Token(woosh.OP, '>', 0, 1, 2, 3)

    with pytest.raises(TypeError):
        a > b
    with pytest.raises(TypeError):
        a >= b
    with pytest.raises(TypeError):
        a < b
    with pytest.raises(TypeError):
        a <= b
Exemple #14
0
def test_comment(tokenize, literal, newline):
    tokens = tokenize(f'{literal}{newline}'.encode('utf-8'))
    end_comment = newline_end = 1, len(literal)
    if newline:
        newline_end = 2, 0
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.COMMENT, literal, 1, 0, *end_comment),
        woosh.Token(woosh.NEWLINE, '', *newline_end, *newline_end),
        woosh.Token(woosh.EOF, '', *newline_end, *newline_end),
    ]
    assert tokens == expected
Exemple #15
0
def test_equality():
    a = woosh.Token(woosh.OP, '>', 0, 1, 2, 3)
    b = woosh.Token(woosh.OP, '>', 0, 1, 2, 3)
    assert a == b
    assert not (a != b)

    b = woosh.Token(woosh.EOF, '>', 0, 1, 2, 3)
    assert not (a == b)
    assert a != b

    b = woosh.Token(woosh.OP, '<', 0, 1, 2, 3)
    assert not (a == b)
    assert a != b

    b = woosh.Token(woosh.OP, '>', 100, 1, 2, 3)
    assert not (a == b)
    assert a != b

    b = woosh.Token(woosh.OP, '>', 0, 100, 2, 3)
    assert not (a == b)
    assert a != b

    b = woosh.Token(woosh.OP, '>', 0, 1, 100, 3)
    assert not (a == b)
    assert a != b

    b = woosh.Token(woosh.OP, '>', 0, 1, 2, 100)
    assert not (a == b)
    assert a != b
Exemple #16
0
def test_unterminated_string(tokenize, quote):
    tokens = tokenize(f'{quote}hello'.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.ERROR, f'{quote}hello', 1, 0, 1,
                    len(quote) + 5),
        woosh.Token(woosh.NEWLINE, '', 1,
                    len(quote) + 5, 1,
                    len(quote) + 5),
        woosh.Token(woosh.EOF, '', 1,
                    len(quote) + 5, 1,
                    len(quote) + 5),
    ]
    assert tokens == expected
Exemple #17
0
def test_indentation_continuation(tokenize, indent, newline) -> None:
    tokens = tokenize(
        textwrap.dedent(f"""
    one\\
    {indent}two
    """).replace('\n', newline).encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NAME, 'one', 2, 0, 2, 3),
        woosh.Token(woosh.NAME, 'two', 3, len(indent), 3,
                    len(indent) + 3),
        woosh.Token(woosh.NEWLINE, newline, 3,
                    len(indent) + 3, 4, 0),
        woosh.Token(woosh.EOF, '', 4, 0, 4, 0),
    ]
    assert tokens == expected
Exemple #18
0
def test_repr(type, value, start_line, start_column, end_line, end_column):
    token = woosh.Token(type, value, start_line, start_column, end_line,
                        end_column)
    expected_repr = (
        f'<Token {type!r} {value!r} '
        f'{start_line!r}:{start_column!r}-{end_line!r}:{end_column!r}>')
    assert repr(token) == expected_repr
Exemple #19
0
def test_utf8_bom_encoding_utf8_comment_encoding(
    tokenize,
    encoding,
    pre_space,
    space_count
):
    comment = f'#{pre_space * space_count}coding={encoding}'
    source = UTF8_BOM + comment.encode('utf-8')
    tokens = tokenize(source)
    expected = [
        woosh.Token(woosh.ENCODING, encoding, 1, 0, 1, 0),
        woosh.Token(woosh.COMMENT, comment, 1, 0, 1, len(comment)),
        woosh.Token(woosh.NEWLINE, '', 1, len(comment), 1, len(comment)),
        woosh.Token(woosh.EOF, '', 1, len(comment), 1, len(comment)),
    ]
    assert tokens == expected
Exemple #20
0
def test_utf8_bom_encoding_non_utf8_comment_encoding(tokenize, encoding):
    source = UTF8_BOM + f'# coding={encoding}'.encode('utf-8')
    tokens = tokenize(source)
    expected = [
        woosh.Token(woosh.ERROR, f"encoding comment '{encoding}' does not match BOM (utf-8)", 1, 0, 1, 0),
    ]
    assert tokens == expected
Exemple #21
0
def test_mismatched_dedent(tokenize):
    tokens = tokenize(f'''
    indent
  dedent
    '''.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.INDENT, '    ', 2, 0, 2, 4),
        woosh.Token(woosh.NAME, 'indent', 2, 4, 2, 10),
        woosh.Token(woosh.NEWLINE, '\n', 2, 10, 3, 0),
        woosh.Token(woosh.ERROR, '  ', 3, 0, 3, 2),
        woosh.Token(woosh.NAME, 'dedent', 3, 2, 3, 8),
        woosh.Token(woosh.NEWLINE, '\n', 3, 8, 4, 0),
        woosh.Token(woosh.DEDENT, '', 4, 4, 4, 4),
        woosh.Token(woosh.EOF, '', 4, 4, 4, 4),
    ]
    assert tokens == expected
Exemple #22
0
def test_double_zero(tokenize):
    tokens = tokenize(f'hello 00123 world'.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NAME, 'hello', 1, 0, 1, 5),
        woosh.Token(woosh.ERROR, '00', 1, 6, 1, 8),
        woosh.Token(woosh.NUMBER, '123', 1, 8, 1, 11),
        woosh.Token(woosh.NAME, 'world', 1, 12, 1, 17),
        woosh.Token(woosh.NEWLINE, '', 1, 17, 1, 17),
        woosh.Token(woosh.EOF, '', 1, 17, 1, 17),
    ]
    assert tokens == expected
Exemple #23
0
def test_tab_size(tokenize):
    tokens = tokenize(
        textwrap.dedent(f"""
            spaces
    \ttab
    """).encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.INDENT, '        ', 2, 0, 2, 8),
        woosh.Token(woosh.NAME, 'spaces', 2, 8, 2, 14),
        woosh.Token(woosh.NEWLINE, '\n', 2, 14, 3, 0),
        woosh.Token(woosh.NAME, 'tab', 3, 1, 3, 4),
        woosh.Token(woosh.NEWLINE, '\n', 3, 4, 4, 0),
        woosh.Token(woosh.DEDENT, '', 4, 0, 4, 0),
        woosh.Token(woosh.EOF, '', 4, 0, 4, 0),
    ]
    assert tokens == expected
Exemple #24
0
def test_name_non_starter(tokenize):
    tokens = tokenize(f'yin☯yang'.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NAME, 'yin', 1, 0, 1, 3),
        woosh.Token(woosh.ERROR, '☯', 1, 3, 1, 4),
        woosh.Token(woosh.NAME, 'yang', 1, 4, 1, 8),
        woosh.Token(woosh.NEWLINE, '', 1, 8, 1, 8),
        woosh.Token(woosh.EOF, '', 1, 8, 1, 8),
    ]
    assert tokens == expected
Exemple #25
0
def test_incomplete_sigil_number(tokenize, sigil):
    tokens = tokenize(f'hello 0{sigil} world'.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.NAME, 'hello', 1, 0, 1, 5),
        woosh.Token(woosh.ERROR, f'0{sigil}', 1, 6, 1, 7 + len(sigil)),
        woosh.Token(woosh.NAME, 'world', 1, 8 + len(sigil), 1,
                    13 + len(sigil)),
        woosh.Token(woosh.NEWLINE, '', 1, 13 + len(sigil), 1, 13 + len(sigil)),
        woosh.Token(woosh.EOF, '', 1, 13 + len(sigil), 1, 13 + len(sigil)),
    ]
    assert tokens == expected
Exemple #26
0
def test_new_token():
    with pytest.raises(TypeError):
        woosh.Token()

    with pytest.raises(TypeError):
        woosh.Token(woosh.OP, '', 0, 0, 0, 0, None)

    with pytest.raises(TypeError):
        woosh.Token(type=woosh.OP,
                    value='',
                    start_line=0,
                    start_column=0,
                    end_line=0,
                    end_column=0)

    with pytest.raises(TypeError) as exinfo:
        woosh.Token(None, '', 0, 0, 0, 0)
    assert exinfo.value.args[0] == f'type must be woosh.Type'

    with pytest.raises(TypeError) as exinfo:
        woosh.Token(woosh.OP, None, 0, 0, 0, 0)
    assert exinfo.value.args[0] == f'value must be str'

    with pytest.raises(TypeError) as exinfo:
        woosh.Token(woosh.OP, '', None, 0, 0, 0)
    assert exinfo.value.args[0] == f'start_line must be int'

    with pytest.raises(TypeError) as exinfo:
        woosh.Token(woosh.OP, '', 0, None, 0, 0)
    assert exinfo.value.args[0] == f'start_column must be int'

    with pytest.raises(TypeError) as exinfo:
        woosh.Token(woosh.OP, '', 0, 0, None, 0)
    assert exinfo.value.args[0] == f'end_line must be int'

    with pytest.raises(TypeError) as exinfo:
        woosh.Token(woosh.OP, '', 0, 0, 0, None)
    assert exinfo.value.args[0] == f'end_column must be int'
Exemple #27
0
def test_indentation_groups(tokenize, indent, newline, open, close):
    tokens = tokenize(
        textwrap.dedent(f"""
    {open}
    {indent}foo
    {close}
    """).replace('\n', newline).encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.OP, open, 2, 0, 2, 1),
        woosh.Token(woosh.NAME, 'foo', 3, len(indent), 3,
                    len(indent) + 3),
        woosh.Token(woosh.OP, close, 4, 0, 4, 1),
        woosh.Token(woosh.NEWLINE, newline, 4, 1, 5, 0),
        woosh.Token(woosh.EOF, '', 5, 0, 5, 0),
    ]
    assert tokens == expected
Exemple #28
0
def test_byte_string_unicode_character(tokenize, quote):
    tokens = tokenize(f'b{quote}abcü123{quote}'.encode('utf-8'))
    expected = [
        woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
        woosh.Token(woosh.ERROR, f'b{quote}abc', 1, 0, 1,
                    len(quote) + 4),
        woosh.Token(woosh.NAME, 'ü123', 1,
                    len(quote) + 4, 1,
                    len(quote) + 8),
        woosh.Token(woosh.ERROR, quote, 1,
                    len(quote) + 8, 1,
                    len(quote) * 2 + 8),
        woosh.Token(woosh.NEWLINE, '', 1,
                    len(quote) * 2 + 8, 1,
                    len(quote) * 2 + 8),
        woosh.Token(woosh.EOF, '', 1,
                    len(quote) * 2 + 8, 1,
                    len(quote) * 2 + 8),
    ]
    assert tokens == expected
Exemple #29
0
import pytest
# woosh
import woosh


def tokenize_file_like(source):
    return list(woosh.tokenize(io.BytesIO(source)))


def tokenize_bytes(source):
    return list(woosh.tokenize(source))


SAMPLE_DIR = pathlib.Path(
    __file__).parent.absolute() / '../../' / '../../' / 'sample'


@pytest.mark.parametrize('tokenize', [tokenize_file_like, tokenize_bytes])
def test(tokenize):
    with open(SAMPLE_DIR / 'contrived/empty.py', 'rb') as f:
        tokens = tokenize(f.read())
    for token, expected in zip(tokens, EXPECTED):
        assert token == expected


EXPECTED = [
    woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
    woosh.Token(woosh.NEWLINE, '', 1, 0, 1, 0),
    woosh.Token(woosh.EOF, '', 1, 0, 1, 0),
]
Exemple #30
0

SAMPLE_DIR = pathlib.Path(
    __file__).parent.absolute() / '../../' / '../../' / 'sample'


@pytest.mark.parametrize('tokenize', [tokenize_file_like, tokenize_bytes])
def test(tokenize):
    with open(SAMPLE_DIR / 'stdlib/codeop.py', 'rb') as f:
        tokens = tokenize(f.read())
    for token, expected in zip(tokens, EXPECTED):
        assert token == expected


EXPECTED = [
    woosh.Token(woosh.ENCODING, 'utf-8', 1, 0, 1, 0),
    woosh.Token(
        woosh.STRING,
        'r"""Utilities to compile possibly incomplete Python source code.\r\n\r\nThis module provides two interfaces, broadly similar to the builtin\r\nfunction compile(), which take program text, a filename and a \'mode\'\r\nand:\r\n\r\n- Return code object if the command is complete and valid\r\n- Return None if the command is incomplete\r\n- Raise SyntaxError, ValueError or OverflowError if the command is a\r\n  syntax error (OverflowError and ValueError can be produced by\r\n  malformed literals).\r\n\r\nApproach:\r\n\r\nFirst, check if the source consists entirely of blank lines and\r\ncomments; if so, replace it with \'pass\', because the built-in\r\nparser doesn\'t always do the right thing for these.\r\n\r\nCompile three times: as is, with \\n, and with \\n\\n appended.  If it\r\ncompiles as is, it\'s complete.  If it compiles with one \\n appended,\r\nwe expect more.  If it doesn\'t compile either way, we compare the\r\nerror we get when compiling with \\n or \\n\\n appended.  If the errors\r\nare the same, the code is broken.  But if the errors are different, we\r\nexpect more.  Not intuitive; not even guaranteed to hold in future\r\nreleases; but this matches the compiler\'s behavior from Python 1.4\r\nthrough 2.2, at least.\r\n\r\nCaveat:\r\n\r\nIt is possible (but not likely) that the parser stops parsing with a\r\nsuccessful outcome before reaching the end of the source; in this\r\ncase, trailing symbols may be ignored instead of causing an error.\r\nFor example, a backslash followed by two newlines may be followed by\r\narbitrary garbage.  This will be fixed once the API for the parser is\r\nbetter.\r\n\r\nThe two interfaces are:\r\n\r\ncompile_command(source, filename, symbol):\r\n\r\n    Compiles a single command in the manner described above.\r\n\r\nCommandCompiler():\r\n\r\n    Instances of this class have __call__ methods identical in\r\n    signature to compile_command; the difference is that if the\r\n    instance compiles program text containing a __future__ statement,\r\n    the instance \'remembers\' and compiles all subsequent program texts\r\n    with the statement in force.\r\n\r\nThe module also provides another class:\r\n\r\nCompile():\r\n\r\n    Instances of this class act like the built-in function compile,\r\n    but with \'memory\' in the sense described above.\r\n"""',
        1, 0, 57, 3),
    woosh.Token(woosh.NEWLINE, '\r\n', 57, 3, 58, 0),
    woosh.Token(woosh.NAME, 'import', 59, 0, 59, 6),
    woosh.Token(woosh.NAME, '__future__', 59, 7, 59, 17),
    woosh.Token(woosh.NEWLINE, '\r\n', 59, 17, 60, 0),
    woosh.Token(woosh.NAME, 'import', 60, 0, 60, 6),
    woosh.Token(woosh.NAME, 'warnings', 60, 7, 60, 15),
    woosh.Token(woosh.NEWLINE, '\r\n', 60, 15, 61, 0),
    woosh.Token(woosh.NAME, '_features', 62, 0, 62, 9),
    woosh.Token(woosh.OP, '=', 62, 10, 62, 11),
    woosh.Token(woosh.OP, '[', 62, 12, 62, 13),
    woosh.Token(woosh.NAME, 'getattr', 62, 13, 62, 20),