Ejemplo n.º 1
0
def test_no_readline():
    class InvalidSource:
        def tell(self):
            return 0

        def seek(self, value):
            pass

    with pytest.raises(TypeError):
        list(woosh.tokenize(InvalidSource()))
Ejemplo n.º 2
0
def test_no_seek():
    class InvalidSource:
        def tell(self):
            return 0

        def readline(self, size=-1):
            return b''

    with pytest.raises(TypeError):
        list(woosh.tokenize(InvalidSource()))
Ejemplo n.º 3
0
def test_readline_incorrect_type(invalid):
    class InvalidSource:
        def tell(self):
            return 0

        def readline(self, size=-1):
            return invalid

        def seek(self, value):
            pass

    with pytest.raises(TypeError) as exinfo:
        list(woosh.tokenize(InvalidSource()))
Ejemplo n.º 4
0
def test_readline_error(error):
    class InvalidSource:
        def tell(self):
            return 0

        def readline(self, size=-1):
            raise error

        def seek(self, value):
            pass

    with pytest.raises(type(error)) as exinfo:
        list(woosh.tokenize(InvalidSource()))
    assert exinfo.value is error
Ejemplo n.º 5
0
def test_invalid_args():
    with pytest.raises(TypeError):
        woosh.tokenize()
    
    with pytest.raises(TypeError):
        woosh.tokenize(b'', True)
        
    with pytest.raises(TypeError):
        woosh.tokenize(source=b'', continue_on_error=True)
Ejemplo n.º 6
0
def test_weird_readline(good_line_count, weird):
    class InvalidSource:
        def __init__(self):
            self.q = ([b'123'] * good_line_count) + [weird]

        def tell(self):
            return 0

        def readline(self, size=-1):
            return self.q.pop(0)

        def seek(self, value):
            pass

    with pytest.raises(TypeError) as exinfo:
        list(woosh.tokenize(InvalidSource()))
Ejemplo n.º 7
0
def test_no_cycle():
    tokenizer = woosh.tokenize(b'hello world')
    weak_tokenizer = weakref.ref(tokenizer)

    token = next(tokenizer)
    weak_token = weakref.ref(token)

    weak_type = weakref.ref(token.type)

    del token
    gc.collect()
    assert weak_token() is None
    assert weak_type() is not None

    token = next(tokenizer)

    del tokenizer
    gc.collect()
    assert weak_tokenizer() is None
Ejemplo n.º 8
0
def test_source_tokenizer_readline_cycle():
    class Source:
        def tell(self):
            return 0

        def seek(self, index):
            pass

    class CycleReadline:
        def __init__(self):
            self.data = [b'', b'hello world']

        def __call__(self, bytes=0):
            try:
                return self.data.pop(0)
            except IndexError:
                return b''

    source = Source()
    source.readline = CycleReadline()
    tokenizer = woosh.tokenize(source)
    source.readline.tokenizer = tokenizer
    del source
    weak_tokenizer = weakref.ref(tokenizer)

    token = next(tokenizer)
    weak_token = weakref.ref(token)

    weak_type = weakref.ref(token.type)

    del token
    gc.collect()
    assert weak_token() is None
    assert weak_type() is not None

    token = next(tokenizer)

    del tokenizer
    gc.collect()
    assert weak_tokenizer() is None
Ejemplo n.º 9
0
SAMPLE_DIR = ROOT / 'sample'

test_files = []

for directory, _, files in os.walk(SAMPLE_DIR):
    directory = pathlib.Path(directory)
    for sample_file_name in files:
        sample_file = (directory / sample_file_name).resolve()
        rel = len(
            str(pathlib.PurePosixPath(directory.relative_to(ROOT))).split('/'))
        sample_file_relative_sample = pathlib.PurePosixPath(
            sample_file.relative_to(SAMPLE_DIR))
        if sample_file.suffix != '.py':
            continue
        with open(sample_file, 'rb') as f:
            tokens = list(woosh.tokenize(f))
        expected = '\n'.join(
            f'            woosh.Token(woosh.{token.type}, {token.value!r}, {token.start_line}, {token.start_column}, {token.end_line}, {token.end_column}),'
            for token in tokens)
        template = textwrap.dedent(f"""
            # this file was generated using test/python/sample/generate.py
        
            # python
            import io
            import pathlib
            # pytest
            import pytest
            # woosh
            import woosh
            
            def tokenize_file_like(source):
Ejemplo n.º 10
0
# this script is used as the data set to generate the profile data for profile
# guided optimization of the c extension

# woosh
import woosh
# python
import io
import os
import pathlib

DATA = (pathlib.Path(__file__).parent.absolute() / 'sample').resolve()

for directory, _, files in os.walk(DATA):
    if directory.endswith('contrived'):
        continue
    directory = pathlib.Path(directory)
    for file in files:
        if not file.endswith('.py'):
            continue
        data_file = directory / file
        with open(data_file, 'rb') as f:
            source_bytes = f.read()
            source_file_like = io.BytesIO(source_bytes)
        print(data_file.relative_to(DATA))
        list(woosh.tokenize(source_bytes))
        list(woosh.tokenize(source_file_like))
Ejemplo n.º 11
0
def tokenize_bytes(source, continue_on_error=False):
    return list(woosh.tokenize(source, continue_on_error=continue_on_error))
Ejemplo n.º 12
0
def tokenize_file_like(source, continue_on_error=False):
    return list(woosh.tokenize(
        io.BytesIO(source),
        continue_on_error=continue_on_error
    ))
Ejemplo n.º 13
0
def _(source, source_file):
    for token in tokenize(source_file):
        pass
Ejemplo n.º 14
0
def test_incorrect_type(bad_source):
    with pytest.raises(TypeError):
        list(woosh.tokenize(bad_source))
Ejemplo n.º 15
0
def tokenize_file_like(source):
    return list(woosh.tokenize(io.BytesIO(source)))
Ejemplo n.º 16
0
def tokenize_bytes(source):
    return list(woosh.tokenize(source))
Ejemplo n.º 17
0
def tokenize_bytes(source):
    return list(woosh.tokenize(source, continue_on_error=True))