Esempio n. 1
0
def test_deterministic_generation():
    ab = dgen(r'(ab)*')
    assert ab.generate_string(0) == ''
    assert ab.generate_string(2) == 'ab'
    assert ab.generate_string(4) == 'abab'
    assert ab.generate_string(3) is None

    finite = (revex.compile('(aa)*') & revex.compile('a{0,7}'))
    strings = list(dgen(finite).matching_strings_iter())
    assert strings == ['', 'aa', 'aaaa', 'aaaaaa']

    assert set(dgen(r'abc').matching_strings_iter()) == {'abc'}
    assert set(dgen(r'abc|def').matching_strings_iter()) == {'abc', 'def'}
Esempio n. 2
0
def test_overflow_example():
    # Regression test for float overflow in computing the probability
    # distribution.
    bits = int(math.ceil(math.log(float_info.max) / math.log(2))) - 1
    assert (2.**bits) * 2. == float('inf')
    actual = re.compile(r'^[01]+$')
    revex_regex = revex.compile(r'[01]+')
    gen = rgen(revex_regex, alphabet=list('01'))
    assert actual.match(gen.generate_string(bits + 1))
    assert actual.match(gen.generate_string(bits * 2))
Esempio n. 3
0
def test_random_walk_matches_regex(regex):
    actual = re.compile('^%s$' % regex)
    revex_regex = revex.compile(regex)
    gen = rgen(revex_regex, alphabet=list(set(regex)))
    for length in islice(gen.valid_lengths_iter(), 10):
        for _ in range(10):
            rand_string = gen.generate_string(length)
            assert actual.match(
                rand_string), '%s should match %s' % (regex, rand_string)
            assert revex_regex.match(
                rand_string), '%s should match %s' % (regex, rand_string)
Esempio n. 4
0
def test_repeat():
    regex = RE('a{0,2}[a-z]')
    assert regex.match('q')
    assert regex.match('a' * 1 + 'q')
    assert regex.match('a' * 2 + 'q')
    assert not regex.match('a' * 3 + 'q')

    assert compile('a{3}') == compile('aaa')

    assert compile('ba{3}') == compile('baaa')
    assert compile('(ba){3}') == compile('bababa')

    assert RE('{').match('{')
    assert RE('a{}').match('a{}')
Esempio n. 5
0
def test_valid_lengths_iter():
    alphabet = 'abc'
    ab = RandomRegularLanguageGenerator(
        revex.compile('(ab)*').as_dfa(alphabet))
    assert [i * 2
            for i in range(50)] == list(islice(ab.valid_lengths_iter(), 0, 50))
    aabb = RandomRegularLanguageGenerator(
        revex.compile('(aa)*(bb)*').as_dfa(alphabet))
    assert [i * 2 for i in range(50)
            ] == list(islice(aabb.valid_lengths_iter(), 0, 50))

    sixes = RandomRegularLanguageGenerator(
        (revex.compile('(aa)*') & revex.compile('(aaa)*')).as_dfa(alphabet))
    assert [i * 6 for i in range(50)
            ] == list(islice(sixes.valid_lengths_iter(), 0, 50))

    finite = (revex.compile('(aa)*') & revex.compile('a{0,16}')).as_dfa('a')
    valid_lengths = set(
        RandomRegularLanguageGenerator(finite).valid_lengths_iter())
    assert valid_lengths == {0, 2, 4, 6, 8, 10, 12, 14, 16}

    assert [] == list(
        RandomRegularLanguageGenerator(EMPTY.as_dfa()).valid_lengths_iter())
Esempio n. 6
0
def test_longest_string():
    ip = revex.compile(
        r'((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)')
    assert ip.as_dfa('0123456789.').has_finite_language
    assert len(ip.as_dfa('0123456789.').longest_string) == 15
    assert ip.match(ip.as_dfa('0123456789.').longest_string)

    with pytest.raises(InfiniteLanguageError):
        (revex.compile(r'([ab]{4})*') & revex.compile(r'([ab]{3})*')).as_dfa('ab').longest_string

    assert (revex.compile(r'(ab)*') & revex.compile(r'(ba)*')).as_dfa('ab').has_finite_language
    assert (revex.compile(r'(ab)*') & revex.compile(r'(ba)*')).as_dfa('ab').longest_string == ''

    assert (revex.compile(r'(ab)+') & revex.compile(r'(ba)+')).as_dfa('ab').has_finite_language
    with pytest.raises(EmptyLanguageError):
        (revex.compile(r'(ab)+') & revex.compile(r'(ba)+')).as_dfa('ab').longest_string

    assert EPSILON.as_dfa().longest_string == ''
Esempio n. 7
0
def test_is_empty():
    assert not (~EPSILON).as_dfa().is_empty
    assert not EPSILON.as_dfa().is_empty
    assert EMPTY.as_dfa().is_empty
    assert (revex.compile('a*|b*') & revex.compile('c+')).as_dfa('abc').is_empty
Esempio n. 8
0
import re
from typing import Set  # noqa
from typing import Tuple  # noqa

import pytest
import six  # noqa
from hypothesis import given, example
from hypothesis import strategies as st

import revex
from revex.derivative import EPSILON, EMPTY
from revex.dfa import DFA, get_equivalent_states, minimize_dfa, \
    InfiniteLanguageError, EmptyLanguageError


example_regex = revex.compile(r'a[abc]*b[abc]*c')
example_dfa = revex.build_dfa(r'a[abc]*b[abc]*c', alphabet='abcd')
example_builtin_regex = re.compile(r'^a[abc]*b[abc]*c$')


@given(st.text(alphabet='abcd'))
@example('abbbbc')
def test_derivative_matches_builtin(s):
    assert example_regex.match(s) == bool(example_builtin_regex.match(s))


@given(st.text(alphabet='abcd'))
@example('abbbbc')
def test_dfa_matches_builtin(s):
    assert example_dfa.match(s) == bool(example_builtin_regex.match(s))
Esempio n. 9
0
def test_longest_string():
    ip = revex.compile(
        r'((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
    )
    assert ip.as_dfa('0123456789.').has_finite_language
    assert len(ip.as_dfa('0123456789.').longest_string) == 15
    assert ip.match(ip.as_dfa('0123456789.').longest_string)

    with pytest.raises(InfiniteLanguageError):
        (revex.compile(r'([ab]{4})*')
         & revex.compile(r'([ab]{3})*')).as_dfa('ab').longest_string

    assert (revex.compile(r'(ab)*')
            & revex.compile(r'(ba)*')).as_dfa('ab').has_finite_language
    assert (revex.compile(r'(ab)*')
            & revex.compile(r'(ba)*')).as_dfa('ab').longest_string == ''

    assert (revex.compile(r'(ab)+')
            & revex.compile(r'(ba)+')).as_dfa('ab').has_finite_language
    with pytest.raises(EmptyLanguageError):
        (revex.compile(r'(ab)+')
         & revex.compile(r'(ba)+')).as_dfa('ab').longest_string

    assert EPSILON.as_dfa().longest_string == ''
Esempio n. 10
0
def test_is_empty():
    assert not (~EPSILON).as_dfa().is_empty
    assert not EPSILON.as_dfa().is_empty
    assert EMPTY.as_dfa().is_empty
    assert (revex.compile('a*|b*')
            & revex.compile('c+')).as_dfa('abc').is_empty
Esempio n. 11
0
import re
from typing import Set  # noqa
from typing import Tuple  # noqa

import pytest
import six  # noqa
from hypothesis import given, example
from hypothesis import strategies as st

import revex
from revex.derivative import EPSILON, EMPTY
from revex.dfa import DFA, get_equivalent_states, minimize_dfa, \
    InfiniteLanguageError, EmptyLanguageError

example_regex = revex.compile(r'a[abc]*b[abc]*c')
example_dfa = revex.build_dfa(r'a[abc]*b[abc]*c', alphabet='abcd')
example_builtin_regex = re.compile(r'^a[abc]*b[abc]*c$')


@given(st.text(alphabet='abcd'))
@example('abbbbc')
def test_derivative_matches_builtin(s):
    assert example_regex.match(s) == bool(example_builtin_regex.match(s))


@given(st.text(alphabet='abcd'))
@example('abbbbc')
def test_dfa_matches_builtin(s):
    assert example_dfa.match(s) == bool(example_builtin_regex.match(s))
Esempio n. 12
0
def test():
    dfa = revex.build_dfa(r'(a|bb|ccc)*', alphabet='abc')
    gen = RandomRegularLanguageGenerator(dfa)

    neg_dfa = (~revex.compile(r'(a|bb|ccc)*')).as_dfa(alphabet='abc')
    neg_gen = RandomRegularLanguageGenerator(neg_dfa)

    regex = re.compile(r'^(a|bb|ccc)*$')

    # These assertions are mostly probabilistic, so the numbers are chosen so
    # as to make the tests quite likely to pass.
    assert {gen.generate_string(0) for _ in range(10)} == {''}
    assert {gen.generate_string(1) for _ in range(10)} == {'a'}

    negs_1 = Counter(neg_gen.generate_string(1) for _ in range(1000))
    assert_dist_approximately_equal(negs_1, {'b': 0.5, 'c': 0.5})

    pos_2 = Counter(gen.generate_string(2) for _ in range(1000))
    negs_2 = Counter(neg_gen.generate_string(2) for _ in range(1000))
    assert_dist_approximately_equal(pos_2, {'aa': 0.5, 'bb': 0.5})
    assert_dist_approximately_equal(
        negs_2, {
            'ab': 1 / 7,
            'ba': 1 / 7,
            'cc': 1 / 7,
            'ca': 1 / 7,
            'cb': 1 / 7,
            'bc': 1 / 7,
            'ac': 1 / 7,
        })

    pos_6 = Counter(gen.generate_string(6) for _ in range(10000))
    possibilities = [
        'aaaaaa',
        'cccccc',
        'bbbbbb',
        'cccaaa',
        'aaaccc',
        'abbccc',
        'acccbb',
        'bbaccc',
        'bbccca',
        'cccabb',
        'cccbba',
        'bbaaaa',
        'abbaaa',
        'aabbaa',
        'aaabba',
        'aaaabb',
        'aabbbb',
        'bbaabb',
        'bbbbaa',
    ]
    assert_dist_approximately_equal(
        pos_6,
        {possibility: 1 / len(possibilities)
         for possibility in possibilities})
    for length in range(1, 15):
        for _ in range(100):
            pos = gen.generate_string(length)
            neg = neg_gen.generate_string(length)
            assert regex.match(pos), pos
            assert not regex.match(neg), neg
Esempio n. 13
0
def dgen(regex, alphabet=None):
    alphabet = alphabet or list(set(six.text_type(regex)))
    if not isinstance(regex, RegularExpression):
        regex = revex.compile(regex)
    return DeterministicRegularLanguageGenerator(regex.as_dfa(alphabet))
Esempio n. 14
0
def rgen(regex, alphabet=None):
    alphabet = alphabet or list(set(str(regex)))
    if not isinstance(regex, RegularExpression):
        regex = revex.compile(regex)
    return RandomRegularLanguageGenerator(regex.as_dfa(alphabet))
Esempio n. 15
0
def test_empty():
    assert compile('') == EPSILON
    assert RE('(a|)').match('')
    assert RE('(a|)').match('a')
    assert RE('a|').match('')
    assert RE('a|').match('a')
Esempio n. 16
0
 def __init__(self, pattern):
     self.base_re = re.compile(r'\A(%s)\Z' % pattern)
     self.re = compile(pattern)