Ejemplo n.º 1
0
def test_learning_always_changes_generation(chars, order):
    learner = LStar(lambda s: len(s) == 1 and s[0] in chars)
    for c in order:
        prev = learner.generation
        s = bytes([c])
        if learner.dfa.matches(s) != learner.member(s):
            learner.learn(s)
            assert learner.generation > prev
Ejemplo n.º 2
0
def test_relearning_does_not_change_generation():
    learner = LStar(lambda s: len(s) >= 3)

    prev = learner.generation
    learner.learn(bytes(3))
    assert prev != learner.generation

    prev = learner.generation
    learner.learn(bytes(3))
    assert prev == learner.generation
Ejemplo n.º 3
0
def test_can_learn_varint_predicate(varints):
    learner = LStar(varint_predicate)
    prev = -1
    while learner.generation != prev:
        prev = learner.generation

        for s in varints:
            learner.learn(s)

    for s in varints:
        assert learner.dfa.matches(s)
Ejemplo n.º 4
0
def test_learning_large_dfa():
    """Mostly the thing this is testing is actually that this runs in reasonable
    time. A naive breadth first search will run ~forever when trying to find this
    because it will have to explore all strings of length 19 before it finds one
    of length 20."""

    learner = LStar(lambda s: len(s) == 20)

    learner.learn(bytes(20))

    for i, s in enumerate(
            itertools.islice(learner.dfa.all_matching_strings(), 500)):
        assert len(s) == 20
        assert i == int.from_bytes(s, "big")
Ejemplo n.º 5
0
def test_can_learn_simple_predicate():
    learner = LStar(lambda s: len(s) >= 3)

    learner.learn(bytes(3))

    dfa = learner.dfa
    assert dfa.start == 0
    assert dfa.transition(0, 0) == 1
    assert dfa.transition(1, 0) == 2
    assert dfa.transition(2, 0) == 3
    assert dfa.transition(3, 0) == 3

    assert not dfa.is_accepting(0)
    assert not dfa.is_accepting(1)
    assert not dfa.is_accepting(2)
    assert dfa.is_accepting(3)
Ejemplo n.º 6
0
def test_can_learn_dead_nodes():
    learner = LStar(lambda s: len(s) == 4 and max(s) <= 1)

    learner.learn(bytes(4))

    assert learner.dfa.matches(bytes(4))
    assert learner.dfa.matches(bytes([1] * 4))

    # Need a length 5 string to distinguish this from
    # something that just loops back to zero.
    learner.learn([2, 0, 0, 0, 0])

    dfa = learner.dfa

    assert dfa.is_dead(dfa.transition(dfa.start, 2))
    assert dfa.is_dead(dfa.transition(dfa.start, 3))
Ejemplo n.º 7
0
def test_canonicalises_values_to_zero_where_appropriate():
    calls = [0]

    def member(s):
        calls[0] += 1
        return len(s) == 10

    learner = LStar(member)

    learner.learn(bytes(10))
    learner.learn(bytes(11))

    (prev, ) = calls

    assert learner.dfa.matches(bytes([1] * 10))

    assert calls[0] == prev
Ejemplo n.º 8
0
def test_iterates_over_learned_strings():
    upper_bound = bytes([1, 2])
    learner = LStar(lambda s: len(s) == 2 and max(s) <= 5 and s <= upper_bound)
    learner.learn(upper_bound)
    learner.learn([1, 2, 0])
    learner.learn([6, 1, 2])
    learner.learn([1, 3])
    learner.learn([0, 5])
    learner.learn([0, 6])

    dfa = learner.dfa
    n = 9
    matches = list(itertools.islice(dfa.all_matching_strings(), n + 1))
    assert len(matches) == n
Ejemplo n.º 9
0
def test_learning_is_just_checking_when_fully_explored():
    count = [0]

    def accept(s):
        count[0] += 1
        return len(s) <= 5 and all(c == 0 for c in s)

    learner = LStar(accept)

    for c in [0, 1]:
        for n in range(10):
            learner.learn(bytes([c]) * n)

    assert list(learner.dfa.all_matching_strings()) == [bytes(n) for n in range(6)]

    (prev,) = count

    learner.learn([2] * 11)

    calls = count[0] - prev

    assert calls == 1
Ejemplo n.º 10
0
def test_iteration_with_dead_nodes():
    learner = LStar(lambda s: len(s) == 3 and max(s) <= 1 and s[1] == 0)
    learner.learn([1, 0, 1])
    learner.learn([1, 1, 1])
    learner.learn([0, 1, 1])
    learner.learn([1, 1, 0])
    learner.learn([1, 1, 1, 0, 1])
    learner.learn([0, 0, 4])

    dfa = learner.dfa
    i = dfa.transition(dfa.start, 1)
    assert not dfa.is_dead(i)
    assert dfa.is_dead(dfa.transition(i, 2))

    assert list(learner.dfa.all_matching_strings()) == [
        bytes([0, 0, 0]),
        bytes([0, 0, 1]),
        bytes([1, 0, 0]),
        bytes([1, 0, 1]),
    ]
Ejemplo n.º 11
0
def learner_for(strategy):
    """Returns an LStar learner that predicts whether a buffer
    corresponds to a discard free choice sequence leading to
    a valid value for this strategy."""
    try:
        return LEARNERS[strategy]
    except KeyError:
        pass

    def test_function(data):
        data.draw(strategy)
        data.mark_interesting()

    runner = ConjectureRunner(
        test_function,
        settings=settings(
            database=None,
            verbosity=Verbosity.quiet,
            suppress_health_check=HealthCheck.all(),
        ),
        random=Random(0),
        ignore_limits=True,
    )

    def predicate(s):
        result = runner.cached_test_function(s)
        if result.status < Status.VALID:
            return False
        if result.has_discards:
            return False
        return result.buffer == s

    learner = LStar(predicate)

    runner.run()

    (v,) = runner.interesting_examples.values()

    # We make sure the learner has properly learned small examples.
    # This is all fairly ad hoc but is mostly designed to get it
    # to understand what the smallest example is and avoid any
    # loops at the beginning of the DFA that don't really exist.
    learner.learn(v.buffer)

    for n in [1, 2, 3]:
        for _ in range(5):
            learner.learn(uniform(runner.random, n) + v.buffer)

    prev = -1
    while learner.generation != prev:
        prev = learner.generation

        for _ in range(10):
            s = uniform(runner.random, len(v.buffer)) + bytes(BUFFER_SIZE)
            learner.learn(s)
            data = runner.cached_test_function(s)
            if data.status >= Status.VALID:
                learner.learn(data.buffer)

    LEARNERS[strategy] = learner
    return learner
Ejemplo n.º 12
0
def learn_a_new_dfa(runner, u, v, predicate):
    """Given two buffers ``u`` and ``v```, learn a DFA that will
    allow the shrinker to normalise them better. ``u`` and ``v``
    should not currently shrink to the same test case when calling
    this function."""
    from hypothesis.internal.conjecture.shrinker import dfa_replacement, sort_key

    assert predicate(runner.cached_test_function(u))
    assert predicate(runner.cached_test_function(v))

    u_shrunk = fully_shrink(runner, u, predicate)
    v_shrunk = fully_shrink(runner, v, predicate)

    u, v = sorted((u_shrunk.buffer, v_shrunk.buffer), key=sort_key)

    assert u != v

    assert not v.startswith(u)

    # We would like to avoid using LStar on large strings as its
    # behaviour can be quadratic or worse. In order to help achieve
    # this we peel off a common prefix and suffix of the two final
    # results and just learn the internal bit where they differ.
    #
    # This potentially reduces the length quite far if there's
    # just one tricky bit of control flow we're struggling to
    # reduce inside a strategy somewhere and the rest of the
    # test function reduces fine.
    if v.endswith(u):
        prefix = b""
        suffix = u
        u_core = b""
        assert len(u) > 0
        v_core = v[:-len(u)]
    else:
        i = 0
        while u[i] == v[i]:
            i += 1
        prefix = u[:i]
        assert u.startswith(prefix)
        assert v.startswith(prefix)

        i = 1
        while u[-i] == v[-i]:
            i += 1

        suffix = u[max(len(prefix), len(u) + 1 - i):]
        assert u.endswith(suffix)
        assert v.endswith(suffix)

        u_core = u[len(prefix):len(u) - len(suffix)]
        v_core = v[len(prefix):len(v) - len(suffix)]

    assert u == prefix + u_core + suffix, (list(u), list(v))
    assert v == prefix + v_core + suffix, (list(u), list(v))

    better = runner.cached_test_function(u)
    worse = runner.cached_test_function(v)

    allow_discards = worse.has_discards or better.has_discards

    def is_valid_core(s):
        if not (len(u_core) <= len(s) <= len(v_core)):
            return False
        buf = prefix + s + suffix
        result = runner.cached_test_function(buf)
        return (
            predicate(result)
            # Because we're often using this to learn strategies
            # rather than entire complex test functions, it's
            # important that our replacements are precise and
            # don't leave the rest of the test case in a weird
            # state.
            and result.buffer == buf
            # Because the shrinker is good at removing discarded
            # data, unless we need discards to allow one or both
            # of u and v to result in valid shrinks, we don't
            # count attempts that have them as valid. This will
            # cause us to match fewer strings, which will make
            # the resulting shrink pass more efficient when run
            # on test functions it wasn't really intended for.
            and (allow_discards or not result.has_discards))

    assert sort_key(u_core) < sort_key(v_core)

    assert is_valid_core(u_core)
    assert is_valid_core(v_core)

    learner = LStar(is_valid_core)

    prev = -1
    while learner.generation != prev:
        prev = learner.generation
        learner.learn(u_core)
        learner.learn(v_core)

        # L* has a tendency to learn DFAs which wrap around to
        # the beginning. We don't want to it to do that unless
        # it's accurate, so we use these as examples to show
        # check going around the DFA twice.
        learner.learn(u_core * 2)
        learner.learn(v_core * 2)

        if learner.dfa.max_length(learner.dfa.start) > len(v_core):
            # The language we learn is finite and bounded above
            # by the length of v_core. This is important in order
            # to keep our shrink passes reasonably efficient -
            # otherwise they can match far too much. So whenever
            # we learn a DFA that could match a string longer
            # than len(v_core) we fix it by finding the first
            # string longer than v_core and learning that as
            # a correction.
            x = next(
                learner.dfa.all_matching_strings(min_length=len(v_core) + 1))
            assert not is_valid_core(x)
            learner.learn(x)
            assert not learner.dfa.matches(x)
            assert learner.generation != prev
        else:
            # We mostly care about getting the right answer on the
            # minimal test case, but because we're doing this offline
            # anyway we might as well spend a little more time trying
            # small examples to make sure the learner gets them right.
            for x in islice(learner.dfa.all_matching_strings(), 100):
                if not is_valid_core(x):
                    learner.learn(x)
                    assert learner.generation != prev
                    break

    # We've now successfully learned a DFA that works for shrinking
    # our failed normalisation further. Canonicalise it into a concrete
    # DFA so we can save it for later.
    new_dfa = learner.dfa.canonicalise()

    assert math.isfinite(new_dfa.max_length(new_dfa.start))

    shrinker = runner.new_shrinker(runner.cached_test_function(v), predicate)

    assert (len(prefix),
            len(v) - len(suffix)) in shrinker.matching_regions(new_dfa)

    name = "tmp-dfa-" + repr(new_dfa)

    shrinker.extra_dfas[name] = new_dfa

    shrinker.fixate_shrink_passes([dfa_replacement(name)])

    assert sort_key(shrinker.buffer) < sort_key(v)

    return new_dfa
Ejemplo n.º 13
0
def test_iterates_over_learned_strings():
    upper_bound = bytes([1, 2])
    learner = LStar(lambda s: len(s) == 2 and max(s) <= 5 and s <= upper_bound)

    learner.learn(upper_bound)

    prev = -1
    while learner.generation != prev:
        prev = learner.generation
        learner.learn([1, 2, 0])
        learner.learn([6, 1, 2])
        learner.learn([1, 3])
        for i in range(7):
            learner.learn([0, i])
            learner.learn([1, i])
        learner.learn([2, 0])

        learner.learn([2, 0, 0, 0])
        learner.learn([2, 0, 0])
        learner.learn([0, 6, 0, 0])
        learner.learn([1, 3, 0, 0])
        learner.learn([1, 6, 0, 0])
        learner.learn([0, 0, 0, 0, 0])

    dfa = learner.dfa

    n = 9
    matches = list(itertools.islice(dfa.all_matching_strings(), n + 1))
    for m in matches:
        assert learner.member(m), list(m)
    assert len(matches) == n
Ejemplo n.º 14
0
def test_cannot_reuse_dfa():
    x = LStar(lambda x: len(x) == 3)
    dfa = x.dfa
    x.learn(bytes(3))
    with pytest.raises(InvalidState):
        dfa.start