Beispiel #1
0
class ConjectureRunner(object):
    def __init__(
        self,
        test_function,
        settings=None,
        random=None,
        database_key=None,
    ):
        self._test_function = test_function
        self.settings = settings or Settings()
        self.last_data = None
        self.shrinks = 0
        self.call_count = 0
        self.event_call_counts = Counter()
        self.valid_examples = 0
        self.start_time = time.time()
        self.random = random or Random(getrandbits(128))
        self.database_key = database_key
        self.status_runtimes = {}
        self.events_to_strings = WeakKeyDictionary()

        self.target_selector = TargetSelector(self.random)

        # Tree nodes are stored in an array to prevent heavy nesting of data
        # structures. Branches are dicts mapping bytes to child nodes (which
        # will in general only be partially populated). Leaves are
        # ConjectureData objects that have been previously seen as the result
        # of following that path.
        self.tree = [{}]

        # A node is dead if there is nothing left to explore past that point.
        # Recursively, a node is dead if either it is a leaf or every byte
        # leads to a dead node when starting from here.
        self.dead = set()

        # We rewrite the byte stream at various points during parsing, to one
        # that will produce an equivalent result but is in some sense more
        # canonical. We keep track of these so that when walking the tree we
        # can identify nodes where the exact byte value doesn't matter and
        # treat all bytes there as equivalent. This significantly reduces the
        # size of the search space and removes a lot of redundant examples.

        # Maps tree indices where to the unique byte that is valid at that
        # point. Corresponds to data.write() calls.
        self.forced = {}

        # Maps tree indices to the maximum byte that is valid at that point.
        # Currently this is only used inside draw_bits, but it potentially
        # could get used elsewhere.
        self.capped = {}

        # Where a tree node consists of the beginning of a block we track the
        # size of said block. This allows us to tell when an example is too
        # short even if it goes off the unexplored region of the tree - if it
        # is at the beginning of a block of size 4 but only has 3 bytes left,
        # it's going to overrun the end of the buffer regardless of the
        # buffer contents.
        self.block_sizes = {}

        self.interesting_examples = {}
        self.covering_examples = {}

        self.shrunk_examples = set()

        self.tag_intern_table = {}

    def __tree_is_exhausted(self):
        return 0 in self.dead

    def test_function(self, data):
        self.call_count += 1
        try:
            self._test_function(data)
            data.freeze()
        except StopTest as e:
            if e.testcounter != data.testcounter:
                self.save_buffer(data.buffer)
                raise e
        except:
            self.save_buffer(data.buffer)
            raise
        finally:
            data.freeze()
            self.note_details(data)

        self.target_selector.add(data)

        self.debug_data(data)

        tags = frozenset(data.tags)
        data.tags = self.tag_intern_table.setdefault(tags, tags)

        if data.status == Status.VALID:
            self.valid_examples += 1
            for t in data.tags:
                existing = self.covering_examples.get(t)
                if (existing is None
                        or sort_key(data.buffer) < sort_key(existing.buffer)):
                    self.covering_examples[t] = data
                    if self.database is not None:
                        self.database.save(self.covering_key, data.buffer)
                        if existing is not None:
                            self.database.delete(self.covering_key,
                                                 existing.buffer)

        tree_node = self.tree[0]
        indices = []
        node_index = 0
        for i, b in enumerate(data.buffer):
            indices.append(node_index)
            if i in data.forced_indices:
                self.forced[node_index] = b
            try:
                self.capped[node_index] = data.capped_indices[i]
            except KeyError:
                pass
            try:
                node_index = tree_node[b]
            except KeyError:
                node_index = len(self.tree)
                self.tree.append({})
                tree_node[b] = node_index
            tree_node = self.tree[node_index]
            if node_index in self.dead:
                break

        for u, v in data.blocks:
            # This can happen if we hit a dead node when walking the buffer.
            # In that case we alrady have this section of the tree mapped.
            if u >= len(indices):
                break
            self.block_sizes[indices[u]] = v - u

        if data.status != Status.OVERRUN and node_index not in self.dead:
            self.dead.add(node_index)
            self.tree[node_index] = data

            for j in reversed(indices):
                if (len(self.tree[j]) < self.capped.get(j, 255) + 1
                        and j not in self.forced):
                    break
                if set(self.tree[j].values()).issubset(self.dead):
                    self.dead.add(j)
                else:
                    break

        last_data_is_interesting = (self.last_data is not None
                                    and self.last_data.status
                                    == Status.INTERESTING)

        if data.status == Status.INTERESTING:
            first_call = len(self.interesting_examples) == 0

            key = data.interesting_origin
            changed = False
            try:
                existing = self.interesting_examples[key]
            except KeyError:
                changed = True
            else:
                if sort_key(data.buffer) < sort_key(existing.buffer):
                    self.downgrade_buffer(existing.buffer)
                    changed = True

            if changed:
                self.interesting_examples[key] = data
                self.shrunk_examples.discard(key)
                if last_data_is_interesting and not first_call:
                    self.shrinks += 1

            if not last_data_is_interesting or (
                    sort_key(data.buffer) < sort_key(self.last_data.buffer)
                    and data.interesting_origin
                    == self.last_data.interesting_origin):
                self.last_data = data

            if self.shrinks >= self.settings.max_shrinks:
                self.exit_with(ExitReason.max_shrinks)
        elif (self.last_data is None
              or self.last_data.status < Status.INTERESTING):
            self.last_data = data
        if (self.settings.timeout > 0
                and time.time() >= self.start_time + self.settings.timeout):
            self.exit_with(ExitReason.timeout)

        if not self.interesting_examples:
            if self.valid_examples >= self.settings.max_examples:
                self.exit_with(ExitReason.max_examples)
            if self.call_count >= max(self.settings.max_iterations,
                                      self.settings.max_examples):
                self.exit_with(ExitReason.max_iterations)

        if self.__tree_is_exhausted():
            self.exit_with(ExitReason.finished)

    def save_buffer(self, buffer, key=None):
        if self.settings.database is not None:
            if key is None:
                key = self.database_key
            if key is None:
                return
            self.settings.database.save(key, hbytes(buffer))

    def downgrade_buffer(self, buffer):
        if self.settings.database is not None:
            self.settings.database.move(self.database_key, self.secondary_key,
                                        buffer)

    @property
    def secondary_key(self):
        return b'.'.join((self.database_key, b"secondary"))

    @property
    def covering_key(self):
        return b'.'.join((self.database_key, b"coverage"))

    def note_details(self, data):
        if data.status == Status.INTERESTING:
            if (self.last_data is None
                    or self.last_data.status != Status.INTERESTING
                    or self.last_data.interesting_origin
                    == data.interesting_origin):
                self.save_buffer(data.buffer)
            else:
                self.save_buffer(data.buffer, self.secondary_key)
        runtime = max(data.finish_time - data.start_time, 0.0)
        self.status_runtimes.setdefault(data.status, []).append(runtime)
        for event in set(map(self.event_to_string, data.events)):
            self.event_call_counts[event] += 1

    def debug(self, message):
        with self.settings:
            debug_report(message)

    def debug_data(self, data):
        buffer_parts = [u"["]
        for i, (u, v) in enumerate(data.blocks):
            if i > 0:
                buffer_parts.append(u" || ")
            buffer_parts.append(u', '.join(
                int_to_text(int(i)) for i in data.buffer[u:v]))
        buffer_parts.append(u']')

        status = unicode_safe_repr(data.status)

        if data.status == Status.INTERESTING:
            status = u'%s (%s)' % (
                status, unicode_safe_repr(data.interesting_origin, ))

        self.debug(u'%d bytes %s -> %s, %s' % (
            data.index,
            u''.join(buffer_parts),
            status,
            data.output,
        ))

    def prescreen_buffer(self, buffer):
        """Attempt to rule out buffer as a possible interesting candidate.

        Returns False if we know for sure that running this buffer will not
        produce an interesting result. Returns True if it might (because it
        explores territory we have not previously tried).

        This is purely an optimisation to try to reduce the number of tests we
        run. "return True" would be a valid but inefficient implementation.

        """
        node_index = 0
        n = len(buffer)
        for k, b in enumerate(buffer):
            if node_index in self.dead:
                return False
            try:
                # The block size at that point provides a lower bound on how
                # many more bytes are required. If the buffer does not have
                # enough bytes to fulfill that block size then we can rule out
                # this buffer.
                if k + self.block_sizes[node_index] > n:
                    return False
            except KeyError:
                pass
            try:
                b = self.forced[node_index]
            except KeyError:
                pass
            try:
                b = min(b, self.capped[node_index])
            except KeyError:
                pass
            try:
                node_index = self.tree[node_index][b]
            except KeyError:
                return True
        else:
            return False

    def incorporate_new_buffer(self, buffer):
        assert self.last_data.status == Status.INTERESTING
        start = self.last_data.interesting_origin

        buffer = hbytes(buffer[:self.last_data.index])
        assert sort_key(buffer) < sort_key(self.last_data.buffer)

        if not self.prescreen_buffer(buffer):
            return False

        assert sort_key(buffer) <= sort_key(self.last_data.buffer)
        data = ConjectureData.for_buffer(buffer)
        self.test_function(data)
        assert self.last_data.interesting_origin == start
        return data is self.last_data

    def run(self):
        with self.settings:
            try:
                self._run()
            except RunIsComplete:
                pass
            if self.interesting_examples:
                self.last_data = max(self.interesting_examples.values(),
                                     key=lambda d: sort_key(d.buffer))
            if self.last_data is not None:
                self.debug_data(self.last_data)
            self.debug(
                u'Run complete after %d examples (%d valid) and %d shrinks' % (
                    self.call_count,
                    self.valid_examples,
                    self.shrinks,
                ))

    def _new_mutator(self):
        def draw_new(data, n):
            return uniform(self.random, n)

        def draw_existing(data, n):
            return self.last_data.buffer[data.index:data.index + n]

        def draw_smaller(data, n):
            existing = self.last_data.buffer[data.index:data.index + n]
            r = uniform(self.random, n)
            if r <= existing:
                return r
            return _draw_predecessor(self.random, existing)

        def draw_larger(data, n):
            existing = self.last_data.buffer[data.index:data.index + n]
            r = uniform(self.random, n)
            if r >= existing:
                return r
            return _draw_successor(self.random, existing)

        def reuse_existing(data, n):
            choices = data.block_starts.get(n, []) or \
                self.last_data.block_starts.get(n, [])
            if choices:
                i = self.random.choice(choices)
                return self.last_data.buffer[i:i + n]
            else:
                result = uniform(self.random, n)
                assert isinstance(result, hbytes)
                return result

        def flip_bit(data, n):
            buf = bytearray(self.last_data.buffer[data.index:data.index + n])
            i = self.random.randint(0, n - 1)
            k = self.random.randint(0, 7)
            buf[i] ^= (1 << k)
            return hbytes(buf)

        def draw_zero(data, n):
            return hbytes(b'\0' * n)

        def draw_max(data, n):
            return hbytes([255]) * n

        def draw_constant(data, n):
            return bytes_from_list([self.random.randint(0, 255)] * n)

        def redraw_last(data, n):
            u = self.last_data.blocks[-1][0]
            if data.index + n <= u:
                return self.last_data.buffer[data.index:data.index + n]
            else:
                return uniform(self.random, n)

        options = [
            draw_new,
            redraw_last,
            redraw_last,
            reuse_existing,
            reuse_existing,
            draw_existing,
            draw_smaller,
            draw_larger,
            flip_bit,
            draw_zero,
            draw_max,
            draw_zero,
            draw_max,
            draw_constant,
        ]

        bits = [self.random.choice(options) for _ in hrange(3)]

        def draw_mutated(data, n):
            if (data.index + n > len(self.last_data.buffer)):
                result = uniform(self.random, n)
            else:
                result = self.random.choice(bits)(data, n)

            return self.__rewrite_for_novelty(data,
                                              self.__zero_bound(data, result))

        return draw_mutated

    def __rewrite(self, data, result):
        return self.__rewrite_for_novelty(data,
                                          self.__zero_bound(data, result))

    def __zero_bound(self, data, result):
        """This tries to get the size of the generated data under control by
        replacing the result with zero if we are too deep or have already
        generated too much data.

        This causes us to enter "shrinking mode" there and thus reduce
        the size of the generated data.

        """
        if (data.depth * 2 >= MAX_DEPTH or
            (data.index + len(result)) * 2 >= self.settings.buffer_size):
            if any(result):
                data.hit_zero_bound = True
            return hbytes(len(result))
        else:
            return result

    def __rewrite_for_novelty(self, data, result):
        """Take a block that is about to be added to data as the result of a
        draw_bytes call and rewrite it a small amount to ensure that the result
        will be novel: that is, not hit a part of the tree that we have fully
        explored.

        This is mostly useful for test functions which draw a small
        number of blocks.

        """
        assert isinstance(result, hbytes)
        try:
            node_index = data.__current_node_index
        except AttributeError:
            node_index = 0
            data.__current_node_index = node_index
            data.__hit_novelty = False
            data.__evaluated_to = 0

        if data.__hit_novelty:
            return result

        node = self.tree[node_index]

        for i in hrange(data.__evaluated_to, len(data.buffer)):
            node = self.tree[node_index]
            try:
                node_index = node[data.buffer[i]]
                assert node_index not in self.dead
                node = self.tree[node_index]
            except KeyError:
                data.__hit_novelty = True
                return result

        for i, b in enumerate(result):
            assert isinstance(b, int)
            try:
                new_node_index = node[b]
            except KeyError:
                data.__hit_novelty = True
                return result

            new_node = self.tree[new_node_index]

            if new_node_index in self.dead:
                if isinstance(result, hbytes):
                    result = bytearray(result)
                for c in range(256):
                    if c not in node:
                        assert c <= self.capped.get(node_index, c)
                        result[i] = c
                        data.__hit_novelty = True
                        return hbytes(result)
                    else:
                        new_node_index = node[c]
                        new_node = self.tree[new_node_index]
                        if new_node_index not in self.dead:
                            result[i] = c
                            break
                else:  # pragma: no cover
                    assert False, (
                        'Found a tree node which is live despite all its '
                        'children being dead.')
            node_index = new_node_index
            node = new_node
        assert node_index not in self.dead
        data.__current_node_index = node_index
        data.__evaluated_to = data.index + len(result)
        return hbytes(result)

    @property
    def database(self):
        if self.database_key is None:
            return None
        return self.settings.database

    def has_existing_examples(self):
        return (self.database is not None
                and Phase.reuse in self.settings.phases)

    def reuse_existing_examples(self):
        """If appropriate (we have a database and have been told to use it),
        try to reload existing examples from the database.

        If there are a lot we don't try all of them. We always try the
        smallest example in the database (which is guaranteed to be the
        last failure) and the largest (which is usually the seed example
        which the last failure came from but we don't enforce that). We
        then take a random sampling of the remainder and try those. Any
        examples that are no longer interesting are cleared out.

        """
        if self.has_existing_examples():
            self.debug('Reusing examples from database')
            # We have to do some careful juggling here. We have two database
            # corpora: The primary and secondary. The primary corpus is a
            # small set of minimized examples each of which has at one point
            # demonstrated a distinct bug. We want to retry all of these.

            # We also have a secondary corpus of examples that have at some
            # point demonstrated interestingness (currently only ones that
            # were previously non-minimal examples of a bug, but this will
            # likely expand in future). These are a good source of potentially
            # interesting examples, but there are a lot of them, so we down
            # sample the secondary corpus to a more manageable size.

            corpus = sorted(self.settings.database.fetch(self.database_key),
                            key=sort_key)
            desired_size = max(2, ceil(0.1 * self.settings.max_examples))

            for extra_key in [self.secondary_key, self.covering_key]:
                if len(corpus) < desired_size:
                    extra_corpus = list(
                        self.settings.database.fetch(extra_key), )

                    shortfall = desired_size - len(corpus)

                    if len(extra_corpus) <= shortfall:
                        extra = extra_corpus
                    else:
                        extra = self.random.sample(extra_corpus, shortfall)
                    extra.sort(key=sort_key)
                    corpus.extend(extra)

            for existing in corpus:
                self.last_data = ConjectureData.for_buffer(existing)
                try:
                    self.test_function(self.last_data)
                finally:
                    if self.last_data.status != Status.INTERESTING:
                        self.settings.database.delete(self.database_key,
                                                      existing)
                        self.settings.database.delete(self.secondary_key,
                                                      existing)

    def exit_with(self, reason):
        self.exit_reason = reason
        raise RunIsComplete()

    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return

        zero_data = ConjectureData(max_length=self.settings.buffer_size,
                                   draw_bytes=lambda data, n: self.
                                   __rewrite_for_novelty(data, hbytes(n)))
        self.test_function(zero_data)

        count = 0
        while count < 10 and not self.interesting_examples:

            def draw_bytes(data, n):
                return self.__rewrite_for_novelty(
                    data, self.__zero_bound(data, uniform(self.random, n)))

            targets_found = len(self.covering_examples)

            self.last_data = ConjectureData(
                max_length=self.settings.buffer_size, draw_bytes=draw_bytes)
            self.test_function(self.last_data)
            self.last_data.freeze()

            if len(self.covering_examples) > targets_found:
                count = 0
            else:
                count += 1

        mutations = 0
        mutator = self._new_mutator()

        zero_bound_queue = []

        while not self.interesting_examples:
            if zero_bound_queue:
                # Whenever we generated an example and it hits a bound
                # which forces zero blocks into it, this creates a weird
                # distortion effect by making certain parts of the data
                # stream (especially ones to the right) much more likely
                # to be zero. We fix this by redistributing the generated
                # data by shuffling it randomly. This results in the
                # zero data being spread evenly throughout the buffer.
                # Hopefully the shrinking this causes will cause us to
                # naturally fail to hit the bound.
                # If it doesn't then we will queue the new version up again
                # (now with more zeros) and try again.
                overdrawn = zero_bound_queue.pop()
                buffer = bytearray(overdrawn.buffer)

                # These will have values written to them that are different
                # from what's in them anyway, so the value there doesn't
                # really "count" for distributional purposes, and if we
                # leave them in then they can cause the fraction of non
                # zero bytes to increase on redraw instead of decrease.
                for i in overdrawn.forced_indices:
                    buffer[i] = 0

                self.random.shuffle(buffer)
                buffer = hbytes(buffer)

                def draw_bytes(data, n):
                    result = buffer[data.index:data.index + n]
                    if len(result) < n:
                        result += hbytes(n - len(result))
                    return self.__rewrite(data, result)

                data = ConjectureData(
                    draw_bytes=draw_bytes,
                    max_length=self.settings.buffer_size,
                )
                self.test_function(data)
                data.freeze()
            else:
                target, last_data = self.target_selector.select()
                mutations += 1
                targets_found = len(self.covering_examples)
                prev_data = self.last_data
                data = ConjectureData(draw_bytes=mutator,
                                      max_length=self.settings.buffer_size)
                self.test_function(data)
                data.freeze()
                if (data.status > prev_data.status
                        or len(self.covering_examples) > targets_found):
                    mutations = 0
                elif (data.status < prev_data.status
                      or not self.target_selector.has_tag(target, data)
                      or mutations >= self.settings.max_mutations):
                    mutations = 0
                    mutator = self._new_mutator()
            if getattr(data, 'hit_zero_bound', False):
                zero_bound_queue.append(data)
            mutations += 1

    def _run(self):
        self.last_data = None
        self.start_time = time.time()

        self.reuse_existing_examples()
        self.generate_new_examples()

        if (Phase.shrink not in self.settings.phases
                or not self.interesting_examples):
            self.exit_with(ExitReason.finished)

        for prev_data in sorted(self.interesting_examples.values(),
                                key=lambda d: sort_key(d.buffer)):
            assert prev_data.status == Status.INTERESTING
            data = ConjectureData.for_buffer(prev_data.buffer)
            self.test_function(data)
            if data.status != Status.INTERESTING:
                self.exit_with(ExitReason.flaky)

        while len(self.shrunk_examples) < len(self.interesting_examples):
            target, self.last_data = min(
                [(k, v) for k, v in self.interesting_examples.items()
                 if k not in self.shrunk_examples],
                key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
            )
            self.debug('Shrinking %r' % (target, ))
            assert self.last_data.interesting_origin == target
            self.shrink()
            self.shrunk_examples.add(target)
        self.exit_with(ExitReason.finished)

    def try_buffer_with_rewriting_from(self, initial_attempt, v):
        initial_data = None
        node_index = 0
        for c in initial_attempt:
            try:
                node_index = self.tree[node_index][c]
            except KeyError:
                break
            node = self.tree[node_index]
            if isinstance(node, ConjectureData):
                initial_data = node
                break

        if initial_data is None:
            initial_data = ConjectureData.for_buffer(initial_attempt)
            self.test_function(initial_data)

        if initial_data.status == Status.INTERESTING:
            return initial_data is self.last_data

        # If this produced something completely invalid we ditch it
        # here rather than trying to persevere.
        if initial_data.status < Status.VALID:
            return False

        if len(initial_data.buffer) < v:
            return False

        lost_data = len(self.last_data.buffer) - \
            len(initial_data.buffer)

        # If this did not in fact cause the data size to shrink we
        # bail here because it's not worth trying to delete stuff from
        # the remainder.
        if lost_data <= 0:
            return False

        try_with_deleted = bytearray(initial_attempt)
        del try_with_deleted[v:v + lost_data]
        try_with_deleted.extend(hbytes(lost_data - 1))
        if self.incorporate_new_buffer(try_with_deleted):
            return True

        for r, s in self.last_data.intervals:
            if (r >= v and s - r <= lost_data
                    and r < len(initial_data.buffer)):
                try_with_deleted = bytearray(initial_attempt)
                del try_with_deleted[r:s]
                try_with_deleted.extend(hbytes(s - r - 1))
                if self.incorporate_new_buffer(try_with_deleted):
                    return True
        return False

    def delta_interval_deletion(self):
        """Attempt to delete every interval in the example."""

        self.debug('delta interval deletes')

        # We do a delta-debugging style thing here where we initially try to
        # delete many intervals at once and prune it down exponentially to
        # eventually only trying to delete one interval at a time.

        # I'm a little skeptical that this is helpful in general, but we've
        # got at least one benchmark where it does help.
        k = len(self.last_data.intervals) // 2
        while k > 0:
            i = 0
            while i + k <= len(self.last_data.intervals):
                bitmask = [True] * len(self.last_data.buffer)

                for u, v in self.last_data.intervals[i:i + k]:
                    for t in range(u, v):
                        bitmask[t] = False

                if not self.incorporate_new_buffer(
                        hbytes(b
                               for b, v in zip(self.last_data.buffer, bitmask)
                               if v)):
                    i += k
            k //= 2

    def greedy_interval_deletion(self):
        """Attempt to delete every interval in the example."""

        self.debug('greedy interval deletes')
        i = 0
        while i < len(self.last_data.intervals):
            u, v = self.last_data.intervals[i]
            if not self.incorporate_new_buffer(self.last_data.buffer[:u] +
                                               self.last_data.buffer[v:]):
                i += 1

    def coarse_block_replacement(self):
        """Attempts to zero every block. This is a very coarse pass that we
        only run once to attempt to remove some irrelevant detail. The main
        purpose of it is that if we manage to zero a lot of data then many
        attempted deletes become duplicates of each other, so we run fewer
        tests.

        If more blocks become possible to zero later that will be
        handled by minimize_individual_blocks. The point of this is
        simply to provide a fairly fast initial pass.

        """
        self.debug('Zeroing blocks')
        i = 0
        while i < len(self.last_data.blocks):
            buf = self.last_data.buffer
            u, v = self.last_data.blocks[i]
            assert u < v
            block = buf[u:v]
            if any(block):
                self.incorporate_new_buffer(buf[:u] + hbytes(v - u) + buf[v:])
            i += 1

    def minimize_duplicated_blocks(self):
        """Find blocks that have been duplicated in multiple places and attempt
        to minimize all of the duplicates simultaneously."""

        self.debug('Simultaneous shrinking of duplicated blocks')
        counts = Counter(self.last_data.buffer[u:v]
                         for u, v in self.last_data.blocks)
        blocks = [k for k, count in counts.items() if count > 1]

        thresholds = {}
        for u, v in self.last_data.blocks:
            b = self.last_data.buffer[u:v]
            thresholds[b] = v

        blocks.sort(reverse=True)
        blocks.sort(key=lambda b: counts[b] * len(b), reverse=True)
        for block in blocks:
            parts = [
                self.last_data.buffer[r:s] for r, s in self.last_data.blocks
            ]

            def replace(b):
                return hbytes(
                    EMPTY_BYTES.join(
                        hbytes(b if c == block else c) for c in parts))

            threshold = thresholds[block]

            minimize(block,
                     lambda b: self.try_buffer_with_rewriting_from(
                         replace(b), threshold),
                     random=self.random,
                     full=False)

    def minimize_individual_blocks(self):
        self.debug('Shrinking of individual blocks')
        i = 0
        while i < len(self.last_data.blocks):
            u, v = self.last_data.blocks[i]
            minimize(
                self.last_data.buffer[u:v],
                lambda b: self.try_buffer_with_rewriting_from(
                    self.last_data.buffer[:u] + b + self.last_data.buffer[v:],
                    v),
                random=self.random,
                full=False,
            )
            i += 1

    def reorder_blocks(self):
        self.debug('Reordering blocks')
        block_lengths = sorted(self.last_data.block_starts, reverse=True)
        for n in block_lengths:
            i = 1
            while i < len(self.last_data.block_starts.get(n, ())):
                j = i
                while j > 0:
                    buf = self.last_data.buffer
                    blocks = self.last_data.block_starts[n]
                    a_start = blocks[j - 1]
                    b_start = blocks[j]
                    a = buf[a_start:a_start + n]
                    b = buf[b_start:b_start + n]
                    if a <= b:
                        break
                    swapped = (buf[:a_start] + b + buf[a_start + n:b_start] +
                               a + buf[b_start + n:])
                    assert len(swapped) == len(buf)
                    assert swapped < buf
                    if self.incorporate_new_buffer(swapped):
                        j -= 1
                    else:
                        break
                i += 1

    def shrink(self):
        # We assume that if an all-zero block of bytes is an interesting
        # example then we're not going to do better than that.
        # This might not technically be true: e.g. for integers() | booleans()
        # the simplest example is actually [1, 0]. Missing this case is fairly
        # harmless and this allows us to make various simplifying assumptions
        # about the structure of the data (principally that we're never
        # operating on a block of all zero bytes so can use non-zeroness as a
        # signpost of complexity).
        if (not any(self.last_data.buffer) or self.incorporate_new_buffer(
                hbytes(len(self.last_data.buffer)))):
            return

        if self.has_existing_examples():
            # If we have any smaller examples in the secondary corpus, now is
            # a good time to try them to see if they work as shrinks. They
            # probably won't, but it's worth a shot and gives us a good
            # opportunity to clear out the database.

            # It's not worth trying the primary corpus because we already
            # tried all of those in the initial phase.
            corpus = sorted(self.settings.database.fetch(self.secondary_key),
                            key=sort_key)
            for c in corpus:
                if sort_key(c) >= sort_key(self.last_data.buffer):
                    break
                elif self.incorporate_new_buffer(c):
                    break
                else:
                    self.settings.database.delete(self.secondary_key, c)

        # Coarse passes that are worth running once when the example is likely
        # to be "far from shrunk" but not worth repeating in a loop because
        # they are subsumed by more fine grained passes.
        self.delta_interval_deletion()
        self.coarse_block_replacement()

        change_counter = -1

        while self.shrinks > change_counter:
            change_counter = self.shrinks

            self.minimize_duplicated_blocks()
            self.minimize_individual_blocks()
            self.reorder_blocks()
            self.greedy_interval_deletion()

    def event_to_string(self, event):
        if isinstance(event, str):
            return event
        try:
            return self.events_to_strings[event]
        except KeyError:
            pass
        result = str(event)
        self.events_to_strings[event] = result
        return result
Beispiel #2
0
    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return

        zero_data = ConjectureData(max_length=self.settings.buffer_size,
                                   draw_bytes=lambda data, n: self.
                                   __rewrite_for_novelty(data, hbytes(n)))
        self.test_function(zero_data)

        count = 0
        while count < 10 and not self.interesting_examples:

            def draw_bytes(data, n):
                return self.__rewrite_for_novelty(
                    data, self.__zero_bound(data, uniform(self.random, n)))

            targets_found = len(self.covering_examples)

            self.last_data = ConjectureData(
                max_length=self.settings.buffer_size, draw_bytes=draw_bytes)
            self.test_function(self.last_data)
            self.last_data.freeze()

            if len(self.covering_examples) > targets_found:
                count = 0
            else:
                count += 1

        mutations = 0
        mutator = self._new_mutator()

        zero_bound_queue = []

        while not self.interesting_examples:
            if zero_bound_queue:
                # Whenever we generated an example and it hits a bound
                # which forces zero blocks into it, this creates a weird
                # distortion effect by making certain parts of the data
                # stream (especially ones to the right) much more likely
                # to be zero. We fix this by redistributing the generated
                # data by shuffling it randomly. This results in the
                # zero data being spread evenly throughout the buffer.
                # Hopefully the shrinking this causes will cause us to
                # naturally fail to hit the bound.
                # If it doesn't then we will queue the new version up again
                # (now with more zeros) and try again.
                overdrawn = zero_bound_queue.pop()
                buffer = bytearray(overdrawn.buffer)

                # These will have values written to them that are different
                # from what's in them anyway, so the value there doesn't
                # really "count" for distributional purposes, and if we
                # leave them in then they can cause the fraction of non
                # zero bytes to increase on redraw instead of decrease.
                for i in overdrawn.forced_indices:
                    buffer[i] = 0

                self.random.shuffle(buffer)
                buffer = hbytes(buffer)

                def draw_bytes(data, n):
                    result = buffer[data.index:data.index + n]
                    if len(result) < n:
                        result += hbytes(n - len(result))
                    return self.__rewrite(data, result)

                data = ConjectureData(
                    draw_bytes=draw_bytes,
                    max_length=self.settings.buffer_size,
                )
                self.test_function(data)
                data.freeze()
            else:
                target, last_data = self.target_selector.select()
                mutations += 1
                targets_found = len(self.covering_examples)
                prev_data = self.last_data
                data = ConjectureData(draw_bytes=mutator,
                                      max_length=self.settings.buffer_size)
                self.test_function(data)
                data.freeze()
                if (data.status > prev_data.status
                        or len(self.covering_examples) > targets_found):
                    mutations = 0
                elif (data.status < prev_data.status
                      or not self.target_selector.has_tag(target, data)
                      or mutations >= self.settings.max_mutations):
                    mutations = 0
                    mutator = self._new_mutator()
            if getattr(data, 'hit_zero_bound', False):
                zero_bound_queue.append(data)
            mutations += 1
Beispiel #3
0
    def _run(self):
        self.last_data = None
        mutations = 0
        start_time = time.time()

        self.reuse_existing_examples()

        if (
            Phase.generate in self.settings.phases and not
            self.__tree_is_exhausted()
        ):
            if (
                self.last_data is None or
                self.last_data.status < Status.INTERESTING
            ):
                self.new_buffer()

            mutator = self._new_mutator()

            zero_bound_queue = []

            while (
                self.last_data.status != Status.INTERESTING and
                not self.__tree_is_exhausted()
            ):
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(
                    self.settings.max_iterations, self.settings.max_examples
                ):
                    self.exit_reason = ExitReason.max_iterations
                    return
                if (
                    self.settings.timeout > 0 and
                    time.time() >= start_time + self.settings.timeout
                ):
                    self.exit_reason = ExitReason.timeout
                    return
                if zero_bound_queue:
                    # Whenever we generated an example and it hits a bound
                    # which forces zero blocks into it, this creates a weird
                    # distortion effect by making certain parts of the data
                    # stream (especially ones to the right) much more likely
                    # to be zero. We fix this by redistributing the generated
                    # data by shuffling it randomly. This results in the
                    # zero data being spread evenly throughout the buffer.
                    # Hopefully the shrinking this causes will cause us to
                    # naturally fail to hit the bound.
                    # If it doesn't then we will queue the new version up again
                    # (now with more zeros) and try again.
                    overdrawn = zero_bound_queue.pop()
                    buffer = bytearray(overdrawn.buffer)
                    self.random.shuffle(buffer)
                    buffer = hbytes(buffer)

                    if buffer == overdrawn.buffer:
                        continue

                    def draw_bytes(data, n, distribution):
                        result = buffer[data.index:data.index + n]
                        if len(result) < n:
                            result += hbytes(n - len(result))
                        return self.__rewrite(data, result)

                    data = ConjectureData(
                        draw_bytes=draw_bytes,
                        max_length=self.settings.buffer_size,
                    )
                    self.test_function(data)
                    data.freeze()
                elif mutations >= self.settings.max_mutations:
                    mutations = 0
                    data = self.new_buffer()
                    mutator = self._new_mutator()
                else:
                    data = ConjectureData(
                        draw_bytes=mutator,
                        max_length=self.settings.buffer_size
                    )
                    self.test_function(data)
                    data.freeze()
                    prev_data = self.last_data
                    if self.consider_new_test_data(data):
                        self.last_data = data
                        if data.status > prev_data.status:
                            mutations = 0
                    else:
                        mutator = self._new_mutator()
                if getattr(data, 'hit_zero_bound', False):
                    zero_bound_queue.append(data)
                mutations += 1

        if self.__tree_is_exhausted():
            self.exit_reason = ExitReason.finished
            return

        data = self.last_data
        if data is None:
            self.exit_reason = ExitReason.finished
            return
        assert isinstance(data.output, text_type)

        if self.settings.max_shrinks <= 0:
            self.exit_reason = ExitReason.max_shrinks
            return

        if Phase.shrink not in self.settings.phases:
            self.exit_reason = ExitReason.finished
            return

        data = ConjectureData.for_buffer(self.last_data.buffer)
        self.test_function(data)
        if data.status != Status.INTERESTING:
            self.exit_reason = ExitReason.flaky
            return

        self.shrink()
Beispiel #4
0
    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return

        zero_data = self.cached_test_function(hbytes(
            self.settings.buffer_size))
        if zero_data.status == Status.OVERRUN or (
                zero_data.status == Status.VALID
                and len(zero_data.buffer) * 2 > self.settings.buffer_size):
            fail_health_check(
                self.settings,
                "The smallest natural example for your test is extremely "
                "large. This makes it difficult for Hypothesis to generate "
                "good examples, especially when trying to reduce failing ones "
                "at the end. Consider reducing the size of your data if it is "
                "of a fixed size. You could also fix this by improving how "
                "your data shrinks (see https://hypothesis.readthedocs.io/en/"
                "latest/data.html#shrinking for details), or by introducing "
                "default values inside your strategy. e.g. could you replace "
                "some arguments with their defaults by using "
                "one_of(none(), some_complex_strategy)?",
                HealthCheck.large_base_example,
            )

        if zero_data is not Overrun:
            # If the language starts with writes of length >= cap then there is
            # only one string in it: Everything after cap is forced to be zero (or
            # to be whatever value is written there). That means that once we've
            # tried the zero value, there's nothing left for us to do, so we
            # exit early here.
            for i in hrange(self.cap):
                if i not in zero_data.forced_indices:
                    break
            else:
                self.exit_with(ExitReason.finished)

        self.health_check_state = HealthCheckState()

        count = 0
        while not self.interesting_examples and (
                count < 10 or self.health_check_state is not None):
            prefix = self.generate_novel_prefix()

            def draw_bytes(data, n):
                if data.index < len(prefix):
                    result = prefix[data.index:data.index + n]
                    if len(result) < n:
                        result += uniform(self.random, n - len(result))
                else:
                    result = uniform(self.random, n)
                return self.__zero_bound(data, result)

            targets_found = len(self.covering_examples)

            last_data = ConjectureData(max_length=self.settings.buffer_size,
                                       draw_bytes=draw_bytes)
            self.test_function(last_data)
            last_data.freeze()

            count += 1

        mutations = 0
        mutator = self._new_mutator()

        zero_bound_queue = []

        while not self.interesting_examples:
            if zero_bound_queue:
                # Whenever we generated an example and it hits a bound
                # which forces zero blocks into it, this creates a weird
                # distortion effect by making certain parts of the data
                # stream (especially ones to the right) much more likely
                # to be zero. We fix this by redistributing the generated
                # data by shuffling it randomly. This results in the
                # zero data being spread evenly throughout the buffer.
                # Hopefully the shrinking this causes will cause us to
                # naturally fail to hit the bound.
                # If it doesn't then we will queue the new version up again
                # (now with more zeros) and try again.
                overdrawn = zero_bound_queue.pop()
                buffer = bytearray(overdrawn.buffer)

                # These will have values written to them that are different
                # from what's in them anyway, so the value there doesn't
                # really "count" for distributional purposes, and if we
                # leave them in then they can cause the fraction of non
                # zero bytes to increase on redraw instead of decrease.
                for i in overdrawn.forced_indices:
                    buffer[i] = 0

                self.random.shuffle(buffer)
                buffer = hbytes(buffer)

                def draw_bytes(data, n):
                    result = buffer[data.index:data.index + n]
                    if len(result) < n:
                        result += hbytes(n - len(result))
                    return self.__rewrite(data, result)

                data = ConjectureData(draw_bytes=draw_bytes,
                                      max_length=self.settings.buffer_size)
                self.test_function(data)
                data.freeze()
            else:
                origin = self.target_selector.select()
                mutations += 1
                targets_found = len(self.covering_examples)
                data = ConjectureData(draw_bytes=mutator(origin),
                                      max_length=self.settings.buffer_size)
                self.test_function(data)
                data.freeze()
                if (data.status > origin.status
                        or len(self.covering_examples) > targets_found):
                    mutations = 0
                elif data.status < origin.status or mutations >= 10:
                    # Cap the variations of a single example and move on to
                    # an entirely fresh start.  Ten is an entirely arbitrary
                    # constant, but it's been working well for years.
                    mutations = 0
                    mutator = self._new_mutator()
            if getattr(data, "hit_zero_bound", False):
                zero_bound_queue.append(data)
            mutations += 1
Beispiel #5
0
    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return

        zero_data = ConjectureData(
            max_length=self.settings.buffer_size,
            draw_bytes=lambda data, n: self.__rewrite_for_novelty(
                data, hbytes(n)))
        self.test_function(zero_data)

        count = 0
        while count < 10 and not self.interesting_examples:
            def draw_bytes(data, n):
                return self.__rewrite_for_novelty(
                    data, self.__zero_bound(data, uniform(self.random, n))
                )

            targets_found = len(self.covering_examples)

            self.last_data = ConjectureData(
                max_length=self.settings.buffer_size,
                draw_bytes=draw_bytes
            )
            self.test_function(self.last_data)
            self.last_data.freeze()

            if len(self.covering_examples) > targets_found:
                count = 0
            else:
                count += 1

        mutations = 0
        mutator = self._new_mutator()

        zero_bound_queue = []

        while not self.interesting_examples:
            if zero_bound_queue:
                # Whenever we generated an example and it hits a bound
                # which forces zero blocks into it, this creates a weird
                # distortion effect by making certain parts of the data
                # stream (especially ones to the right) much more likely
                # to be zero. We fix this by redistributing the generated
                # data by shuffling it randomly. This results in the
                # zero data being spread evenly throughout the buffer.
                # Hopefully the shrinking this causes will cause us to
                # naturally fail to hit the bound.
                # If it doesn't then we will queue the new version up again
                # (now with more zeros) and try again.
                overdrawn = zero_bound_queue.pop()
                buffer = bytearray(overdrawn.buffer)

                # These will have values written to them that are different
                # from what's in them anyway, so the value there doesn't
                # really "count" for distributional purposes, and if we
                # leave them in then they can cause the fraction of non
                # zero bytes to increase on redraw instead of decrease.
                for i in overdrawn.forced_indices:
                    buffer[i] = 0

                self.random.shuffle(buffer)
                buffer = hbytes(buffer)

                def draw_bytes(data, n):
                    result = buffer[data.index:data.index + n]
                    if len(result) < n:
                        result += hbytes(n - len(result))
                    return self.__rewrite(data, result)

                data = ConjectureData(
                    draw_bytes=draw_bytes,
                    max_length=self.settings.buffer_size,
                )
                self.test_function(data)
                data.freeze()
            else:
                target, last_data = self.target_selector.select()
                mutations += 1
                targets_found = len(self.covering_examples)
                prev_data = self.last_data
                data = ConjectureData(
                    draw_bytes=mutator,
                    max_length=self.settings.buffer_size
                )
                self.test_function(data)
                data.freeze()
                if (
                    data.status > prev_data.status or
                    len(self.covering_examples) > targets_found
                ):
                    mutations = 0
                elif (
                    data.status < prev_data.status or
                    not self.target_selector.has_tag(target, data) or
                    mutations >= self.settings.max_mutations
                ):
                    mutations = 0
                    mutator = self._new_mutator()
            if getattr(data, 'hit_zero_bound', False):
                zero_bound_queue.append(data)
            mutations += 1
Beispiel #6
0
class ConjectureRunner(object):

    def __init__(
        self, test_function, settings=None, random=None,
        database_key=None,
    ):
        self._test_function = test_function
        self.settings = settings or Settings()
        self.last_data = None
        self.changed = 0
        self.shrinks = 0
        self.call_count = 0
        self.event_call_counts = Counter()
        self.valid_examples = 0
        self.start_time = time.time()
        self.random = random or Random(getrandbits(128))
        self.database_key = database_key
        self.status_runtimes = {}
        self.events_to_strings = WeakKeyDictionary()

        # Tree nodes are stored in an array to prevent heavy nesting of data
        # structures. Branches are dicts mapping bytes to child nodes (which
        # will in general only be partially populated). Leaves are
        # ConjectureData objects that have been previously seen as the result
        # of following that path.
        self.tree = [{}]

        # A node is dead if there is nothing left to explore past that point.
        # Recursively, a node is dead if either it is a leaf or every byte
        # leads to a dead node when starting from here.
        self.dead = set()
        self.forced = {}

    def __tree_is_exhausted(self):
        return 0 in self.dead

    def new_buffer(self):
        assert not self.__tree_is_exhausted()

        def draw_bytes(data, n, distribution):
            return self.__rewrite_for_novelty(
                data, self.__zero_bound(data, distribution(self.random, n))
            )

        self.last_data = ConjectureData(
            max_length=self.settings.buffer_size,
            draw_bytes=draw_bytes
        )
        self.test_function(self.last_data)
        self.last_data.freeze()

    def test_function(self, data):
        self.call_count += 1
        try:
            self._test_function(data)
            data.freeze()
        except StopTest as e:
            if e.testcounter != data.testcounter:
                self.save_buffer(data.buffer)
                raise e
        except:
            self.save_buffer(data.buffer)
            raise
        finally:
            data.freeze()
            self.note_details(data)

        self.debug_data(data)
        if data.status >= Status.VALID:
            self.valid_examples += 1

        tree_node = self.tree[0]
        indices = []
        node_index = 0
        for i, b in enumerate(data.buffer):
            indices.append(node_index)
            if i in data.forced_indices:
                self.forced[node_index] = b
            try:
                node_index = tree_node[b]
            except KeyError:
                node_index = len(self.tree)
                self.tree.append({})
                tree_node[b] = node_index
            tree_node = self.tree[node_index]
            if node_index in self.dead:
                break

        if data.status != Status.OVERRUN and node_index not in self.dead:
            self.dead.add(node_index)
            self.tree[node_index] = data

            for j in reversed(indices):
                if len(self.tree[j]) < 256 and j not in self.forced:
                    break
                if set(self.tree[j].values()).issubset(self.dead):
                    self.dead.add(j)
                else:
                    break

    def consider_new_test_data(self, data):
        # Transition rules:
        #   1. Transition cannot decrease the status
        #   2. Any transition which increases the status is valid
        #   3. If the previous status was interesting, only shrinking
        #      transitions are allowed.
        if data.buffer == self.last_data.buffer:
            return False
        if self.last_data.status < data.status:
            return True
        if self.last_data.status > data.status:
            return False
        if data.status == Status.INVALID:
            return data.index >= self.last_data.index
        if data.status == Status.OVERRUN:
            return data.overdraw <= self.last_data.overdraw
        if data.status == Status.INTERESTING:
            assert len(data.buffer) <= len(self.last_data.buffer)
            if len(data.buffer) == len(self.last_data.buffer):
                return data.buffer < self.last_data.buffer
            return True
        return True

    def save_buffer(self, buffer):
        if (
            self.settings.database is not None and
            self.database_key is not None
        ):
            self.settings.database.save(self.database_key, hbytes(buffer))

    def note_details(self, data):
        if data.status == Status.INTERESTING:
            self.save_buffer(data.buffer)
        runtime = max(data.finish_time - data.start_time, 0.0)
        self.status_runtimes.setdefault(data.status, []).append(runtime)
        for event in set(map(self.event_to_string, data.events)):
            self.event_call_counts[event] += 1

    def debug(self, message):
        with self.settings:
            debug_report(message)

    def debug_data(self, data):
        self.debug(u'%d bytes %s -> %s, %s' % (
            data.index,
            unicode_safe_repr(list(data.buffer[:data.index])),
            unicode_safe_repr(data.status),
            data.output,
        ))

    def prescreen_buffer(self, buffer):
        i = 0
        for b in buffer:
            if i in self.dead:
                return False
            try:
                b = self.forced[i]
            except KeyError:
                pass
            try:
                i = self.tree[i][b]
            except KeyError:
                return True
        else:
            return False

    def incorporate_new_buffer(self, buffer):
        assert self.last_data.status == Status.INTERESTING
        if (
            self.settings.timeout > 0 and
            time.time() >= self.start_time + self.settings.timeout
        ):
            self.exit_reason = ExitReason.timeout
            raise RunIsComplete()

        buffer = hbytes(buffer[:self.last_data.index])
        if sort_key(buffer) >= sort_key(self.last_data.buffer):
            return False

        if not self.prescreen_buffer(buffer):
            return False

        assert sort_key(buffer) <= sort_key(self.last_data.buffer)
        data = ConjectureData.for_buffer(buffer)
        self.test_function(data)
        if self.consider_new_test_data(data):
            self.shrinks += 1
            self.last_data = data
            if self.shrinks >= self.settings.max_shrinks:
                self.exit_reason = ExitReason.max_shrinks
                raise RunIsComplete()
            self.last_data = data
            self.changed += 1
            return True
        return False

    def run(self):
        with self.settings:
            try:
                self._run()
            except RunIsComplete:
                pass
            self.debug(
                u'Run complete after %d examples (%d valid) and %d shrinks' % (
                    self.call_count, self.valid_examples, self.shrinks,
                ))

    def _new_mutator(self):
        def draw_new(data, n, distribution):
            return distribution(self.random, n)

        def draw_existing(data, n, distribution):
            return self.last_data.buffer[data.index:data.index + n]

        def draw_smaller(data, n, distribution):
            existing = self.last_data.buffer[data.index:data.index + n]
            r = distribution(self.random, n)
            if r <= existing:
                return r
            return _draw_predecessor(self.random, existing)

        def draw_larger(data, n, distribution):
            existing = self.last_data.buffer[data.index:data.index + n]
            r = distribution(self.random, n)
            if r >= existing:
                return r
            return _draw_successor(self.random, existing)

        def reuse_existing(data, n, distribution):
            choices = data.block_starts.get(n, []) or \
                self.last_data.block_starts.get(n, [])
            if choices:
                i = self.random.choice(choices)
                return self.last_data.buffer[i:i + n]
            else:
                result = distribution(self.random, n)
                assert isinstance(result, hbytes)
                return result

        def flip_bit(data, n, distribution):
            buf = bytearray(
                self.last_data.buffer[data.index:data.index + n])
            i = self.random.randint(0, n - 1)
            k = self.random.randint(0, 7)
            buf[i] ^= (1 << k)
            return hbytes(buf)

        def draw_zero(data, n, distribution):
            return hbytes(b'\0' * n)

        def draw_max(data, n, distribution):
            return hbytes([255]) * n

        def draw_constant(data, n, distribution):
            return bytes_from_list([
                self.random.randint(0, 255)
            ] * n)

        options = [
            draw_new,
            reuse_existing, reuse_existing,
            draw_existing, draw_smaller, draw_larger,
            flip_bit,
            draw_zero, draw_max, draw_zero, draw_max,
            draw_constant,
        ]

        bits = [
            self.random.choice(options) for _ in hrange(3)
        ]

        def draw_mutated(data, n, distribution):
            if (
                data.index + n > len(self.last_data.buffer)
            ):
                result = distribution(self.random, n)
            else:
                result = self.random.choice(bits)(data, n, distribution)

            return self.__rewrite_for_novelty(
                data, self.__zero_bound(data, result))

        return draw_mutated

    def __rewrite(self, data, result):
        return self.__rewrite_for_novelty(
            data, self.__zero_bound(data, result)
        )

    def __zero_bound(self, data, result):
        """This tries to get the size of the generated data under control by
        replacing the result with zero if we are too deep or have already
        generated too much data.

        This causes us to enter "shrinking mode" there and thus reduce
        the size of the generated data.

        """
        if (
            data.depth * 2 >= MAX_DEPTH or
            (data.index + len(result)) * 2 >= self.settings.buffer_size
        ):
            if any(result):
                data.hit_zero_bound = True
            return hbytes(len(result))
        else:
            return result

    def __rewrite_for_novelty(self, data, result):
        """Take a block that is about to be added to data as the result of a
        draw_bytes call and rewrite it a small amount to ensure that the result
        will be novel: that is, not hit a part of the tree that we have fully
        explored.

        This is mostly useful for test functions which draw a small
        number of blocks.

        """
        assert isinstance(result, hbytes)
        try:
            node_index = data.__current_node_index
        except AttributeError:
            node_index = 0
            data.__current_node_index = node_index
            data.__hit_novelty = False
            data.__evaluated_to = 0

        if data.__hit_novelty:
            return result

        node = self.tree[node_index]

        for i in hrange(data.__evaluated_to, len(data.buffer)):
            node = self.tree[node_index]
            try:
                node_index = node[data.buffer[i]]
                assert node_index not in self.dead
                node = self.tree[node_index]
            except KeyError:
                data.__hit_novelty = True
                return result

        for i, b in enumerate(result):
            assert isinstance(b, int)
            try:
                new_node_index = node[b]
            except KeyError:
                data.__hit_novelty = True
                return result

            new_node = self.tree[new_node_index]

            if new_node_index in self.dead:
                if isinstance(result, hbytes):
                    result = bytearray(result)
                for c in range(256):
                    if c not in node:
                        result[i] = c
                        data.__hit_novelty = True
                        return hbytes(result)
                    else:
                        new_node_index = node[c]
                        new_node = self.tree[new_node_index]
                        if new_node_index not in self.dead:
                            result[i] = c
                            break
                else:  # pragma: no cover
                    assert False, (
                        'Found a tree node which is live despite all its '
                        'children being dead.')
            node_index = new_node_index
            node = new_node
        assert node_index not in self.dead
        data.__current_node_index = node_index
        data.__evaluated_to = data.index + len(result)
        return hbytes(result)

    def has_existing_examples(self):
        return (
            self.settings.database is not None and
            self.database_key is not None and
            Phase.reuse in self.settings.phases
        )

    def reuse_existing_examples(self):
        """If appropriate (we have a database and have been told to use it),
        try to reload existing examples from the database.

        If there are a lot we don't try all of them. We always try the
        smallest example in the database (which is guaranteed to be the
        last failure) and the largest (which is usually the seed example
        which the last failure came from but we don't enforce that). We
        then take a random sampling of the remainder and try those. Any
        examples that are no longer interesting are cleared out.

        """
        if self.has_existing_examples():
            corpus = sorted(
                self.settings.database.fetch(self.database_key),
                key=sort_key
            )

            desired_size = max(2, ceil(0.1 * self.settings.max_examples))

            if desired_size < len(corpus):
                new_corpus = [corpus[0], corpus[-1]]
                n_boost = max(desired_size - 2, 0)
                new_corpus.extend(self.random.sample(corpus[1:-1], n_boost))
                corpus = new_corpus
                corpus.sort(key=sort_key)

            for existing in corpus:
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_with(ExitReason.max_examples)
                if self.call_count >= max(
                    self.settings.max_iterations, self.settings.max_examples
                ):
                    self.exit_with(ExitReason.max_iterations)
                data = ConjectureData.for_buffer(existing)
                self.test_function(data)
                data.freeze()
                self.last_data = data
                self.consider_new_test_data(data)
                if data.status == Status.INTERESTING:
                    assert data.status == Status.INTERESTING
                    self.last_data = data
                    break
                else:
                    self.settings.database.delete(
                        self.database_key, existing)

    def exit_with(self, reason):
        self.exit_reason = reason
        raise RunIsComplete()

    def _run(self):
        self.last_data = None
        mutations = 0
        start_time = time.time()

        self.reuse_existing_examples()

        if (
            Phase.generate in self.settings.phases and not
            self.__tree_is_exhausted()
        ):
            if (
                self.last_data is None or
                self.last_data.status < Status.INTERESTING
            ):
                self.new_buffer()

            mutator = self._new_mutator()

            zero_bound_queue = []

            while (
                self.last_data.status != Status.INTERESTING and
                not self.__tree_is_exhausted()
            ):
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(
                    self.settings.max_iterations, self.settings.max_examples
                ):
                    self.exit_reason = ExitReason.max_iterations
                    return
                if (
                    self.settings.timeout > 0 and
                    time.time() >= start_time + self.settings.timeout
                ):
                    self.exit_reason = ExitReason.timeout
                    return
                if zero_bound_queue:
                    # Whenever we generated an example and it hits a bound
                    # which forces zero blocks into it, this creates a weird
                    # distortion effect by making certain parts of the data
                    # stream (especially ones to the right) much more likely
                    # to be zero. We fix this by redistributing the generated
                    # data by shuffling it randomly. This results in the
                    # zero data being spread evenly throughout the buffer.
                    # Hopefully the shrinking this causes will cause us to
                    # naturally fail to hit the bound.
                    # If it doesn't then we will queue the new version up again
                    # (now with more zeros) and try again.
                    overdrawn = zero_bound_queue.pop()
                    buffer = bytearray(overdrawn.buffer)
                    self.random.shuffle(buffer)
                    buffer = hbytes(buffer)

                    if buffer == overdrawn.buffer:
                        continue

                    def draw_bytes(data, n, distribution):
                        result = buffer[data.index:data.index + n]
                        if len(result) < n:
                            result += hbytes(n - len(result))
                        return self.__rewrite(data, result)

                    data = ConjectureData(
                        draw_bytes=draw_bytes,
                        max_length=self.settings.buffer_size,
                    )
                    self.test_function(data)
                    data.freeze()
                elif mutations >= self.settings.max_mutations:
                    mutations = 0
                    data = self.new_buffer()
                    mutator = self._new_mutator()
                else:
                    data = ConjectureData(
                        draw_bytes=mutator,
                        max_length=self.settings.buffer_size
                    )
                    self.test_function(data)
                    data.freeze()
                    prev_data = self.last_data
                    if self.consider_new_test_data(data):
                        self.last_data = data
                        if data.status > prev_data.status:
                            mutations = 0
                    else:
                        mutator = self._new_mutator()
                if getattr(data, 'hit_zero_bound', False):
                    zero_bound_queue.append(data)
                mutations += 1

        if self.__tree_is_exhausted():
            self.exit_reason = ExitReason.finished
            return

        data = self.last_data
        if data is None:
            self.exit_reason = ExitReason.finished
            return
        assert isinstance(data.output, text_type)

        if self.settings.max_shrinks <= 0:
            self.exit_reason = ExitReason.max_shrinks
            return

        if Phase.shrink not in self.settings.phases:
            self.exit_reason = ExitReason.finished
            return

        data = ConjectureData.for_buffer(self.last_data.buffer)
        self.test_function(data)
        if data.status != Status.INTERESTING:
            self.exit_reason = ExitReason.flaky
            return

        self.shrink()

    def zero_blocks(self):
        """Try replacing blocks with zero blocks, starting from the right and
        proceeding leftwards.

        Normally we would proceed from left to right, in keeping with
        our policy of lexicographic minimization - making shrinks to the
        right seems like it should be "wasted work" which we might undo
        later.

        The motivation for doing it this way is that this can unlock
        shrinks that would become impossible otherwise: If we shrink
        entirely moving rightwards, then this ends up with a lot of the
        complexity of an example "trapped at the end", leaving a lot of
        dead space in the middle. An example of where this can happen is
        with lists or matrices defined by a length parameter, where only
        one or two of the values actually matter: If we start from the
        left then what we'll find is we replace all the early values with
        zero, leave the later values as the ones that matter, and then we
        can't shrink the length parameter.

        """

        self.debug('Zeroing individual blocks')

        # We first do a binary search on the hope that a lot of blocks are
        # replacable. If not, we only pay a log(n) cost so it's no big deal.

        # We can replace all blocks >= hi with zero. We cannot replace
        # all blocks >= lo with zero.
        lo = 0
        hi = len(self.last_data.blocks)
        while lo + 1 < hi:
            mid = (lo + hi) // 2
            try:
                u = self.last_data.blocks[mid][0]
            except IndexError:
                # This shouldn't really happen, but may in the presence of a
                # bad test function whose block structure varies based on some
                # sort of external data. We could possibly detect this better
                # and signal an error, but it's hard to do so reliably so
                # instead we just try to be robust in the face of it.
                break
            if self.incorporate_new_buffer(
                self.last_data.buffer[:u] +
                hbytes(len(self.last_data.buffer) - u),
            ):
                hi = mid
            else:
                lo = mid

        for i in hrange(len(self.last_data.blocks) - 1, -1, -1):
            # The case where this is not true is hard to hit reliably, and only
            # exists for similar reasons to the above: It guards against
            # invalid data generation.
            if i < len(self.last_data.blocks):  # pragma: no branch
                u, v = self.last_data.blocks[i]
                self.incorporate_new_buffer(
                    self.last_data.buffer[:u] + hbytes(v - u) +
                    self.last_data.buffer[v:],
                )

    def shrink(self):
        # We assume that if an all-zero block of bytes is an interesting
        # example then we're not going to do better than that.
        # This might not technically be true: e.g. for integers() | booleans()
        # the simplest example is actually [1, 0]. Missing this case is fairly
        # harmless and this allows us to make various simplifying assumptions
        # about the structure of the data (principally that we're never
        # operating on a block of all zero bytes so can use non-zeroness as a
        # signpost of complexity).
        if (
            not any(self.last_data.buffer) or
            self.incorporate_new_buffer(hbytes(len(self.last_data.buffer)))
        ):
            self.exit_reason = ExitReason.finished
            return

        if self.has_existing_examples():
            corpus = sorted(
                self.settings.database.fetch(self.database_key),
                key=sort_key
            )
            # We always have self.last_data.buffer in the database because
            # we save every interesting example. This means we will always
            # trigger the first break and thus never exit the loop normally.
            for c in corpus:  # pragma: no branch
                if sort_key(c) >= sort_key(self.last_data.buffer):
                    break
                elif self.incorporate_new_buffer(c):
                    break
                else:
                    self.settings.database.delete(self.database_key, c)

        change_counter = -1

        while self.changed > change_counter:
            change_counter = self.changed

            self.debug('Structured interval deletes')

            k = len(self.last_data.intervals) // 2
            while k > 0:
                i = 0
                while i + k <= len(self.last_data.intervals):
                    bitmask = [True] * len(self.last_data.buffer)

                    for u, v in self.last_data.intervals[i:i + k]:
                        for t in range(u, v):
                            bitmask[t] = False

                    u, v = self.last_data.intervals[i]
                    if not self.incorporate_new_buffer(hbytes(
                        b for b, v in zip(self.last_data.buffer, bitmask)
                        if v
                    )):
                        i += k
                k //= 2

            self.zero_blocks()

            minimize(
                self.last_data.buffer, self.incorporate_new_buffer,
                cautious=True, random=self.random,
            )

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Bulk replacing blocks with simpler blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                buf = self.last_data.buffer
                block = buf[u:v]
                n = v - u

                buffer = bytearray()
                for r, s in self.last_data.blocks:
                    if s - r == n and self.last_data.buffer[r:s] > block:
                        buffer.extend(block)
                    else:
                        buffer.extend(self.last_data.buffer[r:s])
                self.incorporate_new_buffer(hbytes(buffer))
                i += 1

            self.debug('Simultaneous shrinking of duplicated blocks')
            block_counter = -1
            while block_counter < self.changed:
                block_counter = self.changed
                blocks = [
                    k for k, count in
                    Counter(
                        self.last_data.buffer[u:v]
                        for u, v in self.last_data.blocks).items()
                    if count > 1
                ]
                for block in blocks:
                    parts = [
                        self.last_data.buffer[r:s]
                        for r, s in self.last_data.blocks
                    ]

                    def replace(b):
                        return hbytes(EMPTY_BYTES.join(
                            hbytes(b if c == block else c) for c in parts
                        ))
                    minimize(
                        block,
                        lambda b: self.incorporate_new_buffer(replace(b)),
                        random=self.random,
                    )

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Shrinking of individual blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                minimize(
                    self.last_data.buffer[u:v],
                    lambda b: self.incorporate_new_buffer(
                        self.last_data.buffer[:u] + b +
                        self.last_data.buffer[v:],
                    ),
                    random=self.random,
                )
                i += 1

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Reordering blocks')
            block_lengths = sorted(self.last_data.block_starts, reverse=True)
            for n in block_lengths:
                i = 1
                while i < len(self.last_data.block_starts.get(n, ())):
                    j = i
                    while j > 0:
                        buf = self.last_data.buffer
                        blocks = self.last_data.block_starts[n]
                        a_start = blocks[j - 1]
                        b_start = blocks[j]
                        a = buf[a_start:a_start + n]
                        b = buf[b_start:b_start + n]
                        if a <= b:
                            break
                        swapped = (
                            buf[:a_start] + b + buf[a_start + n:b_start] +
                            a + buf[b_start + n:])
                        assert len(swapped) == len(buf)
                        assert swapped < buf
                        if self.incorporate_new_buffer(swapped):
                            j -= 1
                        else:
                            break
                    i += 1

            self.debug('Shuffling suffixes while shrinking %r' % (
                self.last_data.bind_points,
            ))
            b = 0
            while b < len(self.last_data.bind_points):
                cutoff = sorted(self.last_data.bind_points)[b]

                def test_value(prefix):
                    for t in hrange(5):
                        alphabet = {}
                        for i, j in self.last_data.blocks[b:]:
                            alphabet.setdefault(j - i, []).append((i, j))
                        if t > 0:
                            for v in alphabet.values():
                                self.random.shuffle(v)
                        buf = bytearray(prefix)
                        for i, j in self.last_data.blocks[b:]:
                            u, v = alphabet[j - i].pop()
                            buf.extend(self.last_data.buffer[u:v])
                        if self.incorporate_new_buffer(hbytes(buf)):
                            return True
                    return False
                minimize(
                    self.last_data.buffer[:cutoff], test_value, cautious=True,
                    random=self.random,
                )
                b += 1

        self.exit_reason = ExitReason.finished

    def event_to_string(self, event):
        if isinstance(event, str):
            return event
        try:
            return self.events_to_strings[event]
        except KeyError:
            pass
        result = str(event)
        self.events_to_strings[event] = result
        return result
Beispiel #7
0
    def _run(self):
        self.last_data = None
        mutations = 0
        start_time = time.time()

        self.reuse_existing_examples()

        if (
            Phase.generate in self.settings.phases and not
            self.__tree_is_exhausted()
        ):
            if (
                self.last_data is None or
                self.last_data.status < Status.INTERESTING
            ):
                self.new_buffer()

            mutator = self._new_mutator()

            zero_bound_queue = []

            while (
                self.last_data.status != Status.INTERESTING and
                not self.__tree_is_exhausted()
            ):
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(
                    self.settings.max_iterations, self.settings.max_examples
                ):
                    self.exit_reason = ExitReason.max_iterations
                    return
                if (
                    self.settings.timeout > 0 and
                    time.time() >= start_time + self.settings.timeout
                ):
                    self.exit_reason = ExitReason.timeout
                    return
                if zero_bound_queue:
                    # Whenever we generated an example and it hits a bound
                    # which forces zero blocks into it, this creates a weird
                    # distortion effect by making certain parts of the data
                    # stream (especially ones to the right) much more likely
                    # to be zero. We fix this by redistributing the generated
                    # data by shuffling it randomly. This results in the
                    # zero data being spread evenly throughout the buffer.
                    # Hopefully the shrinking this causes will cause us to
                    # naturally fail to hit the bound.
                    # If it doesn't then we will queue the new version up again
                    # (now with more zeros) and try again.
                    overdrawn = zero_bound_queue.pop()
                    buffer = bytearray(overdrawn.buffer)

                    # These will have values written to them that are different
                    # from what's in them anyway, so the value there doesn't
                    # really "count" for distributional purposes, and if we
                    # leave them in then they can cause the fraction of non
                    # zero bytes to increase on redraw instead of decrease.
                    for i in overdrawn.forced_indices:
                        buffer[i] = 0

                    self.random.shuffle(buffer)
                    buffer = hbytes(buffer)

                    if buffer == overdrawn.buffer:
                        continue

                    def draw_bytes(data, n):
                        result = buffer[data.index:data.index + n]
                        if len(result) < n:
                            result += hbytes(n - len(result))
                        return self.__rewrite(data, result)

                    data = ConjectureData(
                        draw_bytes=draw_bytes,
                        max_length=self.settings.buffer_size,
                    )
                    self.test_function(data)
                    data.freeze()
                elif mutations >= self.settings.max_mutations:
                    mutations = 0
                    data = self.new_buffer()
                    mutator = self._new_mutator()
                else:
                    data = ConjectureData(
                        draw_bytes=mutator,
                        max_length=self.settings.buffer_size
                    )
                    self.test_function(data)
                    data.freeze()
                    prev_data = self.last_data
                    if self.consider_new_test_data(data):
                        self.last_data = data
                        if data.status > prev_data.status:
                            mutations = 0
                    else:
                        mutator = self._new_mutator()
                if getattr(data, 'hit_zero_bound', False):
                    zero_bound_queue.append(data)
                mutations += 1

        if self.__tree_is_exhausted():
            self.exit_reason = ExitReason.finished
            return

        data = self.last_data
        if data is None:
            self.exit_reason = ExitReason.finished
            return
        assert isinstance(data.output, text_type)

        if Phase.shrink not in self.settings.phases:
            self.exit_reason = ExitReason.finished
            return

        data = ConjectureData.for_buffer(self.last_data.buffer)
        self.test_function(data)
        if data.status != Status.INTERESTING:
            self.exit_reason = ExitReason.flaky
            return

        while len(self.shrunk_examples) < len(self.interesting_examples):
            target, d = min([
                (k, v) for k, v in self.interesting_examples.items()
                if k not in self.shrunk_examples],
                key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
            )
            self.debug('Shrinking %r' % (target,))
            self.last_data = d
            assert self.last_data.interesting_origin == target
            self.shrink()
            self.shrunk_examples.add(target)
Beispiel #8
0
class ConjectureRunner(object):

    def __init__(
        self, test_function, settings=None, random=None,
        database_key=None,
    ):
        self._test_function = test_function
        self.settings = settings or Settings()
        self.last_data = None
        self.shrinks = 0
        self.call_count = 0
        self.event_call_counts = Counter()
        self.valid_examples = 0
        self.start_time = time.time()
        self.random = random or Random(getrandbits(128))
        self.database_key = database_key
        self.status_runtimes = {}
        self.events_to_strings = WeakKeyDictionary()

        self.target_selector = TargetSelector(self.random)

        # Tree nodes are stored in an array to prevent heavy nesting of data
        # structures. Branches are dicts mapping bytes to child nodes (which
        # will in general only be partially populated). Leaves are
        # ConjectureData objects that have been previously seen as the result
        # of following that path.
        self.tree = [{}]

        # A node is dead if there is nothing left to explore past that point.
        # Recursively, a node is dead if either it is a leaf or every byte
        # leads to a dead node when starting from here.
        self.dead = set()

        # We rewrite the byte stream at various points during parsing, to one
        # that will produce an equivalent result but is in some sense more
        # canonical. We keep track of these so that when walking the tree we
        # can identify nodes where the exact byte value doesn't matter and
        # treat all bytes there as equivalent. This significantly reduces the
        # size of the search space and removes a lot of redundant examples.

        # Maps tree indices where to the unique byte that is valid at that
        # point. Corresponds to data.write() calls.
        self.forced = {}

        # Maps tree indices to the maximum byte that is valid at that point.
        # Currently this is only used inside draw_bits, but it potentially
        # could get used elsewhere.
        self.capped = {}

        # Where a tree node consists of the beginning of a block we track the
        # size of said block. This allows us to tell when an example is too
        # short even if it goes off the unexplored region of the tree - if it
        # is at the beginning of a block of size 4 but only has 3 bytes left,
        # it's going to overrun the end of the buffer regardless of the
        # buffer contents.
        self.block_sizes = {}

        self.interesting_examples = {}
        self.covering_examples = {}

        self.shrunk_examples = set()

        self.tag_intern_table = {}

    def __tree_is_exhausted(self):
        return 0 in self.dead

    def test_function(self, data):
        self.call_count += 1
        try:
            self._test_function(data)
            data.freeze()
        except StopTest as e:
            if e.testcounter != data.testcounter:
                self.save_buffer(data.buffer)
                raise e
        except:
            self.save_buffer(data.buffer)
            raise
        finally:
            data.freeze()
            self.note_details(data)

        self.target_selector.add(data)

        self.debug_data(data)

        tags = frozenset(
            self.tag_intern_table.setdefault(t, t)
            for t in data.tags
        )
        data.tags = self.tag_intern_table.setdefault(tags, tags)

        if data.status == Status.VALID:
            self.valid_examples += 1
            for t in data.tags:
                existing = self.covering_examples.get(t)
                if (
                    existing is None or
                    sort_key(data.buffer) < sort_key(existing.buffer)
                ):
                    self.covering_examples[t] = data
                    if self.database is not None:
                        self.database.save(self.covering_key, data.buffer)
                        if existing is not None:
                            self.database.delete(
                                self.covering_key, existing.buffer)

        tree_node = self.tree[0]
        indices = []
        node_index = 0
        for i, b in enumerate(data.buffer):
            indices.append(node_index)
            if i in data.forced_indices:
                self.forced[node_index] = b
            try:
                self.capped[node_index] = data.capped_indices[i]
            except KeyError:
                pass
            try:
                node_index = tree_node[b]
            except KeyError:
                node_index = len(self.tree)
                self.tree.append({})
                tree_node[b] = node_index
            tree_node = self.tree[node_index]
            if node_index in self.dead:
                break

        for u, v in data.blocks:
            # This can happen if we hit a dead node when walking the buffer.
            # In that case we alrady have this section of the tree mapped.
            if u >= len(indices):
                break
            self.block_sizes[indices[u]] = v - u

        if data.status != Status.OVERRUN and node_index not in self.dead:
            self.dead.add(node_index)
            self.tree[node_index] = data

            for j in reversed(indices):
                if (
                    len(self.tree[j]) < self.capped.get(j, 255) + 1 and
                    j not in self.forced
                ):
                    break
                if set(self.tree[j].values()).issubset(self.dead):
                    self.dead.add(j)
                else:
                    break

        last_data_is_interesting = (
            self.last_data is not None and
            self.last_data.status == Status.INTERESTING
        )

        if data.status == Status.INTERESTING:
            first_call = len(self.interesting_examples) == 0

            key = data.interesting_origin
            changed = False
            try:
                existing = self.interesting_examples[key]
            except KeyError:
                changed = True
            else:
                if sort_key(data.buffer) < sort_key(existing.buffer):
                    self.downgrade_buffer(existing.buffer)
                    changed = True

            if changed:
                self.interesting_examples[key] = data
                self.shrunk_examples.discard(key)
                if last_data_is_interesting and not first_call:
                    self.shrinks += 1

            if not last_data_is_interesting or (
                sort_key(data.buffer) < sort_key(self.last_data.buffer) and
                data.interesting_origin ==
                self.last_data.interesting_origin
            ):
                self.last_data = data

            if self.shrinks >= self.settings.max_shrinks:
                self.exit_with(ExitReason.max_shrinks)
        elif (
            self.last_data is None or
            self.last_data.status < Status.INTERESTING
        ):
            self.last_data = data
        if (
            self.settings.timeout > 0 and
            time.time() >= self.start_time + self.settings.timeout
        ):
            self.exit_with(ExitReason.timeout)

        if not self.interesting_examples:
            if self.valid_examples >= self.settings.max_examples:
                self.exit_with(ExitReason.max_examples)
            if self.call_count >= max(
                self.settings.max_iterations, self.settings.max_examples
            ):
                self.exit_with(ExitReason.max_iterations)

        if self.__tree_is_exhausted():
            self.exit_with(ExitReason.finished)

    def save_buffer(self, buffer, key=None):
        if self.settings.database is not None:
            if key is None:
                key = self.database_key
            if key is None:
                return
            self.settings.database.save(key, hbytes(buffer))

    def downgrade_buffer(self, buffer):
        if self.settings.database is not None:
            self.settings.database.move(
                self.database_key, self.secondary_key, buffer)

    @property
    def secondary_key(self):
        return b'.'.join((self.database_key, b"secondary"))

    @property
    def covering_key(self):
        return b'.'.join((self.database_key, b"coverage"))

    def note_details(self, data):
        if data.status == Status.INTERESTING:
            if (
                self.last_data is None or
                self.last_data.status != Status.INTERESTING or
                self.last_data.interesting_origin == data.interesting_origin
            ):
                self.save_buffer(data.buffer)
            else:
                self.save_buffer(data.buffer, self.secondary_key)
        runtime = max(data.finish_time - data.start_time, 0.0)
        self.status_runtimes.setdefault(data.status, []).append(runtime)
        for event in set(map(self.event_to_string, data.events)):
            self.event_call_counts[event] += 1

    def debug(self, message):
        with self.settings:
            debug_report(message)

    def debug_data(self, data):
        buffer_parts = [u"["]
        for i, (u, v) in enumerate(data.blocks):
            if i > 0:
                buffer_parts.append(u" || ")
            buffer_parts.append(
                u', '.join(int_to_text(int(i)) for i in data.buffer[u:v]))
        buffer_parts.append(u']')

        status = unicode_safe_repr(data.status)

        if data.status == Status.INTERESTING:
            status = u'%s (%s)' % (
                status, unicode_safe_repr(data.interesting_origin,))

        self.debug(u'%d bytes %s -> %s, %s' % (
            data.index,
            u''.join(buffer_parts),
            status,
            data.output,
        ))

    def prescreen_buffer(self, buffer):
        """Attempt to rule out buffer as a possible interesting candidate.

        Returns False if we know for sure that running this buffer will not
        produce an interesting result. Returns True if it might (because it
        explores territory we have not previously tried).

        This is purely an optimisation to try to reduce the number of tests we
        run. "return True" would be a valid but inefficient implementation.

        """
        node_index = 0
        n = len(buffer)
        for k, b in enumerate(buffer):
            if node_index in self.dead:
                return False
            try:
                # The block size at that point provides a lower bound on how
                # many more bytes are required. If the buffer does not have
                # enough bytes to fulfill that block size then we can rule out
                # this buffer.
                if k + self.block_sizes[node_index] > n:
                    return False
            except KeyError:
                pass
            try:
                b = self.forced[node_index]
            except KeyError:
                pass
            try:
                b = min(b, self.capped[node_index])
            except KeyError:
                pass
            try:
                node_index = self.tree[node_index][b]
            except KeyError:
                return True
        else:
            return False

    def incorporate_new_buffer(self, buffer):
        assert self.last_data.status == Status.INTERESTING
        start = self.last_data.interesting_origin

        buffer = hbytes(buffer[:self.last_data.index])
        assert sort_key(buffer) < sort_key(self.last_data.buffer)

        if not self.prescreen_buffer(buffer):
            return False

        assert sort_key(buffer) <= sort_key(self.last_data.buffer)
        data = ConjectureData.for_buffer(buffer)
        self.test_function(data)
        assert self.last_data.interesting_origin == start
        return data is self.last_data

    def run(self):
        with self.settings:
            try:
                self._run()
            except RunIsComplete:
                pass
            if self.interesting_examples:
                self.last_data = max(
                    self.interesting_examples.values(),
                    key=lambda d: sort_key(d.buffer))
            if self.last_data is not None:
                self.debug_data(self.last_data)
            self.debug(
                u'Run complete after %d examples (%d valid) and %d shrinks' % (
                    self.call_count, self.valid_examples, self.shrinks,
                ))

    def _new_mutator(self):
        def draw_new(data, n):
            return uniform(self.random, n)

        def draw_existing(data, n):
            return self.last_data.buffer[data.index:data.index + n]

        def draw_smaller(data, n):
            existing = self.last_data.buffer[data.index:data.index + n]
            r = uniform(self.random, n)
            if r <= existing:
                return r
            return _draw_predecessor(self.random, existing)

        def draw_larger(data, n):
            existing = self.last_data.buffer[data.index:data.index + n]
            r = uniform(self.random, n)
            if r >= existing:
                return r
            return _draw_successor(self.random, existing)

        def reuse_existing(data, n):
            choices = data.block_starts.get(n, []) or \
                self.last_data.block_starts.get(n, [])
            if choices:
                i = self.random.choice(choices)
                return self.last_data.buffer[i:i + n]
            else:
                result = uniform(self.random, n)
                assert isinstance(result, hbytes)
                return result

        def flip_bit(data, n):
            buf = bytearray(
                self.last_data.buffer[data.index:data.index + n])
            i = self.random.randint(0, n - 1)
            k = self.random.randint(0, 7)
            buf[i] ^= (1 << k)
            return hbytes(buf)

        def draw_zero(data, n):
            return hbytes(b'\0' * n)

        def draw_max(data, n):
            return hbytes([255]) * n

        def draw_constant(data, n):
            return bytes_from_list([
                self.random.randint(0, 255)
            ] * n)

        def redraw_last(data, n):
            u = self.last_data.blocks[-1][0]
            if data.index + n <= u:
                return self.last_data.buffer[data.index:data.index + n]
            else:
                return uniform(self.random, n)

        options = [
            draw_new,
            redraw_last, redraw_last,
            reuse_existing, reuse_existing,
            draw_existing, draw_smaller, draw_larger,
            flip_bit,
            draw_zero, draw_max, draw_zero, draw_max,
            draw_constant,
        ]

        bits = [
            self.random.choice(options) for _ in hrange(3)
        ]

        def draw_mutated(data, n):
            if (
                data.index + n > len(self.last_data.buffer)
            ):
                result = uniform(self.random, n)
            else:
                result = self.random.choice(bits)(data, n)

            return self.__rewrite_for_novelty(
                data, self.__zero_bound(data, result))

        return draw_mutated

    def __rewrite(self, data, result):
        return self.__rewrite_for_novelty(
            data, self.__zero_bound(data, result)
        )

    def __zero_bound(self, data, result):
        """This tries to get the size of the generated data under control by
        replacing the result with zero if we are too deep or have already
        generated too much data.

        This causes us to enter "shrinking mode" there and thus reduce
        the size of the generated data.

        """
        if (
            data.depth * 2 >= MAX_DEPTH or
            (data.index + len(result)) * 2 >= self.settings.buffer_size
        ):
            if any(result):
                data.hit_zero_bound = True
            return hbytes(len(result))
        else:
            return result

    def __rewrite_for_novelty(self, data, result):
        """Take a block that is about to be added to data as the result of a
        draw_bytes call and rewrite it a small amount to ensure that the result
        will be novel: that is, not hit a part of the tree that we have fully
        explored.

        This is mostly useful for test functions which draw a small
        number of blocks.

        """
        assert isinstance(result, hbytes)
        try:
            node_index = data.__current_node_index
        except AttributeError:
            node_index = 0
            data.__current_node_index = node_index
            data.__hit_novelty = False
            data.__evaluated_to = 0

        if data.__hit_novelty:
            return result

        node = self.tree[node_index]

        for i in hrange(data.__evaluated_to, len(data.buffer)):
            node = self.tree[node_index]
            try:
                node_index = node[data.buffer[i]]
                assert node_index not in self.dead
                node = self.tree[node_index]
            except KeyError:
                data.__hit_novelty = True
                return result

        for i, b in enumerate(result):
            assert isinstance(b, int)
            try:
                new_node_index = node[b]
            except KeyError:
                data.__hit_novelty = True
                return result

            new_node = self.tree[new_node_index]

            if new_node_index in self.dead:
                if isinstance(result, hbytes):
                    result = bytearray(result)
                for c in range(256):
                    if c not in node:
                        assert c <= self.capped.get(node_index, c)
                        result[i] = c
                        data.__hit_novelty = True
                        return hbytes(result)
                    else:
                        new_node_index = node[c]
                        new_node = self.tree[new_node_index]
                        if new_node_index not in self.dead:
                            result[i] = c
                            break
                else:  # pragma: no cover
                    assert False, (
                        'Found a tree node which is live despite all its '
                        'children being dead.')
            node_index = new_node_index
            node = new_node
        assert node_index not in self.dead
        data.__current_node_index = node_index
        data.__evaluated_to = data.index + len(result)
        return hbytes(result)

    @property
    def database(self):
        if self.database_key is None:
            return None
        return self.settings.database

    def has_existing_examples(self):
        return (
            self.database is not None and
            Phase.reuse in self.settings.phases
        )

    def reuse_existing_examples(self):
        """If appropriate (we have a database and have been told to use it),
        try to reload existing examples from the database.

        If there are a lot we don't try all of them. We always try the
        smallest example in the database (which is guaranteed to be the
        last failure) and the largest (which is usually the seed example
        which the last failure came from but we don't enforce that). We
        then take a random sampling of the remainder and try those. Any
        examples that are no longer interesting are cleared out.

        """
        if self.has_existing_examples():
            self.debug('Reusing examples from database')
            # We have to do some careful juggling here. We have two database
            # corpora: The primary and secondary. The primary corpus is a
            # small set of minimized examples each of which has at one point
            # demonstrated a distinct bug. We want to retry all of these.

            # We also have a secondary corpus of examples that have at some
            # point demonstrated interestingness (currently only ones that
            # were previously non-minimal examples of a bug, but this will
            # likely expand in future). These are a good source of potentially
            # interesting examples, but there are a lot of them, so we down
            # sample the secondary corpus to a more manageable size.

            corpus = sorted(
                self.settings.database.fetch(self.database_key),
                key=sort_key
            )
            desired_size = max(2, ceil(0.1 * self.settings.max_examples))

            for extra_key in [self.secondary_key, self.covering_key]:
                if len(corpus) < desired_size:
                    extra_corpus = list(
                        self.settings.database.fetch(extra_key),
                    )

                    shortfall = desired_size - len(corpus)

                    if len(extra_corpus) <= shortfall:
                        extra = extra_corpus
                    else:
                        extra = self.random.sample(extra_corpus, shortfall)
                    extra.sort(key=sort_key)
                    corpus.extend(extra)

            for existing in corpus:
                self.last_data = ConjectureData.for_buffer(existing)
                try:
                    self.test_function(self.last_data)
                finally:
                    if self.last_data.status != Status.INTERESTING:
                        self.settings.database.delete(
                            self.database_key, existing)
                        self.settings.database.delete(
                            self.secondary_key, existing)

    def exit_with(self, reason):
        self.exit_reason = reason
        raise RunIsComplete()

    def generate_new_examples(self):
        if Phase.generate not in self.settings.phases:
            return

        zero_data = ConjectureData(
            max_length=self.settings.buffer_size,
            draw_bytes=lambda data, n: self.__rewrite_for_novelty(
                data, hbytes(n)))
        self.test_function(zero_data)

        count = 0
        while count < 10 and not self.interesting_examples:
            def draw_bytes(data, n):
                return self.__rewrite_for_novelty(
                    data, self.__zero_bound(data, uniform(self.random, n))
                )

            targets_found = len(self.covering_examples)

            self.last_data = ConjectureData(
                max_length=self.settings.buffer_size,
                draw_bytes=draw_bytes
            )
            self.test_function(self.last_data)
            self.last_data.freeze()

            if len(self.covering_examples) > targets_found:
                count = 0
            else:
                count += 1

        mutations = 0
        mutator = self._new_mutator()

        zero_bound_queue = []

        while not self.interesting_examples:
            if zero_bound_queue:
                # Whenever we generated an example and it hits a bound
                # which forces zero blocks into it, this creates a weird
                # distortion effect by making certain parts of the data
                # stream (especially ones to the right) much more likely
                # to be zero. We fix this by redistributing the generated
                # data by shuffling it randomly. This results in the
                # zero data being spread evenly throughout the buffer.
                # Hopefully the shrinking this causes will cause us to
                # naturally fail to hit the bound.
                # If it doesn't then we will queue the new version up again
                # (now with more zeros) and try again.
                overdrawn = zero_bound_queue.pop()
                buffer = bytearray(overdrawn.buffer)

                # These will have values written to them that are different
                # from what's in them anyway, so the value there doesn't
                # really "count" for distributional purposes, and if we
                # leave them in then they can cause the fraction of non
                # zero bytes to increase on redraw instead of decrease.
                for i in overdrawn.forced_indices:
                    buffer[i] = 0

                self.random.shuffle(buffer)
                buffer = hbytes(buffer)

                def draw_bytes(data, n):
                    result = buffer[data.index:data.index + n]
                    if len(result) < n:
                        result += hbytes(n - len(result))
                    return self.__rewrite(data, result)

                data = ConjectureData(
                    draw_bytes=draw_bytes,
                    max_length=self.settings.buffer_size,
                )
                self.test_function(data)
                data.freeze()
            else:
                target, last_data = self.target_selector.select()
                mutations += 1
                targets_found = len(self.covering_examples)
                prev_data = self.last_data
                data = ConjectureData(
                    draw_bytes=mutator,
                    max_length=self.settings.buffer_size
                )
                self.test_function(data)
                data.freeze()
                if (
                    data.status > prev_data.status or
                    len(self.covering_examples) > targets_found
                ):
                    mutations = 0
                elif (
                    data.status < prev_data.status or
                    not self.target_selector.has_tag(target, data) or
                    mutations >= self.settings.max_mutations
                ):
                    mutations = 0
                    mutator = self._new_mutator()
            if getattr(data, 'hit_zero_bound', False):
                zero_bound_queue.append(data)
            mutations += 1

    def _run(self):
        self.last_data = None
        self.start_time = time.time()

        self.reuse_existing_examples()
        self.generate_new_examples()

        if (
            Phase.shrink not in self.settings.phases or
            not self.interesting_examples
        ):
            self.exit_with(ExitReason.finished)

        for prev_data in sorted(
            self.interesting_examples.values(),
            key=lambda d: sort_key(d.buffer)
        ):
            assert prev_data.status == Status.INTERESTING
            data = ConjectureData.for_buffer(prev_data.buffer)
            self.test_function(data)
            if data.status != Status.INTERESTING:
                self.exit_with(ExitReason.flaky)

        while len(self.shrunk_examples) < len(self.interesting_examples):
            target, self.last_data = min([
                (k, v) for k, v in self.interesting_examples.items()
                if k not in self.shrunk_examples],
                key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
            )
            self.debug('Shrinking %r' % (target,))
            assert self.last_data.interesting_origin == target
            self.shrink()
            self.shrunk_examples.add(target)
        self.exit_with(ExitReason.finished)

    def try_buffer_with_rewriting_from(self, initial_attempt, v):
        initial_data = None
        node_index = 0
        for c in initial_attempt:
            try:
                node_index = self.tree[node_index][c]
            except KeyError:
                break
            node = self.tree[node_index]
            if isinstance(node, ConjectureData):
                initial_data = node
                break

        if initial_data is None:
            initial_data = ConjectureData.for_buffer(initial_attempt)
            self.test_function(initial_data)

        if initial_data.status == Status.INTERESTING:
            return initial_data is self.last_data

        # If this produced something completely invalid we ditch it
        # here rather than trying to persevere.
        if initial_data.status < Status.VALID:
            return False

        if len(initial_data.buffer) < v:
            return False

        lost_data = len(self.last_data.buffer) - \
            len(initial_data.buffer)

        # If this did not in fact cause the data size to shrink we
        # bail here because it's not worth trying to delete stuff from
        # the remainder.
        if lost_data <= 0:
            return False

        try_with_deleted = bytearray(initial_attempt)
        del try_with_deleted[v:v + lost_data]
        try_with_deleted.extend(hbytes(lost_data - 1))
        if self.incorporate_new_buffer(try_with_deleted):
            return True

        for r, s in self.last_data.intervals:
            if (
                r >= v and
                s - r <= lost_data and
                r < len(initial_data.buffer)
            ):
                try_with_deleted = bytearray(initial_attempt)
                del try_with_deleted[r:s]
                try_with_deleted.extend(hbytes(s - r - 1))
                if self.incorporate_new_buffer(try_with_deleted):
                    return True
        return False

    def delta_interval_deletion(self):
        """Attempt to delete every interval in the example."""

        self.debug('delta interval deletes')

        # We do a delta-debugging style thing here where we initially try to
        # delete many intervals at once and prune it down exponentially to
        # eventually only trying to delete one interval at a time.

        # I'm a little skeptical that this is helpful in general, but we've
        # got at least one benchmark where it does help.
        k = len(self.last_data.intervals) // 2
        while k > 0:
            i = 0
            while i + k <= len(self.last_data.intervals):
                bitmask = [True] * len(self.last_data.buffer)

                for u, v in self.last_data.intervals[i:i + k]:
                    for t in range(u, v):
                        bitmask[t] = False

                if not self.incorporate_new_buffer(hbytes(
                    b for b, v in zip(self.last_data.buffer, bitmask)
                    if v
                )):
                    i += k
            k //= 2

    def greedy_interval_deletion(self):
        """Attempt to delete every interval in the example."""

        self.debug('greedy interval deletes')
        i = 0
        while i < len(self.last_data.intervals):
            u, v = self.last_data.intervals[i]
            if not self.incorporate_new_buffer(
                self.last_data.buffer[:u] + self.last_data.buffer[v:]
            ):
                i += 1

    def coarse_block_replacement(self):
        """Attempts to zero every block. This is a very coarse pass that we
        only run once to attempt to remove some irrelevant detail. The main
        purpose of it is that if we manage to zero a lot of data then many
        attempted deletes become duplicates of each other, so we run fewer
        tests.

        If more blocks become possible to zero later that will be
        handled by minimize_individual_blocks. The point of this is
        simply to provide a fairly fast initial pass.

        """
        self.debug('Zeroing blocks')
        i = 0
        while i < len(self.last_data.blocks):
            buf = self.last_data.buffer
            u, v = self.last_data.blocks[i]
            assert u < v
            block = buf[u:v]
            if any(block):
                self.incorporate_new_buffer(
                    buf[:u] + hbytes(v - u) + buf[v:]
                )
            i += 1

    def minimize_duplicated_blocks(self):
        """Find blocks that have been duplicated in multiple places and attempt
        to minimize all of the duplicates simultaneously."""

        self.debug('Simultaneous shrinking of duplicated blocks')
        counts = Counter(
            self.last_data.buffer[u:v] for u, v in self.last_data.blocks
        )
        blocks = [
            k for k, count in
            counts.items()
            if count > 1
        ]

        thresholds = {}
        for u, v in self.last_data.blocks:
            b = self.last_data.buffer[u:v]
            thresholds[b] = v

        blocks.sort(reverse=True)
        blocks.sort(key=lambda b: counts[b] * len(b), reverse=True)
        for block in blocks:
            parts = [
                self.last_data.buffer[r:s]
                for r, s in self.last_data.blocks
            ]

            def replace(b):
                return hbytes(EMPTY_BYTES.join(
                    hbytes(b if c == block else c) for c in parts
                ))

            threshold = thresholds[block]

            minimize(
                block,
                lambda b: self.try_buffer_with_rewriting_from(
                    replace(b), threshold),
                random=self.random, full=False
            )

    def minimize_individual_blocks(self):
        self.debug('Shrinking of individual blocks')
        i = 0
        while i < len(self.last_data.blocks):
            u, v = self.last_data.blocks[i]
            minimize(
                self.last_data.buffer[u:v],
                lambda b: self.try_buffer_with_rewriting_from(
                    self.last_data.buffer[:u] + b +
                    self.last_data.buffer[v:], v
                ),
                random=self.random, full=False,
            )
            i += 1

    def reorder_blocks(self):
        self.debug('Reordering blocks')
        block_lengths = sorted(self.last_data.block_starts, reverse=True)
        for n in block_lengths:
            i = 1
            while i < len(self.last_data.block_starts.get(n, ())):
                j = i
                while j > 0:
                    buf = self.last_data.buffer
                    blocks = self.last_data.block_starts[n]
                    a_start = blocks[j - 1]
                    b_start = blocks[j]
                    a = buf[a_start:a_start + n]
                    b = buf[b_start:b_start + n]
                    if a <= b:
                        break
                    swapped = (
                        buf[:a_start] + b + buf[a_start + n:b_start] +
                        a + buf[b_start + n:])
                    assert len(swapped) == len(buf)
                    assert swapped < buf
                    if self.incorporate_new_buffer(swapped):
                        j -= 1
                    else:
                        break
                i += 1

    def shrink(self):
        # We assume that if an all-zero block of bytes is an interesting
        # example then we're not going to do better than that.
        # This might not technically be true: e.g. for integers() | booleans()
        # the simplest example is actually [1, 0]. Missing this case is fairly
        # harmless and this allows us to make various simplifying assumptions
        # about the structure of the data (principally that we're never
        # operating on a block of all zero bytes so can use non-zeroness as a
        # signpost of complexity).
        if (
            not any(self.last_data.buffer) or
            self.incorporate_new_buffer(hbytes(len(self.last_data.buffer)))
        ):
            return

        if self.has_existing_examples():
            # If we have any smaller examples in the secondary corpus, now is
            # a good time to try them to see if they work as shrinks. They
            # probably won't, but it's worth a shot and gives us a good
            # opportunity to clear out the database.

            # It's not worth trying the primary corpus because we already
            # tried all of those in the initial phase.
            corpus = sorted(
                self.settings.database.fetch(self.secondary_key),
                key=sort_key
            )
            for c in corpus:
                if sort_key(c) >= sort_key(self.last_data.buffer):
                    break
                elif self.incorporate_new_buffer(c):
                    break
                else:
                    self.settings.database.delete(self.secondary_key, c)

        # Coarse passes that are worth running once when the example is likely
        # to be "far from shrunk" but not worth repeating in a loop because
        # they are subsumed by more fine grained passes.
        self.delta_interval_deletion()
        self.coarse_block_replacement()

        change_counter = -1

        while self.shrinks > change_counter:
            change_counter = self.shrinks

            self.minimize_duplicated_blocks()
            self.minimize_individual_blocks()
            self.reorder_blocks()
            self.greedy_interval_deletion()

    def event_to_string(self, event):
        if isinstance(event, str):
            return event
        try:
            return self.events_to_strings[event]
        except KeyError:
            pass
        result = str(event)
        self.events_to_strings[event] = result
        return result
Beispiel #9
0
class ConjectureRunner(object):

    def __init__(
        self, test_function, settings=None, random=None,
        database_key=None,
    ):
        self._test_function = test_function
        self.settings = settings or Settings()
        self.last_data = None
        self.changed = 0
        self.shrinks = 0
        self.call_count = 0
        self.event_call_counts = Counter()
        self.valid_examples = 0
        self.start_time = time.time()
        self.random = random or Random(getrandbits(128))
        self.database_key = database_key
        self.seen = set()
        self.duplicates = 0
        self.status_runtimes = {}
        self.events_to_strings = WeakKeyDictionary()

    def new_buffer(self):
        self.last_data = ConjectureData(
            max_length=self.settings.buffer_size,
            draw_bytes=lambda data, n, distribution:
            distribution(self.random, n)
        )
        self.test_function(self.last_data)
        self.last_data.freeze()

    def test_function(self, data):
        self.call_count += 1
        try:
            self._test_function(data)
            data.freeze()
        except StopTest as e:
            if e.testcounter != data.testcounter:
                self.save_buffer(data.buffer)
                raise e
        except:
            self.save_buffer(data.buffer)
            raise
        finally:
            data.freeze()
            self.note_details(data)
        if (
            data.status == Status.INTERESTING and (
                self.last_data is None or
                data.buffer != self.last_data.buffer
            )
        ):
            self.debug_data(data)
        if data.status >= Status.VALID:
            self.valid_examples += 1

    def consider_new_test_data(self, data):
        # Transition rules:
        #   1. Transition cannot decrease the status
        #   2. Any transition which increases the status is valid
        #   3. If the previous status was interesting, only shrinking
        #      transitions are allowed.
        key = hbytes(data.buffer)
        if key in self.seen:
            self.duplicates += 1
            return False
        self.seen.add(key)
        if data.buffer == self.last_data.buffer:
            return False
        if self.last_data.status < data.status:
            return True
        if self.last_data.status > data.status:
            return False
        if data.status == Status.INVALID:
            return data.index >= self.last_data.index
        if data.status == Status.OVERRUN:
            return data.overdraw <= self.last_data.overdraw
        if data.status == Status.INTERESTING:
            assert len(data.buffer) <= len(self.last_data.buffer)
            if len(data.buffer) == len(self.last_data.buffer):
                assert data.buffer < self.last_data.buffer
            return True
        return True

    def save_buffer(self, buffer):
        if (
            self.settings.database is not None and
            self.database_key is not None and
            Phase.reuse in self.settings.phases
        ):
            self.settings.database.save(
                self.database_key, hbytes(buffer)
            )

    def note_details(self, data):
        if data.status == Status.INTERESTING:
            self.save_buffer(data.buffer)
        runtime = max(data.finish_time - data.start_time, 0.0)
        self.status_runtimes.setdefault(data.status, []).append(runtime)
        for event in set(map(self.event_to_string, data.events)):
            self.event_call_counts[event] += 1

    def debug(self, message):
        with self.settings:
            debug_report(message)

    def debug_data(self, data):
        self.debug(u'%d bytes %s -> %s, %s' % (
            data.index,
            unicode_safe_repr(list(data.buffer[:data.index])),
            unicode_safe_repr(data.status),
            data.output,
        ))

    def incorporate_new_buffer(self, buffer):
        if buffer in self.seen:
            return False
        assert self.last_data.status == Status.INTERESTING
        if (
            self.settings.timeout > 0 and
            time.time() >= self.start_time + self.settings.timeout
        ):
            self.exit_reason = ExitReason.timeout
            raise RunIsComplete()
        buffer = buffer[:self.last_data.index]
        if sort_key(buffer) >= sort_key(self.last_data.buffer):
            return False
        assert sort_key(buffer) <= sort_key(self.last_data.buffer)
        data = ConjectureData.for_buffer(buffer)
        self.test_function(data)
        if self.consider_new_test_data(data):
            self.shrinks += 1
            self.last_data = data
            if self.shrinks >= self.settings.max_shrinks:
                self.exit_reason = ExitReason.max_shrinks
                raise RunIsComplete()
            self.last_data = data
            self.changed += 1
            return True
        return False

    def run(self):
        with self.settings:
            try:
                self._run()
            except RunIsComplete:
                pass
            self.debug(
                u'Run complete after %d examples (%d valid) and %d shrinks' % (
                    self.call_count, self.valid_examples, self.shrinks,
                ))

    def _new_mutator(self):
        def draw_new(data, n, distribution):
            return distribution(self.random, n)

        def draw_existing(data, n, distribution):
            return self.last_data.buffer[data.index:data.index + n]

        def draw_smaller(data, n, distribution):
            existing = self.last_data.buffer[data.index:data.index + n]
            r = distribution(self.random, n)
            if r <= existing:
                return r
            return _draw_predecessor(self.random, existing)

        def draw_larger(data, n, distribution):
            existing = self.last_data.buffer[data.index:data.index + n]
            r = distribution(self.random, n)
            if r >= existing:
                return r
            return _draw_successor(self.random, existing)

        def reuse_existing(data, n, distribution):
            choices = data.block_starts.get(n, []) or \
                self.last_data.block_starts.get(n, [])
            if choices:
                i = self.random.choice(choices)
                return self.last_data.buffer[i:i + n]
            else:
                return distribution(self.random, n)

        def flip_bit(data, n, distribution):
            buf = bytearray(
                self.last_data.buffer[data.index:data.index + n])
            i = self.random.randint(0, n - 1)
            k = self.random.randint(0, 7)
            buf[i] ^= (1 << k)
            return hbytes(buf)

        def draw_zero(data, n, distribution):
            return b'\0' * n

        def draw_constant(data, n, distribution):
            return bytes_from_list([
                self.random.randint(0, 255)
            ] * n)

        options = [
            draw_new,
            reuse_existing, reuse_existing,
            draw_existing, draw_smaller, draw_larger,
            flip_bit, draw_zero, draw_constant,
        ]

        bits = [
            self.random.choice(options) for _ in hrange(3)
        ]

        def draw_mutated(data, n, distribution):
            if (
                data.index + n > len(self.last_data.buffer)
            ):
                return distribution(self.random, n)
            return self.random.choice(bits)(data, n, distribution)
        return draw_mutated

    def _run(self):
        self.last_data = None
        mutations = 0
        start_time = time.time()

        if (
            self.settings.database is not None and
            self.database_key is not None
        ):
            corpus = sorted(
                self.settings.database.fetch(self.database_key),
                key=lambda d: (len(d), d)
            )
            for existing in corpus:
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(
                    self.settings.max_iterations, self.settings.max_examples
                ):
                    self.exit_reason = ExitReason.max_iterations
                    return
                data = ConjectureData.for_buffer(existing)
                self.test_function(data)
                data.freeze()
                self.last_data = data
                if data.status < Status.VALID:
                    self.settings.database.delete(
                        self.database_key, existing)
                elif data.status == Status.VALID:
                    # Incremental garbage collection! we store a lot of
                    # examples in the DB as we shrink: Those that stay
                    # interesting get kept, those that become invalid get
                    # dropped, but those that are merely valid gradually go
                    # away over time.
                    if self.random.randint(0, 2) == 0:
                        self.settings.database.delete(
                            self.database_key, existing)
                else:
                    assert data.status == Status.INTERESTING
                    self.last_data = data
                    break

        if Phase.generate in self.settings.phases:
            if (
                self.last_data is None or
                self.last_data.status < Status.INTERESTING
            ):
                self.new_buffer()

            mutator = self._new_mutator()
            while self.last_data.status != Status.INTERESTING:
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(
                    self.settings.max_iterations, self.settings.max_examples
                ):
                    self.exit_reason = ExitReason.max_iterations
                    return
                if (
                    self.settings.timeout > 0 and
                    time.time() >= start_time + self.settings.timeout
                ):
                    self.exit_reason = ExitReason.timeout
                    return
                if mutations >= self.settings.max_mutations:
                    mutations = 0
                    self.new_buffer()
                    mutator = self._new_mutator()
                else:
                    data = ConjectureData(
                        draw_bytes=mutator,
                        max_length=self.settings.buffer_size
                    )
                    self.test_function(data)
                    data.freeze()
                    prev_data = self.last_data
                    if self.consider_new_test_data(data):
                        self.last_data = data
                        if data.status > prev_data.status:
                            mutations = 0
                    else:
                        mutator = self._new_mutator()

                mutations += 1

        data = self.last_data
        if data is None:
            self.exit_reason = ExitReason.finished
            return
        assert isinstance(data.output, text_type)

        if self.settings.max_shrinks <= 0:
            self.exit_reason = ExitReason.max_shrinks
            return

        if Phase.shrink not in self.settings.phases:
            self.exit_reason = ExitReason.finished
            return

        if not self.last_data.buffer:
            self.exit_reason = ExitReason.finished
            return

        data = ConjectureData.for_buffer(self.last_data.buffer)
        self.test_function(data)
        if data.status != Status.INTERESTING:
            self.exit_reason = ExitReason.flaky
            return

        change_counter = -1

        while self.changed > change_counter:
            change_counter = self.changed

            self.debug('Random interval deletes')
            failed_deletes = 0
            while self.last_data.intervals and failed_deletes < 10:
                if self.random.randint(0, 1):
                    u, v = self.random.choice(self.last_data.intervals)
                else:
                    n = len(self.last_data.buffer) - 1
                    u, v = sorted((
                        self.random.choice(self.last_data.intervals)
                    ))
                if (
                    v < len(self.last_data.buffer)
                ) and self.incorporate_new_buffer(
                    self.last_data.buffer[:u] +
                    self.last_data.buffer[v:]
                ):
                    failed_deletes = 0
                else:
                    failed_deletes += 1

            self.debug('Structured interval deletes')
            i = 0
            while i < len(self.last_data.intervals):
                u, v = self.last_data.intervals[i]
                if not self.incorporate_new_buffer(
                    self.last_data.buffer[:u] +
                    self.last_data.buffer[v:]
                ):
                    i += 1

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Lexicographical minimization of whole buffer')
            minimize(
                self.last_data.buffer, self.incorporate_new_buffer,
                cautious=True
            )

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Replacing blocks with simpler blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                buf = self.last_data.buffer
                block = buf[u:v]
                n = v - u
                all_blocks = sorted(set([bytes(n)] + [
                    buf[a:a + n]
                    for a in self.last_data.block_starts[n]
                ]))
                better_blocks = all_blocks[:all_blocks.index(block)]
                for b in better_blocks:
                    if self.incorporate_new_buffer(
                        buf[:u] + b + buf[v:]
                    ):
                        break
                i += 1

            self.debug('Simultaneous shrinking of duplicated blocks')
            block_counter = -1
            while block_counter < self.changed:
                block_counter = self.changed
                blocks = [
                    k for k, count in
                    Counter(
                        self.last_data.buffer[u:v]
                        for u, v in self.last_data.blocks).items()
                    if count > 1
                ]
                for block in blocks:
                    parts = [
                        self.last_data.buffer[r:s]
                        for r, s in self.last_data.blocks
                    ]

                    def replace(b):
                        return b''.join(
                            bytes(b if c == block else c) for c in parts
                        )
                    minimize(
                        block,
                        lambda b: self.incorporate_new_buffer(replace(b)),
                        self.random
                    )

            self.debug('Shrinking of individual blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                minimize(
                    self.last_data.buffer[u:v],
                    lambda b: self.incorporate_new_buffer(
                        self.last_data.buffer[:u] + b +
                        self.last_data.buffer[v:],
                    ), self.random
                )
                i += 1

            self.debug('Replacing intervals with simpler intervals')

            interval_counter = -1
            while interval_counter != self.changed:
                interval_counter = self.changed
                i = 0
                alternatives = None
                while i < len(self.last_data.intervals):
                    if alternatives is None:
                        alternatives = sorted(set(
                            self.last_data.buffer[u:v]
                            for u, v in self.last_data.intervals), key=len)
                    u, v = self.last_data.intervals[i]
                    for a in alternatives:
                        buf = self.last_data.buffer
                        if (
                            len(a) < v - u or
                            (len(a) == (v - u) and a < buf[u:v])
                        ):
                            if self.incorporate_new_buffer(
                                buf[:u] + a + buf[v:]
                            ):
                                alternatives = None
                                break
                    i += 1

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Shuffling suffixes while shrinking %r' % (
                self.last_data.bind_points,
            ))
            b = 0
            while b < len(self.last_data.bind_points):
                cutoff = sorted(self.last_data.bind_points)[b]

                def test_value(prefix):
                    for t in hrange(5):
                        alphabet = {}
                        for i, j in self.last_data.blocks[b:]:
                            alphabet.setdefault(j - i, []).append((i, j))
                        if t > 0:
                            for v in alphabet.values():
                                self.random.shuffle(v)
                        buf = bytearray(prefix)
                        for i, j in self.last_data.blocks[b:]:
                            u, v = alphabet[j - i].pop()
                            buf.extend(self.last_data.buffer[u:v])
                        if self.incorporate_new_buffer(hbytes(buf)):
                            return True
                    return False
                minimize(
                    self.last_data.buffer[:cutoff], test_value, cautious=True
                )
                b += 1

        self.exit_reason = ExitReason.finished

    def event_to_string(self, event):
        if isinstance(event, str):
            return event
        try:
            return self.events_to_strings[event]
        except KeyError:
            pass
        result = str(event)
        self.events_to_strings[event] = result
        return result
Beispiel #10
0
    def _run(self):
        self.last_data = None
        mutations = 0
        start_time = time.time()

        if (
            self.settings.database is not None and
            self.database_key is not None
        ):
            corpus = sorted(
                self.settings.database.fetch(self.database_key),
                key=lambda d: (len(d), d)
            )
            for existing in corpus:
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(
                    self.settings.max_iterations, self.settings.max_examples
                ):
                    self.exit_reason = ExitReason.max_iterations
                    return
                data = ConjectureData.for_buffer(existing)
                self.test_function(data)
                data.freeze()
                self.last_data = data
                if data.status < Status.VALID:
                    self.settings.database.delete(
                        self.database_key, existing)
                elif data.status == Status.VALID:
                    # Incremental garbage collection! we store a lot of
                    # examples in the DB as we shrink: Those that stay
                    # interesting get kept, those that become invalid get
                    # dropped, but those that are merely valid gradually go
                    # away over time.
                    if self.random.randint(0, 2) == 0:
                        self.settings.database.delete(
                            self.database_key, existing)
                else:
                    assert data.status == Status.INTERESTING
                    self.last_data = data
                    break

        if Phase.generate in self.settings.phases:
            if (
                self.last_data is None or
                self.last_data.status < Status.INTERESTING
            ):
                self.new_buffer()

            mutator = self._new_mutator()
            while self.last_data.status != Status.INTERESTING:
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(
                    self.settings.max_iterations, self.settings.max_examples
                ):
                    self.exit_reason = ExitReason.max_iterations
                    return
                if (
                    self.settings.timeout > 0 and
                    time.time() >= start_time + self.settings.timeout
                ):
                    self.exit_reason = ExitReason.timeout
                    return
                if mutations >= self.settings.max_mutations:
                    mutations = 0
                    self.new_buffer()
                    mutator = self._new_mutator()
                else:
                    data = ConjectureData(
                        draw_bytes=mutator,
                        max_length=self.settings.buffer_size
                    )
                    self.test_function(data)
                    data.freeze()
                    prev_data = self.last_data
                    if self.consider_new_test_data(data):
                        self.last_data = data
                        if data.status > prev_data.status:
                            mutations = 0
                    else:
                        mutator = self._new_mutator()

                mutations += 1

        data = self.last_data
        if data is None:
            self.exit_reason = ExitReason.finished
            return
        assert isinstance(data.output, text_type)

        if self.settings.max_shrinks <= 0:
            self.exit_reason = ExitReason.max_shrinks
            return

        if Phase.shrink not in self.settings.phases:
            self.exit_reason = ExitReason.finished
            return

        if not self.last_data.buffer:
            self.exit_reason = ExitReason.finished
            return

        data = ConjectureData.for_buffer(self.last_data.buffer)
        self.test_function(data)
        if data.status != Status.INTERESTING:
            self.exit_reason = ExitReason.flaky
            return

        change_counter = -1

        while self.changed > change_counter:
            change_counter = self.changed

            self.debug('Random interval deletes')
            failed_deletes = 0
            while self.last_data.intervals and failed_deletes < 10:
                if self.random.randint(0, 1):
                    u, v = self.random.choice(self.last_data.intervals)
                else:
                    n = len(self.last_data.buffer) - 1
                    u, v = sorted((
                        self.random.choice(self.last_data.intervals)
                    ))
                if (
                    v < len(self.last_data.buffer)
                ) and self.incorporate_new_buffer(
                    self.last_data.buffer[:u] +
                    self.last_data.buffer[v:]
                ):
                    failed_deletes = 0
                else:
                    failed_deletes += 1

            self.debug('Structured interval deletes')
            i = 0
            while i < len(self.last_data.intervals):
                u, v = self.last_data.intervals[i]
                if not self.incorporate_new_buffer(
                    self.last_data.buffer[:u] +
                    self.last_data.buffer[v:]
                ):
                    i += 1

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Lexicographical minimization of whole buffer')
            minimize(
                self.last_data.buffer, self.incorporate_new_buffer,
                cautious=True
            )

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Replacing blocks with simpler blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                buf = self.last_data.buffer
                block = buf[u:v]
                n = v - u
                all_blocks = sorted(set([bytes(n)] + [
                    buf[a:a + n]
                    for a in self.last_data.block_starts[n]
                ]))
                better_blocks = all_blocks[:all_blocks.index(block)]
                for b in better_blocks:
                    if self.incorporate_new_buffer(
                        buf[:u] + b + buf[v:]
                    ):
                        break
                i += 1

            self.debug('Simultaneous shrinking of duplicated blocks')
            block_counter = -1
            while block_counter < self.changed:
                block_counter = self.changed
                blocks = [
                    k for k, count in
                    Counter(
                        self.last_data.buffer[u:v]
                        for u, v in self.last_data.blocks).items()
                    if count > 1
                ]
                for block in blocks:
                    parts = [
                        self.last_data.buffer[r:s]
                        for r, s in self.last_data.blocks
                    ]

                    def replace(b):
                        return b''.join(
                            bytes(b if c == block else c) for c in parts
                        )
                    minimize(
                        block,
                        lambda b: self.incorporate_new_buffer(replace(b)),
                        self.random
                    )

            self.debug('Shrinking of individual blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                minimize(
                    self.last_data.buffer[u:v],
                    lambda b: self.incorporate_new_buffer(
                        self.last_data.buffer[:u] + b +
                        self.last_data.buffer[v:],
                    ), self.random
                )
                i += 1

            self.debug('Replacing intervals with simpler intervals')

            interval_counter = -1
            while interval_counter != self.changed:
                interval_counter = self.changed
                i = 0
                alternatives = None
                while i < len(self.last_data.intervals):
                    if alternatives is None:
                        alternatives = sorted(set(
                            self.last_data.buffer[u:v]
                            for u, v in self.last_data.intervals), key=len)
                    u, v = self.last_data.intervals[i]
                    for a in alternatives:
                        buf = self.last_data.buffer
                        if (
                            len(a) < v - u or
                            (len(a) == (v - u) and a < buf[u:v])
                        ):
                            if self.incorporate_new_buffer(
                                buf[:u] + a + buf[v:]
                            ):
                                alternatives = None
                                break
                    i += 1

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Shuffling suffixes while shrinking %r' % (
                self.last_data.bind_points,
            ))
            b = 0
            while b < len(self.last_data.bind_points):
                cutoff = sorted(self.last_data.bind_points)[b]

                def test_value(prefix):
                    for t in hrange(5):
                        alphabet = {}
                        for i, j in self.last_data.blocks[b:]:
                            alphabet.setdefault(j - i, []).append((i, j))
                        if t > 0:
                            for v in alphabet.values():
                                self.random.shuffle(v)
                        buf = bytearray(prefix)
                        for i, j in self.last_data.blocks[b:]:
                            u, v = alphabet[j - i].pop()
                            buf.extend(self.last_data.buffer[u:v])
                        if self.incorporate_new_buffer(hbytes(buf)):
                            return True
                    return False
                minimize(
                    self.last_data.buffer[:cutoff], test_value, cautious=True
                )
                b += 1

        self.exit_reason = ExitReason.finished
Beispiel #11
0
class ConjectureRunner(object):
    def __init__(
        self,
        test_function,
        settings=None,
        random=None,
        database_key=None,
    ):
        self._test_function = test_function
        self.settings = settings or Settings()
        self.last_data = None
        self.changed = 0
        self.shrinks = 0
        self.call_count = 0
        self.event_call_counts = Counter()
        self.valid_examples = 0
        self.start_time = time.time()
        self.random = random or Random(getrandbits(128))
        self.database_key = database_key
        self.status_runtimes = {}
        self.events_to_strings = WeakKeyDictionary()

        # Tree nodes are stored in an array to prevent heavy nesting of data
        # structures. Branches are dicts mapping bytes to child nodes (which
        # will in general only be partially populated). Leaves are
        # ConjectureData objects that have been previously seen as the result
        # of following that path.
        self.tree = [{}]

        # A node is dead if there is nothing left to explore past that point.
        # Recursively, a node is dead if either it is a leaf or every byte
        # leads to a dead node when starting from here.
        self.dead = set()

    def __tree_is_exhausted(self):
        return 0 in self.dead

    def new_buffer(self):
        assert not self.__tree_is_exhausted()
        self.last_data = ConjectureData(
            max_length=self.settings.buffer_size,
            draw_bytes=lambda data, n, distribution: self.
            __rewrite_for_novelty(data, distribution(self.random, n)))
        self.test_function(self.last_data)
        self.last_data.freeze()

    def test_function(self, data):
        self.call_count += 1
        try:
            self._test_function(data)
            data.freeze()
        except StopTest as e:
            if e.testcounter != data.testcounter:
                self.save_buffer(data.buffer)
                raise e
        except:
            self.save_buffer(data.buffer)
            raise
        finally:
            data.freeze()
            self.note_details(data)

        self.debug_data(data)
        if data.status >= Status.VALID:
            self.valid_examples += 1

        tree_node = self.tree[0]
        indices = []
        i = 0
        for b in data.buffer:
            indices.append(i)
            try:
                i = tree_node[b]
            except KeyError:
                i = len(self.tree)
                self.tree.append({})
                tree_node[b] = i
            tree_node = self.tree[i]
            if i in self.dead:
                break

        if data.status != Status.OVERRUN and i not in self.dead:
            self.dead.add(i)
            self.tree[i] = data

            for j in reversed(indices):
                if len(self.tree[j]) < 256:
                    break
                if set(self.tree[j].values()).issubset(self.dead):
                    self.dead.add(j)
                else:
                    break

    def consider_new_test_data(self, data):
        # Transition rules:
        #   1. Transition cannot decrease the status
        #   2. Any transition which increases the status is valid
        #   3. If the previous status was interesting, only shrinking
        #      transitions are allowed.
        if data.buffer == self.last_data.buffer:
            return False
        if self.last_data.status < data.status:
            return True
        if self.last_data.status > data.status:
            return False
        if data.status == Status.INVALID:
            return data.index >= self.last_data.index
        if data.status == Status.OVERRUN:
            return data.overdraw <= self.last_data.overdraw
        if data.status == Status.INTERESTING:
            assert len(data.buffer) <= len(self.last_data.buffer)
            if len(data.buffer) == len(self.last_data.buffer):
                assert data.buffer < self.last_data.buffer
            return True
        return True

    def save_buffer(self, buffer):
        if (self.settings.database is not None
                and self.database_key is not None
                and Phase.reuse in self.settings.phases):
            self.settings.database.save(self.database_key, hbytes(buffer))

    def note_details(self, data):
        if data.status == Status.INTERESTING:
            self.save_buffer(data.buffer)
        runtime = max(data.finish_time - data.start_time, 0.0)
        self.status_runtimes.setdefault(data.status, []).append(runtime)
        for event in set(map(self.event_to_string, data.events)):
            self.event_call_counts[event] += 1

    def debug(self, message):
        with self.settings:
            debug_report(message)

    def debug_data(self, data):
        self.debug(u'%d bytes %s -> %s, %s' % (
            data.index,
            unicode_safe_repr(list(data.buffer[:data.index])),
            unicode_safe_repr(data.status),
            data.output,
        ))

    def incorporate_new_buffer(self, buffer):
        assert self.last_data.status == Status.INTERESTING
        if (self.settings.timeout > 0
                and time.time() >= self.start_time + self.settings.timeout):
            self.exit_reason = ExitReason.timeout
            raise RunIsComplete()

        buffer = buffer[:self.last_data.index]
        if sort_key(buffer) >= sort_key(self.last_data.buffer):
            return False

        i = 0
        for b in buffer:
            if i in self.dead:
                return False
            try:
                i = self.tree[i][b]
            except KeyError:
                break
        else:
            return False

        assert sort_key(buffer) <= sort_key(self.last_data.buffer)
        data = ConjectureData.for_buffer(buffer)
        self.test_function(data)
        if self.consider_new_test_data(data):
            self.shrinks += 1
            self.last_data = data
            if self.shrinks >= self.settings.max_shrinks:
                self.exit_reason = ExitReason.max_shrinks
                raise RunIsComplete()
            self.last_data = data
            self.changed += 1
            return True
        return False

    def run(self):
        with self.settings:
            try:
                self._run()
            except RunIsComplete:
                pass
            self.debug(
                u'Run complete after %d examples (%d valid) and %d shrinks' % (
                    self.call_count,
                    self.valid_examples,
                    self.shrinks,
                ))

    def _new_mutator(self):
        def draw_new(data, n, distribution):
            return distribution(self.random, n)

        def draw_existing(data, n, distribution):
            return self.last_data.buffer[data.index:data.index + n]

        def draw_smaller(data, n, distribution):
            existing = self.last_data.buffer[data.index:data.index + n]
            r = distribution(self.random, n)
            if r <= existing:
                return r
            return _draw_predecessor(self.random, existing)

        def draw_larger(data, n, distribution):
            existing = self.last_data.buffer[data.index:data.index + n]
            r = distribution(self.random, n)
            if r >= existing:
                return r
            return _draw_successor(self.random, existing)

        def reuse_existing(data, n, distribution):
            choices = data.block_starts.get(n, []) or \
                self.last_data.block_starts.get(n, [])
            if choices:
                i = self.random.choice(choices)
                return self.last_data.buffer[i:i + n]
            else:
                result = distribution(self.random, n)
                assert isinstance(result, hbytes)
                return result

        def flip_bit(data, n, distribution):
            buf = bytearray(self.last_data.buffer[data.index:data.index + n])
            i = self.random.randint(0, n - 1)
            k = self.random.randint(0, 7)
            buf[i] ^= (1 << k)
            return hbytes(buf)

        def draw_zero(data, n, distribution):
            return hbytes(b'\0' * n)

        def draw_constant(data, n, distribution):
            return bytes_from_list([self.random.randint(0, 255)] * n)

        options = [
            draw_new,
            reuse_existing,
            reuse_existing,
            draw_existing,
            draw_smaller,
            draw_larger,
            flip_bit,
            draw_zero,
            draw_constant,
        ]

        bits = [self.random.choice(options) for _ in hrange(3)]

        def draw_mutated(data, n, distribution):
            if (data.index + n > len(self.last_data.buffer)):
                result = distribution(self.random, n)
            else:
                result = self.random.choice(bits)(data, n, distribution)

            return self.__rewrite_for_novelty(data, result)

        return draw_mutated

    def __rewrite_for_novelty(self, data, result):
        assert isinstance(result, hbytes)
        try:
            node_index = data.__current_node_index
        except AttributeError:
            assert len(data.buffer) == 0
            node_index = 0
            data.__current_node_index = node_index
            data.__hit_novelty = False

        if data.__hit_novelty:
            return result

        node = self.tree[node_index]
        assert node_index not in self.dead

        for i, b in enumerate(result):
            assert isinstance(b, int)
            try:
                new_node_index = node[b]
            except KeyError:
                data.__hit_novelty = True
                return result

            new_node = self.tree[new_node_index]

            if new_node_index in self.dead:
                if isinstance(result, hbytes):
                    result = bytearray(result)
                for c in range(256):
                    if c not in node:
                        result[i] = c
                        data.__hit_novelty = True
                        return hbytes(result)
                    else:
                        new_node_index = node[c]
                        new_node = self.tree[new_node_index]
                        if new_node_index not in self.dead:
                            result[i] = c
                            break
                else:  # pragma: no cover
                    assert False, (
                        'Found a tree node which is live despite all its '
                        'children being dead.')
            node_index = new_node_index
            node = new_node
        assert node_index not in self.dead
        data.__current_node_index = node_index
        return hbytes(result)

    def _run(self):
        self.last_data = None
        mutations = 0
        start_time = time.time()

        if (self.settings.database is not None
                and self.database_key is not None):
            corpus = sorted(self.settings.database.fetch(self.database_key),
                            key=lambda d: (len(d), d))
            for existing in corpus:
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(self.settings.max_iterations,
                                          self.settings.max_examples):
                    self.exit_reason = ExitReason.max_iterations
                    return
                data = ConjectureData.for_buffer(existing)
                self.test_function(data)
                data.freeze()
                self.last_data = data
                self.consider_new_test_data(data)
                if data.status < Status.VALID:
                    self.settings.database.delete(self.database_key, existing)
                elif data.status == Status.VALID:
                    # Incremental garbage collection! we store a lot of
                    # examples in the DB as we shrink: Those that stay
                    # interesting get kept, those that become invalid get
                    # dropped, but those that are merely valid gradually go
                    # away over time.
                    if self.random.randint(0, 2) == 0:
                        self.settings.database.delete(self.database_key,
                                                      existing)
                else:
                    assert data.status == Status.INTERESTING
                    self.last_data = data
                    break

        if (Phase.generate in self.settings.phases
                and not self.__tree_is_exhausted()):
            if (self.last_data is None
                    or self.last_data.status < Status.INTERESTING):
                self.new_buffer()

            mutator = self._new_mutator()
            while (self.last_data.status != Status.INTERESTING
                   and not self.__tree_is_exhausted()):
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(self.settings.max_iterations,
                                          self.settings.max_examples):
                    self.exit_reason = ExitReason.max_iterations
                    return
                if (self.settings.timeout > 0
                        and time.time() >= start_time + self.settings.timeout):
                    self.exit_reason = ExitReason.timeout
                    return
                if mutations >= self.settings.max_mutations:
                    mutations = 0
                    self.new_buffer()
                    mutator = self._new_mutator()
                else:
                    data = ConjectureData(draw_bytes=mutator,
                                          max_length=self.settings.buffer_size)
                    self.test_function(data)
                    data.freeze()
                    prev_data = self.last_data
                    if self.consider_new_test_data(data):
                        self.last_data = data
                        if data.status > prev_data.status:
                            mutations = 0
                    else:
                        mutator = self._new_mutator()

                mutations += 1

        if self.__tree_is_exhausted():
            self.exit_reason = ExitReason.finished
            return

        data = self.last_data
        if data is None:
            self.exit_reason = ExitReason.finished
            return
        assert isinstance(data.output, text_type)

        if self.settings.max_shrinks <= 0:
            self.exit_reason = ExitReason.max_shrinks
            return

        if Phase.shrink not in self.settings.phases:
            self.exit_reason = ExitReason.finished
            return

        data = ConjectureData.for_buffer(self.last_data.buffer)
        self.test_function(data)
        if data.status != Status.INTERESTING:
            self.exit_reason = ExitReason.flaky
            return

        change_counter = -1

        while self.changed > change_counter:
            change_counter = self.changed

            self.debug('Structured interval deletes')

            k = len(self.last_data.intervals) // 2
            while k > 0:
                i = 0
                while i + k <= len(self.last_data.intervals):
                    bitmask = [True] * len(self.last_data.buffer)

                    for u, v in self.last_data.intervals[i:i + k]:
                        for t in range(u, v):
                            bitmask[t] = False

                    u, v = self.last_data.intervals[i]
                    if not self.incorporate_new_buffer(
                            hbytes(
                                b
                                for b, v in zip(self.last_data.buffer, bitmask)
                                if v)):
                        i += k
                k //= 2

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Bulk replacing blocks with simpler blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                buf = self.last_data.buffer
                block = buf[u:v]
                n = v - u

                buffer = bytearray()
                for r, s in self.last_data.blocks:
                    if s - r == n and self.last_data.buffer[r:s] > block:
                        buffer.extend(block)
                    else:
                        buffer.extend(self.last_data.buffer[r:s])
                self.incorporate_new_buffer(hbytes(buffer))
                i += 1

            self.debug('Replacing individual blocks with simpler blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                buf = self.last_data.buffer
                block = buf[u:v]
                n = v - u
                all_blocks = sorted(
                    set([hbytes(n)] +
                        [buf[a:a + n]
                         for a in self.last_data.block_starts[n]]))
                better_blocks = all_blocks[:all_blocks.index(block)]
                for b in better_blocks:
                    if self.incorporate_new_buffer(buf[:u] + b + buf[v:]):
                        break
                i += 1

            self.debug('Simultaneous shrinking of duplicated blocks')
            block_counter = -1
            while block_counter < self.changed:
                block_counter = self.changed
                blocks = [
                    k for k, count in Counter(
                        self.last_data.buffer[u:v]
                        for u, v in self.last_data.blocks).items() if count > 1
                ]
                for block in blocks:
                    parts = [
                        self.last_data.buffer[r:s]
                        for r, s in self.last_data.blocks
                    ]

                    def replace(b):
                        return hbytes(
                            EMPTY_BYTES.join(
                                hbytes(b if c == block else c) for c in parts))

                    minimize(block,
                             lambda b: self.incorporate_new_buffer(replace(b)),
                             self.random)

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Lexicographical minimization of whole buffer')
            minimize(self.last_data.buffer,
                     self.incorporate_new_buffer,
                     cautious=True)

            self.debug('Shrinking of individual blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                minimize(
                    self.last_data.buffer[u:v],
                    lambda b: self.incorporate_new_buffer(
                        self.last_data.buffer[:u] + b + self.last_data.buffer[
                            v:], ), self.random)
                i += 1

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Reordering blocks')
            block_lengths = sorted(self.last_data.block_starts, reverse=True)
            for n in block_lengths:
                i = 1
                while i < len(self.last_data.block_starts.get(n, ())):
                    j = i
                    while j > 0:
                        buf = self.last_data.buffer
                        blocks = self.last_data.block_starts[n]
                        a_start = blocks[j - 1]
                        b_start = blocks[j]
                        a = buf[a_start:a_start + n]
                        b = buf[b_start:b_start + n]
                        if a <= b:
                            break
                        swapped = (buf[:a_start] + b +
                                   buf[a_start + n:b_start] + a +
                                   buf[b_start + n:])
                        assert len(swapped) == len(buf)
                        assert swapped < buf
                        if self.incorporate_new_buffer(swapped):
                            j -= 1
                        else:
                            break
                    i += 1

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Shuffling suffixes while shrinking %r' %
                       (self.last_data.bind_points, ))
            b = 0
            while b < len(self.last_data.bind_points):
                cutoff = sorted(self.last_data.bind_points)[b]

                def test_value(prefix):
                    for t in hrange(5):
                        alphabet = {}
                        for i, j in self.last_data.blocks[b:]:
                            alphabet.setdefault(j - i, []).append((i, j))
                        if t > 0:
                            for v in alphabet.values():
                                self.random.shuffle(v)
                        buf = bytearray(prefix)
                        for i, j in self.last_data.blocks[b:]:
                            u, v = alphabet[j - i].pop()
                            buf.extend(self.last_data.buffer[u:v])
                        if self.incorporate_new_buffer(hbytes(buf)):
                            return True
                    return False

                minimize(self.last_data.buffer[:cutoff],
                         test_value,
                         cautious=True)
                b += 1

        self.exit_reason = ExitReason.finished

    def event_to_string(self, event):
        if isinstance(event, str):
            return event
        try:
            return self.events_to_strings[event]
        except KeyError:
            pass
        result = str(event)
        self.events_to_strings[event] = result
        return result
Beispiel #12
0
    def _run(self):
        self.last_data = None
        mutations = 0
        start_time = time.time()

        if (self.settings.database is not None
                and self.database_key is not None):
            corpus = sorted(self.settings.database.fetch(self.database_key),
                            key=lambda d: (len(d), d))
            for existing in corpus:
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(self.settings.max_iterations,
                                          self.settings.max_examples):
                    self.exit_reason = ExitReason.max_iterations
                    return
                data = ConjectureData.for_buffer(existing)
                self.test_function(data)
                data.freeze()
                self.last_data = data
                self.consider_new_test_data(data)
                if data.status < Status.VALID:
                    self.settings.database.delete(self.database_key, existing)
                elif data.status == Status.VALID:
                    # Incremental garbage collection! we store a lot of
                    # examples in the DB as we shrink: Those that stay
                    # interesting get kept, those that become invalid get
                    # dropped, but those that are merely valid gradually go
                    # away over time.
                    if self.random.randint(0, 2) == 0:
                        self.settings.database.delete(self.database_key,
                                                      existing)
                else:
                    assert data.status == Status.INTERESTING
                    self.last_data = data
                    break

        if (Phase.generate in self.settings.phases
                and not self.__tree_is_exhausted()):
            if (self.last_data is None
                    or self.last_data.status < Status.INTERESTING):
                self.new_buffer()

            mutator = self._new_mutator()
            while (self.last_data.status != Status.INTERESTING
                   and not self.__tree_is_exhausted()):
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(self.settings.max_iterations,
                                          self.settings.max_examples):
                    self.exit_reason = ExitReason.max_iterations
                    return
                if (self.settings.timeout > 0
                        and time.time() >= start_time + self.settings.timeout):
                    self.exit_reason = ExitReason.timeout
                    return
                if mutations >= self.settings.max_mutations:
                    mutations = 0
                    self.new_buffer()
                    mutator = self._new_mutator()
                else:
                    data = ConjectureData(draw_bytes=mutator,
                                          max_length=self.settings.buffer_size)
                    self.test_function(data)
                    data.freeze()
                    prev_data = self.last_data
                    if self.consider_new_test_data(data):
                        self.last_data = data
                        if data.status > prev_data.status:
                            mutations = 0
                    else:
                        mutator = self._new_mutator()

                mutations += 1

        if self.__tree_is_exhausted():
            self.exit_reason = ExitReason.finished
            return

        data = self.last_data
        if data is None:
            self.exit_reason = ExitReason.finished
            return
        assert isinstance(data.output, text_type)

        if self.settings.max_shrinks <= 0:
            self.exit_reason = ExitReason.max_shrinks
            return

        if Phase.shrink not in self.settings.phases:
            self.exit_reason = ExitReason.finished
            return

        data = ConjectureData.for_buffer(self.last_data.buffer)
        self.test_function(data)
        if data.status != Status.INTERESTING:
            self.exit_reason = ExitReason.flaky
            return

        change_counter = -1

        while self.changed > change_counter:
            change_counter = self.changed

            self.debug('Structured interval deletes')

            k = len(self.last_data.intervals) // 2
            while k > 0:
                i = 0
                while i + k <= len(self.last_data.intervals):
                    bitmask = [True] * len(self.last_data.buffer)

                    for u, v in self.last_data.intervals[i:i + k]:
                        for t in range(u, v):
                            bitmask[t] = False

                    u, v = self.last_data.intervals[i]
                    if not self.incorporate_new_buffer(
                            hbytes(
                                b
                                for b, v in zip(self.last_data.buffer, bitmask)
                                if v)):
                        i += k
                k //= 2

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Bulk replacing blocks with simpler blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                buf = self.last_data.buffer
                block = buf[u:v]
                n = v - u

                buffer = bytearray()
                for r, s in self.last_data.blocks:
                    if s - r == n and self.last_data.buffer[r:s] > block:
                        buffer.extend(block)
                    else:
                        buffer.extend(self.last_data.buffer[r:s])
                self.incorporate_new_buffer(hbytes(buffer))
                i += 1

            self.debug('Replacing individual blocks with simpler blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                buf = self.last_data.buffer
                block = buf[u:v]
                n = v - u
                all_blocks = sorted(
                    set([hbytes(n)] +
                        [buf[a:a + n]
                         for a in self.last_data.block_starts[n]]))
                better_blocks = all_blocks[:all_blocks.index(block)]
                for b in better_blocks:
                    if self.incorporate_new_buffer(buf[:u] + b + buf[v:]):
                        break
                i += 1

            self.debug('Simultaneous shrinking of duplicated blocks')
            block_counter = -1
            while block_counter < self.changed:
                block_counter = self.changed
                blocks = [
                    k for k, count in Counter(
                        self.last_data.buffer[u:v]
                        for u, v in self.last_data.blocks).items() if count > 1
                ]
                for block in blocks:
                    parts = [
                        self.last_data.buffer[r:s]
                        for r, s in self.last_data.blocks
                    ]

                    def replace(b):
                        return hbytes(
                            EMPTY_BYTES.join(
                                hbytes(b if c == block else c) for c in parts))

                    minimize(block,
                             lambda b: self.incorporate_new_buffer(replace(b)),
                             self.random)

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Lexicographical minimization of whole buffer')
            minimize(self.last_data.buffer,
                     self.incorporate_new_buffer,
                     cautious=True)

            self.debug('Shrinking of individual blocks')
            i = 0
            while i < len(self.last_data.blocks):
                u, v = self.last_data.blocks[i]
                minimize(
                    self.last_data.buffer[u:v],
                    lambda b: self.incorporate_new_buffer(
                        self.last_data.buffer[:u] + b + self.last_data.buffer[
                            v:], ), self.random)
                i += 1

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Reordering blocks')
            block_lengths = sorted(self.last_data.block_starts, reverse=True)
            for n in block_lengths:
                i = 1
                while i < len(self.last_data.block_starts.get(n, ())):
                    j = i
                    while j > 0:
                        buf = self.last_data.buffer
                        blocks = self.last_data.block_starts[n]
                        a_start = blocks[j - 1]
                        b_start = blocks[j]
                        a = buf[a_start:a_start + n]
                        b = buf[b_start:b_start + n]
                        if a <= b:
                            break
                        swapped = (buf[:a_start] + b +
                                   buf[a_start + n:b_start] + a +
                                   buf[b_start + n:])
                        assert len(swapped) == len(buf)
                        assert swapped < buf
                        if self.incorporate_new_buffer(swapped):
                            j -= 1
                        else:
                            break
                    i += 1

            if change_counter != self.changed:
                self.debug('Restarting')
                continue

            self.debug('Shuffling suffixes while shrinking %r' %
                       (self.last_data.bind_points, ))
            b = 0
            while b < len(self.last_data.bind_points):
                cutoff = sorted(self.last_data.bind_points)[b]

                def test_value(prefix):
                    for t in hrange(5):
                        alphabet = {}
                        for i, j in self.last_data.blocks[b:]:
                            alphabet.setdefault(j - i, []).append((i, j))
                        if t > 0:
                            for v in alphabet.values():
                                self.random.shuffle(v)
                        buf = bytearray(prefix)
                        for i, j in self.last_data.blocks[b:]:
                            u, v = alphabet[j - i].pop()
                            buf.extend(self.last_data.buffer[u:v])
                        if self.incorporate_new_buffer(hbytes(buf)):
                            return True
                    return False

                minimize(self.last_data.buffer[:cutoff],
                         test_value,
                         cautious=True)
                b += 1

        self.exit_reason = ExitReason.finished
Beispiel #13
0
    def _run(self):
        self.last_data = None
        mutations = 0
        start_time = time.time()

        if (
            self.settings.database is not None and
            self.database_key is not None
        ):
            corpus = sorted(
                self.settings.database.fetch(self.database_key),
                key=lambda d: (len(d), d)
            )
            for existing in corpus:
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(
                    self.settings.max_iterations, self.settings.max_examples
                ):
                    self.exit_reason = ExitReason.max_iterations
                    return
                data = ConjectureData.for_buffer(existing)
                self.test_function(data)
                data.freeze()
                self.last_data = data
                self.consider_new_test_data(data)
                if data.status < Status.VALID:
                    self.settings.database.delete(
                        self.database_key, existing)
                elif data.status == Status.VALID:
                    # Incremental garbage collection! we store a lot of
                    # examples in the DB as we shrink: Those that stay
                    # interesting get kept, those that become invalid get
                    # dropped, but those that are merely valid gradually go
                    # away over time.
                    if self.random.randint(0, 2) == 0:
                        self.settings.database.delete(
                            self.database_key, existing)
                else:
                    assert data.status == Status.INTERESTING
                    self.last_data = data
                    break

        if (
            Phase.generate in self.settings.phases and not
            self.__tree_is_exhausted()
        ):
            if (
                self.last_data is None or
                self.last_data.status < Status.INTERESTING
            ):
                self.new_buffer()

            mutator = self._new_mutator()
            while (
                self.last_data.status != Status.INTERESTING and
                not self.__tree_is_exhausted()
            ):
                if self.valid_examples >= self.settings.max_examples:
                    self.exit_reason = ExitReason.max_examples
                    return
                if self.call_count >= max(
                    self.settings.max_iterations, self.settings.max_examples
                ):
                    self.exit_reason = ExitReason.max_iterations
                    return
                if (
                    self.settings.timeout > 0 and
                    time.time() >= start_time + self.settings.timeout
                ):
                    self.exit_reason = ExitReason.timeout
                    return
                if mutations >= self.settings.max_mutations:
                    mutations = 0
                    self.new_buffer()
                    mutator = self._new_mutator()
                else:
                    data = ConjectureData(
                        draw_bytes=mutator,
                        max_length=self.settings.buffer_size
                    )
                    self.test_function(data)
                    data.freeze()
                    prev_data = self.last_data
                    if self.consider_new_test_data(data):
                        self.last_data = data
                        if data.status > prev_data.status:
                            mutations = 0
                    else:
                        mutator = self._new_mutator()

                mutations += 1

        if self.__tree_is_exhausted():
            self.exit_reason = ExitReason.finished
            return

        data = self.last_data
        if data is None:
            self.exit_reason = ExitReason.finished
            return
        assert isinstance(data.output, text_type)

        if self.settings.max_shrinks <= 0:
            self.exit_reason = ExitReason.max_shrinks
            return

        if Phase.shrink not in self.settings.phases:
            self.exit_reason = ExitReason.finished
            return

        data = ConjectureData.for_buffer(self.last_data.buffer)
        self.test_function(data)
        if data.status != Status.INTERESTING:
            self.exit_reason = ExitReason.flaky
            return

        self.shrink()