Exemple #1
0
    def sort_regions_with_gaps(self):
        """Guarantees that for each i we have tried to swap index i with
        index i + 2.

        This uses an adaptive algorithm that works by sorting contiguous
        regions centered on each element, where that element is treated as
        fixed and the elements around it are sorted..
        """
        for i in range(1, len(self.current) - 1):
            if self.current[i - 1] <= self.current[i] <= self.current[i + 1]:
                # The `continue` line is optimised out of the bytecode on
                # CPython >= 3.7 (https://bugs.python.org/issue2506) and on
                # PyPy, and so coverage cannot tell that it has been taken.
                continue  # pragma: no cover

            def can_sort(a, b):
                if a < 0 or b > len(self.current):
                    return False
                assert a <= i < b
                split = i - a
                values = sorted(self.current[a:i] + self.current[i + 1:b])
                return self.consider(
                    list(self.current[:a]) + values[:split] +
                    [self.current[i]] + values[split:] +
                    list(self.current[b:]))

            left = i
            right = i + 1
            right += find_integer(lambda k: can_sort(left, right + k))
            find_integer(lambda k: can_sort(left - k, right))
Exemple #2
0
    def sort_regions_with_gaps(self):
        """Guarantees that for each i we have tried to swap index i with
        index i + 2.

        This uses an adaptive algorithm that works by sorting contiguous
        regions centered on each element, where that element is treated as
        fixed and the elements around it are sorted..
        """
        for i in range(1, len(self.current) - 1):
            if self.current[i - 1] <= self.current[i] <= self.current[i + 1]:
                continue

            def can_sort(a, b):
                if a < 0 or b > len(self.current):
                    return False
                assert a <= i < b
                split = i - a
                values = sorted(self.current[a:i] + self.current[i + 1:b])
                return self.consider(
                    list(self.current[:a]) + values[:split] +
                    [self.current[i]] + values[split:] +
                    list(self.current[b:]))

            left = i
            right = i + 1
            right += find_integer(lambda k: can_sort(left, right + k))
            find_integer(lambda k: can_sort(left - k, right))
Exemple #3
0
    def distinguish(self, value, test):
        """Checks whether ``test`` gives the same answer for
        ``value`` and ``self.normalize(value)``. If it does
        not, updates the list of canonical values so that
        it does.

        Returns True if and only if this makes a change to
        the underlying canonical values."""
        canonical = self.normalize(value)
        if canonical == value:
            return False

        value_test = test(value)
        if test(canonical) == value_test:
            return False

        def can_lower(k):
            new_canon = value - k
            if new_canon <= canonical:
                return False
            return test(new_canon) == value_test

        new_canon = value - find_integer(can_lower)

        assert new_canon not in self.__values

        insort(self.__values, new_canon)

        assert self.normalize(value) == new_canon
        return True
Exemple #4
0
    def sort_regions(self):
        """Guarantees that for each i we have tried to swap index i with
        index i + 1.

        This uses an adaptive algorithm that works by sorting contiguous
        regions starting from each element.
        """
        i = 0
        while i + 1 < len(self.current):
            prefix = list(self.current[:i])
            k = find_integer(
                lambda k: i + k <= len(self.current) and self.consider(
                    prefix + sorted(self.current[i:i + k], key=self.key
                                    ) + list(self.current[i + k:])))
            i += k
    def hill_climb(self):
        """The main hill climbing loop where we actually do the work: Take
        data, and attempt to improve its score for target. select_example takes
        a data object and returns an index to an example where we should focus
        our efforts."""

        blocks_examined = set()

        prev = None
        i = len(self.current_data.blocks) - 1
        while i >= 0 and self.improvements <= self.max_improvements:
            if prev is not self.current_data:
                i = len(self.current_data.blocks) - 1
                prev = self.current_data

            if i in blocks_examined:
                i -= 1
                continue

            blocks_examined.add(i)
            data = self.current_data
            block = data.blocks[i]
            prefix = data.buffer[:block.start]

            existing = data.buffer[block.start:block.end]
            existing_as_int = int_from_bytes(existing)
            max_int_value = (256**len(existing)) - 1

            if existing_as_int == max_int_value:
                continue

            def attempt_replace(v):
                """Try replacing the current block in the current best test case
                 with an integer of value i. Note that we use the *current*
                best and not the one we started with. This helps ensure that
                if we luck into a good draw when making random choices we get
                to keep the good bits."""
                if v < 0 or v > max_int_value:
                    return False
                v_as_bytes = int_to_bytes(v, len(existing))

                # We make a couple attempts at replacement. This only matters
                # if we end up growing the buffer - otherwise we exit the loop
                # early - but in the event that there *is* some randomized
                # component we want to give it a couple of tries to succeed.
                for _ in range(3):
                    attempt = self.engine.cached_test_function(
                        prefix + v_as_bytes +
                        self.current_data.buffer[block.end:] +
                        bytes(BUFFER_SIZE), )

                    if self.consider_new_test_data(attempt):
                        return True

                    if attempt.status < Status.INVALID or len(
                            attempt.buffer) == len(self.current_data.buffer):
                        return False

                    for i, ex in enumerate(self.current_data.examples):
                        if ex.start >= block.end:
                            break
                        if ex.end <= block.start:
                            continue
                        ex_attempt = attempt.examples[i]
                        if ex.length == ex_attempt.length:
                            continue
                        replacement = attempt.buffer[ex_attempt.
                                                     start:ex_attempt.end]
                        if self.consider_new_test_data(
                                self.engine.cached_test_function(
                                    prefix + replacement +
                                    self.current_data.buffer[ex.end:])):
                            return True
                return False

            # We unconditionally scan both upwards and downwards. The reason
            # for this is that we allow "lateral" moves that don't increase the
            # score but instead leave it constant. All else being equal we'd
            # like to leave the test case closer to shrunk, so afterwards we
            # try lowering the value towards zero even if we've just raised it.

            if not attempt_replace(max_int_value):
                find_integer(lambda k: attempt_replace(k + existing_as_int))

            existing = self.current_data.buffer[block.start:block.end]
            existing_as_int = int_from_bytes(existing)
            if not attempt_replace(0):
                find_integer(lambda k: attempt_replace(existing_as_int - k))
Exemple #6
0
    def learn(self, s):
        """Learn to give the correct answer on this string.
        That is, after this method completes we will have
        ``self.dfa.matches(s) == self.member(s)``.

        Note that we do not guarantee that this will remain
        true in the event that learn is called again with
        a different string. It is in principle possible that
        future learning will cause us to make a mistake on
        this string. However, repeatedly calling learn on
        each of a set of strings until the generation stops
        changing is guaranteed to terminate.
        """
        s = bytes(s)
        correct_outcome = self.member(s)

        # We don't want to check this inside the loop because it potentially
        # causes us to evaluate more of the states than we actually need to,
        # but if our model is mostly correct then this will be faster because
        # we only need to evaluate strings that are of the form
        # ``state + experiment``, which will generally be cached and/or needed
        # later.
        if self.dfa.matches(s) == correct_outcome:
            return

        # In the papers they assume that we only run this process
        # once, but this is silly - often when you've got a messy
        # string it will be wrong for many different reasons.
        #
        # Thus we iterate this to a fixed point where we repair
        # the DFA by repeatedly adding experiments until the DFA
        # agrees with the membership function on this string.
        while True:
            dfa = self.dfa
            states = [dfa.start]

            def seems_right(n):
                """After reading n characters from s, do we seem to be
                in the right state?

                We determine this by replacing the first n characters
                of s with the label of the state we expect to be in.
                If we are in the right state, that will replace a substring
                with an equivalent one so must produce the same answer.
                """
                if n > len(s):
                    return False

                # Populate enough of the states list to know where we are.
                while n >= len(states):
                    states.append(dfa.transition(states[-1], s[len(states) - 1]))

                return self.member(dfa.label(states[n]) + s[n:]) == correct_outcome

            n = find_integer(seems_right)

            # We got to the end without ever finding ourself in a bad
            # state, so we must correctly match this string.
            if n == len(s):
                assert dfa.matches(s) == correct_outcome
                break

            # Reading n characters does not put us in a bad state but
            # reading n + 1 does. This means that the remainder of
            # the string that we have not read yet is an experiment
            # that allows us to distinguish the state that we ended
            # up in from the state that we should have ended up in.
            #
            # There are two possibilities here: Either we have badly
            # normalised the byte that lead to this transition, or
            # we ended up in the wrong state because we could not
            # distinguish the state we eneded up infrom the correct
            # one.

            prefix = s[:n]
            suffix = s[n + 1 :]
            if self.__normalizer.distinguish(
                s[n], lambda x: self.member(prefix + bytes([x]) + suffix)
            ):
                self.__dfa = None
                continue

            self.__add_experiment(suffix)
Exemple #7
0
    def learn(self, string):
        """Learn to give the correct answer on this string.
        That is, after this method completes we will have
        ``self.dfa.matches(s) == self.member(s)``.

        Note that we do not guarantee that this will remain
        true in the event that learn is called again with
        a different string. It is in principle possible that
        future learning will cause us to make a mistake on
        this string. However, repeatedly calling learn on
        each of a set of strings until the generation stops
        changing is guaranteed to terminate.
        """
        string = bytes(string)
        correct_outcome = self.member(string)

        # We don't want to check this inside the loop because it potentially
        # causes us to evaluate more of the states than we actually need to,
        # but if our model is mostly correct then this will be faster because
        # we only need to evaluate strings that are of the form
        # ``state + experiment``, which will generally be cached and/or needed
        # later.
        if self.dfa.matches(string) == correct_outcome:
            return

        # In the papers they assume that we only run this process
        # once, but this is silly - often when you've got a messy
        # string it will be wrong for many different reasons.
        #
        # Thus we iterate this to a fixed point where we repair
        # the DFA by repeatedly adding experiments until the DFA
        # agrees with the membership function on this string.

        # First we make sure that normalization is not the source of the
        # failure to match.
        while True:
            normalized = bytes(self.normalizer.normalize(c) for c in string)
            # We can correctly replace the string with its normalized version
            # so normalization is not the problem here.
            if self.member(normalized) == correct_outcome:
                string = normalized
                break
            alphabet = sorted(set(string), reverse=True)
            target = string
            for a in alphabet:

                def replace(b):
                    if a == b:
                        return target
                    return bytes(b if c == a else c for c in target)

                self.normalizer.distinguish(a,
                                            lambda x: self.member(replace(x)))
                target = replace(self.normalizer.normalize(a))
                assert self.member(target) == correct_outcome
            assert target != normalized
            self.__dfa_changed()

        if self.dfa.matches(string) == correct_outcome:
            return

        # Now we know normalization is correct we can attempt to determine if
        # any of our transitions are wrong.
        while True:
            dfa = self.dfa

            states = [dfa.start]

            def seems_right(n):
                """After reading n characters from s, do we seem to be
                in the right state?

                We determine this by replacing the first n characters
                of s with the label of the state we expect to be in.
                If we are in the right state, that will replace a substring
                with an equivalent one so must produce the same answer.
                """
                if n > len(string):
                    return False

                # Populate enough of the states list to know where we are.
                while n >= len(states):
                    states.append(
                        dfa.transition(states[-1], string[len(states) - 1]))

                return self.member(dfa.label(states[n]) +
                                   string[n:]) == correct_outcome

            assert seems_right(0)

            n = find_integer(seems_right)

            # We got to the end without ever finding ourself in a bad
            # state, so we must correctly match this string.
            if n == len(string):
                assert dfa.matches(string) == correct_outcome
                break

            # Reading n characters does not put us in a bad state but
            # reading n + 1 does. This means that the remainder of
            # the string that we have not read yet is an experiment
            # that allows us to distinguish the state that we ended
            # up in from the state that we should have ended up in.

            source = states[n]
            character = string[n]
            wrong_destination = states[n + 1]

            # We've made an error in transitioning from ``source`` to
            # ``wrong_destination`` via ``character``. We now need to update
            # the DFA so that this transition no longer occurs. Note that we
            # do not guarantee that the transition is *correct* after this,
            # only that we don't make this particular error.
            assert self.transition(source, character) == wrong_destination

            labels_wrong_destination = self.dfa.label(wrong_destination)
            labels_correct_destination = self.dfa.label(source) + bytes(
                [character])

            ex = string[n + 1:]

            assert self.member(labels_wrong_destination +
                               ex) != self.member(labels_correct_destination +
                                                  ex)

            # Adding this experiment causes us to distinguish the wrong
            # destination from the correct one.
            self.__states[wrong_destination].experiments[ex] = self.member(
                labels_wrong_destination + ex)

            # We now clear the cached details that caused us to make this error
            # so that when we recalculate this transition we get to a
            # (hopefully now correct) different state.
            del self.__states[source].transitions[character]
            self.__dfa_changed()

            # We immediately recalculate the transition so that we can check
            # that it has changed as we expect it to have.
            new_destination = self.transition(source, string[n])
            assert new_destination != wrong_destination
 def shift_right(self):
     base = self.current
     find_integer(lambda k: k <= self.size and self.consider(base >> k))