def generate_corpus(experiment, seed, buffer_size, count): random.seed(seed) try: os.unlink(os.path.join(CORPORA, experiment + '.tar')) except FileNotFoundError: pass shutil.rmtree(os.path.join(CORPORA, experiment), ignore_errors=True) mkdirp(os.path.join(CORPORA, experiment)) experiment = EXPERIMENTS[experiment] completed = 0 while completed < count: try: data = ConjectureData( draw_bytes=lambda data, n: uniform(random, n), max_length=buffer_size, ) gen = experiment.generator(data) info = experiment.calculate_info(gen) error_pred = experiment.calculate_error_predicate(info) except StopTest: continue except Exception: continue print(info) with open(os.path.join(CORPORA, experiment.name, hashlib.sha1(data.buffer).hexdigest()[:16]), "wb") as o: o.write(data.buffer) completed += 1 subprocess.check_call(["apack", experiment.name + ".tar", experiment.name], cwd=CORPORA) shutil.rmtree(os.path.join(CORPORA, experiment.name))
def draw_bits(self, n, *, forced=None): """Return an ``n``-bit integer from the underlying source of bytes. If ``forced`` is set to an integer will instead ignore the underlying source and simulate a draw as if it had returned that integer.""" self.__assert_not_frozen("draw_bits") if n == 0: return 0 assert n > 0 n_bytes = bits_to_bytes(n) self.__check_capacity(n_bytes) if forced is not None: buf = int_to_bytes(forced, n_bytes) elif self.__bytes_drawn < len(self.__prefix): index = self.__bytes_drawn buf = self.__prefix[index:index + n_bytes] if len(buf) < n_bytes: buf += uniform(self.__random, n_bytes - len(buf)) else: buf = uniform(self.__random, n_bytes) buf = bytearray(buf) self.__bytes_drawn += n_bytes assert len(buf) == n_bytes # If we have a number of bits that is not a multiple of 8 # we have to mask off the high bits. buf[0] &= BYTE_MASKS[n % 8] buf = bytes(buf) result = int_from_bytes(buf) self.observer.draw_bits(n, forced is not None, result) self.__example_record.draw_bits(n, forced) initial = self.index self.buffer.extend(buf) self.index = len(self.buffer) if forced is not None: self.forced_indices.update(range(initial, self.index)) self.blocks.add_endpoint(self.index) assert bit_length(result) <= n return result
def draw_bytes(data, n): if data.index < len(prefix): result = prefix[data.index : data.index + n] # We always draw prefixes as a whole number of blocks assert len(result) == n else: result = uniform(self.random, n) return self.__zero_bound(data, result)
def __draw_without_alphabet(self, n): if self.__random.random() <= self.zero_chance: return hbytes(n) if self.__random.random() <= self.max_chance: return hbytes([255]) * n return uniform(self.__random, n)
def draw_bytes(data, n): if data.index < len(prefix): result = prefix[data.index:data.index + n] # We always draw prefixes as a whole number of blocks assert len(result) == n else: result = uniform(self.random, n) return self.__zero_bound(data, result)
def reuse_existing(data, n): choices = data.block_starts.get(n, []) if choices: i = self.random.choice(choices) assert i + n <= len(data.buffer) return hbytes(data.buffer[i : i + n]) else: result = uniform(self.random, n) assert isinstance(result, hbytes) return result
def reuse_existing(data, n): choices = data.block_starts.get(n, []) if choices: i = self.random.choice(choices) assert i + n <= len(data.buffer) return hbytes(data.buffer[i:i + n]) else: result = uniform(self.random, n) assert isinstance(result, hbytes) return result
def draw_mutated(data, n): if data.index + n > len(target_data[0].buffer): result = uniform(self.random, n) else: draw = self.random.choice(bits) result = draw(data, n) p = prefix[0] if data.index < len(p): start = p[data.index : data.index + n] result = start + result[len(start) :] assert len(result) == n return self.__zero_bound(data, result)
def draw_mutated(data, n): if data.index + n > len(target_data[0].buffer): result = uniform(self.random, n) else: draw = self.random.choice(bits) result = draw(data, n) p = prefix[0] if data.index < len(p): start = p[data.index:data.index + n] result = start + result[len(start):] assert len(result) == n return self.__zero_bound(data, result)
def redraw_last(data, n): u = target_data[0].blocks[-1].start if data.index + n <= u: return target_data[0].buffer[data.index : data.index + n] else: return uniform(self.random, n)
def draw_larger(data, n): existing = target_data[0].buffer[data.index : data.index + n] r = uniform(self.random, n) if r >= existing: return r return _draw_successor(self.random, existing)
def draw_new(data, n): return uniform(self.random, n)
def draw_bytes(data, n): i = data.index if i < len(initial): return initial[i : i + n] else: return uniform(random, n)
def redraw_last(data, n): u = target_data[0].blocks[-1].start if data.index + n <= u: return target_data[0].buffer[data.index:data.index + n] else: return uniform(self.random, n)
def draw_larger(data, n): existing = target_data[0].buffer[data.index:data.index + n] r = uniform(self.random, n) if r >= existing: return r return _draw_successor(self.random, existing)
def draw_new(data, n): return uniform(self.random, n)
def learner_for(strategy): """Returns an LStar learner that predicts whether a buffer corresponds to a discard free choice sequence leading to a valid value for this strategy.""" try: return LEARNERS[strategy] except KeyError: pass def test_function(data): data.draw(strategy) data.mark_interesting() runner = ConjectureRunner( test_function, settings=settings( database=None, verbosity=Verbosity.quiet, suppress_health_check=HealthCheck.all(), ), random=Random(0), ignore_limits=True, ) def predicate(s): result = runner.cached_test_function(s) if result.status < Status.VALID: return False if result.has_discards: return False return result.buffer == s learner = LStar(predicate) runner.run() (v,) = runner.interesting_examples.values() # We make sure the learner has properly learned small examples. # This is all fairly ad hoc but is mostly designed to get it # to understand what the smallest example is and avoid any # loops at the beginning of the DFA that don't really exist. learner.learn(v.buffer) for n in [1, 2, 3]: for _ in range(5): learner.learn(uniform(runner.random, n) + v.buffer) prev = -1 while learner.generation != prev: prev = learner.generation for _ in range(10): s = uniform(runner.random, len(v.buffer)) + bytes(BUFFER_SIZE) learner.learn(s) data = runner.cached_test_function(s) if data.status >= Status.VALID: learner.learn(data.buffer) LEARNERS[strategy] = learner return learner
def __draw_without_alphabet(self, n): return uniform(self.__random, n)