def do_draw(self, data): # 1 - Select a valid top-level domain (TLD) name # 2 - Check that the number of characters in our selected TLD won't # prevent us from generating at least a 1 character subdomain. # 3 - Randomize the TLD between upper and lower case characters. domain = data.draw( st.sampled_from(TOP_LEVEL_DOMAINS).filter(lambda tld: len( tld) + 2 <= self.max_length).flatmap(lambda tld: st.tuples( *[st.sampled_from([c.lower(), c.upper()]) for c in tld]).map(u"".join))) # The maximum possible number of subdomains is 126, # 1 character subdomain + 1 '.' character, * 126 = 252, # with a max of 255, that leaves 3 characters for a TLD. # Allowing any more subdomains would not leave enough # characters for even the shortest possible TLDs. elements = cu.many(data, min_size=1, average_size=1, max_size=126) while elements.more(): # Generate a new valid subdomain using the regex strategy. sub_domain = data.draw( st.from_regex(self.label_regex, fullmatch=True)) if len(domain) + len(sub_domain) >= self.max_length: data.stop_example(discard=True) break domain = sub_domain + "." + domain return domain
def __init__(self, grammar, start, explicit): assert isinstance(grammar, lark.lark.Lark) if start is None: start = grammar.options.start if not isinstance(start, list): start = [start] self.grammar = grammar if "start" in getfullargspec(grammar.grammar.compile).args: terminals, rules, ignore_names = grammar.grammar.compile(start) else: # pragma: no cover # This branch is to support lark <= 0.7.1, without the start argument. terminals, rules, ignore_names = grammar.grammar.compile() self.names_to_symbols = {} for r in rules: t = r.origin self.names_to_symbols[t.name] = t for t in terminals: self.names_to_symbols[t.name] = Terminal(t.name) self.start = st.sampled_from([self.names_to_symbols[s] for s in start]) self.ignored_symbols = (st.sampled_from( [self.names_to_symbols[n] for n in ignore_names]) if ignore_names else st.nothing()) self.terminal_strategies = { t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True) for t in terminals } unknown_explicit = set(explicit) - get_terminal_names( terminals, rules, ignore_names) if unknown_explicit: raise InvalidArgument( "The following arguments were passed as explicit_strategies, " "but there is no such terminal production in this grammar: %r" % (sorted(unknown_explicit), )) self.terminal_strategies.update(explicit) nonterminals = {} for rule in rules: nonterminals.setdefault(rule.origin.name, []).append(tuple(rule.expansion)) for v in nonterminals.values(): v.sort(key=len) self.nonterminal_strategies = { k: st.sampled_from(v) for k, v in nonterminals.items() } self.__rule_labels = {}
def __init__(self, grammar, start=None): check_type(lark.lark.Lark, grammar, "grammar") if start is None: start = grammar.options.start if not isinstance(start, list): start = [start] self.grammar = grammar if "start" in getfullargspec(grammar.grammar.compile).args: terminals, rules, ignore_names = grammar.grammar.compile(start) else: # pragma: no cover # This branch is to support lark <= 0.7.1, without the start argument. terminals, rules, ignore_names = grammar.grammar.compile() self.names_to_symbols = {} for r in rules: t = r.origin self.names_to_symbols[t.name] = t for t in terminals: self.names_to_symbols[t.name] = Terminal(t.name) self.start = st.sampled_from([self.names_to_symbols[s] for s in start]) self.ignored_symbols = (st.sampled_from( [self.names_to_symbols[n] for n in ignore_names]) if ignore_names else st.nothing()) self.terminal_strategies = { t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True) for t in terminals } nonterminals = {} for rule in rules: nonterminals.setdefault(rule.origin.name, []).append(tuple(rule.expansion)) for v in nonterminals.values(): v.sort(key=len) self.nonterminal_strategies = { k: st.sampled_from(v) for k, v in nonterminals.items() } self.__rule_labels = {}
def dtype_factory(kind, sizes, valid_sizes, endianness): # Utility function, shared logic for most integer and string types valid_endian = ("?", "<", "=", ">") check_argument( endianness in valid_endian, u"Unknown endianness: was {}, must be in {}", endianness, valid_endian, ) if valid_sizes is not None: if isinstance(sizes, int): sizes = (sizes,) check_argument(sizes, "Dtype must have at least one possible size.") check_argument( all(s in valid_sizes for s in sizes), u"Invalid sizes: was {} must be an item or sequence " u"in {}", sizes, valid_sizes, ) if all(isinstance(s, int) for s in sizes): sizes = sorted({s // 8 for s in sizes}) strat = st.sampled_from(sizes) if "{}" not in kind: kind += "{}" if endianness == "?": return strat.map(("<" + kind).format) | strat.map((">" + kind).format) return strat.map((endianness + kind).format)
class RunAnalysisSuccess(TestCase): @given(output_location=text(min_size=1, max_size=10, alphabet=string.ascii_letters), log_location=text(min_size=1, max_size=10, alphabet=string.ascii_letters), traceback_location=text(min_size=1, max_size=10, alphabet=string.ascii_letters), return_code=sampled_from([0, 1])) def test_output_file_and_status_are_updated(self, output_location, log_location, traceback_location, return_code): expected_status = Analysis.status_choices.RUN_COMPLETED if return_code == 0 else Analysis.status_choices.RUN_ERROR with TemporaryDirectory() as d: with override_settings(MEDIA_ROOT=d): initiator = fake_user() analysis = fake_analysis() Path(d, output_location).touch() Path(d, log_location).touch() Path(d, traceback_location).touch() record_run_analysis_result( ( os.path.join(d, output_location), os.path.join(d, traceback_location), os.path.join(d, log_location), return_code, ), analysis.pk, initiator.pk, ) analysis.refresh_from_db() if return_code == 0: self.assertEqual(analysis.output_file.file.name, output_location) self.assertEqual(analysis.output_file.content_type, 'application/gzip') self.assertEqual(analysis.output_file.creator, initiator) else: self.assertEqual(analysis.output_file, None) self.assertEqual(analysis.run_log_file.file.name, log_location) self.assertEqual(analysis.run_log_file.content_type, 'application/gzip') self.assertEqual(analysis.run_log_file.creator, initiator) self.assertEqual(analysis.run_traceback_file.file.name, traceback_location) self.assertEqual(analysis.run_traceback_file.content_type, 'text/plain') self.assertEqual(analysis.run_traceback_file.creator, initiator) self.assertEqual(analysis.status, expected_status)
def combaination(draw): from wifi.config import Config packets = draw(integers(min_value=0, max_value=64)) rate = draw(sampled_from([6, 9, 12, 18, 24, 36, 48, 54])) conf = Config.from_data_rate(rate) lim = packets * conf.coded_bits_per_ofdm_symbol data = draw(binary(min_size=lim, max_size=lim)) data = bitstr.from_bytes(data) return data, conf.coded_bits_per_ofdm_symbol, conf.coded_bits_per_carrier_symbol
def __init__(self, grammar, start=None): check_type(lark.lark.Lark, grammar, "grammar") if start is None: start = grammar.options.start self.grammar = grammar terminals, rules, ignore_names = grammar.grammar.compile() self.names_to_symbols = {} for r in rules: t = r.origin self.names_to_symbols[t.name] = t for t in terminals: self.names_to_symbols[t.name] = Terminal(t.name) self.start = self.names_to_symbols[start] self.ignored_symbols = ( st.sampled_from([self.names_to_symbols[n] for n in ignore_names]) if ignore_names else st.nothing() ) self.terminal_strategies = { t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True) for t in terminals } nonterminals = {} for rule in rules: nonterminals.setdefault(rule.origin.name, []).append(tuple(rule.expansion)) for v in nonterminals.values(): v.sort(key=len) self.nonterminal_strategies = { k: st.sampled_from(v) for k, v in nonterminals.items() } self.__rule_labels = {}
def from_dtype(dtype): # type: (np.dtype) -> st.SearchStrategy[Any] """Creates a strategy which can generate any value of the given dtype.""" check_type(np.dtype, dtype, "dtype") # Compound datatypes, eg 'f4,f4,f4' if dtype.names is not None: # mapping np.void.type over a strategy is nonsense, so return now. return st.tuples(*[from_dtype(dtype.fields[name][0]) for name in dtype.names]) # Subarray datatypes, eg '(2, 3)i4' if dtype.subdtype is not None: subtype, shape = dtype.subdtype return arrays(subtype, shape) # Scalar datatypes if dtype.kind == u"b": result = st.booleans() # type: SearchStrategy[Any] elif dtype.kind == u"f": if dtype.itemsize == 2: result = st.floats(width=16) elif dtype.itemsize == 4: result = st.floats(width=32) else: result = st.floats() elif dtype.kind == u"c": if dtype.itemsize == 8: float32 = st.floats(width=32) result = st.builds(complex, float32, float32) else: result = st.complex_numbers() elif dtype.kind in (u"S", u"a"): # Numpy strings are null-terminated; only allow round-trippable values. # `itemsize == 0` means 'fixed length determined at array creation' result = st.binary(max_size=dtype.itemsize or None).filter( lambda b: b[-1:] != b"\0" ) elif dtype.kind == u"u": result = st.integers(min_value=0, max_value=2 ** (8 * dtype.itemsize) - 1) elif dtype.kind == u"i": overflow = 2 ** (8 * dtype.itemsize - 1) result = st.integers(min_value=-overflow, max_value=overflow - 1) elif dtype.kind == u"U": # Encoded in UTF-32 (four bytes/codepoint) and null-terminated result = st.text(max_size=(dtype.itemsize or 0) // 4 or None).filter( lambda b: b[-1:] != u"\0" ) elif dtype.kind in (u"m", u"M"): if "[" in dtype.str: res = st.just(dtype.str.split("[")[-1][:-1]) else: res = st.sampled_from(TIME_RESOLUTIONS) result = st.builds(dtype.type, st.integers(-(2 ** 63), 2 ** 63 - 1), res) else: raise InvalidArgument(u"No strategy inference for {}".format(dtype)) return result.map(dtype.type)
def domains(): """A strategy for :rfc:`1035` fully qualified domain names.""" atoms = st.text( string.ascii_letters + "0123456789-", min_size=1, max_size=63 ).filter(lambda s: "-" not in s[0] + s[-1]) return st.builds( lambda x, y: ".".join(x + [y]), st.lists(atoms, min_size=1), # TODO: be more devious about top-level domains st.sampled_from(["com", "net", "org", "biz", "info"]), ).filter(lambda url: len(url) <= 255)
def urls(): # type: () -> SearchStrategy[Text] """A strategy for :rfc:`3986`, generating http/https URLs.""" def url_encode(s): return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c) for c in s) schemes = st.sampled_from(["http", "https"]) ports = st.integers(min_value=0, max_value=2**16 - 1).map(":{}".format) paths = st.lists(st.text(string.printable).map(url_encode)).map("/".join) return st.builds(u"{}://{}{}/{}".format, schemes, domains(), st.just(u"") | ports, paths)
def urls(): """A strategy for :rfc:`3986`, generating http/https URLs.""" def url_encode(s): safe_chars = set(string.ascii_letters + string.digits + "$-_.+!*'(),") return "".join(c if c in safe_chars else "%%%02X" % ord(c) for c in s) schemes = st.sampled_from(["http", "https"]) ports = st.integers(min_value=0, max_value=2 ** 16 - 1).map(":{}".format) paths = st.lists(st.text(string.printable).map(url_encode)).map( lambda path: "/".join([""] + path) ) return st.builds( "{}://{}{}{}".format, schemes, domains(), st.one_of(st.just(""), ports), paths )
def timezones(): # type: () -> st.SearchStrategy[dt.tzinfo] """Any timezone in the Olsen database, as a pytz tzinfo object. This strategy minimises to UTC, or the smallest possible fixed offset, and is designed for use with :py:func:`hypothesis.strategies.datetimes`. """ all_timezones = [pytz.timezone(tz) for tz in pytz.all_timezones] # Some timezones have always had a constant offset from UTC. This makes # them simpler than timezones with daylight savings, and the smaller the # absolute offset the simpler they are. Of course, UTC is even simpler! static = [pytz.UTC] # type: list static += sorted( (t for t in all_timezones if isinstance(t, StaticTzInfo)), key=lambda tz: abs(tz.utcoffset(dt.datetime(2000, 1, 1))), ) # Timezones which have changed UTC offset; best ordered by name. dynamic = [tz for tz in all_timezones if tz not in static] return st.sampled_from(static + dynamic)
def timezones(): # type: () -> st.SearchStrategy[dt.tzinfo] """Any timezone in dateutil. This strategy minimises to UTC, or the timezone with the smallest offset from UTC as of 2000-01-01, and is designed for use with :py:func:`~hypothesis.strategies.datetimes`. Note that the timezones generated by the strategy may vary depending on the configuration of your machine. See the dateutil documentation for more information. """ reference_date = dt.datetime(2000, 1, 1) tz_names = zoneinfo.get_zonefile_instance().zones all_timezones = [tz.UTC] # type: ignore all_timezones += sorted( [tz.gettz(t) for t in tz_names], key=lambda zone: abs(zone.utcoffset(reference_date)), ) return st.sampled_from(all_timezones)
def timezones(): # type: () -> st.SearchStrategy[dt.tzinfo] """Any timezone in dateutil. This strategy minimises to UTC, or the timezone with the smallest offset from UTC as of 2000-01-01, and is designed for use with :py:func:`~hypothesis.strategies.datetimes`. Note that the timezones generated by the strategy may vary depending on the configuration of your machine. See the dateutil documentation for more information. """ all_timezones = sorted( [tz.gettz(t) for t in zoneinfo.get_zonefile_instance().zones], key=__zone_sort_key, ) all_timezones.insert(0, tz.UTC) # We discard Nones in the list comprehension because Mypy knows that # tz.gettz may return None. However this should never happen for known # zone names, so we assert that it's impossible first. assert None not in all_timezones return st.sampled_from([z for z in all_timezones if z is not None])
# test reverse rev = undo(symbols, bits_per_symbol=4) assert rev == input def test_i144(): # Table I-9—SIGNAL field bits after interleaving input = bits('100101001101000000010100100000110010010010010100') # Table I-10—Frequency domain representation of SIGNAL field expected = [(1 + 0j), (-1 + 0j), (-1 + 0j), (1 + 0j), (-1 + 0j), (1 + 0j), (-1 + 0j), (-1 + 0j), (1 + 0j), (1 + 0j), (-1 + 0j), (1 + 0j), (-1 + 0j), (-1 + 0j), (-1 + 0j), (-1 + 0j), (-1 + 0j), (-1 + 0j), (-1 + 0j), (1 + 0j), (-1 + 0j), (1 + 0j), (-1 + 0j), (-1 + 0j), (1 + 0j), (-1 + 0j), (-1 + 0j), (-1 + 0j), (-1 + 0j), (-1 + 0j), (1 + 0j), (1 + 0j), (-1 + 0j), (-1 + 0j), (1 + 0j), (-1 + 0j), (-1 + 0j), (1 + 0j), (-1 + 0j), (-1 + 0j), (1 + 0j), (-1 + 0j), (-1 + 0j), (1 + 0j), (-1 + 0j), (1 + 0j), (-1 + 0j), (-1 + 0j)] symbols = do(input, bits_per_symbol=1) np.testing.assert_equal(expected, np.round(symbols, 3)) # test reverse rev = undo(symbols, bits_per_symbol=1) assert rev == input @given(binary(), sampled_from([1, 2, 4, 6])) def test_hypothesis(data, bits_per_symbol): data = bitstr.from_bytes(data) assert undo(do(data, bits_per_symbol), bits_per_symbol) == data
def do_draw(self, data): # All shapes are handled in column-major order; i.e. they are reversed base_shape = self.base_shape[::-1] result_shape = list(base_shape) shapes = [[] for _ in range(self.num_shapes)] use = [True for _ in range(self.num_shapes)] for dim_count in range(1, self.max_dims + 1): dim = dim_count - 1 # We begin by drawing a valid dimension-size for the given # dimension. This restricts the variability across the shapes # at this dimension such that they can only choose between # this size and a singleton dimension. if len(base_shape) < dim_count or base_shape[dim] == 1: # dim is unrestricted by the base-shape: shrink to min_side dim_side = data.draw(self.side_strat) elif base_shape[dim] <= self.max_side: # dim is aligned with non-singleton base-dim dim_side = base_shape[dim] else: # only a singleton is valid in alignment with the base-dim dim_side = 1 for shape_id, shape in enumerate(shapes): # Populating this dimension-size for each shape, either # the drawn size is used or, if permitted, a singleton # dimension. if dim_count <= len(base_shape) and self.size_one_allowed: # aligned: shrink towards size 1 side = data.draw(st.sampled_from([1, dim_side])) else: side = dim_side # Use a trick where where a biased coin is queried to see # if the given shape-tuple will continue to be grown. All # of the relevant draws will still be made for the given # shape-tuple even if it is no longer being added to. # This helps to ensure more stable shrinking behavior. if self.min_dims < dim_count: use[shape_id] &= cu.biased_coin( data, 1 - 1 / (1 + self.max_dims - dim) ) if use[shape_id]: shape.append(side) if len(result_shape) < len(shape): result_shape.append(shape[-1]) elif shape[-1] != 1 and result_shape[dim] == 1: result_shape[dim] = shape[-1] if not any(use): break result_shape = result_shape[: max(map(len, [self.base_shape] + shapes))] assert len(shapes) == self.num_shapes assert all(self.min_dims <= len(s) <= self.max_dims for s in shapes) assert all(self.min_side <= s <= self.max_side for side in shapes for s in side) return BroadcastableShapes( input_shapes=tuple(tuple(reversed(shape)) for shape in shapes), result_shape=tuple(reversed(result_shape)), )
from hypothesis._strategies import binary, sampled_from from wifi import bits, bitstr import numpy as np def do(data: bits, data_bits_per_ofdm_symbol: int) -> Tuple[bits, int]: service = '0' * 16 tail = '0' * 6 data = service + data + tail n_symbols = int(np.ceil(len(data) / data_bits_per_ofdm_symbol)) n_data = n_symbols * data_bits_per_ofdm_symbol n_pad = int(n_data - len(data)) pad = '0' * n_pad data = data + pad return data, n_pad def undo(data: bits, length_bytes: int) -> bits: return data[16:16 + length_bytes * 8] @given(binary(), sampled_from([48, 96, 192, 288])) def test_hypothesis(data, data_bits_per_ofdm_symbol): data = bitstr.from_bytes(data) done_data, n_pad = do(data, data_bits_per_ofdm_symbol) assert undo(done_data, len(data) // 8) == data
data_rate_bits = data[:4] data_rate = [ key for key, value in RATE_LUT.items() if value == data_rate_bits ][0] length_bytes = bitstr.to_int(bitstr.reverse(data[5:17])) return data_rate, length_bytes def test_signal_field(): """ IEEE Std 802.11-2016: I.1.4.1 SIGNAL field bit assignment """ # IEEE Std 802.11-2016: Table I-7—Bit assignment for SIGNAL field # expect = '101100010011000000000000' data_rate = 36 length_bytes = 100 output = do(data_rate, length_bytes) # assert output == expect # test decode dec_data_rate, dec_length_bytes = undo(output) assert dec_data_rate == data_rate assert dec_length_bytes == length_bytes @given(sampled_from(list(RATE_LUT.keys())), integers(min_value=0, max_value=(2**12) - 1)) def test_hypothesis(data_rate: int, length_bytes: int): assert undo(do(data_rate, length_bytes)) == (data_rate, length_bytes)
assume(bitstr.is_divisible(data, by=4)) data = [bit for i, bit in enumerate(data) if (i % 4) != 3] elif coding_rate == '3/4': # throw out each 3. and 4. bit groups of 6 bits assume(bitstr.is_divisible(data, by=6)) data = [bit for i, bit in enumerate(data) if (i % 6) != 3 and (i % 6) != 4] return bitstr.merge(data) def undo(data: bits, coding_rate='1/2') -> bits: # un-puncturing process i.e. add 'X' bits, which are basically just ignored by the conv decoder if coding_rate == '3/4': data = [d[:3] + '??' + d[3] for d in bitstr.split(data, 4)] elif coding_rate == '2/3': data = [d + '?' for d in bitstr.split(data, 3)] return bitstr.merge(data) @given(binary(), sampled_from(['1/2', '2/3', '3/4'])) def test_hypothesis(data, coding_rate): data = bits(data) # cant test equality because 'do' throws away data do1 = do(data, coding_rate) undo1 = undo(do1, coding_rate) assert len(undo1) == len(data) do2 = do(undo1, coding_rate) assert do1 == do2
def gen_ignore(self, data, draw_state): if self.ignored_symbols and data.draw_bits(2) == 3: emit = data.draw(st.sampled_from(self.ignored_symbols)) self.draw_symbol(data, emit, draw_state)
def random_packet(draw): elements = draw(integers(min_value=0, max_value=(2**12) - 1)) data = draw(binary(min_size=elements, max_size=elements)) data = bitstr.from_bytes(data) rate = draw(sampled_from([6, 9, 12, 18, 24, 36, 48, 54])) return data, rate