def do_draw(self, data): # 1 - Select a valid top-level domain (TLD) name # 2 - Check that the number of characters in our selected TLD won't # prevent us from generating at least a 1 character subdomain. # 3 - Randomize the TLD between upper and lower case characters. domain = data.draw( st.sampled_from(TOP_LEVEL_DOMAINS).filter(lambda tld: len( tld) + 2 <= self.max_length).flatmap(lambda tld: st.tuples( *[st.sampled_from([c.lower(), c.upper()]) for c in tld]).map(u"".join))) # The maximum possible number of subdomains is 126, # 1 character subdomain + 1 '.' character, * 126 = 252, # with a max of 255, that leaves 3 characters for a TLD. # Allowing any more subdomains would not leave enough # characters for even the shortest possible TLDs. elements = cu.many(data, min_size=1, average_size=1, max_size=126) while elements.more(): # Generate a new valid subdomain using the regex strategy. sub_domain = data.draw( st.from_regex(self.label_regex, fullmatch=True)) if len(domain) + len(sub_domain) >= self.max_length: data.stop_example(discard=True) break domain = sub_domain + "." + domain return domain
def __init__(self, grammar, start, explicit): assert isinstance(grammar, lark.lark.Lark) if start is None: start = grammar.options.start if not isinstance(start, list): start = [start] self.grammar = grammar if "start" in getfullargspec(grammar.grammar.compile).args: terminals, rules, ignore_names = grammar.grammar.compile(start) else: # pragma: no cover # This branch is to support lark <= 0.7.1, without the start argument. terminals, rules, ignore_names = grammar.grammar.compile() self.names_to_symbols = {} for r in rules: t = r.origin self.names_to_symbols[t.name] = t for t in terminals: self.names_to_symbols[t.name] = Terminal(t.name) self.start = st.sampled_from([self.names_to_symbols[s] for s in start]) self.ignored_symbols = tuple(self.names_to_symbols[n] for n in ignore_names) self.terminal_strategies = { t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True) for t in terminals } unknown_explicit = set(explicit) - get_terminal_names( terminals, rules, ignore_names) if unknown_explicit: raise InvalidArgument( "The following arguments were passed as explicit_strategies, " "but there is no such terminal production in this grammar: %r" % (sorted(unknown_explicit), )) self.terminal_strategies.update(explicit) nonterminals = {} for rule in rules: nonterminals.setdefault(rule.origin.name, []).append(tuple(rule.expansion)) for v in nonterminals.values(): v.sort(key=len) self.nonterminal_strategies = { k: st.sampled_from(v) for k, v in nonterminals.items() } self.__rule_labels = {}
def ip_addresses( *, v: int = None, network: Union[str, IPv4Network, IPv6Network] = None ) -> SearchStrategy[Union[IPv4Address, IPv6Address]]: r"""Generate IP addresses - ``v=4`` for :class:`~python:ipaddress.IPv4Address`\ es, ``v=6`` for :class:`~python:ipaddress.IPv6Address`\ es, or leave unspecified to allow both versions. ``network`` may be an :class:`~python:ipaddress.IPv4Network` or :class:`~python:ipaddress.IPv6Network`, or a string representing a network such as ``"127.0.0.0/24"`` or ``"2001:db8::/32"``. As well as generating addresses within a particular routable network, this can be used to generate addresses from a reserved range listed in the `IANA <https://www.iana.org/assignments/iana-ipv4-special-registry/>`__ `registries <https://www.iana.org/assignments/iana-ipv6-special-registry/>`__. If you pass both ``v`` and ``network``, they must be for the same version. """ if v is not None: check_type(int, v, "v") if v != 4 and v != 6: raise InvalidArgument("v=%r, but only v=4 or v=6 are valid" % (v, )) if network is None: # We use the reserved-address registries to boost the chance # of generating one of the various special types of address. four = binary( 4, 4).map(IPv4Address) | sampled_from(SPECIAL_IPv4_RANGES).flatmap( lambda network: ip_addresses(network=network)) six = binary( 16, 16).map(IPv6Address) | sampled_from(SPECIAL_IPv6_RANGES).flatmap( lambda network: ip_addresses(network=network)) if v == 4: return four if v == 6: return six return four | six if isinstance(network, str): network = ip_network(network) check_type((IPv4Network, IPv6Network), network, "network") assert isinstance(network, (IPv4Network, IPv6Network)) # for Mypy if v not in (None, network.version): raise InvalidArgument("v=%r is incompatible with network=%r" % (v, network)) addr_type = IPv4Address if network.version == 4 else IPv6Address return integers(int(network[0]), int(network[-1])).map(addr_type) # type: ignore
def test_renaming(cl_and_vals, data): converter = Converter() cl, vals = cl_and_vals attrs = fields(cl) to_replace = data.draw(sampled_from(attrs)) u_fn = make_dict_unstructure_fn( cl, converter, **{to_replace.name: override(rename="class")} ) s_fn = make_dict_structure_fn( cl, converter, **{to_replace.name: override(rename="class")} ) converter.register_structure_hook(cl, s_fn) converter.register_unstructure_hook(cl, u_fn) inst = cl(*vals) raw = converter.unstructure(inst) assert "class" in raw new_inst = converter.structure(raw, cl) assert inst == new_inst
def urls() -> SearchStrategy[str]: """A strategy for :rfc:`3986`, generating http/https URLs.""" def url_encode(s): return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c) for c in s) schemes = st.sampled_from(["http", "https"]) ports = st.integers(min_value=0, max_value=2**16 - 1).map(":{}".format) paths = st.lists(st.text(string.printable).map(url_encode)).map("/".join) return st.builds("{}://{}{}/{}".format, schemes, domains(), st.just("") | ports, paths)
def timezones() -> st.SearchStrategy[dt.tzinfo]: """Any timezone in the Olsen database, as a pytz tzinfo object. This strategy minimises to UTC, or the smallest possible fixed offset, and is designed for use with :py:func:`hypothesis.strategies.datetimes`. """ all_timezones = [pytz.timezone(tz) for tz in pytz.all_timezones] # Some timezones have always had a constant offset from UTC. This makes # them simpler than timezones with daylight savings, and the smaller the # absolute offset the simpler they are. Of course, UTC is even simpler! static = [pytz.UTC] # type: list static += sorted( (t for t in all_timezones if isinstance(t, StaticTzInfo)), key=lambda tz: abs(tz.utcoffset(dt.datetime(2000, 1, 1))), ) # Timezones which have changed UTC offset; best ordered by name. dynamic = [tz for tz in all_timezones if tz not in static] return st.sampled_from(static + dynamic)
def timezones() -> st.SearchStrategy[dt.tzinfo]: """Any timezone from :pypi:`dateutil <python-dateutil>`. This strategy minimises to UTC, or the timezone with the smallest offset from UTC as of 2000-01-01, and is designed for use with :py:func:`~hypothesis.strategies.datetimes`. Note that the timezones generated by the strategy may vary depending on the configuration of your machine. See the dateutil documentation for more information. """ all_timezones = sorted( (tz.gettz(t) for t in zoneinfo.get_zonefile_instance().zones), key=__zone_sort_key, ) all_timezones.insert(0, tz.UTC) # We discard Nones in the list comprehension because Mypy knows that # tz.gettz may return None. However this should never happen for known # zone names, so we assert that it's impossible first. assert None not in all_timezones return st.sampled_from([z for z in all_timezones if z is not None])
def _hypothesis_do_random(self, method, kwargs): if method == "choices": key = (method, len(kwargs["population"]), kwargs.get("k")) elif method == "choice": key = (method, len(kwargs["seq"])) elif method == "shuffle": key = (method, len(kwargs["x"])) else: key = (method, ) + tuple(sorted(kwargs)) try: result, self.__state = self.__state.next_states[key] except KeyError: pass else: return self.__convert_result(method, kwargs, result) if method == "_randbelow": result = cu.integer_range(self.__data, 0, kwargs["n"] - 1) elif method in ("betavariate", "random"): result = self.__data.draw(UNIFORM) elif method == "uniform": a = normalize_zero(kwargs["a"]) b = normalize_zero(kwargs["b"]) result = self.__data.draw(st.floats(a, b)) elif method in ("weibullvariate", "gammavariate"): result = self.__data.draw( st.floats(min_value=0.0, allow_infinity=False)) elif method in ("gauss", "normalvariate"): mu = kwargs["mu"] result = mu + self.__data.draw( st.floats(allow_nan=False, allow_infinity=False)) elif method == "vonmisesvariate": result = self.__data.draw(st.floats(0, 2 * math.pi)) elif method == "randrange": if kwargs["stop"] is None: stop = kwargs["start"] start = 0 else: start = kwargs["start"] stop = kwargs["stop"] step = kwargs["step"] if start == stop: raise ValueError("empty range for randrange(%d, %d, %d)" % (start, stop, step)) if step != 1: endpoint = (stop - start) // step if (start - stop) % step == 0: endpoint -= 1 i = cu.integer_range(self.__data, 0, endpoint) result = start + i * step else: result = cu.integer_range(self.__data, start, stop - 1) elif method == "randint": result = cu.integer_range(self.__data, kwargs["a"], kwargs["b"]) elif method == "choice": seq = kwargs["seq"] result = cu.integer_range(self.__data, 0, len(seq) - 1) elif method == "choices": k = kwargs["k"] result = self.__data.draw( st.lists( st.integers(0, len(kwargs["population"]) - 1), min_size=k, max_size=k, )) elif method == "sample": k = kwargs["k"] seq = kwargs["population"] if k > len(seq) or k < 0: raise ValueError( "Sample size %d not in expected range 0 <= k <= %d" % (k, len(seq))) result = self.__data.draw( st.lists( st.sampled_from(range(len(seq))), min_size=k, max_size=k, unique=True, )) elif method == "getrandbits": result = self.__data.draw_bits(kwargs["n"]) elif method == "triangular": low = normalize_zero(kwargs["low"]) high = normalize_zero(kwargs["high"]) mode = normalize_zero(kwargs["mode"]) if mode is None: result = self.__data.draw(st.floats(low, high)) elif self.__data.draw_bits(1): result = self.__data.draw(st.floats(mode, high)) else: result = self.__data.draw(st.floats(low, mode)) elif method in ("paretovariate", "expovariate", "lognormvariate"): result = self.__data.draw(st.floats(min_value=0.0)) elif method == "shuffle": result = self.__data.draw(st.permutations(range(len(kwargs["x"])))) # This is tested for but only appears in 3.9 so doesn't appear in coverage. elif method == "randbytes": # pragma: no cover n = kwargs["n"] result = self.__data.draw(st.binary(min_size=n, max_size=n)) else: raise NotImplementedError(method) new_state = RandomState() self.__state.next_states[key] = (result, new_state) self.__state = new_state return self.__convert_result(method, kwargs, result)
def gen_ignore(self, data, draw_state): if self.ignored_symbols and data.draw_bits(2) == 3: emit = data.draw(st.sampled_from(self.ignored_symbols)) self.draw_symbol(data, emit, draw_state)
def sample_with_prefixes(zone): keys_with_prefixes = (zone, f"posix/{zone}", f"right/{zone}") return sampled_from( [key for key in keys_with_prefixes if valid_key(key)])
def timezone_keys( *, # allow_alias: bool = True, # allow_deprecated: bool = True, allow_prefix: bool = True, ) -> SearchStrategy[str]: """A strategy for :wikipedia:`IANA timezone names <List_of_tz_database_time_zones>`. As well as timezone names like ``"UTC"``, ``"Australia/Sydney"``, or ``"America/New_York"``, this strategy can generate: - Aliases such as ``"Antarctica/McMurdo"``, which links to ``"Pacific/Auckland"``. - Deprecated names such as ``"Antarctica/South_Pole"``, which *also* links to ``"Pacific/Auckland"``. Note that most but not all deprecated timezone names are also aliases. - Timezone names with the ``"posix/"`` or ``"right/"`` prefixes, unless ``allow_prefix=False``. These strings are provided separately from Tzinfo objects - such as ZoneInfo instances from the timezones() strategy - to facilitate testing of timezone logic without needing workarounds to access non-canonical names. .. note:: The :mod:`python:zoneinfo` module is new in Python 3.9, so you will need to install the :pypi:`backports.zoneinfo` module on earlier versions, and the :pypi:`importlib_resources` backport on Python 3.6. ``pip install hypothesis[zoneinfo]`` will install these conditional dependencies if and only if they are needed. On Windows, you may need to access IANA timezone data via the :pypi:`tzdata` package. For non-IANA timezones, such as Windows-native names or GNU TZ strings, we recommend using :func:`~hypothesis.strategies.sampled_from` with the :pypi:`dateutil` package, e.g. :meth:`dateutil:dateutil.tz.tzwin.list`. """ # check_type(bool, allow_alias, "allow_alias") # check_type(bool, allow_deprecated, "allow_deprecated") check_type(bool, allow_prefix, "allow_prefix") if zoneinfo is None: # pragma: no cover raise ModuleNotFoundError( "The zoneinfo module is required, but could not be imported. " "Run `pip install hypothesis[zoneinfo]` and try again.") available_timezones = ("UTC", ) + tuple( sorted(zoneinfo.available_timezones())) # TODO: filter out alias and deprecated names if disallowed # When prefixes are allowed, we first choose a key and then flatmap to get our # choice with one of the available prefixes. That in turn means that we need # some logic to determine which prefixes are available for a given key: def valid_key(key): return key == "UTC" or _valid_key_cacheable(zoneinfo.TZPATH, key) # TODO: work out how to place a higher priority on "weird" timezones # For details see https://github.com/HypothesisWorks/hypothesis/issues/2414 strategy = sampled_from( [key for key in available_timezones if valid_key(key)]) if not allow_prefix: return strategy def sample_with_prefixes(zone): keys_with_prefixes = (zone, f"posix/{zone}", f"right/{zone}") return sampled_from( [key for key in keys_with_prefixes if valid_key(key)]) return strategy.flatmap(sample_with_prefixes)
def __init__(self, grammar, start, explicit): assert isinstance(grammar, lark.lark.Lark) if start is None: start = grammar.options.start if not isinstance(start, list): start = [start] self.grammar = grammar # This is a total hack, but working around the changes is a nicer user # experience than breaking for anyone who doesn't instantly update their # installation of Lark alongside Hypothesis. compile_args = getfullargspec(grammar.grammar.compile).args if "terminals_to_keep" in compile_args: terminals, rules, ignore_names = grammar.grammar.compile(start, ()) elif "start" in compile_args: # pragma: no cover # Support lark <= 0.10.0, without the terminals_to_keep argument. terminals, rules, ignore_names = grammar.grammar.compile(start) else: # pragma: no cover # This branch is to support lark <= 0.7.1, without the start argument. terminals, rules, ignore_names = grammar.grammar.compile() self.names_to_symbols = {} for r in rules: t = r.origin self.names_to_symbols[t.name] = t for t in terminals: self.names_to_symbols[t.name] = Terminal(t.name) self.start = st.sampled_from([self.names_to_symbols[s] for s in start]) self.ignored_symbols = tuple(self.names_to_symbols[n] for n in ignore_names) self.terminal_strategies = { t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True) for t in terminals } unknown_explicit = set(explicit) - get_terminal_names( terminals, rules, ignore_names ) if unknown_explicit: raise InvalidArgument( "The following arguments were passed as explicit_strategies, " "but there is no such terminal production in this grammar: " + repr(sorted(unknown_explicit)) ) self.terminal_strategies.update(explicit) nonterminals = {} for rule in rules: nonterminals.setdefault(rule.origin.name, []).append(tuple(rule.expansion)) for v in nonterminals.values(): v.sort(key=len) self.nonterminal_strategies = { k: st.sampled_from(v) for k, v in nonterminals.items() } self.__rule_labels = {}
def test_Chain_bg(chain: Chain): chain.fg updated = chain.bg assert updated._background @given(chain()) def test_Chain_fg(chain: Chain): chain.bg updated = chain.fg assert updated._background == False @given(chain(), sampled_from(Style)) def test_Chain_applies_Style(chain: Chain, style: Style): updated = chain._handle_style(style) assert style in updated.chalk.style @given(chain(), one_of(sampled_from(Color), true_color()), booleans()) def test_Chain_applies_Color(chain: Chain, color: Color_T, background: bool): if background: chain.bg else: chain.fg updated = chain._handle_color(color) if background: