Beispiel #1
0
 def do_draw(self, data):
     # 1 - Select a valid top-level domain (TLD) name
     # 2 - Check that the number of characters in our selected TLD won't
     # prevent us from generating at least a 1 character subdomain.
     # 3 - Randomize the TLD between upper and lower case characters.
     domain = data.draw(
         st.sampled_from(TOP_LEVEL_DOMAINS).filter(lambda tld: len(
             tld) + 2 <= self.max_length).flatmap(lambda tld: st.tuples(
                 *[st.sampled_from([c.lower(), c.upper()])
                   for c in tld]).map(u"".join)))
     # The maximum possible number of subdomains is 126,
     # 1 character subdomain + 1 '.' character, * 126 = 252,
     # with a max of 255, that leaves 3 characters for a TLD.
     # Allowing any more subdomains would not leave enough
     # characters for even the shortest possible TLDs.
     elements = cu.many(data, min_size=1, average_size=1, max_size=126)
     while elements.more():
         # Generate a new valid subdomain using the regex strategy.
         sub_domain = data.draw(
             st.from_regex(self.label_regex, fullmatch=True))
         if len(domain) + len(sub_domain) >= self.max_length:
             data.stop_example(discard=True)
             break
         domain = sub_domain + "." + domain
     return domain
Beispiel #2
0
    def __init__(self, grammar, start, explicit):
        assert isinstance(grammar, lark.lark.Lark)
        if start is None:
            start = grammar.options.start
        if not isinstance(start, list):
            start = [start]
        self.grammar = grammar

        if "start" in getfullargspec(grammar.grammar.compile).args:
            terminals, rules, ignore_names = grammar.grammar.compile(start)
        else:  # pragma: no cover
            # This branch is to support lark <= 0.7.1, without the start argument.
            terminals, rules, ignore_names = grammar.grammar.compile()

        self.names_to_symbols = {}

        for r in rules:
            t = r.origin
            self.names_to_symbols[t.name] = t

        for t in terminals:
            self.names_to_symbols[t.name] = Terminal(t.name)

        self.start = st.sampled_from([self.names_to_symbols[s] for s in start])

        self.ignored_symbols = (st.sampled_from(
            [self.names_to_symbols[n]
             for n in ignore_names]) if ignore_names else st.nothing())

        self.terminal_strategies = {
            t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True)
            for t in terminals
        }
        unknown_explicit = set(explicit) - get_terminal_names(
            terminals, rules, ignore_names)
        if unknown_explicit:
            raise InvalidArgument(
                "The following arguments were passed as explicit_strategies, "
                "but there is no such terminal production in this grammar: %r"
                % (sorted(unknown_explicit), ))
        self.terminal_strategies.update(explicit)

        nonterminals = {}

        for rule in rules:
            nonterminals.setdefault(rule.origin.name,
                                    []).append(tuple(rule.expansion))

        for v in nonterminals.values():
            v.sort(key=len)

        self.nonterminal_strategies = {
            k: st.sampled_from(v)
            for k, v in nonterminals.items()
        }

        self.__rule_labels = {}
Beispiel #3
0
    def __init__(self, grammar, start=None):
        check_type(lark.lark.Lark, grammar, "grammar")
        if start is None:
            start = grammar.options.start
        if not isinstance(start, list):
            start = [start]
        self.grammar = grammar

        if "start" in getfullargspec(grammar.grammar.compile).args:
            terminals, rules, ignore_names = grammar.grammar.compile(start)
        else:  # pragma: no cover
            # This branch is to support lark <= 0.7.1, without the start argument.
            terminals, rules, ignore_names = grammar.grammar.compile()

        self.names_to_symbols = {}

        for r in rules:
            t = r.origin
            self.names_to_symbols[t.name] = t

        for t in terminals:
            self.names_to_symbols[t.name] = Terminal(t.name)

        self.start = st.sampled_from([self.names_to_symbols[s] for s in start])

        self.ignored_symbols = (st.sampled_from(
            [self.names_to_symbols[n]
             for n in ignore_names]) if ignore_names else st.nothing())

        self.terminal_strategies = {
            t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True)
            for t in terminals
        }

        nonterminals = {}

        for rule in rules:
            nonterminals.setdefault(rule.origin.name,
                                    []).append(tuple(rule.expansion))

        for v in nonterminals.values():
            v.sort(key=len)

        self.nonterminal_strategies = {
            k: st.sampled_from(v)
            for k, v in nonterminals.items()
        }

        self.__rule_labels = {}
Beispiel #4
0
    def __init__(self, grammar, start=None):
        check_type(lark.lark.Lark, grammar, "grammar")
        if start is None:
            start = grammar.options.start
        self.grammar = grammar

        terminals, rules, ignore_names = grammar.grammar.compile()

        self.names_to_symbols = {}

        for r in rules:
            t = r.origin
            self.names_to_symbols[t.name] = t

        for t in terminals:
            self.names_to_symbols[t.name] = Terminal(t.name)

        self.start = self.names_to_symbols[start]

        self.ignored_symbols = (
            st.sampled_from([self.names_to_symbols[n] for n in ignore_names])
            if ignore_names
            else st.nothing()
        )

        self.terminal_strategies = {
            t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True)
            for t in terminals
        }

        nonterminals = {}

        for rule in rules:
            nonterminals.setdefault(rule.origin.name, []).append(tuple(rule.expansion))

        for v in nonterminals.values():
            v.sort(key=len)

        self.nonterminal_strategies = {
            k: st.sampled_from(v) for k, v in nonterminals.items()
        }

        self.__rule_labels = {}
Beispiel #5
0
def from_lark(grammar, start=None):
    # type: (lark.lark.Lark, Text) -> st.SearchStrategy[Text]
    """A strategy for strings accepted by the given context-free grammar.

    ``grammar`` must be a ``Lark`` object, which wraps an EBNF specification.
    The Lark EBNF grammar reference can be found
    `here <https://lark-parser.readthedocs.io/en/latest/grammar/>`_.

    ``from_lark`` will automatically generate strings matching the
    nonterminal ``start`` symbol in the grammar, which was supplied as an
    argument to the Lark class.  To generate strings matching a different
    symbol, including terminals, you can override this by passing the
    ``start`` argument to ``from_lark``.
    """
    check_type(lark.lark.Lark, grammar, "grammar")
    if start is None:
        start = grammar.options.start

    # Compiling the EBNF grammar to a sanitised and canonicalised BNF
    # format makes further transformations much easier.
    terminals, rules, ignore_names = grammar.grammar.compile()

    # Map all terminals to the corresponging regular expression, and
    # thence to a strategy for producing matching strings.
    # We'll add strategies for non-terminals to this mapping later.
    strategies = {
        t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True)
        for t in terminals
    }
    if start in strategies:
        return strategies[start]

    # Reshape our flat list of rules into a dict of rulename to list of
    # possible productions for that rule.  We sort productions by increasing
    # number of parts as a heuristic for shrinking order.
    nonterminals = {
        origin.name:
        sorted([rule.expansion for rule in rules if rule.origin == origin],
               key=len)
        for origin in set(rule.origin for rule in rules)
    }

    @st.cacheable
    @st.defines_strategy_with_reusable_values
    def convert(expansion):
        parts = []
        for p in expansion:
            if parts and ignore_names:
                # Chance to insert ignored substrings between meaningful
                # tokens, e.g. whitespace between values in JSON.
                parts.append(
                    st.just(u"")
                    | st.one_of([strategies[name] for name in ignore_names]))
            if p.name in strategies:
                # This might be a Terminal, or it might be a NonTerminal
                # that we've previously handled.
                parts.append(strategies[p.name])
            else:
                # It must be the first time we've encountered this NonTerminal.
                # Recurse to handle it, relying on lazy strategy instantiation
                # to allow forward references, then add it to the strategies
                # cache to avoid infinite loops.
                assert isinstance(p, lark.grammar.NonTerminal)
                s = st.one_of([convert(ex) for ex in nonterminals[p.name]])
                parts.append(s)
                strategies[p.name] = s
        # Special-case rules with only one expansion; it's worthwhile being
        # efficient when this includes terminals!  Otherwise, join the parts.
        if len(parts) == 1:
            return parts[0]
        return st.tuples(*parts).map(u"".join)

    # Most grammars describe several production rules, so we check the start
    # option passed to Lark to see which nonterminal we're going to produce.
    return st.one_of([convert(ex) for ex in nonterminals[start]])