class TestCheckURL(TestCase):
    @given(urls())
    @example("http://example.com/some/path?x=2")
    @example("http://example.com/some/path")
    @example("http://example.com/")
    @example("http://example.com")
    def test_full_url_is_ok(self, url):
        self.assertEqual(config._check_url(url), url)

    @given(domains())
    @example(None)
    @example("example.com")
    @example("://example.com")
    def test_raises_for_missing_or_wrong_scheme(self, url):
        self.assertRaises(ValueError, config._check_url, url)

    @given(_urls_with_out_of_bounds_port())
    def test_raises_for_out_of_bounds_port_number(self, url):
        self.assertRaises(ValueError, config._check_url, url)

    @given(domain=domains(), port=_everything_except(int))
    def test_raises_for_bogus_port_number(self, domain, port):
        assume(str(port)
               not in ("[]",
                       ""))  # urllib bug: https://bugs.python.org/issue36338
        url = "http://%s:%s" % (domain, port)
        self.assertRaises(ValueError, config._check_url, url)
예제 #2
0
def repo_url_strategy():
    def url_encode(s):
        return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c)
                       for c in s)

    paths = st.lists(st.text(string.printable).map(url_encode)).map("/".join)
    ports = st.integers(min_value=0, max_value=65535).map(":{}".format)
    fragments = (st.text(
        alphabet="abcdefghijklmnopqrstuvwxyz0123456789_-.").map(
            url_encode).map(
                "#egg={}".format).map(lambda x: "" if x == "#egg=" else x))
    refs = (st.text(
        alphabet="abcdefghijklmnopqrstuvwxyz0123456789_-").map(url_encode).map(
            "@{}".format).map(lambda x: "" if x == "@" else x))
    scheme = (st.sampled_from(vcs_schemes).filter(lambda x: "+" in x).map(
        "{}://".format).map(lambda x: x.replace("file://", "file:///")))
    auth = (auth_strings().map(
        "{}@".format).map(lambda x: "" if x == "@" else x).map(
            lambda x: x.replace(":@", "@") if x.endswith(":@") else x))
    domain = domains().map(lambda x: x.lower())
    return st.builds(
        "{}{}{}{}{}{}{}".format,
        scheme,
        auth,
        domain,
        st.just("|") | ports,
        paths,
        refs,
        fragments,
    ).map(lambda x: x.replace("|", ":")
          if "git+git@" in x else x.replace("|", "/"))
예제 #3
0
def urls():
    """
    Strategy for generating urls.
    """
    def url_encode(s):
        return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c)
                       for c in s)

    return st.builds(
        URI,
        scheme=st.sampled_from(uri_schemes),
        host=domains(),
        port=st.integers(min_value=1, max_value=65535),
        path=st.lists(st.text(string.printable).map(url_encode)).map("/".join),
        query=st.lists(
            st.text(
                max_size=10,
                alphabet=st.characters(blacklist_characters="/?#",
                                       blacklist_categories=("Cs", )),
            ),
            min_size=2,
            max_size=2,
        ).map("=".join).map(vistir.misc.to_text),
        ref=st.text(max_size=64,
                    alphabet="abcdefghijklmnopqrstuvwxyz0123456789"),
        subdirectory=st.text(max_size=64,
                             alphabet="abcdefghijklmnopqrstuvwxyz0123456789"),
        extras=st.lists(
            st.text(max_size=20,
                    alphabet="abcdefghijklmnopqrstuvwxyz0123456789_-."),
            min_size=0,
            max_size=10,
        ),
    )
예제 #4
0
def vcs_requirements():
    def url_encode(s):
        return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c) for c in s)

    return st.builds(
        parsed_url,
        scheme=st.sampled_from(vcs_schemes),
        netloc=domains(),
        path=st.lists(st.text(string.printable).map(url_encode)).map("/".join),
        fragment=valid_names(),
    )
def string_schema(schema: dict) -> st.SearchStrategy[str]:
    """Handle schemata for strings."""
    # also https://json-schema.org/latest/json-schema-validation.html#rfc.section.7
    min_size = schema.get("minLength", 0)
    max_size = schema.get("maxLength", float("inf"))
    strategy = st.text(min_size=min_size, max_size=schema.get("maxLength"))
    if "format" in schema:
        url_synonyms = [
            "uri", "uri-reference", "iri", "iri-reference", "uri-template"
        ]
        domains = prov.domains()  # type: ignore
        formats = {
            # A value of None indicates a known but unsupported format.
            **{name: rfc3339(name)
               for name in RFC3339_FORMATS},
            "date": rfc3339("full-date"),
            "time": rfc3339("full-time"),
            # Hypothesis' provisional strategies are not type-annotated.
            # We should get a principled plan for them at some point I guess...
            "email": st.emails(),  # type: ignore
            "idn-email": st.emails(),  # type: ignore
            "hostname": domains,
            "idn-hostname": domains,
            "ipv4": prov.ip4_addr_strings(),  # type: ignore
            "ipv6": prov.ip6_addr_strings(),  # type: ignore
            **{
                name: domains.map("https://{}".format)
                for name in url_synonyms
            },
            "json-pointer": st.just(""),
            "relative-json-pointer": st.just(""),
            "regex": REGEX_PATTERNS,
        }
        if schema["format"] not in formats:
            raise InvalidArgument(
                f"Unsupported string format={schema['format']}")
        strategy = formats[schema["format"]]
        if "pattern" in schema:  # pragma: no cover
            # This isn't really supported, but we'll do our best.
            strategy = strategy.filter(
                lambda s: re.search(schema["pattern"], string=s) is not None)
    elif "pattern" in schema:
        try:
            re.compile(schema["pattern"])
            strategy = st.from_regex(schema["pattern"])
        except re.error:
            # Patterns that are invalid in Python, or just malformed
            return st.nothing()
    # TODO: mypy should be able to tell that the lambda is returning a bool
    # without the explicit cast, but can't as of v 0.720 - report upstream.
    return strategy.filter(lambda s: bool(min_size <= len(s) <= max_size))
예제 #6
0
def urls ():
    """ Build http/https URL """
    scheme = st.sampled_from (['http', 'https'])
    # Path must start with a slash
    pathSt = st.builds (lambda x: '/' + x, st.text ())
    args = st.fixed_dictionaries ({
            'scheme': scheme,
            'host': domains (),
            'port': st.one_of (st.none (), st.integers (min_value=1, max_value=2**16-1)),
            'path': pathSt,
            'query_string': st.text (),
            'fragment': st.text (),
            })
    return st.builds (lambda x: URL.build (**x), args)
예제 #7
0
def string_schema(schema: dict) -> st.SearchStrategy[str]:
    """Handle schemata for strings."""
    # also https://json-schema.org/latest/json-schema-validation.html#rfc.section.7
    min_size = schema.get("minLength", 0)
    max_size = schema.get("maxLength", float("inf"))
    strategy: str = st.text(min_size=min_size,
                            max_size=schema.get("maxLength"))
    if "format" in schema:
        url_synonyms = [
            "uri", "uri-reference", "iri", "iri-reference", "uri-template"
        ]
        domains = prov.domains()
        strategy = {
            # A value of None indicates a known but unsupported format.
            **{name: rfc3339(name)
               for name in RFC3339_FORMATS},
            "date": rfc3339("full-date"),
            "time": rfc3339("full-time"),
            "email": st.emails(),
            "idn-email": st.emails(),
            "hostname": domains,
            "idn-hostname": domains,
            "ipv4": prov.ip4_addr_strings(),
            "ipv6": prov.ip6_addr_strings(),
            **{
                name: domains.map("https://{}".format)
                for name in url_synonyms
            },
            "json-pointer": st.just(""),
            "relative-json-pointer": st.just(""),
            "regex": REGEX_PATTERNS,
        }.get(schema["format"])
        if strategy is None:
            raise InvalidArgument(
                f"Unsupported string format={schema['format']}")
        if "pattern" in schema:  # pragma: no cover
            # This isn't really supported, but we'll do our best.
            strategy = strategy.filter(
                lambda s: re.search(schema["pattern"], string=s) is not None)
    elif "pattern" in schema:
        try:
            re.compile(schema["pattern"])
            strategy = st.from_regex(schema["pattern"])
        except re.error:
            # Patterns that are invalid in Python, or just malformed
            strategy = st.nothing()
    return strategy.filter(lambda s: min_size <= len(s) <= max_size)
예제 #8
0
def string_schema(schema: dict) -> st.SearchStrategy[str]:
    """Handle schemata for strings."""
    # also https://json-schema.org/latest/json-schema-validation.html#rfc.section.7
    min_size = schema.get("minLength", 0)
    max_size = schema.get("maxLength", float("inf"))
    strategy: Any = st.text(min_size=min_size,
                            max_size=schema.get("maxLength"))
    assert not (
        "format" in schema and "pattern" in schema
    ), "format and regex constraints are supported, but not both at once."
    if "pattern" in schema:
        strategy = st.from_regex(schema["pattern"])
    elif "format" in schema:
        url_synonyms = [
            "uri", "uri-reference", "iri", "iri-reference", "uri-template"
        ]
        domains = prov.domains()  # type: ignore
        strategy = {
            # A value of None indicates a known but unsupported format.
            **{name: rfc3339(name)
               for name in RFC3339_FORMATS},
            "date": rfc3339("full-date"),
            "time": rfc3339("full-time"),
            "email": st.emails(),  # type: ignore
            "idn-email": st.emails(),  # type: ignore
            "hostname": domains,
            "idn-hostname": domains,
            "ipv4": prov.ip4_addr_strings(),  # type: ignore
            "ipv6": prov.ip6_addr_strings(),  # type: ignore
            **{
                name: domains.map("https://{}".format)
                for name in url_synonyms
            },
            "json-pointer": st.just(""),
            "relative-json-pointer": st.just(""),
            "regex": REGEX_PATTERNS,
        }.get(schema["format"])
        if strategy is None:
            raise InvalidArgument(
                f"Unsupported string format={schema['format']}")
    return strategy.filter(
        lambda s: min_size <= len(s) <= max_size)  # type: ignore
예제 #9
0
def url():
    """ Build http/https URL """
    scheme = st.sampled_from(["http", "https"])
    # Path must start with a slash
    pathSt = st.builds(lambda x: "/" + x, st.text())
    args = st.fixed_dictionaries({
        "scheme":
        scheme,
        "host":
        domains(),
        "port":
        st.one_of(st.none(), st.integers(min_value=10, max_value=2**16 - 1)),
        "path":
        pathSt,
        "query_string":
        st.text(),
        "fragment":
        st.text(),
    })
    return st.builds(lambda x: URL.build(**x), args)
예제 #10
0
def auth_url_strategy():
    # taken from the hypothesis provisional url generation strategy
    def url_encode(s):
        return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c)
                       for c in s)

    schemes = [
        "{0}://".format(scheme) for scheme in uri_schemes if scheme != "file"
    ]
    schemes.append("file:///")
    return st.builds(
        AuthUrl,
        scheme=st.sampled_from(schemes),
        auth=auth_strings().filter(lambda x: x != ":").map(
            lambda x: "" if not x else "{0}@".format(x)),
        domain=domains().filter(lambda x: x != "").map(lambda x: x.lower()),
        port=st.integers(min_value=0, max_value=65535),
        path=st.lists(
            st.text(string.printable).map(url_encode).filter(
                lambda x: x not in ["", ".", ".."])).map("/".join),
    )
@pytest.mark.parametrize("max_length", [-1, 0, 3, 4.0, 256])
@pytest.mark.parametrize("max_element_length", [-1, 0, 4.0, 64, 128])
def test_invalid_domain_arguments(max_length, max_element_length):
    with pytest.raises(InvalidArgument):
        domains(max_length=max_length,
                max_element_length=max_element_length).example()


@pytest.mark.parametrize("max_length", [None, 4, 8, 255])
@pytest.mark.parametrize("max_element_length", [None, 1, 2, 4, 8, 63])
def test_valid_domains_arguments(max_length, max_element_length):
    domains(max_length=max_length,
            max_element_length=max_element_length).example()


@pytest.mark.parametrize("strategy", [domains(), urls()])
def test_find_any_non_empty(strategy):
    find_any(strategy, lambda s: len(s) > 0)


@given(_url_fragments_strategy)
# There's a lambda in the implementation that only gets run if we generate at
# least one percent-escape sequence, so we derandomize to ensure that coverage
# isn't flaky.
@settings(derandomize=True)
def test_url_fragments_contain_legal_chars(fragment):
    assert fragment.startswith("#")

    # Strip all legal escape sequences. Any remaining % characters were not
    # part of a legal escape sequence.
    without_escapes = re.sub(r"(?ai)%[0-9a-f][0-9a-f]", "", fragment[1:])
@given(ip6_addr_strings())
def test_is_IP6_addr(address):
    # Works for non-normalised addresses produced by this strategy, but not
    # a particularly general test
    assert address == address.upper()
    as_hex = address.split(":")
    assert len(as_hex) == 8
    assert all(len(part) == 4 for part in as_hex)
    raw = unhexlify(address.replace(u":", u"").encode("ascii"))
    assert len(raw) == 16


@pytest.mark.parametrize("max_length", [-1, 0, 3, 4.0, 256])
@pytest.mark.parametrize("max_element_length", [-1, 0, 4.0, 64, 128])
def test_invalid_domain_arguments(max_length, max_element_length):
    with pytest.raises(InvalidArgument):
        domains(max_length=max_length, max_element_length=max_element_length).example()


@pytest.mark.parametrize("max_length", [None, 4, 8, 255])
@pytest.mark.parametrize("max_element_length", [None, 1, 2, 4, 8, 63])
def test_valid_domains_arguments(max_length, max_element_length):
    domains(max_length=max_length, max_element_length=max_element_length).example()


@pytest.mark.parametrize(
    "strategy", [domains(), ip4_addr_strings(), ip6_addr_strings(), urls()]
)
def test_find_any_non_empty(strategy):
    find_any(strategy, lambda s: len(s) > 0)
    url = url.lower()
    iocs = find_iocs(url)
    failure = False

    try:
        assert len(iocs['urls']) == 1
        assert iocs['urls'][0] == url
    except AssertionError as e:
        failure = True
        print('Failed on url: {}'.format(url))

    if failure:
        raise AssertionError('Error parsing urls')


@given(domains())
@settings(deadline=None)
def test_domain_parsing(domain):
    domain = domain.lower()
    iocs = find_iocs(domain)
    failure = False

    try:
        assert len(iocs['domains']) == 1
        assert iocs['domains'][0] == domain
    except AssertionError as e:
        failure = True
        print('Failed on domain: {}'.format(domain))

    if failure:
        raise AssertionError('Error parsing domains')
예제 #14
0
def collector_fqdns(draw):
    return draw(domains()) + draw(
        sampled_from([".routeviews.org", ".ripe.net"]))
예제 #15
0
        re.compile(result)
    except re.error:
        assume(False)
    return result


REGEX_PATTERNS = regex_patterns()

STRING_FORMATS = {
    **{name: rfc3339(name)
       for name in RFC3339_FORMATS},
    "date": rfc3339("full-date"),
    "time": rfc3339("full-time"),
    "email": st.emails(),
    "idn-email": st.emails(),
    "hostname": prov.domains(),
    "idn-hostname": prov.domains(),
    "ipv4": st.ip_addresses(v=4).map(str),
    "ipv6": st.ip_addresses(v=6).map(str),
    **{
        name: prov.domains().map("https://{}".format)
        for name in [
            "uri", "uri-reference", "iri", "iri-reference", "uri-template"
        ]
    },
    "json-pointer": st.just(""),
    "relative-json-pointer": st.just(""),
    "regex": REGEX_PATTERNS,
}

def _urls_with_bogus_port(draw):
    domain = draw(domains())
    port = draw(_everything_except(int))
    return "http://%s:%s" % (domain, port)
def test_invalid_domain_arguments(max_length, max_element_length):
    with pytest.raises(InvalidArgument):
        domains(max_length=max_length,
                max_element_length=max_element_length).example()
예제 #18
0
        REGEX_PATTERNS.map("{}+".format),
        REGEX_PATTERNS.map("{}?".format),
        REGEX_PATTERNS.map("{}*".format),
    )
    result = draw(st.lists(fragments, min_size=1, max_size=3).map("".join))
    try:
        re.compile(result)
    except re.error:
        assume(False)
    # The @composite decorator *is* well-typed, but expressing this is painful :/
    return result  # type: ignore


REGEX_PATTERNS = regex_patterns()

_domains = prov.domains()  # type: ignore
STRING_FORMATS = {
    # A value of None indicates a known but unsupported format.
    **{name: rfc3339(name)
       for name in RFC3339_FORMATS},
    "date": rfc3339("full-date"),
    "time": rfc3339("full-time"),
    # Hypothesis' provisional strategies are not type-annotated.
    # We should get a principled plan for them at some point I guess...
    "email": st.emails(),  # type: ignore
    "idn-email": st.emails(),  # type: ignore
    "hostname": _domains,
    "idn-hostname": _domains,
    "ipv4": prov.ip4_addr_strings(),  # type: ignore
    "ipv6": prov.ip6_addr_strings(),  # type: ignore
    **{
def test_valid_domains_arguments(max_length, max_element_length):
    domains(max_length=max_length,
            max_element_length=max_element_length).example()
def _urls_with_out_of_bounds_port(draw):
    domain = draw(domains())
    port = draw(PORTS_INVALID)
    return "http://%s:%d" % (domain, port)