Beispiel #1
0
    def test_parse_web_page(self):
        url = "https://yolaw-tokeep-hiring-env.herokuapp.com"
        content = "<html><head><title>test<body><h1>page title</h3>"
        content_type = "text/html"

        html_parser = Parser().get(content_type)
        web_page = WebPage(url, content, content_type)
        web_page.parse()
        assert isinstance(web_page._tree, etree._ElementTree) == True
        assert etree.tostring(web_page._tree) == etree.tostring(
            etree.parse(StringIO(unicode(content)), html_parser))
Beispiel #2
0
    def test_evaluate_query(self):
        url = "https://yolaw-tokeep-hiring-env.herokuapp.com"
        content = "<html><head><title>test<body><h1>page title</h3>"
        content_type = "text/html"

        html_parser = Parser().get(content_type)
        web_page = WebPage(url, content, content_type)

        assert web_page.evaluate_query("//text()") == ["test", "page title"]

        with pytest.raises(EvaluateQueryException):
            web_page.evaluate_query("")
Beispiel #3
0
    def test_webpage_repr(self):
        url = "https://yolaw-tokeep-hiring-env.herokuapp.com"
        content = "<html></html>"
        content_type = "text/html"

        web_page = WebPage(url, content, content_type)
        assert repr(web_page) == "WebPage({})".format(content[:10])
Beispiel #4
0
    def test_webpage_instance_if_url_invalid(self):
        url = ""
        content = "<html></html>"
        content_type = "text/html"

        with pytest.raises(InvalidURLException):
            WebPage(url, content, content_type)
Beispiel #5
0
    def test_webpage_instance_default_values(self):
        url = "https://yolaw-tokeep-hiring-env.herokuapp.com"

        web_page = WebPage(url)
        assert web_page._url == url
        assert web_page._content == ""
        assert web_page._content_type == ""
        assert web_page._tree == None
Beispiel #6
0
    def test_webpage_instance(self):
        url = "https://yolaw-tokeep-hiring-env.herokuapp.com"
        content = "<html></html>"
        content_type = "text/html"

        web_page = WebPage(url, content, content_type)
        assert web_page._url == url
        assert web_page._content == content
        assert web_page._content_type == content_type
        assert web_page._tree == None