Beispiel #1
0
and they lived at the bottom of a well.</p>

<p class="story">...</p>
</body>
</html>
"""

html_results = [
    (simple_html, PageTestResult(True, [])),
    (html_doc, PageTestResult(False, ['http://example.com/elsie',
                                      'http://example.com/lacie',
                                      'http://example.com/tillie']))
]

for i, (html_text, page_test_result) in enumerate(html_results):
    page = Page('http://example.com', '/index.html')
    page._soup = BeautifulSoup(html_text, 'lxml')
    page.modified()
    expected = "should be stored" if page_test_result.indexable else "should not be stored"
    if page.can_be_stored() != page_test_result.indexable:
        print("[FAIL] page #{} {}".format(i, expected))
    else:
        print("[ OK ] page #{} {}".format(i, expected))

    expected = "should be followed by {}".format(page_test_result.children)
    children_urls = list(map(lambda p: p.url(), page.children()))
    if children_urls != page_test_result.children:
        print("[FAIL] page #{} {} but has {}".format(i, expected, children_urls))
    else:
        print("[ OK ] page #{} {}".format(i, expected))