def test_mongin(caplog): pagename = 'mongin.html' url = 'https://studies2.hec.fr/jahia/Jahia/cache/offonce/lang/en/mongin/pid/1072' page = Webpage(url, html=source(pagename)) svars = page.session_variables() assert 'jsessionid' in svars testurl = 'https://studies2.hec.fr/jahia/webdav/site/hec/shared/sites/mongin/foo.pdf;jsessionid=123456' stripped = page.strip_session_variables(testurl) assert stripped == 'https://studies2.hec.fr/jahia/webdav/site/hec/shared/sites/mongin/foo.pdf;'
def test_broken_html(): pagename = 'healey.html' url = 'https://blah.org' page = Webpage(url, html=source(pagename)) targets = set(u for u in page.xpath('//a/@href') if re.search('.pdf$', u, re.I)) assert targets
def test_utf8(caplog): pagename = 'philpapers-rec.html' url = 'https://blah.org' page = Webpage(url, html=source(pagename)) assert 'Analytic' in page.text()