def test_Link(testdb): li = Link(source_id=1, url='http://umsu.de/papers/magnetism2.pdf') li.update_db(filesize=1234) assert li.link_id > 0 li2 = Link(source_id=1, url='http://umsu.de/papers/magnetism2.pdf') li2.load_from_db() assert li2.filesize == 1234
def test_process_link(testdb, caplog): source = Source(url='http://umsu.de/papers/') source.load_from_db() browser = scraper.Browser(use_virtual_display=VDISPLAY) browser.goto(source.url) source.set_html(browser.page_source) link = 'options.pdf' el = browser.find_element_by_xpath("//a[@href='{}']".format(link)) url = source.make_absolute(link) li = Link(url=url, source=source, element=el) li.load_from_db() debuglevel(2) scraper.process_link(li, force_reprocess=True, keep_tempfiles=True) debuglevel(5) assert 'Options and Actions' in caplog.text() assert 'But even if we know' in caplog.text()