def test_process_file(): doc = Doc(filetype='pdf') doc.link = Link(url='foo') doc.link.context = 'Lorem ipsum dolor sit amet' doc.link.anchortext = 'Lorem ipsum dolor sit amet' doc.source = Source(url='foo', html='<b>Lorem ipsum dolor sit amet</b>') doc.tempfile = os.path.join(testdir, 'simple.pdf') scraper.process_file(doc) assert doc.title == 'Lorem ipsum dolor sit amet'