def test_should_fetch_article_using_native_extractor(self): page = mock('page').with_children(content_url='http://example.com/example', raw_content=""" <html> <title>Title of Content</title> <body> <script src="malicious.js"/> <p>Hello there!</p> </body> </html> """) content = native.extract(page) self.assertEqual(content.title, 'Title of Content') self.assertEqual("".join(content.body.split('\n')), '<body><p>Hello there!</p></body>')
def extract(self, content, url=some_url): p = Page(url=url, _raw_content=content, owner=a_user) return native.extract(p)