def test_download(self): tests = [ { 'label': 'valid url', 'url': 'http://www.example.com', 'expect': '<title>IANA — Example domains</title>', }, { # We're using openDNS that redirects invalid urls to notice # page so it will return content. 'label': 'invalid url', 'url': 'http://www.xxxxxxxxxxfakeurlxxxxxxxxxxx.com', 'expect': None, }] for test in tests: da = DownloadAgent(url=test['url']) da.download() if test['expect']: self.assertTrue(da.content and len(da.content) > 0) p_title = re.compile('%s' % test['expect']) m = p_title.search(da.content) self.assertFalse(not m) else: self.assertTrue(not da.content) return
def get( self, cookie_jar=None, cookie=None, cookie_filename=None, referrer=None, request_headers=None, ): """Get the content of the webpage. Note: Content is stored in "content" property. Args: cookie_jar = cookielib.CookieJar object instance cookie: cookielib.Cookie object instance referrer: string, Url of referrer. Raises: DownloadError if web page download is unsuccessful """ if not referrer: referrer = self.url if not cookie_jar and cookie_filename: cookie_jar = DynamicMozillaCookieJar(filename=cookie_filename) cookie_jar.create_file() agent = DownloadAgent( accept_encoding='gzip,deflate', cookie_jar=cookie_jar, cookie=cookie, post_data=self.post_data, referrer=referrer, request_headers=request_headers, retries=self.retries, url=self.url, ) agent.download() if not agent.content: if agent.errors: raise DownloadError(messages=agent.errors) self.raw_content = agent.content # with open('/root/tmp/raw.htm', 'w') as f_dump: # f_dump.write(self.raw_content + '\n') self.content = self.scrub_content() return