def get( self, cookie_jar=None, cookie=None, cookie_filename=None, referrer=None, request_headers=None, ): """Get the content of the webpage. Note: Content is stored in "content" property. Args: cookie_jar = cookielib.CookieJar object instance cookie: cookielib.Cookie object instance referrer: string, Url of referrer. Raises: DownloadError if web page download is unsuccessful """ if not referrer: referrer = self.url if not cookie_jar and cookie_filename: cookie_jar = DynamicMozillaCookieJar(filename=cookie_filename) cookie_jar.create_file() agent = DownloadAgent( accept_encoding='gzip,deflate', cookie_jar=cookie_jar, cookie=cookie, post_data=self.post_data, referrer=referrer, request_headers=request_headers, retries=self.retries, url=self.url, ) agent.download() if not agent.content: if agent.errors: raise DownloadError(messages=agent.errors) self.raw_content = agent.content # with open('/root/tmp/raw.htm', 'w') as f_dump: # f_dump.write(self.raw_content + '\n') self.content = self.scrub_content() return
def test__create_file(self): # Should gracefully handle no filename. cj = DynamicMozillaCookieJar() cj.create_file() cj = DynamicMozillaCookieJar(filename='') cj.create_file() # Test non-existant file. cj_file = '/tmp/__test_download_agent__.txt' if os.path.exists(cj_file): os.remove(cj_file) cj = DynamicMozillaCookieJar(filename=cj_file) self.assertTrue(cj is not None) self.assertRaises(IOError, cj.load) # Should create cookie file and it should be loadable. cj.create_file() self.assertTrue(os.path.exists(cj_file)) try: cj.load() except IOError: self.assertFalse('Unexpected IOError raised') if os.path.exists(cj_file): os.remove(cj_file) # Test file from no-existant path. cj_path = '/tmp/__test_download_agent__' cj_file = os.path.join(cj_path, '__cookie__.txt') if os.path.exists(cj_path): shutil.rmtree(cj_path) cj = DynamicMozillaCookieJar(filename=cj_file) self.assertTrue(cj is not None) self.assertRaises(IOError, cj.load) # Should create path and cookie file and it should be loadable. cj.create_file() self.assertTrue(os.path.exists(cj_file)) try: cj.load() except IOError: self.assertFalse('Unexpected IOError raised') if os.path.exists(cj_path): shutil.rmtree(cj_path) return