Example #1
0
 def setUp(self):
     self.config = get_config('http://localhost:5000', debug=False)
     self.context = self.config.create_context()
     self.session = self.config.create_session()
     self.scheduler = Scheduler()
     self.resource = VoidResource(self.session, self.config, self.scheduler, self.context)
     self.response = Response()
     self.response.url = 'http://localhost:5000/'    # this url has '/' at the end
     self.response.encoding = 'utf-8'
Example #2
0
 def test_simple(self):
     ans = configs.get_config('http://localhost:5000')
     self.assertTrue(isinstance(ans, configs.ConfigHandler))
     self.assertEqual(ans.get('project_url'), 'http://localhost:5000')
     self.assertEqual(ans.get('project_name'), 'http_localhost_5000')
     self.assertEqual(
         ans.get('project_folder'),
         os.path.join(tempfile.gettempdir(), ans.get('project_name')))
     for k, v in configs.default_config.items():
         if k in ('project_url', 'project_name', 'project_folder'):
             continue
         self.assertEqual(ans.get(k), v)
Example #3
0
 def test_simple_page_creation(self):
     ans = configs.get_config('http://localhost:5000')
     page = ans.create_page()
     self.assertTrue(isinstance(page, WebPage))
     self.assertTrue(isinstance(page.session, Session))
     self.assertTrue(isinstance(page.context, Context))
     self.assertTrue(isinstance(page.scheduler, Scheduler))
     self.assertEqual(page.session.obey_robots_txt,
                      not ans.get('bypass_robots'))
     self.assertEqual(page.session.delay, ans.get('delay'))
     self.assertEqual(page.session.headers,
                      configs.default_headers(**configs.safe_http_headers))
Example #4
0
 def test_session_creation(self):
     ans = configs.get_config('http://localhost:5000')
     ans.__setitem__('bypass_robots', True)
     ans.__setitem__('http_cache', True)
     ans.__setitem__('delay', 1)
     ans.__setitem__('http_headers', {'User-Agent': 'test-bot'})
     sess = ans.create_session()
     self.assertTrue(isinstance(sess, Session))
     self.assertEqual(sess.obey_robots_txt, False)
     from cachecontrol import CacheControlAdapter
     for i in sess.adapters.values():
         self.assertTrue(isinstance(i, CacheControlAdapter))
     self.assertEqual(sess.delay, 1)
     self.assertEqual(sess.headers, {'User-Agent': 'test-bot'})
Example #5
0
 def test_all_arguments(self):
     ans = configs.get_config('http://localhost:5000',
                              project_folder='home/user/',
                              project_name='my_project',
                              bypass_robots=True,
                              delay=1)
     self.assertTrue(isinstance(ans, configs.ConfigHandler))
     self.assertEqual(ans.get('project_url'), 'http://localhost:5000')
     self.assertEqual(ans.get('project_name'), 'my_project')
     self.assertEqual(
         ans.get('project_folder'),
         os.path.join(os.path.abspath('home/user/'),
                      ans.get('project_name')))
     self.assertEqual(ans.get('bypass_robots'), True)
     self.assertEqual(ans.get('delay'), 1)
     os.removedirs(ans.get('project_folder'))
Example #6
0
 def test_context_creation(self):
     ans = configs.get_config('http://localhost:5000')
     ans.__setitem__('tree_type', 'HIERARCHY')
     context = ans.create_context()
     self.assertTrue(isinstance(context, Context))
     self.assertEqual(context.tree_type, 'HIERARCHY')