def webui(ctx, host, port, cdn, scheduler_rpc, fetcher_rpc, max_rate, max_burst, username, password, need_auth, app=app): g = ctx.obj app.config['taskdb'] = g.taskdb app.config['projectdb'] = g.projectdb app.config['resultdb'] = g.resultdb app.config['cdn'] = cdn if max_rate: app.config['max_rate'] = max_rate if max_burst: app.config['max_burst'] = max_burst if username: app.config['webui_username'] = username if password: app.config['webui_password'] = password # fetcher rpc if isinstance(fetcher_rpc, six.string_types): fetcher_rpc = connect_rpc(ctx, None, fetcher_rpc) if fetcher_rpc is None: fetcher = Fetcher(inqueue=None, outqueue=None, async=False) fetcher.phantomjs_proxy = g.phantomjs_proxy app.config['fetch'] = lambda x: fetcher.fetch(x)[1] else: import umsgpack app.config['fetch'] = lambda x: umsgpack.unpackb( fetcher_rpc.fetch(x).data) if isinstance(scheduler_rpc, six.string_types): scheduler_rpc = connect_rpc(ctx, None, scheduler_rpc) if scheduler_rpc is None and os.environ.get('SCHEDULER_NAME'): app.config['scheduler_rpc'] = connect_rpc( ctx, None, 'http://%s/' % (os.environ['SCHEDULER_PORT_23333_TCP'][len('tcp://'):])) elif scheduler_rpc is None: app.config['scheduler_rpc'] = connect_rpc(ctx, None, 'http://localhost:23333/') else: app.config['scheduler_rpc'] = scheduler_rpc app.debug = g.debug g.instances.append(app) if g.get('testing_mode'): return app app.run(host=host, port=port)
def webui(ctx, host, port, cdn, scheduler_rpc, fetcher_rpc, max_rate, max_burst, username, password): g = ctx.obj from pyspider.webui.app import app app.config['taskdb'] = g.taskdb app.config['projectdb'] = g.projectdb app.config['resultdb'] = g.resultdb app.config['cdn'] = cdn if max_rate: app.config['max_rate'] = max_rate if max_burst: app.config['max_burst'] = max_burst if username: app.config['webui_username'] = username if password: app.config['webui_password'] = password # fetcher rpc if isinstance(fetcher_rpc, basestring): fetcher_rpc = connect_rpc(ctx, None, fetcher_rpc) if fetcher_rpc is None: from pyspider.fetcher.tornado_fetcher import Fetcher fetcher = Fetcher(inqueue=None, outqueue=None, async=False) fetcher.phantomjs_proxy = g.phantomjs_proxy app.config['fetch'] = lambda x: fetcher.fetch(x)[1] else: import umsgpack app.config['fetch'] = lambda x: umsgpack.unpackb(fetcher_rpc.fetch(x).data) if isinstance(scheduler_rpc, basestring): scheduler_rpc = connect_rpc(ctx, None, scheduler_rpc) if scheduler_rpc is None and os.environ.get('SCHEDULER_NAME'): app.config['scheduler_rpc'] = connect_rpc(ctx, None, 'http://%s/' % ( os.environ['SCHEDULER_PORT_23333_TCP'][len('tcp://'):])) elif scheduler_rpc is None: app.config['scheduler_rpc'] = connect_rpc(ctx, None, 'http://localhost:23333/') else: app.config['scheduler_rpc'] = scheduler_rpc app.debug = g.debug if g.get('testing_mode'): return app app.run(host=host, port=port)
class TestResponse(unittest.TestCase): sample_task_http = { 'taskid': 'taskid', 'project': 'project', 'url': '', } @classmethod def setUpClass(self): self.fetcher = Fetcher(None, None, async=False) self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887) self.httpbin = 'http://127.0.0.1:14887' time.sleep(0.5) @classmethod def tearDownClass(self): self.httpbin_thread.terminate() def get(self, url, **kwargs): if not url.startswith('http://'): url = self.httpbin + url request = copy.deepcopy(self.sample_task_http) request['url'] = url request.update(kwargs) task, result = self.fetcher.fetch(request) response = rebuild_response(result) self.assertEqual(response.status_code, 200, result) return response def test_10_html(self): response = self.get('/html') self.assertIsNotNone(response.doc('h1')) def test_20_xml(self): response = self.get('/xml') self.assertIsNotNone(response.doc('item')) def test_30_gzip(self): response = self.get('/gzip') self.assertIn('gzipped', response.text) def test_40_deflate(self): response = self.get('/deflate') self.assertIn('deflated', response.text)
def run_webui(g=g): import cPickle as pickle from pyspider.fetcher.tornado_fetcher import Fetcher fetcher = Fetcher(inqueue=None, outqueue=None, async=False) fetcher.phantomjs_proxy = g.phantomjs_proxy from pyspider.webui.app import app app.config['taskdb'] = g.taskdb app.config['projectdb'] = g.projectdb app.config['resultdb'] = g.resultdb app.config['fetch'] = lambda x: fetcher.fetch(x)[1] app.config['scheduler_rpc'] = g.scheduler_rpc #app.config['cdn'] = '//cdnjs.cloudflare.com/ajax/libs/' if g.demo_mode: app.config['max_rate'] = 0.2 app.config['max_burst'] = 3.0 if 'WEBUI_USERNAME' in os.environ: app.config['webui_username'] = os.environ['WEBUI_USERNAME'] app.config['webui_password'] = os.environ.get('WEBUI_PASSWORD', '') if not getattr(g, 'all_in_one', False): app.debug = g.debug app.run(host=g.webui_host, port=g.webui_port)
def run_webui(g=g): import cPickle as pickle from pyspider.fetcher.tornado_fetcher import Fetcher fetcher = Fetcher(inqueue=None, outqueue=None, async=False) fetcher.phantomjs_proxy = g.phantomjs_proxy from pyspider.webui.app import app app.config['taskdb'] = g.taskdb app.config['projectdb'] = g.projectdb app.config['resultdb'] = g.resultdb app.config['fetch'] = lambda x: fetcher.fetch(x)[1] app.config['scheduler_rpc'] = g.scheduler_rpc #app.config['cdn'] = '//cdnjs.cloudflare.com/ajax/libs/' if g.demo_mode: app.config['max_rate'] = 0.2 app.config['max_burst'] = 3.0 if 'WEBUI_USERNAME' in os.environ: app.config['webui_username'] = os.environ['WEBUI_USERNAME'] app.config['webui_password'] = os.environ.get('WEBUI_PASSWORD', '') if not getattr(g, 'all_in_one', False): app.debug = g.debug app.run(host=g.webui_host, port=g.webui_port)
class TestResponse(unittest.TestCase): sample_task_http = { 'taskid': 'taskid', 'project': 'project', 'url': '', } @classmethod def setUpClass(self): self.fetcher = Fetcher(None, None, async=False) self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False) self.httpbin = 'http://127.0.0.1:14887' time.sleep(0.5) @classmethod def tearDownClass(self): self.httpbin_thread.terminate() def get(self, url, **kwargs): if not url.startswith('http://'): url = self.httpbin + url request = copy.deepcopy(self.sample_task_http) request['url'] = url request.update(kwargs) result = self.fetcher.fetch(request) response = rebuild_response(result) return response def test_10_html(self): response = self.get('/html') self.assertEqual(response.status_code, 200) self.assertIsNotNone(response.doc('h1')) def test_20_xml(self): response = self.get('/xml') self.assertEqual(response.status_code, 200) self.assertIsNotNone(response.doc('item')) def test_30_gzip(self): response = self.get('/gzip') self.assertEqual(response.status_code, 200) self.assertIn('gzipped', response.text) def test_40_deflate(self): response = self.get('/deflate') self.assertEqual(response.status_code, 200) self.assertIn('deflated', response.text) def test_50_ok(self): response = self.get('/status/200') self.assertTrue(response.ok) self.assertTrue(response) response = self.get('/status/302') self.assertTrue(response.ok) self.assertTrue(response) with self.assertRaises(Exception): self.raise_for_status(allow_redirects=False) def test_60_not_ok(self): response = self.get('/status/400') self.assertFalse(response.ok) self.assertFalse(response) response = self.get('/status/500') self.assertFalse(response.ok) self.assertFalse(response) response = self.get('/status/600') self.assertFalse(response.ok) self.assertFalse(response) def test_70_reraise_exception(self): response = self.get('file://abc') with self.assertRaisesRegexp(Exception, 'HTTP 599'): response.raise_for_status()
class TestResponse(unittest.TestCase): sample_task_http = { 'taskid': 'taskid', 'project': 'project', 'url': '', } @classmethod def setUpClass(self): self.fetcher = Fetcher(None, None, async_mode=False) self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False) self.httpbin = 'http://127.0.0.1:14887' time.sleep(0.5) @classmethod def tearDownClass(self): self.httpbin_thread.terminate() def get(self, url, **kwargs): if not url.startswith('http://'): url = self.httpbin + url request = copy.deepcopy(self.sample_task_http) request['url'] = url request.update(kwargs) result = self.fetcher.fetch(request) response = rebuild_response(result) return response def test_10_html(self): response = self.get('/html') self.assertEqual(response.status_code, 200) self.assertIsNotNone(response.doc('h1')) def test_20_xml(self): response = self.get('/xml') self.assertEqual(response.status_code, 200) self.assertIsNotNone(response.doc('item')) def test_30_gzip(self): response = self.get('/gzip') self.assertEqual(response.status_code, 200) self.assertIn('gzipped', response.text) def test_40_deflate(self): response = self.get('/deflate') self.assertEqual(response.status_code, 200) self.assertIn('deflated', response.text) def test_50_ok(self): response = self.get('/status/200') self.assertTrue(response.ok) self.assertTrue(response) response = self.get('/status/302') self.assertTrue(response.ok) self.assertTrue(response) with self.assertRaises(Exception): self.raise_for_status(allow_redirects=False) def test_60_not_ok(self): response = self.get('/status/400') self.assertFalse(response.ok) self.assertFalse(response) response = self.get('/status/500') self.assertFalse(response.ok) self.assertFalse(response) response = self.get('/status/600') self.assertFalse(response.ok) self.assertFalse(response) def test_70_reraise_exception(self): response = self.get('file://abc') with self.assertRaisesRegexp(Exception, 'HTTP 599'): response.raise_for_status()