예제 #1
0
def webui(ctx,
          host,
          port,
          cdn,
          scheduler_rpc,
          fetcher_rpc,
          max_rate,
          max_burst,
          username,
          password,
          need_auth,
          app=app):
    g = ctx.obj
    app.config['taskdb'] = g.taskdb
    app.config['projectdb'] = g.projectdb
    app.config['resultdb'] = g.resultdb
    app.config['cdn'] = cdn

    if max_rate:
        app.config['max_rate'] = max_rate
    if max_burst:
        app.config['max_burst'] = max_burst
    if username:
        app.config['webui_username'] = username
    if password:
        app.config['webui_password'] = password

    # fetcher rpc
    if isinstance(fetcher_rpc, six.string_types):
        fetcher_rpc = connect_rpc(ctx, None, fetcher_rpc)
    if fetcher_rpc is None:
        fetcher = Fetcher(inqueue=None, outqueue=None, async=False)
        fetcher.phantomjs_proxy = g.phantomjs_proxy
        app.config['fetch'] = lambda x: fetcher.fetch(x)[1]
    else:
        import umsgpack
        app.config['fetch'] = lambda x: umsgpack.unpackb(
            fetcher_rpc.fetch(x).data)

    if isinstance(scheduler_rpc, six.string_types):
        scheduler_rpc = connect_rpc(ctx, None, scheduler_rpc)
    if scheduler_rpc is None and os.environ.get('SCHEDULER_NAME'):
        app.config['scheduler_rpc'] = connect_rpc(
            ctx, None, 'http://%s/' %
            (os.environ['SCHEDULER_PORT_23333_TCP'][len('tcp://'):]))
    elif scheduler_rpc is None:
        app.config['scheduler_rpc'] = connect_rpc(ctx, None,
                                                  'http://localhost:23333/')
    else:
        app.config['scheduler_rpc'] = scheduler_rpc

    app.debug = g.debug
    g.instances.append(app)
    if g.get('testing_mode'):
        return app

    app.run(host=host, port=port)
예제 #2
0
파일: run.py 프로젝트: Debug-Orz/pyspider
def webui(ctx, host, port, cdn, scheduler_rpc, fetcher_rpc,
          max_rate, max_burst, username, password):
    g = ctx.obj
    from pyspider.webui.app import app
    app.config['taskdb'] = g.taskdb
    app.config['projectdb'] = g.projectdb
    app.config['resultdb'] = g.resultdb
    app.config['cdn'] = cdn

    if max_rate:
        app.config['max_rate'] = max_rate
    if max_burst:
        app.config['max_burst'] = max_burst
    if username:
        app.config['webui_username'] = username
    if password:
        app.config['webui_password'] = password

    # fetcher rpc
    if isinstance(fetcher_rpc, basestring):
        fetcher_rpc = connect_rpc(ctx, None, fetcher_rpc)
    if fetcher_rpc is None:
        from pyspider.fetcher.tornado_fetcher import Fetcher
        fetcher = Fetcher(inqueue=None, outqueue=None, async=False)
        fetcher.phantomjs_proxy = g.phantomjs_proxy
        app.config['fetch'] = lambda x: fetcher.fetch(x)[1]
    else:
        import umsgpack
        app.config['fetch'] = lambda x: umsgpack.unpackb(fetcher_rpc.fetch(x).data)

    if isinstance(scheduler_rpc, basestring):
        scheduler_rpc = connect_rpc(ctx, None, scheduler_rpc)
    if scheduler_rpc is None and os.environ.get('SCHEDULER_NAME'):
        app.config['scheduler_rpc'] = connect_rpc(ctx, None, 'http://%s/' % (
            os.environ['SCHEDULER_PORT_23333_TCP'][len('tcp://'):]))
    elif scheduler_rpc is None:
        app.config['scheduler_rpc'] = connect_rpc(ctx, None, 'http://localhost:23333/')
    else:
        app.config['scheduler_rpc'] = scheduler_rpc

    app.debug = g.debug
    if g.get('testing_mode'):
        return app

    app.run(host=host, port=port)
예제 #3
0
class TestResponse(unittest.TestCase):
    sample_task_http = {
        'taskid': 'taskid',
        'project': 'project',
        'url': '',
    }

    @classmethod
    def setUpClass(self):
        self.fetcher = Fetcher(None, None, async=False)
        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887)
        self.httpbin = 'http://127.0.0.1:14887'
        time.sleep(0.5)

    @classmethod
    def tearDownClass(self):
        self.httpbin_thread.terminate()

    def get(self, url, **kwargs):
        if not url.startswith('http://'):
            url = self.httpbin + url
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = url
        request.update(kwargs)
        task, result = self.fetcher.fetch(request)
        response = rebuild_response(result)
        self.assertEqual(response.status_code, 200, result)
        return response

    def test_10_html(self):
        response = self.get('/html')
        self.assertIsNotNone(response.doc('h1'))

    def test_20_xml(self):
        response = self.get('/xml')
        self.assertIsNotNone(response.doc('item'))

    def test_30_gzip(self):
        response = self.get('/gzip')
        self.assertIn('gzipped', response.text)

    def test_40_deflate(self):
        response = self.get('/deflate')
        self.assertIn('deflated', response.text)
예제 #4
0
파일: run.py 프로젝트: zhangfeifei02/python
def run_webui(g=g):
    import cPickle as pickle

    from pyspider.fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=None, outqueue=None, async=False)
    fetcher.phantomjs_proxy = g.phantomjs_proxy

    from pyspider.webui.app import app
    app.config['taskdb'] = g.taskdb
    app.config['projectdb'] = g.projectdb
    app.config['resultdb'] = g.resultdb
    app.config['fetch'] = lambda x: fetcher.fetch(x)[1]
    app.config['scheduler_rpc'] = g.scheduler_rpc
    #app.config['cdn'] = '//cdnjs.cloudflare.com/ajax/libs/'
    if g.demo_mode:
        app.config['max_rate'] = 0.2
        app.config['max_burst'] = 3.0
    if 'WEBUI_USERNAME' in os.environ:
        app.config['webui_username'] = os.environ['WEBUI_USERNAME']
        app.config['webui_password'] = os.environ.get('WEBUI_PASSWORD', '')
    if not getattr(g, 'all_in_one', False):
        app.debug = g.debug
    app.run(host=g.webui_host, port=g.webui_port)
예제 #5
0
파일: run.py 프로젝트: BCriswell/pyspider
def run_webui(g=g):
    import cPickle as pickle

    from pyspider.fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=None, outqueue=None, async=False)
    fetcher.phantomjs_proxy = g.phantomjs_proxy

    from pyspider.webui.app import app
    app.config['taskdb'] = g.taskdb
    app.config['projectdb'] = g.projectdb
    app.config['resultdb'] = g.resultdb
    app.config['fetch'] = lambda x: fetcher.fetch(x)[1]
    app.config['scheduler_rpc'] = g.scheduler_rpc
    #app.config['cdn'] = '//cdnjs.cloudflare.com/ajax/libs/'
    if g.demo_mode:
        app.config['max_rate'] = 0.2
        app.config['max_burst'] = 3.0
    if 'WEBUI_USERNAME' in os.environ:
        app.config['webui_username'] = os.environ['WEBUI_USERNAME']
        app.config['webui_password'] = os.environ.get('WEBUI_PASSWORD', '')
    if not getattr(g, 'all_in_one', False):
        app.debug = g.debug
    app.run(host=g.webui_host, port=g.webui_port)
예제 #6
0
class TestResponse(unittest.TestCase):
    sample_task_http = {
        'taskid': 'taskid',
        'project': 'project',
        'url': '',
    }

    @classmethod
    def setUpClass(self):
        self.fetcher = Fetcher(None, None, async=False)
        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False)
        self.httpbin = 'http://127.0.0.1:14887'
        time.sleep(0.5)

    @classmethod
    def tearDownClass(self):
        self.httpbin_thread.terminate()

    def get(self, url, **kwargs):
        if not url.startswith('http://'):
            url = self.httpbin + url
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = url
        request.update(kwargs)
        result = self.fetcher.fetch(request)
        response = rebuild_response(result)
        return response

    def test_10_html(self):
        response = self.get('/html')
        self.assertEqual(response.status_code, 200)
        self.assertIsNotNone(response.doc('h1'))

    def test_20_xml(self):
        response = self.get('/xml')
        self.assertEqual(response.status_code, 200)
        self.assertIsNotNone(response.doc('item'))

    def test_30_gzip(self):
        response = self.get('/gzip')
        self.assertEqual(response.status_code, 200)
        self.assertIn('gzipped', response.text)

    def test_40_deflate(self):
        response = self.get('/deflate')
        self.assertEqual(response.status_code, 200)
        self.assertIn('deflated', response.text)

    def test_50_ok(self):
        response = self.get('/status/200')
        self.assertTrue(response.ok)
        self.assertTrue(response)
        response = self.get('/status/302')
        self.assertTrue(response.ok)
        self.assertTrue(response)
        with self.assertRaises(Exception):
            self.raise_for_status(allow_redirects=False)

    def test_60_not_ok(self):
        response = self.get('/status/400')
        self.assertFalse(response.ok)
        self.assertFalse(response)
        response = self.get('/status/500')
        self.assertFalse(response.ok)
        self.assertFalse(response)
        response = self.get('/status/600')
        self.assertFalse(response.ok)
        self.assertFalse(response)

    def test_70_reraise_exception(self):
        response = self.get('file://abc')
        with self.assertRaisesRegexp(Exception, 'HTTP 599'):
            response.raise_for_status()
예제 #7
0
class TestResponse(unittest.TestCase):
    sample_task_http = {
        'taskid': 'taskid',
        'project': 'project',
        'url': '',
    }

    @classmethod
    def setUpClass(self):
        self.fetcher = Fetcher(None, None, async_mode=False)
        self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run,
                                                      port=14887,
                                                      passthrough_errors=False)
        self.httpbin = 'http://127.0.0.1:14887'
        time.sleep(0.5)

    @classmethod
    def tearDownClass(self):
        self.httpbin_thread.terminate()

    def get(self, url, **kwargs):
        if not url.startswith('http://'):
            url = self.httpbin + url
        request = copy.deepcopy(self.sample_task_http)
        request['url'] = url
        request.update(kwargs)
        result = self.fetcher.fetch(request)
        response = rebuild_response(result)
        return response

    def test_10_html(self):
        response = self.get('/html')
        self.assertEqual(response.status_code, 200)
        self.assertIsNotNone(response.doc('h1'))

    def test_20_xml(self):
        response = self.get('/xml')
        self.assertEqual(response.status_code, 200)
        self.assertIsNotNone(response.doc('item'))

    def test_30_gzip(self):
        response = self.get('/gzip')
        self.assertEqual(response.status_code, 200)
        self.assertIn('gzipped', response.text)

    def test_40_deflate(self):
        response = self.get('/deflate')
        self.assertEqual(response.status_code, 200)
        self.assertIn('deflated', response.text)

    def test_50_ok(self):
        response = self.get('/status/200')
        self.assertTrue(response.ok)
        self.assertTrue(response)
        response = self.get('/status/302')
        self.assertTrue(response.ok)
        self.assertTrue(response)
        with self.assertRaises(Exception):
            self.raise_for_status(allow_redirects=False)

    def test_60_not_ok(self):
        response = self.get('/status/400')
        self.assertFalse(response.ok)
        self.assertFalse(response)
        response = self.get('/status/500')
        self.assertFalse(response.ok)
        self.assertFalse(response)
        response = self.get('/status/600')
        self.assertFalse(response.ok)
        self.assertFalse(response)

    def test_70_reraise_exception(self):
        response = self.get('file://abc')
        with self.assertRaisesRegexp(Exception, 'HTTP 599'):
            response.raise_for_status()