def run_webui(g=g): import xmlrpclib from pyspider.webui.app import app app.config['taskdb'] = g.taskdb app.config['projectdb'] = g.projectdb app.config['resultdb'] = g.resultdb app.config['scheduler_rpc'] = xmlrpclib.ServerProxy('http://localhost:23333') g.app = app app.run()
def webui(ctx, host, port, cdn, scheduler_rpc, fetcher_rpc, max_rate, max_burst, username, password, need_auth, app=app): g = ctx.obj app.config['taskdb'] = g.taskdb app.config['projectdb'] = g.projectdb app.config['resultdb'] = g.resultdb app.config['cdn'] = cdn if max_rate: app.config['max_rate'] = max_rate if max_burst: app.config['max_burst'] = max_burst if username: app.config['webui_username'] = username if password: app.config['webui_password'] = password # fetcher rpc if isinstance(fetcher_rpc, six.string_types): fetcher_rpc = connect_rpc(ctx, None, fetcher_rpc) if fetcher_rpc is None: fetcher = Fetcher(inqueue=None, outqueue=None, async=False) fetcher.phantomjs_proxy = g.phantomjs_proxy app.config['fetch'] = lambda x: fetcher.fetch(x)[1] else: import umsgpack app.config['fetch'] = lambda x: umsgpack.unpackb( fetcher_rpc.fetch(x).data) if isinstance(scheduler_rpc, six.string_types): scheduler_rpc = connect_rpc(ctx, None, scheduler_rpc) if scheduler_rpc is None and os.environ.get('SCHEDULER_NAME'): app.config['scheduler_rpc'] = connect_rpc( ctx, None, 'http://%s/' % (os.environ['SCHEDULER_PORT_23333_TCP'][len('tcp://'):])) elif scheduler_rpc is None: app.config['scheduler_rpc'] = connect_rpc(ctx, None, 'http://localhost:23333/') else: app.config['scheduler_rpc'] = scheduler_rpc app.debug = g.debug g.instances.append(app) if g.get('testing_mode'): return app app.run(host=host, port=port)
def run_webui(g=g): import xmlrpclib from pyspider.webui.app import app app.config['taskdb'] = g.taskdb app.config['projectdb'] = g.projectdb app.config['resultdb'] = g.resultdb app.config['scheduler_rpc'] = xmlrpclib.ServerProxy( 'http://localhost:23333') g.app = app app.run()
def webui(ctx, host, port, cdn, scheduler_rpc, fetcher_rpc, max_rate, max_burst, username, password): g = ctx.obj from pyspider.webui.app import app app.config['taskdb'] = g.taskdb app.config['projectdb'] = g.projectdb app.config['resultdb'] = g.resultdb app.config['cdn'] = cdn if max_rate: app.config['max_rate'] = max_rate if max_burst: app.config['max_burst'] = max_burst if username: app.config['webui_username'] = username if password: app.config['webui_password'] = password # fetcher rpc if isinstance(fetcher_rpc, basestring): fetcher_rpc = connect_rpc(ctx, None, fetcher_rpc) if fetcher_rpc is None: from pyspider.fetcher.tornado_fetcher import Fetcher fetcher = Fetcher(inqueue=None, outqueue=None, async=False) fetcher.phantomjs_proxy = g.phantomjs_proxy app.config['fetch'] = lambda x: fetcher.fetch(x)[1] else: import umsgpack app.config['fetch'] = lambda x: umsgpack.unpackb(fetcher_rpc.fetch(x).data) if isinstance(scheduler_rpc, basestring): scheduler_rpc = connect_rpc(ctx, None, scheduler_rpc) if scheduler_rpc is None and os.environ.get('SCHEDULER_NAME'): app.config['scheduler_rpc'] = connect_rpc(ctx, None, 'http://%s/' % ( os.environ['SCHEDULER_PORT_23333_TCP'][len('tcp://'):])) elif scheduler_rpc is None: app.config['scheduler_rpc'] = connect_rpc(ctx, None, 'http://localhost:23333/') else: app.config['scheduler_rpc'] = scheduler_rpc app.debug = g.debug if g.get('testing_mode'): return app app.run(host=host, port=port)
def run_webui(g=g): import cPickle as pickle from pyspider.fetcher.tornado_fetcher import Fetcher fetcher = Fetcher(inqueue=None, outqueue=None, async=False) fetcher.phantomjs_proxy = g.phantomjs_proxy from pyspider.webui.app import app app.config['taskdb'] = g.taskdb app.config['projectdb'] = g.projectdb app.config['resultdb'] = g.resultdb app.config['fetch'] = lambda x: fetcher.fetch(x)[1] app.config['scheduler_rpc'] = g.scheduler_rpc #app.config['cdn'] = '//cdnjs.cloudflare.com/ajax/libs/' if g.demo_mode: app.config['max_rate'] = 0.2 app.config['max_burst'] = 3.0 if 'WEBUI_USERNAME' in os.environ: app.config['webui_username'] = os.environ['WEBUI_USERNAME'] app.config['webui_password'] = os.environ.get('WEBUI_PASSWORD', '') if not getattr(g, 'all_in_one', False): app.debug = g.debug app.run(host=g.webui_host, port=g.webui_port)
def bench(ctx, fetcher_num, processor_num, result_worker_num, run_in, total, show): from pyspider.libs import bench from pyspider.webui import bench_test ctx.obj['debug'] = False g = ctx.obj if result_worker_num == 0: g['processor2result'] = None if run_in == 'subprocess' and os.name != 'nt': run_in = run_in_subprocess else: run_in = run_in_thread g.projectdb.insert('bench', { 'name': 'bench', 'status': 'RUNNING', 'script': bench.bench_script % {'total': total, 'show': show}, 'rate': 100000000000000, 'burst': 10000000000000000, 'updatetime': time.time() }) # disable log logging.getLogger().setLevel(logging.ERROR) logging.getLogger('scheduler').setLevel(logging.ERROR) logging.getLogger('fetcher').setLevel(logging.ERROR) logging.getLogger('processor').setLevel(logging.ERROR) logging.getLogger('result').setLevel(logging.ERROR) logging.getLogger('webui').setLevel(logging.ERROR) threads = [] # result worker result_worker_config = g.config.get('result_worker', {}) for i in range(result_worker_num): threads.append(run_in(ctx.invoke, result_worker, ResultWorker=bench.BenchResultWorker, **result_worker_config)) # processor processor_config = g.config.get('processor', {}) for i in range(processor_num): threads.append(run_in(ctx.invoke, processor, Processor=bench.BenchProcessor, **processor_config)) # fetcher fetcher_config = g.config.get('fetcher', {}) fetcher_config.setdefault('xmlrpc_host', '127.0.0.1') for i in range(fetcher_num): threads.append(run_in(ctx.invoke, fetcher, Fetcher=bench.BenchFetcher, **fetcher_config)) # scheduler scheduler_config = g.config.get('scheduler', {}) scheduler_config.setdefault('xmlrpc_host', '127.0.0.1') threads.append(run_in(ctx.invoke, scheduler, Scheduler=bench.BenchScheduler, **scheduler_config)) # running webui in main thread to make it exitable webui_config = g.config.get('webui', {}) webui_config.setdefault('scheduler_rpc', 'http://localhost:%s/' % g.config.get('scheduler', {}).get('xmlrpc_port', 23333)) g['testing_mode'] = True app = ctx.invoke(webui, **webui_config) # run project app_client = app.test_client() rv = app_client.post('/run', data={ 'project': 'bench', }) assert rv.status_code == 200, 'run project error' app.run('127.0.0.1', 5000) for each in g.instances: each.quit() for each in threads: each.join()