def setUpClass(self): self.projectdb = ProjectDB([ os.path.join(os.path.dirname(__file__), 'data_fetcher_processor_handler.py') ]) self.fetcher = Fetcher(None, None, async=False) self.status_queue = Queue() self.newtask_queue = Queue() self.result_queue = Queue() self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887, passthrough_errors=False) self.httpbin = 'http://127.0.0.1:14887' self.proxy_thread = subprocess.Popen([ 'pyproxy', '--username=binux', '--password=123456', '--port=14830', '--debug' ], close_fds=True) self.proxy = '127.0.0.1:14830' self.processor = Processor(projectdb=self.projectdb, inqueue=None, status_queue=self.status_queue, newtask_queue=self.newtask_queue, result_queue=self.result_queue) self.project_name = 'data_fetcher_processor_handler' time.sleep(0.5)
def setUpClass(self): self.projectdb = ProjectDB([ os.path.join(os.path.dirname(__file__), 'data_fetcher_processor_handler.py') ]) self.fetcher = Fetcher(None, None, async=False) self.status_queue = Queue() self.newtask_queue = Queue() self.result_queue = Queue() self.httpbin_thread = utils.run_in_subprocess(httpbin.app.run, port=14887) self.httpbin = 'http://127.0.0.1:14887' self.processor = Processor(projectdb=self.projectdb, inqueue=None, status_queue=self.status_queue, newtask_queue=self.newtask_queue, result_queue=self.result_queue) self.project_name = 'data_fetcher_processor_handler' time.sleep(0.5)
def one(ctx, interactive, enable_phantomjs, enable_puppeteer, scripts): """ One mode not only means all-in-one, it runs every thing in one process over tornado.ioloop, for debug purpose """ ctx.obj['debug'] = False g = ctx.obj g['testing_mode'] = True if scripts: from pyspider.database.local.projectdb import ProjectDB g['projectdb'] = ProjectDB(scripts) if g.get('is_taskdb_default'): g['taskdb'] = connect_database('sqlite+taskdb://') if g.get('is_resultdb_default'): g['resultdb'] = None if enable_phantomjs: phantomjs_config = g.config.get('phantomjs', {}) phantomjs_obj = ctx.invoke(phantomjs, **phantomjs_config) if phantomjs_obj: g.setdefault('phantomjs_proxy', '127.0.0.1:%s' % phantomjs_obj.port) else: phantomjs_obj = None if enable_puppeteer: puppeteer_config = g.config.get('puppeteer', {}) puppeteer_obj = ctx.invoke(puppeteer, **puppeteer_config) if puppeteer_obj: g.setdefault('puppeteer_proxy', '127.0.0.1:%s' % puppeteer.port) else: puppeteer_obj = None result_worker_config = g.config.get('result_worker', {}) if g.resultdb is None: result_worker_config.setdefault('result_cls', 'pyspider.result.OneResultWorker') result_worker_obj = ctx.invoke(result_worker, **result_worker_config) processor_config = g.config.get('processor', {}) processor_config.setdefault('enable_stdout_capture', False) processor_obj = ctx.invoke(processor, **processor_config) fetcher_config = g.config.get('fetcher', {}) fetcher_config.setdefault('xmlrpc', False) fetcher_obj = ctx.invoke(fetcher, **fetcher_config) scheduler_config = g.config.get('scheduler', {}) scheduler_config.setdefault('xmlrpc', False) scheduler_config.setdefault('scheduler_cls', 'pyspider.scheduler.OneScheduler') scheduler_obj = ctx.invoke(scheduler, **scheduler_config) scheduler_obj.init_one(ioloop=fetcher_obj.ioloop, fetcher=fetcher_obj, processor=processor_obj, result_worker=result_worker_obj, interactive=interactive) if scripts: for project in g.projectdb.projects: scheduler_obj.trigger_on_start(project) try: scheduler_obj.run() finally: scheduler_obj.quit() if phantomjs_obj: phantomjs_obj.quit() if puppeteer_obj: puppeteer_obj.quit()