def scheduler(ctx, xmlrpc, xmlrpc_host, xmlrpc_port, inqueue_limit, delete_time, active_tasks, loop_limit, scheduler_cls): """ Run Scheduler, only one scheduler is allowed. """ g = ctx.obj Scheduler = load_cls(None, None, scheduler_cls) scheduler = Scheduler(taskdb=g.taskdb, projectdb=g.projectdb, resultdb=g.resultdb, newtask_queue=g.newtask_queue, status_queue=g.status_queue, out_queue=g.scheduler2fetcher, data_path=g.get('data_path', 'data')) scheduler.INQUEUE_LIMIT = inqueue_limit scheduler.DELETE_TIME = delete_time scheduler.ACTIVE_TASKS = active_tasks scheduler.LOOP_LIMIT = loop_limit g.instances.append(scheduler) if g.get('testing_mode'): return scheduler if xmlrpc: utils.run_in_thread(scheduler.xmlrpc_run, port=xmlrpc_port, bind=xmlrpc_host) scheduler.run()
fetcher = Fetcher(inqueue=g.scheduler2fetcher, outqueue=g.fetcher2processor, poolsize=poolsize, proxy=proxy, async=async) fetcher.phantomjs_proxy = g.phantomjs_proxy if user_agent: fetcher.user_agent = user_agent if timeout: fetcher.default_options = copy.deepcopy(fetcher.default_options) fetcher.default_options['timeout'] = timeout g.instances.append(fetcher) if g.get('testing_mode'): return fetcher if xmlrpc: utils.run_in_thread(fetcher.xmlrpc_run, port=xmlrpc_port, bind=xmlrpc_host) fetcher.run() @cli.command() @click.option('--processor-cls', default='spider.processor.Processor', callback=load_cls, help='Processor class to be used.') @click.pass_context def processor(ctx, processor_cls, enable_stdout_capture=True): """ Run Processor. """ g = ctx.obj Processor = load_cls(None, None, processor_cls) processor = Processor(projectdb=g.projectdb,