def run_scheduler(): from scheduler import Scheduler scheduler = Scheduler(taskdb=get_taskdb(), projectdb=get_projectdb(), newtask_queue=newtask_queue, status_queue=status_queue, out_queue=scheduler2fetcher) run_in_thread(scheduler.xmlrpc_run, port=scheduler_xmlrpc_port) scheduler.run()
def run_fetcher(g=g): from fetcher.tornado_fetcher import Fetcher fetcher = Fetcher(inqueue=g.scheduler2fetcher, outqueue=g.fetcher2processor) fetcher.phantomjs_proxy = g.phantomjs_proxy run_in_thread(fetcher.xmlrpc_run, port=g.fetcher_xmlrpc_port, bind=g.webui_host) fetcher.run()
def setUpClass(self): self.inqueue = Queue(10) self.outqueue = Queue(10) self.fetcher = Fetcher(self.inqueue, self.outqueue) self.rpc = xmlrpclib.ServerProxy('http://localhost:%d' % 24444) self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444) self.thread = utils.run_in_thread(self.fetcher.run)
def setUpClass(self): self.inqueue = Queue(10) self.outqueue = Queue(10) self.fetcher = Fetcher(self.inqueue, self.outqueue) self.rpc = xmlrpclib.ServerProxy("http://localhost:%d" % 24444) self.xmlrpc_thread = utils.run_in_thread(self.fetcher.xmlrpc_run, port=24444) self.thread = utils.run_in_thread(self.fetcher.run)
def run_scheduler(): scheduler = Scheduler(taskdb=get_taskdb(), projectdb=get_projectdb(), newtask_queue=self.newtask_queue, status_queue=self.status_queue, out_queue=self.scheduler2fetcher, data_path="./test/data/") scheduler.UPDATE_PROJECT_INTERVAL = 0.05 scheduler.LOOP_INTERVAL = 0.01 run_in_thread(scheduler.xmlrpc_run, port=self.scheduler_xmlrpc_port) scheduler.run()
def run_scheduler(): scheduler = Scheduler(taskdb=get_taskdb(), projectdb=get_projectdb(), newtask_queue=self.newtask_queue, status_queue=self.status_queue, out_queue=self.scheduler2fetcher, data_path="./test/data/") scheduler.UPDATE_PROJECT_INTERVAL = 0.1 scheduler.LOOP_INTERVAL = 0.1 scheduler._last_tick = time.time() # not dispatch cronjob run_in_thread(scheduler.xmlrpc_run, port=self.scheduler_xmlrpc_port) scheduler.run()
def run_scheduler(g=g): from scheduler import Scheduler scheduler = Scheduler(taskdb=g.taskdb, projectdb=g.projectdb, resultdb=g.resultdb, newtask_queue=g.newtask_queue, status_queue=g.status_queue, out_queue=g.scheduler2fetcher) if g.demo_mode: scheduler.INQUEUE_LIMIT = 1000 run_in_thread(scheduler.xmlrpc_run, port=g.scheduler_xmlrpc_port, bind=g.webui_host) scheduler.run()
def setUpClass(self): shutil.rmtree('./test/data/', ignore_errors=True) os.makedirs('./test/data/') def get_taskdb(): return taskdb.TaskDB(self.taskdb_path) self.taskdb = get_taskdb() def get_projectdb(): return projectdb.ProjectDB(self.projectdb_path) self.projectdb = get_projectdb() def get_resultdb(): return resultdb.ResultDB(self.resultdb_path) self.resultdb = get_resultdb() self.newtask_queue = Queue(10) self.status_queue = Queue(10) self.scheduler2fetcher = Queue(10) self.rpc = xmlrpclib.ServerProxy('http://localhost:%d' % self.scheduler_xmlrpc_port) def run_scheduler(): scheduler = Scheduler(taskdb=get_taskdb(), projectdb=get_projectdb(), newtask_queue=self.newtask_queue, status_queue=self.status_queue, out_queue=self.scheduler2fetcher, data_path="./test/data/", resultdb=get_resultdb()) scheduler.UPDATE_PROJECT_INTERVAL = 0.1 scheduler.LOOP_INTERVAL = 0.1 scheduler.INQUEUE_LIMIT = 10 Scheduler.DELETE_TIME = 0 scheduler._last_tick = int(time.time()) # not dispatch cronjob run_in_thread(scheduler.xmlrpc_run, port=self.scheduler_xmlrpc_port) scheduler.run() self.process = run_in_thread(run_scheduler) time.sleep(1)
def test_40_multiple_threading_error(self): def put(q): for i in range(100): q.put("DATA_%d" % i) def get(q): for i in range(100): q.get() thread = utils.run_in_thread(put, self.q3) get(self.q3)
def setUpClass(self): shutil.rmtree('./test/data/', ignore_errors=True) os.makedirs('./test/data/') def get_resultdb(): return resultdb.ResultDB(self.resultdb_path) self.resultdb = get_resultdb() self.inqueue = Queue(10) def run_result_worker(): self.result_worker = ResultWorker(get_resultdb(), self.inqueue) self.result_worker.run() self.process = run_in_thread(run_result_worker) time.sleep(1)
def scheduler(ctx, xmlrpc, xmlrpc_host, xmlrpc_port, inqueue_limit, delete_time, active_tasks, loop_limit, scheduler_cls): """ Run Scheduler, only one scheduler is allowed. """ g = ctx.obj Scheduler = load_cls(None, None, scheduler_cls) scheduler = Scheduler(taskdb=g.taskdb, projectdb=g.projectdb, resultdb=g.resultdb, newtask_queue=g.newtask_queue, status_queue=g.status_queue, out_queue=g.scheduler2fetcher, data_path=g.get('data_path', 'data')) scheduler.INQUEUE_LIMIT = inqueue_limit scheduler.DELETE_TIME = delete_time scheduler.ACTIVE_TASKS = active_tasks scheduler.LOOP_LIMIT = loop_limit g.instances.append(scheduler) if g.get('testing_mode'): return scheduler if xmlrpc: utils.run_in_thread(scheduler.xmlrpc_run, port=xmlrpc_port, bind=xmlrpc_host) scheduler.run()
def setUpClass(self): shutil.rmtree('./test/data/', ignore_errors=True) os.makedirs('./test/data/') def get_projectdb(): return projectdb.ProjectDB(self.projectdb_path) self.projectdb = get_projectdb() self.in_queue = Queue(10) self.status_queue = Queue(10) self.newtask_queue = Queue(10) self.result_queue = Queue(10) def run_processor(): self.processor = Processor(get_projectdb(), self.in_queue, self.status_queue, self.newtask_queue, self.result_queue) self.processor.CHECK_PROJECTS_INTERVAL = 0.1 self.processor.run() self.process = run_in_thread(run_processor) time.sleep(1)
def run_fetcher(): from fetcher.tornado_fetcher import Fetcher fetcher = Fetcher(inqueue=scheduler2fetcher, outqueue=fetcher2processor) run_in_thread(fetcher.xmlrpc_run, port=fetcher_xmlrpc_port) fetcher.run()
def setUp(self): self.fetcher = Fetcher(None, None) self.thread = utils.run_in_thread(self.fetcher.run)
fetcher = Fetcher(inqueue=g.scheduler2fetcher, outqueue=g.fetcher2processor, poolsize=poolsize, proxy=proxy, async=async) fetcher.phantomjs_proxy = g.phantomjs_proxy if user_agent: fetcher.user_agent = user_agent if timeout: fetcher.default_options = copy.deepcopy(fetcher.default_options) fetcher.default_options['timeout'] = timeout g.instances.append(fetcher) if g.get('testing_mode'): return fetcher if xmlrpc: utils.run_in_thread(fetcher.xmlrpc_run, port=xmlrpc_port, bind=xmlrpc_host) fetcher.run() @cli.command() @click.option('--processor-cls', default='pyspider.processor.Processor', callback=load_cls, help='Processor class to be used.') @click.pass_context def processor(ctx, processor_cls, enable_stdout_capture=True): """ Run Processor. """ g = ctx.obj Processor = load_cls(None, None, processor_cls) processor = Processor(projectdb=g.projectdb,