def xmlrpc_run(self, port=24444, bind='127.0.0.1', logRequests=False): '''Run xmlrpc server''' import umsgpack from pyspider.libs.wsgi_xmlrpc import WSGIXMLRPCApplication try: from xmlrpc.client import Binary except ImportError: from xmlrpclib import Binary application = WSGIXMLRPCApplication() application.register_function(self.quit, '_quit') application.register_function(self.size) def sync_fetch(task): result = self.sync_fetch(task) result = Binary(umsgpack.packb(result)) return result application.register_function(sync_fetch, 'fetch') def dump_counter(_time, _type): return self._cnt[_time].to_dict(_type) application.register_function(dump_counter, 'counter') container = tornado.wsgi.WSGIContainer(application) self.xmlrpc_ioloop = tornado.ioloop.IOLoop() self.xmlrpc_server = tornado.httpserver.HTTPServer(container, io_loop=self.xmlrpc_ioloop) self.xmlrpc_server.listen(port=port, address=bind) logger.info('fetcher.xmlrpc listening on %s:%s', bind, port) self.xmlrpc_ioloop.start()
def xmlrpc_run(self, port=24444, bind='127.0.0.1', logRequests=False): '''Run xmlrpc server''' import umsgpack from pyspider.libs.wsgi_xmlrpc import WSGIXMLRPCApplication try: from xmlrpc.client import Binary except ImportError: from xmlrpclib import Binary application = WSGIXMLRPCApplication() application.register_function(self.quit, '_quit') application.register_function(self.size) def sync_fetch(task): result = self.sync_fetch(task) result = Binary(umsgpack.packb(result)) return result application.register_function(sync_fetch, 'fetch') def dump_counter(_time, _type): return self._cnt[_time].to_dict(_type) application.register_function(dump_counter, 'counter') import tornado.wsgi import tornado.ioloop import tornado.httpserver container = tornado.wsgi.WSGIContainer(application) self.xmlrpc_ioloop = tornado.ioloop.IOLoop() self.xmlrpc_server = tornado.httpserver.HTTPServer(container, io_loop=self.xmlrpc_ioloop) self.xmlrpc_server.listen(port=port, address=bind) logger.info('fetcher.xmlrpc listening on %s:%s', bind, port) self.xmlrpc_ioloop.start()
def xmlrpc_run(self, port=23333, bind='127.0.0.1', logRequests=False): '''Start xmlrpc interface''' from pyspider.libs.wsgi_xmlrpc import WSGIXMLRPCApplication application = WSGIXMLRPCApplication() application.register_function(self.quit, '_quit') application.register_function(self.__len__, 'size') def dump_counter(_time, _type): try: return self._cnt[_time].to_dict(_type) except: logger.exception('') application.register_function(dump_counter, 'counter') def new_task(task): if self.task_verify(task): self.newtask_queue.put(task) return True return False application.register_function(new_task, 'newtask') def send_task(task): '''dispatch task to fetcher''' self.send_task(task) return True application.register_function(send_task, 'send_task') def update_project(): self._force_update_project = True application.register_function(update_project, 'update_project') def get_active_tasks(project=None, limit=100): allowed_keys = set(( 'type', 'taskid', 'project', 'status', 'url', 'lastcrawltime', 'updatetime', 'track', )) track_allowed_keys = set(( 'ok', 'time', 'follows', 'status_code', )) iters = [iter(x.active_tasks) for k, x in iteritems(self.projects) if x and (k == project if project else True)] tasks = [next(x, None) for x in iters] result = [] while len(result) < limit and tasks and not all(x is None for x in tasks): updatetime, task = t = max(t for t in tasks if t) i = tasks.index(t) tasks[i] = next(iters[i], None) for key in list(task): if key == 'track': for k in list(task[key].get('fetch', [])): if k not in track_allowed_keys: del task[key]['fetch'][k] for k in list(task[key].get('process', [])): if k not in track_allowed_keys: del task[key]['process'][k] if key in allowed_keys: continue del task[key] result.append(t) # fix for "<type 'exceptions.TypeError'>:dictionary key must be string" # have no idea why return json.loads(json.dumps(result)) application.register_function(get_active_tasks, 'get_active_tasks') def get_projects_pause_status(): result = {} for project_name, project in iteritems(self.projects): result[project_name] = project.paused return result application.register_function(get_projects_pause_status, 'get_projects_pause_status') def webui_update(): return { 'pause_status': get_projects_pause_status(), 'counter': { '5m_time': dump_counter('5m_time', 'avg'), '5m': dump_counter('5m', 'sum'), '1h': dump_counter('1h', 'sum'), '1d': dump_counter('1d', 'sum'), 'all': dump_counter('all', 'sum'), }, } application.register_function(webui_update, 'webui_update') import tornado.wsgi import tornado.ioloop import tornado.httpserver container = tornado.wsgi.WSGIContainer(application) self.xmlrpc_ioloop = tornado.ioloop.IOLoop() self.xmlrpc_server = tornado.httpserver.HTTPServer(container, io_loop=self.xmlrpc_ioloop) self.xmlrpc_server.listen(port=port, address=bind) logger.info('scheduler.xmlrpc listening on %s:%s', bind, port) self.xmlrpc_ioloop.start()