コード例 #1
0
class RPCServer(threading.Thread):
    """
    The RPCServer thread provides an API for external programs to interact
    with MOM.
    """
    def __init__(self, config, momFuncs):
        threading.Thread.__init__(self, name="RPCServer")
        self.setDaemon(True)
        self.config = config
        self.momFuncs = momFuncs
        self.logger = logging.getLogger('mom.RPCServer')
        self.server = None
        self.start()

    def thread_ok(self):
        if self.server is None:
            return True
        return self.isAlive()

    def create_server(self):
        try:
            unix_port = None
            port = self.config.getint('main', 'rpc-port')
        except ValueError:
            port = None
            unix_port = self.config.get('main', 'rpc-port')
            self.logger.info("Using unix socket "+unix_port)

        if unix_port is None and (port is None or port < 0):
            return None

        if unix_port:
            self.server = UnixXmlRpcServer(unix_port)
        else:
            self.server = SimpleXMLRPCServer(
                ("localhost", port),
                requestHandler=RequestHandler,
                logRequests=0
            )

        self.server.register_introspection_functions()
        self.server.register_instance(self.momFuncs)

    def shutdown(self):
        if self.server is not None:
            self.server.shutdown()

    def run(self):
        try:
            self.create_server()
            if self.server is not None:
                self.logger.info("RPC Server starting")
                self.server.serve_forever()
                self.logger.info("RPC Server ending")
            else:
                self.logger.info("RPC Server is disabled")
        except Exception as e:
            self.logger.error("RPC Server crashed", exc_info=True)
コード例 #2
0
ファイル: scheduler.py プロジェクト: Cloudebug/pyspider
    def xmlrpc_run(self, port=23333, bind='127.0.0.1', logRequests=False):
        '''Start xmlrpc interface'''
        try:
            from six.moves.xmlrpc_server import SimpleXMLRPCServer
        except ImportError:
            from SimpleXMLRPCServer import SimpleXMLRPCServer

        server = SimpleXMLRPCServer((bind, port), allow_none=True, logRequests=logRequests)
        server.register_introspection_functions()
        server.register_multicall_functions()

        server.register_function(self.quit, '_quit')
        server.register_function(self.__len__, 'size')

        def dump_counter(_time, _type):
            try:
                return self._cnt[_time].to_dict(_type)
            except:
                logger.exception('')
        server.register_function(dump_counter, 'counter')

        def new_task(task):
            if self.task_verify(task):
                self.newtask_queue.put(task)
                return True
            return False
        server.register_function(new_task, 'newtask')

        def send_task(task):
            '''dispatch task to fetcher'''
            self.send_task(task)
            return True
        server.register_function(send_task, 'send_task')

        def update_project():
            self._force_update_project = True
        server.register_function(update_project, 'update_project')

        def get_active_tasks(project=None, limit=100):
            allowed_keys = set((
                'taskid',
                'project',
                'status',
                'url',
                'lastcrawltime',
                'updatetime',
                'track',
            ))
            track_allowed_keys = set((
                'ok',
                'time',
                'follows',
                'status_code',
            ))

            iters = [iter(x['active_tasks']) for k, x in iteritems(self.projects)
                     if x and (k == project if project else True)]
            tasks = [next(x, None) for x in iters]
            result = []

            while len(result) < limit and tasks and not all(x is None for x in tasks):
                updatetime, task = t = max(tasks)
                i = tasks.index(t)
                tasks[i] = next(iters[i], None)
                for key in list(task):
                    if key == 'track':
                        for k in list(task[key].get('fetch', [])):
                            if k not in track_allowed_keys:
                                del task[key]['fetch'][k]
                        for k in list(task[key].get('process', [])):
                            if k not in track_allowed_keys:
                                del task[key]['process'][k]
                    if key in allowed_keys:
                        continue
                    del task[key]
                result.append(t)
            # fix for "<type 'exceptions.TypeError'>:dictionary key must be string"
            # have no idea why
            return json.loads(json.dumps(result))
        server.register_function(get_active_tasks, 'get_active_tasks')

        server.timeout = 0.5
        while not self._quit:
            server.handle_request()
        server.server_close()
コード例 #3
0
ファイル: scheduler.py プロジェクト: rtk4616/pyspider
    def xmlrpc_run(self, port=23333, bind='127.0.0.1', logRequests=False):
        '''Start xmlrpc interface'''
        try:
            from six.moves.xmlrpc_server import SimpleXMLRPCServer
        except ImportError:
            from SimpleXMLRPCServer import SimpleXMLRPCServer

        server = SimpleXMLRPCServer((bind, port), allow_none=True, logRequests=logRequests)
        server.register_introspection_functions()
        server.register_multicall_functions()

        server.register_function(self.quit, '_quit')
        server.register_function(self.__len__, 'size')

        def dump_counter(_time, _type):
            return self._cnt[_time].to_dict(_type)
        server.register_function(dump_counter, 'counter')

        def new_task(task):
            if self.task_verify(task):
                self.newtask_queue.put(task)
                return True
            return False
        server.register_function(new_task, 'newtask')

        def update_project():
            self._force_update_project = True
        server.register_function(update_project, 'update_project')

        def get_active_tasks(project=None, limit=100):
            allowed_keys = set((
                'taskid',
                'project',
                'status',
                'url',
                'lastcrawltime',
                'updatetime',
                'track',
            ))
            track_allowed_keys = set((
                'ok',
                'time',
                'follows',
                'status_code',
            ))

            iters = [iter(x['active_tasks']) for k, x in iteritems(self.projects)
                     if x and (k == project if project else True)]
            tasks = [next(x, None) for x in iters]
            result = []

            while len(result) < limit and tasks and not all(x is None for x in tasks):
                updatetime, task = t = max(tasks)
                i = tasks.index(t)
                tasks[i] = next(iters[i], None)
                for key in list(task):
                    if key == 'track':
                        for k in list(task[key].get('fetch', [])):
                            if k not in track_allowed_keys:
                                del task[key]['fetch'][k]
                        for k in list(task[key].get('process', [])):
                            if k not in track_allowed_keys:
                                del task[key]['process'][k]
                    if key in allowed_keys:
                        continue
                    del task[key]
                result.append(t)
            # fix for "<type 'exceptions.TypeError'>:dictionary key must be string"
            # have no idea why
            return json.loads(json.dumps(result))
        server.register_function(get_active_tasks, 'get_active_tasks')

        server.timeout = 0.5
        while not self._quit:
            server.handle_request()
        server.server_close()
コード例 #4
0
ファイル: scheduler.py プロジェクト: railroadman/pyspider
    def xmlrpc_run(self, port=23333, bind="127.0.0.1", logRequests=False):
        """Start xmlrpc interface"""
        try:
            from six.moves.xmlrpc_server import SimpleXMLRPCServer
        except ImportError:
            from SimpleXMLRPCServer import SimpleXMLRPCServer

        server = SimpleXMLRPCServer((bind, port), allow_none=True, logRequests=logRequests)
        server.register_introspection_functions()
        server.register_multicall_functions()

        server.register_function(self.quit, "_quit")
        server.register_function(self.__len__, "size")

        def dump_counter(_time, _type):
            try:
                return self._cnt[_time].to_dict(_type)
            except:
                logger.exception("")

        server.register_function(dump_counter, "counter")

        def new_task(task):
            if self.task_verify(task):
                self.newtask_queue.put(task)
                return True
            return False

        server.register_function(new_task, "newtask")

        def send_task(task):
            """dispatch task to fetcher"""
            self.send_task(task)
            return True

        server.register_function(send_task, "send_task")

        def update_project():
            self._force_update_project = True

        server.register_function(update_project, "update_project")

        def get_active_tasks(project=None, limit=100):
            allowed_keys = set(("taskid", "project", "status", "url", "lastcrawltime", "updatetime", "track"))
            track_allowed_keys = set(("ok", "time", "follows", "status_code"))

            iters = [
                iter(x["active_tasks"])
                for k, x in iteritems(self.projects)
                if x and (k == project if project else True)
            ]
            tasks = [next(x, None) for x in iters]
            result = []

            while len(result) < limit and tasks and not all(x is None for x in tasks):
                updatetime, task = t = max(t for t in tasks if t)
                i = tasks.index(t)
                tasks[i] = next(iters[i], None)
                for key in list(task):
                    if key == "track":
                        for k in list(task[key].get("fetch", [])):
                            if k not in track_allowed_keys:
                                del task[key]["fetch"][k]
                        for k in list(task[key].get("process", [])):
                            if k not in track_allowed_keys:
                                del task[key]["process"][k]
                    if key in allowed_keys:
                        continue
                    del task[key]
                result.append(t)
            # fix for "<type 'exceptions.TypeError'>:dictionary key must be string"
            # have no idea why
            return json.loads(json.dumps(result))

        server.register_function(get_active_tasks, "get_active_tasks")

        server.timeout = 0.5
        while not self._quit:
            server.handle_request()
        server.server_close()