コード例 #1
0
ファイル: scheduler.py プロジェクト: Cloudebug/pyspider
    def xmlrpc_run(self, port=23333, bind='127.0.0.1', logRequests=False):
        '''Start xmlrpc interface'''
        try:
            from six.moves.xmlrpc_server import SimpleXMLRPCServer
        except ImportError:
            from SimpleXMLRPCServer import SimpleXMLRPCServer

        server = SimpleXMLRPCServer((bind, port), allow_none=True, logRequests=logRequests)
        server.register_introspection_functions()
        server.register_multicall_functions()

        server.register_function(self.quit, '_quit')
        server.register_function(self.__len__, 'size')

        def dump_counter(_time, _type):
            try:
                return self._cnt[_time].to_dict(_type)
            except:
                logger.exception('')
        server.register_function(dump_counter, 'counter')

        def new_task(task):
            if self.task_verify(task):
                self.newtask_queue.put(task)
                return True
            return False
        server.register_function(new_task, 'newtask')

        def send_task(task):
            '''dispatch task to fetcher'''
            self.send_task(task)
            return True
        server.register_function(send_task, 'send_task')

        def update_project():
            self._force_update_project = True
        server.register_function(update_project, 'update_project')

        def get_active_tasks(project=None, limit=100):
            allowed_keys = set((
                'taskid',
                'project',
                'status',
                'url',
                'lastcrawltime',
                'updatetime',
                'track',
            ))
            track_allowed_keys = set((
                'ok',
                'time',
                'follows',
                'status_code',
            ))

            iters = [iter(x['active_tasks']) for k, x in iteritems(self.projects)
                     if x and (k == project if project else True)]
            tasks = [next(x, None) for x in iters]
            result = []

            while len(result) < limit and tasks and not all(x is None for x in tasks):
                updatetime, task = t = max(tasks)
                i = tasks.index(t)
                tasks[i] = next(iters[i], None)
                for key in list(task):
                    if key == 'track':
                        for k in list(task[key].get('fetch', [])):
                            if k not in track_allowed_keys:
                                del task[key]['fetch'][k]
                        for k in list(task[key].get('process', [])):
                            if k not in track_allowed_keys:
                                del task[key]['process'][k]
                    if key in allowed_keys:
                        continue
                    del task[key]
                result.append(t)
            # fix for "<type 'exceptions.TypeError'>:dictionary key must be string"
            # have no idea why
            return json.loads(json.dumps(result))
        server.register_function(get_active_tasks, 'get_active_tasks')

        server.timeout = 0.5
        while not self._quit:
            server.handle_request()
        server.server_close()
コード例 #2
0
ファイル: scheduler.py プロジェクト: railroadman/pyspider
    def xmlrpc_run(self, port=23333, bind="127.0.0.1", logRequests=False):
        """Start xmlrpc interface"""
        try:
            from six.moves.xmlrpc_server import SimpleXMLRPCServer
        except ImportError:
            from SimpleXMLRPCServer import SimpleXMLRPCServer

        server = SimpleXMLRPCServer((bind, port), allow_none=True, logRequests=logRequests)
        server.register_introspection_functions()
        server.register_multicall_functions()

        server.register_function(self.quit, "_quit")
        server.register_function(self.__len__, "size")

        def dump_counter(_time, _type):
            try:
                return self._cnt[_time].to_dict(_type)
            except:
                logger.exception("")

        server.register_function(dump_counter, "counter")

        def new_task(task):
            if self.task_verify(task):
                self.newtask_queue.put(task)
                return True
            return False

        server.register_function(new_task, "newtask")

        def send_task(task):
            """dispatch task to fetcher"""
            self.send_task(task)
            return True

        server.register_function(send_task, "send_task")

        def update_project():
            self._force_update_project = True

        server.register_function(update_project, "update_project")

        def get_active_tasks(project=None, limit=100):
            allowed_keys = set(("taskid", "project", "status", "url", "lastcrawltime", "updatetime", "track"))
            track_allowed_keys = set(("ok", "time", "follows", "status_code"))

            iters = [
                iter(x["active_tasks"])
                for k, x in iteritems(self.projects)
                if x and (k == project if project else True)
            ]
            tasks = [next(x, None) for x in iters]
            result = []

            while len(result) < limit and tasks and not all(x is None for x in tasks):
                updatetime, task = t = max(t for t in tasks if t)
                i = tasks.index(t)
                tasks[i] = next(iters[i], None)
                for key in list(task):
                    if key == "track":
                        for k in list(task[key].get("fetch", [])):
                            if k not in track_allowed_keys:
                                del task[key]["fetch"][k]
                        for k in list(task[key].get("process", [])):
                            if k not in track_allowed_keys:
                                del task[key]["process"][k]
                    if key in allowed_keys:
                        continue
                    del task[key]
                result.append(t)
            # fix for "<type 'exceptions.TypeError'>:dictionary key must be string"
            # have no idea why
            return json.loads(json.dumps(result))

        server.register_function(get_active_tasks, "get_active_tasks")

        server.timeout = 0.5
        while not self._quit:
            server.handle_request()
        server.server_close()
コード例 #3
0
ファイル: scheduler.py プロジェクト: rtk4616/pyspider
    def xmlrpc_run(self, port=23333, bind='127.0.0.1', logRequests=False):
        '''Start xmlrpc interface'''
        try:
            from six.moves.xmlrpc_server import SimpleXMLRPCServer
        except ImportError:
            from SimpleXMLRPCServer import SimpleXMLRPCServer

        server = SimpleXMLRPCServer((bind, port), allow_none=True, logRequests=logRequests)
        server.register_introspection_functions()
        server.register_multicall_functions()

        server.register_function(self.quit, '_quit')
        server.register_function(self.__len__, 'size')

        def dump_counter(_time, _type):
            return self._cnt[_time].to_dict(_type)
        server.register_function(dump_counter, 'counter')

        def new_task(task):
            if self.task_verify(task):
                self.newtask_queue.put(task)
                return True
            return False
        server.register_function(new_task, 'newtask')

        def update_project():
            self._force_update_project = True
        server.register_function(update_project, 'update_project')

        def get_active_tasks(project=None, limit=100):
            allowed_keys = set((
                'taskid',
                'project',
                'status',
                'url',
                'lastcrawltime',
                'updatetime',
                'track',
            ))
            track_allowed_keys = set((
                'ok',
                'time',
                'follows',
                'status_code',
            ))

            iters = [iter(x['active_tasks']) for k, x in iteritems(self.projects)
                     if x and (k == project if project else True)]
            tasks = [next(x, None) for x in iters]
            result = []

            while len(result) < limit and tasks and not all(x is None for x in tasks):
                updatetime, task = t = max(tasks)
                i = tasks.index(t)
                tasks[i] = next(iters[i], None)
                for key in list(task):
                    if key == 'track':
                        for k in list(task[key].get('fetch', [])):
                            if k not in track_allowed_keys:
                                del task[key]['fetch'][k]
                        for k in list(task[key].get('process', [])):
                            if k not in track_allowed_keys:
                                del task[key]['process'][k]
                    if key in allowed_keys:
                        continue
                    del task[key]
                result.append(t)
            # fix for "<type 'exceptions.TypeError'>:dictionary key must be string"
            # have no idea why
            return json.loads(json.dumps(result))
        server.register_function(get_active_tasks, 'get_active_tasks')

        server.timeout = 0.5
        while not self._quit:
            server.handle_request()
        server.server_close()
コード例 #4
0
def start_reload(zope_layer_dotted_name, reload_paths=('src',),
                 preload_layer_dotted_name='plone.app.testing.PLONE_FIXTURE',
                 extensions=None):

    print(WAIT("Starting Zope robot server"))

    zsl = Zope2Server()
    zsl.start_zope_server(preload_layer_dotted_name)

    forkloop = ForkLoop()
    watcher = Watcher(reload_paths, forkloop)
    if extensions:
        watcher.allowed_extensions = extensions
    elif HAS_DEBUG_MODE:
        watcher.allowed_extensions.remove('pt')
    watcher.start()
    forkloop.start()

    if forkloop.exit:
        print(WAIT("Stopping Zope robot server"))
        zsl.stop_zope_server()
        print(READY("Zope robot server stopped"))
        return

    # XXX: For unknown reason call to socket.gethostbyaddr may cause malloc
    # errors on OSX in forked child when called from medusa http_server, but
    # proper sleep seem to fix it:
    import time
    import socket
    import platform
    if 'Darwin' in platform.uname():
        gethostbyaddr = socket.gethostbyaddr
        socket.gethostbyaddr = lambda x: time.sleep(0.5) or (ZSERVER_HOST,)

    # Setting smaller asyncore poll timeout will speed up restart a bit
    import plone.testing.z2
    plone.testing.z2.ZServer.timeout = 0.5

    zsl.amend_zope_server(zope_layer_dotted_name)

    if HAS_DEBUG_MODE:
        import App.config
        config = App.config.getConfiguration()
        config.debug_mode = HAS_DEBUG_MODE
        App.config.setConfiguration(config)

    if 'Darwin' in platform.uname():
        socket.gethostbyaddr = gethostbyaddr

    print(READY("Zope robot server started"))

    try:
        listener = SimpleXMLRPCServer((LISTENER_HOST, LISTENER_PORT),
                                      logRequests=False)
    except socket.error as e:
        print(ERROR(str(e)))
        print(WAIT("Pruning Zope robot server"))
        zsl.prune_zope_server()
        return

    listener.timeout = 0.5
    listener.allow_none = True
    listener.register_function(zsl.zodb_setup, 'zodb_setup')
    listener.register_function(zsl.zodb_teardown, 'zodb_teardown')

    try:
        while not forkloop.exit:
            listener.handle_request()
    except select.error:  # Interrupted system call
        pass
    finally:
        print(WAIT("Pruning Zope robot server"))
        zsl.prune_zope_server()