예제 #1
0
 def __parse_response(self, response, spider):
     """处理响应,将新的请求放入请求队列,提取到的item抛给输入管理器处理"""
     try:
         callback = getattr(spider, response.request.callback)
         for each in callback(response):
             if isinstance(each, req.Request):  # 将新产生的请求添加至队列
                 RequestQueue.push(each, spider)
             elif isinstance(each, dict):  # 将提取到的item抛给op_manager进行处理
                 self.op_manager(each, spider)
     except Exception as err:
         self.__save_mode(response.request, err)
예제 #2
0
    def __init__(self, name, params, request_runner):
        FMSBot.__init__(self, name)
        RequestQueue.__init__(self, request_runner)

        self.ctx = None
        self.applier = None
        self.params = params.copy()
        self.ui_ = None
        self.repo = None

        self.trust = None
        self.update_sm = None
        # Why doesn't the base class ctr do this?
        request_runner.add_queue(self)
예제 #3
0
    def __init__(self, name, params, request_runner):
        FMSBot.__init__(self, name)
        RequestQueue.__init__(self, request_runner)

        self.ctx = None
        self.applier = None
        self.params = params.copy()
        self.ui_ = None
        self.repo = None

        self.trust = None
        self.update_sm = None
        # Why doesn't the base class ctr do this?
        request_runner.add_queue(self)
예제 #4
0
 def __roll_request(self):
     """不断从请求列表中提取请求进行处理(发送请求,解析响应)"""
     while 1:
         try:
             request = RequestQueue.pop()
         except EmptyError:
             # 请求队列为空, 退出程序
             gevent.killall(self.jobs)
         try:
             # 递出请求
             response = self.__throw_request(request, request.spider)
         except NoResponseError as err:
             logger.debug('No response, reason: %s', err)
             if 'duplicate url' in err.message:
                 RequestQueue.del_requesting(request)
         else:
             # 解析响应
             self.__parse_response(response, request.spider)
             RequestQueue.del_requesting(request)
예제 #5
0
 def __init_requests(self):
     for spider in self.spiders:
         for request in spider.start_requests():
             RequestQueue.push(request, spider)
예제 #6
0
    def __init__(self, runner, ctx):
        RequestQueue.__init__(self, runner)
        StateMachine.__init__(self)
        self.ctx = None
        self.set_context(ctx) # Do early. States might depend on ctx.
        self.states = {
            QUIESCENT:Quiescent(self, QUIESCENT),

            # Justing inverting
            INVERTING_URI:InvertingUri(self, INVERTING_URI,
                                       QUIESCENT,
                                       FAILING),

            # Requesting previous graph in order to do insert.
            INVERTING_URI_4_INSERT:InvertingUri(self, INVERTING_URI_4_INSERT,
                                                REQUESTING_URI_4_INSERT,
                                                FAILING),

            REQUESTING_URI_4_INSERT:RequestingUri(self,
                                                  REQUESTING_URI_4_INSERT,
                                                  REQUESTING_GRAPH,
                                                  FAILING),
            REQUESTING_GRAPH:RequestingGraph(self, REQUESTING_GRAPH,
                                             INSERTING_BUNDLES,
                                             FAILING),


            # Inserting
            INSERTING_BUNDLES:InsertingBundles(self,
                                               INSERTING_BUNDLES),
            INSERTING_GRAPH:InsertingGraph(self, INSERTING_GRAPH,
                                           INSERTING_URI,
                                           FAILING),
            INSERTING_URI:InsertingUri(self,INSERTING_URI,
                                       FINISHING,
                                       FAILING),
            CANCELING:CleaningUp(self, CANCELING, QUIESCENT),
            FAILING:CleaningUp(self, FAILING, QUIESCENT),

            # Requesting
            REQUESTING_URI:RequestingUri(self, REQUESTING_URI,
                                         REQUESTING_BUNDLES,
                                         FAILING),

            REQUESTING_BUNDLES:RequestingBundles(self, REQUESTING_BUNDLES,
                                                 FINISHING,
                                                 FAILING),

            FINISHING:CleaningUp(self, FINISHING, QUIESCENT),


            # Requesting head info from freenet
            REQUESTING_URI_4_HEADS:RequestingUri(self, REQUESTING_URI_4_HEADS,
                                                 REQUIRES_GRAPH_4_HEADS,
                                                 FAILING),

            REQUIRES_GRAPH_4_HEADS:RequiresGraph(self, REQUIRES_GRAPH_4_HEADS,
                                                 REQUESTING_GRAPH_4_HEADS,
                                                 FINISHING),

            REQUESTING_GRAPH_4_HEADS:RequestingGraph(self,
                                                     REQUESTING_GRAPH_4_HEADS,
                                                     FINISHING,
                                                     FAILING),

            # Run and arbitrary StatefulRequest.
            RUNNING_SINGLE_REQUEST:RunningSingleRequest(self,
                                                        RUNNING_SINGLE_REQUEST,
                                                        FINISHING,
                                                        FAILING),

            # Copying.
            # This doesn't verify that the graph chk(s) are fetchable.
            REQUESTING_URI_4_COPY:RequestingUri(self, REQUESTING_URI_4_COPY,
                                                INSERTING_URI,
                                                FAILING),

            }

        self.current_state = self.get_state(QUIESCENT)

        self.params = {}
        # Must not change any state!
        self.monitor_callback = lambda parent, client, msg: None

        runner.add_queue(self)
예제 #7
0
    def __init__(self, runner, ctx):
        RequestQueue.__init__(self, runner)
        StateMachine.__init__(self)
        self.ctx = None
        self.set_context(ctx)  # Do early. States might depend on ctx.
        self.states = {
            QUIESCENT:
            Quiescent(self, QUIESCENT),

            # Justing inverting
            INVERTING_URI:
            InvertingUri(self, INVERTING_URI, QUIESCENT, FAILING),

            # Requesting previous graph in order to do insert.
            INVERTING_URI_4_INSERT:
            InvertingUri(self, INVERTING_URI_4_INSERT, REQUESTING_URI_4_INSERT,
                         FAILING),
            REQUESTING_URI_4_INSERT:
            RequestingUri(self, REQUESTING_URI_4_INSERT, REQUESTING_GRAPH,
                          FAILING),
            REQUESTING_GRAPH:
            RequestingGraph(self, REQUESTING_GRAPH, INSERTING_BUNDLES,
                            FAILING),

            # Inserting
            INSERTING_BUNDLES:
            InsertingBundles(self, INSERTING_BUNDLES),
            INSERTING_GRAPH:
            InsertingGraph(self, INSERTING_GRAPH, INSERTING_URI, FAILING),
            INSERTING_URI:
            InsertingUri(self, INSERTING_URI, FINISHING, FAILING),
            CANCELING:
            CleaningUp(self, CANCELING, QUIESCENT),
            FAILING:
            CleaningUp(self, FAILING, QUIESCENT),

            # Requesting
            REQUESTING_URI:
            RequestingUri(self, REQUESTING_URI, REQUESTING_BUNDLES, FAILING),
            REQUESTING_BUNDLES:
            RequestingBundles(self, REQUESTING_BUNDLES, FINISHING, FAILING),
            FINISHING:
            CleaningUp(self, FINISHING, QUIESCENT),

            # Requesting head info from freenet
            REQUESTING_URI_4_HEADS:
            RequestingUri(self, REQUESTING_URI_4_HEADS, REQUIRES_GRAPH_4_HEADS,
                          FAILING),
            REQUIRES_GRAPH_4_HEADS:
            RequiresGraph(self, REQUIRES_GRAPH_4_HEADS,
                          REQUESTING_GRAPH_4_HEADS, FINISHING),
            REQUESTING_GRAPH_4_HEADS:
            RequestingGraph(self, REQUESTING_GRAPH_4_HEADS, FINISHING,
                            FAILING),

            # Run and arbitrary StatefulRequest.
            RUNNING_SINGLE_REQUEST:
            RunningSingleRequest(self, RUNNING_SINGLE_REQUEST, FINISHING,
                                 FAILING),

            # Copying.
            # This doesn't verify that the graph chk(s) are fetchable.
            REQUESTING_URI_4_COPY:
            RequestingUri(self, REQUESTING_URI_4_COPY, INSERTING_URI, FAILING),
        }

        self.current_state = self.get_state(QUIESCENT)

        self.params = {}
        # Must not change any state!
        self.monitor_callback = lambda parent, client, msg: None

        runner.add_queue(self)