def __parse_response(self, response, spider): """处理响应,将新的请求放入请求队列,提取到的item抛给输入管理器处理""" try: callback = getattr(spider, response.request.callback) for each in callback(response): if isinstance(each, req.Request): # 将新产生的请求添加至队列 RequestQueue.push(each, spider) elif isinstance(each, dict): # 将提取到的item抛给op_manager进行处理 self.op_manager(each, spider) except Exception as err: self.__save_mode(response.request, err)
def __init__(self, name, params, request_runner): FMSBot.__init__(self, name) RequestQueue.__init__(self, request_runner) self.ctx = None self.applier = None self.params = params.copy() self.ui_ = None self.repo = None self.trust = None self.update_sm = None # Why doesn't the base class ctr do this? request_runner.add_queue(self)
def __roll_request(self): """不断从请求列表中提取请求进行处理(发送请求,解析响应)""" while 1: try: request = RequestQueue.pop() except EmptyError: # 请求队列为空, 退出程序 gevent.killall(self.jobs) try: # 递出请求 response = self.__throw_request(request, request.spider) except NoResponseError as err: logger.debug('No response, reason: %s', err) if 'duplicate url' in err.message: RequestQueue.del_requesting(request) else: # 解析响应 self.__parse_response(response, request.spider) RequestQueue.del_requesting(request)
def __init_requests(self): for spider in self.spiders: for request in spider.start_requests(): RequestQueue.push(request, spider)
def __init__(self, runner, ctx): RequestQueue.__init__(self, runner) StateMachine.__init__(self) self.ctx = None self.set_context(ctx) # Do early. States might depend on ctx. self.states = { QUIESCENT:Quiescent(self, QUIESCENT), # Justing inverting INVERTING_URI:InvertingUri(self, INVERTING_URI, QUIESCENT, FAILING), # Requesting previous graph in order to do insert. INVERTING_URI_4_INSERT:InvertingUri(self, INVERTING_URI_4_INSERT, REQUESTING_URI_4_INSERT, FAILING), REQUESTING_URI_4_INSERT:RequestingUri(self, REQUESTING_URI_4_INSERT, REQUESTING_GRAPH, FAILING), REQUESTING_GRAPH:RequestingGraph(self, REQUESTING_GRAPH, INSERTING_BUNDLES, FAILING), # Inserting INSERTING_BUNDLES:InsertingBundles(self, INSERTING_BUNDLES), INSERTING_GRAPH:InsertingGraph(self, INSERTING_GRAPH, INSERTING_URI, FAILING), INSERTING_URI:InsertingUri(self,INSERTING_URI, FINISHING, FAILING), CANCELING:CleaningUp(self, CANCELING, QUIESCENT), FAILING:CleaningUp(self, FAILING, QUIESCENT), # Requesting REQUESTING_URI:RequestingUri(self, REQUESTING_URI, REQUESTING_BUNDLES, FAILING), REQUESTING_BUNDLES:RequestingBundles(self, REQUESTING_BUNDLES, FINISHING, FAILING), FINISHING:CleaningUp(self, FINISHING, QUIESCENT), # Requesting head info from freenet REQUESTING_URI_4_HEADS:RequestingUri(self, REQUESTING_URI_4_HEADS, REQUIRES_GRAPH_4_HEADS, FAILING), REQUIRES_GRAPH_4_HEADS:RequiresGraph(self, REQUIRES_GRAPH_4_HEADS, REQUESTING_GRAPH_4_HEADS, FINISHING), REQUESTING_GRAPH_4_HEADS:RequestingGraph(self, REQUESTING_GRAPH_4_HEADS, FINISHING, FAILING), # Run and arbitrary StatefulRequest. RUNNING_SINGLE_REQUEST:RunningSingleRequest(self, RUNNING_SINGLE_REQUEST, FINISHING, FAILING), # Copying. # This doesn't verify that the graph chk(s) are fetchable. REQUESTING_URI_4_COPY:RequestingUri(self, REQUESTING_URI_4_COPY, INSERTING_URI, FAILING), } self.current_state = self.get_state(QUIESCENT) self.params = {} # Must not change any state! self.monitor_callback = lambda parent, client, msg: None runner.add_queue(self)
def __init__(self, runner, ctx): RequestQueue.__init__(self, runner) StateMachine.__init__(self) self.ctx = None self.set_context(ctx) # Do early. States might depend on ctx. self.states = { QUIESCENT: Quiescent(self, QUIESCENT), # Justing inverting INVERTING_URI: InvertingUri(self, INVERTING_URI, QUIESCENT, FAILING), # Requesting previous graph in order to do insert. INVERTING_URI_4_INSERT: InvertingUri(self, INVERTING_URI_4_INSERT, REQUESTING_URI_4_INSERT, FAILING), REQUESTING_URI_4_INSERT: RequestingUri(self, REQUESTING_URI_4_INSERT, REQUESTING_GRAPH, FAILING), REQUESTING_GRAPH: RequestingGraph(self, REQUESTING_GRAPH, INSERTING_BUNDLES, FAILING), # Inserting INSERTING_BUNDLES: InsertingBundles(self, INSERTING_BUNDLES), INSERTING_GRAPH: InsertingGraph(self, INSERTING_GRAPH, INSERTING_URI, FAILING), INSERTING_URI: InsertingUri(self, INSERTING_URI, FINISHING, FAILING), CANCELING: CleaningUp(self, CANCELING, QUIESCENT), FAILING: CleaningUp(self, FAILING, QUIESCENT), # Requesting REQUESTING_URI: RequestingUri(self, REQUESTING_URI, REQUESTING_BUNDLES, FAILING), REQUESTING_BUNDLES: RequestingBundles(self, REQUESTING_BUNDLES, FINISHING, FAILING), FINISHING: CleaningUp(self, FINISHING, QUIESCENT), # Requesting head info from freenet REQUESTING_URI_4_HEADS: RequestingUri(self, REQUESTING_URI_4_HEADS, REQUIRES_GRAPH_4_HEADS, FAILING), REQUIRES_GRAPH_4_HEADS: RequiresGraph(self, REQUIRES_GRAPH_4_HEADS, REQUESTING_GRAPH_4_HEADS, FINISHING), REQUESTING_GRAPH_4_HEADS: RequestingGraph(self, REQUESTING_GRAPH_4_HEADS, FINISHING, FAILING), # Run and arbitrary StatefulRequest. RUNNING_SINGLE_REQUEST: RunningSingleRequest(self, RUNNING_SINGLE_REQUEST, FINISHING, FAILING), # Copying. # This doesn't verify that the graph chk(s) are fetchable. REQUESTING_URI_4_COPY: RequestingUri(self, REQUESTING_URI_4_COPY, INSERTING_URI, FAILING), } self.current_state = self.get_state(QUIESCENT) self.params = {} # Must not change any state! self.monitor_callback = lambda parent, client, msg: None runner.add_queue(self)