def get_work_queue(): ''' working queue ''' while 1: if not work_queue.empty(): # every item in queue is a dict _dict = work_queue.get() if not isinstance(_dict,dict): msg = 'put queue data is not dict,please check' raise ValueError(msg) _args = _dict.get("args") work_func = _dict.get("work_func") dont_filter = _dict.get("dont_filter") if content is not None: if content == 'HAS CRAWLED': logger.warning("%s has crawled" % url) else: _dict["content"] = content _dict["url"] = url follow_func = _dict.get('follow_func') save_func =_dict.get("save_func") if follow_func: handle_thread_exception(follow_func) if save_func: save_queue.put(_dict) work_queue.task_done()
def get_work_queue(): """ 工作队列 """ while 1: if not work_queue.empty(): _dict = work_queue.get() if not isinstance(_dict, dict): msg = 'put queue data is not dict,please check' raise ValueError(msg) # 参数 _args = _dict.get('args') # 工作函数即请求函数 work_func = _dict.get('work_func') # 是否过滤 dont_filter = _dict.get('dont_filter') content, url = work_func(_args, dont_filter) if content is not None: if content == 'HAS CRAWLED': logger.warning('%s has crawled' % url) else: _dict['content'] = content _dict['url'] = url follow_func = _dict.get('follow_func') save_func = _dict.get('save_func') if follow_func: handle_thread_exception(follow_func, _dict) if save_func: save_queue.put(_dict) work_queue.task_done()