コード例 #1
0
def get_work_queue():
	'''
	working queue
	''' 
	while 1:
		if not work_queue.empty():
			# every item in queue is a dict
			_dict = work_queue.get()

			if not isinstance(_dict,dict):
				msg = 'put queue data is not dict,please check'
				raise ValueError(msg)
	        _args = _dict.get("args")
	        work_func = _dict.get("work_func")
	        dont_filter = _dict.get("dont_filter")

	        if content is not None:
	        	if content == 'HAS CRAWLED':
	        		logger.warning("%s has crawled" % url)
	        	else:
	        		_dict["content"] = content
	        		_dict["url"] = url

	        		follow_func = _dict.get('follow_func')
	        		save_func =_dict.get("save_func")

	        		if follow_func:
	        			handle_thread_exception(follow_func)
	        		if save_func:
	        			save_queue.put(_dict)
	        work_queue.task_done()
コード例 #2
0
def get_work_queue():
    """
    工作队列

    """
    while 1:
        if not work_queue.empty():
            _dict = work_queue.get()

            if not isinstance(_dict, dict):
                msg = 'put queue data is not dict,please check'
                raise ValueError(msg)
            # 参数
            _args = _dict.get('args')
            # 工作函数即请求函数
            work_func = _dict.get('work_func')

            # 是否过滤
            dont_filter = _dict.get('dont_filter')

            content, url = work_func(_args, dont_filter)

            if content is not None:
                if content == 'HAS CRAWLED':
                    logger.warning('%s has crawled' % url)
                else:
                    _dict['content'] = content
                    _dict['url'] = url

                    follow_func = _dict.get('follow_func')
                    save_func = _dict.get('save_func')

                    if follow_func:
                        handle_thread_exception(follow_func, _dict)

                    if save_func:
                        save_queue.put(_dict)

            work_queue.task_done()