def parse_task(): parse_beanstalk = PyBeanstalk(beanstalk_parse_conf['host'], beanstalk_parse_conf['port']) parse_tube = beanstalk_parse_conf['tube'] for company_name in data_list: data = { 'company': company_name, 'province': 'hunan', } parse_beanstalk.put(parse_tube, json.dumps(data))
def main(): beanstalk = PyBeanstalk(beanstalk_consumer_conf['host'], beanstalk_consumer_conf['port']) tube = beanstalk_consumer_conf['tube'] data_str = '湖南汉璟真空玻璃科技有限公司' data = { 'company': data_str, 'province': 'hunan', } print data_str beanstalk.put(tube, json.dumps(data))
def main(): beanstalk = PyBeanstalk(beanstalk_consumer_conf['host'], beanstalk_consumer_conf['port']) tube = beanstalk_consumer_conf['tube'] for company, info in company_info.iteritems(): data = { 'company': company, 'province': info['province'], } print company beanstalk.put(tube, json.dumps(data))
def crawl_task(): beanstalk_crawl_conf = {'host': 'cs0.sz-internal.haizhi.com', 'port': 11400, 'tube': 'gs_hunan_scheduler'} crawl_beanstalk = PyBeanstalk(beanstalk_crawl_conf['host'], beanstalk_crawl_conf['port']) crawl_tube = beanstalk_crawl_conf['tube'] for company_name in data_list: data = { 'company_name': company_name, 'province': 'hunan', } data_str = json.dumps(data) crawl_beanstalk.put(crawl_tube, data_str)
def main(): beanstalk = PyBeanstalk(beanstalk_consumer_conf['host'], beanstalk_consumer_conf['port']) tube = beanstalk_consumer_conf['tube'] company_list = ['贵州大龙帝国网吧', '罗甸县网络帝国网咖', '玉屏国网线下百货店', '帝国网络会所', '玉屏县帝国网吧'] for company_name in company_list: data = { 'company_name': company_name, 'province': 'guizhou', } data_str = json.dumps(data) print data_str beanstalk.put(tube, data_str)
class MqQueueThread(threading.Thread): PAUSE_COUNT_LV1 = 1000 PAUSE_COUNT_LV2 = 10000 PAUSE_COUNT_LV3 = 50000 PAUSE_COUNT_LV4 = 100000 PAUSE_COUNT_LV5 = 1000000 PAUSE_TIME_LV1 = 1 PAUSE_TIME_LV2 = 3 PAUSE_TIME_LV3 = 10 PAUSE_TIME_LV4 = 20 PAUSE_TIME_LV5 = 300 def __init__(self, server_conf=None, log=None, is_open=True): threading.Thread.__init__(self) self.daemon = True self.log = log # 判断是否需要开启消息队列 self.is_open = is_open if not self.is_open: return # 判断是否消息队列已中断 self.is_connect = True # 判断是否需要暂停 self.is_pause = False self.pause_time = self.PAUSE_TIME_LV1 # 输送队列 self.queue = Queue() if server_conf is None: raise StandardError('没有消息队列配置信息...') # 获取消息队列配置 self.server_conf = server_conf # 消息队列 if self.is_open: self.beanstalk = PyBeanstalk(self.server_conf['host'], self.server_conf['port']) else: self.beanstalk = None self.output_tube = self.server_conf['tube'] def __del__(self): self.log.info('消息队列线程退出...') # 判断是否需要暂停 def is_need_pause(self): try: count = self.beanstalk.get_tube_count(self.output_tube) except Exception as e: self.log.error('获取当前队列数目失败..开启消息队列休眠...') self.log.exception(e) count = self.PAUSE_COUNT_LV1 if count < self.PAUSE_COUNT_LV1: self.is_pause = False self.pause_time = self.PAUSE_TIME_LV1 return self.is_pause = True if count >= self.PAUSE_COUNT_LV5: self.pause_time = self.PAUSE_TIME_LV5 elif count >= self.PAUSE_COUNT_LV4: self.pause_time = self.PAUSE_TIME_LV4 elif count >= self.PAUSE_COUNT_LV3: self.pause_time = self.PAUSE_TIME_LV3 elif count >= self.PAUSE_COUNT_LV2: self.pause_time = self.PAUSE_TIME_LV2 else: self.pause_time = self.PAUSE_TIME_LV1 # 开始休眠 time.sleep(self.pause_time) def close(self): self.queue.put_nowait('@@##$$') self.log.info('发送线程退出指令...') def push_msg(self, msg): if self.is_open: self.queue.put_nowait(str(msg)) def run(self): self.log.info('开始运行消息队列...') while True: # 判断是否打开了消息队列 if not self.is_open: self.log.info('没有打开消息队列, 退出!') break try: msg = self.queue.get() if msg == '@@##$$': break while True: try: self.beanstalk.put(self.output_tube, msg) # 发送前先判断是否需要休眠 # self.is_need_pause() # 设置消息队列连接状态 self.is_connect = True break except SocketError as e: # 设置当前消息队列已中断, 减缓发送数据速度 self.is_connect = False time.sleep(10) self.beanstalk.reconnect() self.log.warn("reconnect beanstalk...") self.log.exception(e) except Exception as e: self.is_connect = False self.log.error('捕获异常休眠...') self.log.exception(e) time.sleep(10) except Exception as e: self.log.info('当前队列大小: size = {size}'.format(size=self.queue.qsize())) self.log.exception(e) time.sleep(5) self.log.info('消息队列线程正常退出.')