context = zmq.Context() # Socket to send messages on sender = context.socket(zmq.PUSH) sender.bind("tcp://*:%s" % XAPIAN_ZMQ_VENT_PORT) # Socket for worker control controller = context.socket(zmq.PUB) controller.bind("tcp://*:%s" % XAPIAN_ZMQ_CTRL_VENT_PORT) # Socket for sync syncservice = context.socket(zmq.REP) syncservice.bind("tcp://*:%s" % XAPIAN_ZMQ_SYNC_VENT_PORT) # init redis global_r0 = _default_redis() from_csv = FROM_CSV def csv_input_pre_func(item): item = itemLine2Dict(item) return item if from_csv: pre_funcs = [csv_input_pre_func] from consts import CSV_FILEPATH if os.path.isdir(CSV_FILEPATH): total_cost = 0 count = 0 tb = time.time() ts = tb
""" py xapian_zmq_vent.py' """ context = zmq.Context() # Socket to send messages on sender = context.socket(zmq.PUSH) sender.bind("tcp://*:%s" % XAPIAN_ZMQ_VENT_PORT) # Socket for worker control controller = context.socket(zmq.PUB) controller.bind("tcp://*:%s" % XAPIAN_ZMQ_CTRL_VENT_PORT) # init redis global_vent_r0 = _default_redis(host=VENT_REDIS_HOST, port=VENT_REDIS_PORT, db=0) from_csv = FROM_CSV def csv_input_pre_func(item): item = itemLine2Dict(item) return item if from_csv: pre_funcs = [csv_input_pre_func] from consts import CSV_FILEPATH if os.path.isdir(CSV_FILEPATH): total_cost = 0 count = 0 tb = time.time() ts = tb
def cal_forever(receiver, controller, poller, sender=None, fill_field_funcs=[]): count = 0 ts = time.time() tb = ts receive_kill = False # prepare item = receiver.recv_json() item_timestamp = item["timestamp"] now_db_no = get_now_db_no(item_timestamp) print "redis db no now", now_db_no global_profile_r = _default_redis(host=PROFILE_REDIS_HOST, port=PROFILE_REDIS_PORT, db=now_db_no) set_now_accepted_tsrange(item_timestamp) while 1: evts = poller.poll(XAPIAN_ZMQ_POLL_TIMEOUT) if evts: socks = dict(poller.poll(XAPIAN_ZMQ_POLL_TIMEOUT)) elif receive_kill and time.time() - tb > XAPIAN_ZMQ_WORK_KILL_INTERVAL: """ 定期kill,可以记录work开启的时间 然后收到kill的时候判断一下当前时间减去work开启的时间 是否超过某个阈值,是则执行kill操作 配套的prod模式下,应该在每隔XAPIAN_ZMQ_WORK_KILL_INTERVAL新开work """ print 'receive "KILL", worker stop, cost: %ss' % (time.time() - tb) break else: socks = None if socks and socks.get(receiver) == zmq.POLLIN: item = receiver.recv_json() if fill_field_funcs: for func in fill_field_funcs: item = func(item) item_timestamp = item["timestamp"] now_a_start_ts, now_a_end_ts = get_now_accepted_tsrange() if int(item_timestamp) < now_a_start_ts or int(item_timestamp) >= now_a_end_ts: # 超出接受范围,抛弃该条微博 continue new_db_no = get_now_db_no(item_timestamp) if new_db_no != now_db_no: now_db_no = new_db_no print "redis db no now", now_db_no global_profile_r = _default_redis(db=now_db_no) set_now_accepted_tsrange(item_timestamp) realtime_profile_keywords_cal(item, global_profile_r) count += 1 if count % XAPIAN_FLUSH_DB_SIZE == 0: te = time.time() cost = te - ts ts = te print "[%s] total profile calc: %s, %s sec/per %s" % ( datetime.now().strftime("%Y-%m-%d %H:%M:%S"), count, cost, XAPIAN_FLUSH_DB_SIZE, ) # Any waiting controller command acts as 'KILL' if socks and socks.get(controller) == zmq.POLLIN: controller.recv() receive_kill = True
# Socket for control input controller = context.socket(zmq.SUB) controller.connect("tcp://%s:%s" % (XAPIAN_ZMQ_VENT_HOST, XAPIAN_ZMQ_CTRL_VENT_PORT)) controller.setsockopt(zmq.SUBSCRIBE, "") # Process messages from receiver and controller poller = zmq.Poller() poller.register(receiver, zmq.POLLIN) poller.register(controller, zmq.POLLIN) # disable realtime work sender = None # init global vent redis global_vent_r0 = _default_redis(host=VENT_REDIS_HOST, port=VENT_REDIS_PORT, db=0) # init global profile redis global_profile_r0 = _default_redis(host=PROFILE_REDIS_HOST, port=PROFILE_REDIS_PORT, db=0) fill_field_funcs = [] s = load_scws() def cut_text(item): text = item["text"].encode("utf-8") item["terms"] = cut(s, text, f="n", cx=False) return item fill_field_funcs.append(cut_text)
# Socket for control input controller = context.socket(zmq.SUB) controller.connect('tcp://%s:%s' % (XAPIAN_ZMQ_VENT_HOST, XAPIAN_ZMQ_CTRL_VENT_PORT)) controller.setsockopt(zmq.SUBSCRIBE, "") # Process messages from receiver and controller poller = zmq.Poller() poller.register(receiver, zmq.POLLIN) poller.register(controller, zmq.POLLIN) # disable realtime work sender = None # init global vent redis global_vent_r0 = _default_redis(host=VENT_REDIS_HOST, port=VENT_REDIS_PORT, db=0) # init global sentiment redis global_sentiment_r0 = _default_redis(host=SENTIMENT_REDIS_HOST, port=SENTIMENT_REDIS_PORT, db=0) fill_field_funcs = [] from consts import XAPIAN_EXTRA_FIELD from triple_sentiment_classifier import triple_classifier def fill_sentiment(item): sentiment = triple_classifier(item) item[XAPIAN_EXTRA_FIELD] = sentiment return item fill_field_funcs.append(fill_sentiment) s = load_scws()