context = zmq.Context()

    # Socket to send messages on
    sender = context.socket(zmq.PUSH)
    sender.bind("tcp://*:%s" % XAPIAN_ZMQ_VENT_PORT)

    # Socket for worker control
    controller = context.socket(zmq.PUB)
    controller.bind("tcp://*:%s" % XAPIAN_ZMQ_CTRL_VENT_PORT)

    # Socket for sync
    syncservice = context.socket(zmq.REP)
    syncservice.bind("tcp://*:%s" % XAPIAN_ZMQ_SYNC_VENT_PORT)

    # init redis
    global_r0 = _default_redis()
    from_csv = FROM_CSV

    def csv_input_pre_func(item):
        item = itemLine2Dict(item)
        return item

    if from_csv:
        pre_funcs = [csv_input_pre_func]
        from consts import CSV_FILEPATH
        if os.path.isdir(CSV_FILEPATH):
            total_cost = 0
            count = 0

            tb = time.time()
            ts = tb
Example #2
0
    """
    py xapian_zmq_vent.py'
    """

    context = zmq.Context()

    # Socket to send messages on
    sender = context.socket(zmq.PUSH)
    sender.bind("tcp://*:%s" % XAPIAN_ZMQ_VENT_PORT)

    # Socket for worker control
    controller = context.socket(zmq.PUB)
    controller.bind("tcp://*:%s" % XAPIAN_ZMQ_CTRL_VENT_PORT)
    
    # init redis
    global_vent_r0 = _default_redis(host=VENT_REDIS_HOST, port=VENT_REDIS_PORT, db=0)

    from_csv = FROM_CSV

    def csv_input_pre_func(item):
        item = itemLine2Dict(item)
        return item

    if from_csv:
        pre_funcs = [csv_input_pre_func]
        from consts import CSV_FILEPATH
        if os.path.isdir(CSV_FILEPATH):
            total_cost = 0
            count = 0
            tb = time.time()
            ts = tb
Example #3
0
def cal_forever(receiver, controller, poller, sender=None, fill_field_funcs=[]):
    count = 0
    ts = time.time()
    tb = ts
    receive_kill = False

    # prepare
    item = receiver.recv_json()
    item_timestamp = item["timestamp"]

    now_db_no = get_now_db_no(item_timestamp)
    print "redis db no now", now_db_no
    global_profile_r = _default_redis(host=PROFILE_REDIS_HOST, port=PROFILE_REDIS_PORT, db=now_db_no)

    set_now_accepted_tsrange(item_timestamp)

    while 1:
        evts = poller.poll(XAPIAN_ZMQ_POLL_TIMEOUT)
        if evts:
            socks = dict(poller.poll(XAPIAN_ZMQ_POLL_TIMEOUT))
        elif receive_kill and time.time() - tb > XAPIAN_ZMQ_WORK_KILL_INTERVAL:
            """
            定期kill,可以记录work开启的时间
            然后收到kill的时候判断一下当前时间减去work开启的时间
            是否超过某个阈值,是则执行kill操作
            配套的prod模式下,应该在每隔XAPIAN_ZMQ_WORK_KILL_INTERVAL新开work
            """
            print 'receive "KILL", worker stop, cost: %ss' % (time.time() - tb)
            break
        else:
            socks = None

        if socks and socks.get(receiver) == zmq.POLLIN:
            item = receiver.recv_json()
            if fill_field_funcs:
                for func in fill_field_funcs:
                    item = func(item)

            item_timestamp = item["timestamp"]

            now_a_start_ts, now_a_end_ts = get_now_accepted_tsrange()
            if int(item_timestamp) < now_a_start_ts or int(item_timestamp) >= now_a_end_ts:
                # 超出接受范围,抛弃该条微博
                continue

            new_db_no = get_now_db_no(item_timestamp)
            if new_db_no != now_db_no:
                now_db_no = new_db_no
                print "redis db no now", now_db_no
                global_profile_r = _default_redis(db=now_db_no)
                set_now_accepted_tsrange(item_timestamp)

            realtime_profile_keywords_cal(item, global_profile_r)

            count += 1
            if count % XAPIAN_FLUSH_DB_SIZE == 0:
                te = time.time()
                cost = te - ts
                ts = te
                print "[%s] total profile calc: %s, %s sec/per %s" % (
                    datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    count,
                    cost,
                    XAPIAN_FLUSH_DB_SIZE,
                )

        # Any waiting controller command acts as 'KILL'
        if socks and socks.get(controller) == zmq.POLLIN:
            controller.recv()
            receive_kill = True
Example #4
0
    # Socket for control input
    controller = context.socket(zmq.SUB)
    controller.connect("tcp://%s:%s" % (XAPIAN_ZMQ_VENT_HOST, XAPIAN_ZMQ_CTRL_VENT_PORT))
    controller.setsockopt(zmq.SUBSCRIBE, "")

    # Process messages from receiver and controller
    poller = zmq.Poller()
    poller.register(receiver, zmq.POLLIN)
    poller.register(controller, zmq.POLLIN)

    # disable realtime work
    sender = None

    # init global vent redis
    global_vent_r0 = _default_redis(host=VENT_REDIS_HOST, port=VENT_REDIS_PORT, db=0)

    # init global profile redis
    global_profile_r0 = _default_redis(host=PROFILE_REDIS_HOST, port=PROFILE_REDIS_PORT, db=0)

    fill_field_funcs = []

    s = load_scws()

    def cut_text(item):
        text = item["text"].encode("utf-8")
        item["terms"] = cut(s, text, f="n", cx=False)
        return item

    fill_field_funcs.append(cut_text)
Example #5
0
    context = zmq.Context()

    # Socket to send messages on
    sender = context.socket(zmq.PUSH)
    sender.bind("tcp://*:%s" % XAPIAN_ZMQ_VENT_PORT)

    # Socket for worker control
    controller = context.socket(zmq.PUB)
    controller.bind("tcp://*:%s" % XAPIAN_ZMQ_CTRL_VENT_PORT)

    # Socket for sync
    syncservice = context.socket(zmq.REP)
    syncservice.bind("tcp://*:%s" % XAPIAN_ZMQ_SYNC_VENT_PORT)

    # init redis
    global_r0 = _default_redis()
    from_csv = FROM_CSV

    def csv_input_pre_func(item):
        item = itemLine2Dict(item)
        return item

    if from_csv:
        pre_funcs = [csv_input_pre_func]
        from consts import CSV_FILEPATH
        if os.path.isdir(CSV_FILEPATH):
            total_cost = 0
            count = 0

            tb = time.time()
            ts = tb
    # Socket for control input
    controller = context.socket(zmq.SUB)
    controller.connect('tcp://%s:%s' % (XAPIAN_ZMQ_VENT_HOST, XAPIAN_ZMQ_CTRL_VENT_PORT))
    controller.setsockopt(zmq.SUBSCRIBE, "")

    # Process messages from receiver and controller
    poller = zmq.Poller()
    poller.register(receiver, zmq.POLLIN)
    poller.register(controller, zmq.POLLIN)

    # disable realtime work
    sender = None

    # init global vent redis
    global_vent_r0 = _default_redis(host=VENT_REDIS_HOST, port=VENT_REDIS_PORT, db=0)
    
    # init global sentiment redis
    global_sentiment_r0 = _default_redis(host=SENTIMENT_REDIS_HOST, port=SENTIMENT_REDIS_PORT, db=0)

    fill_field_funcs = []
    from consts import XAPIAN_EXTRA_FIELD
    from triple_sentiment_classifier import triple_classifier

    def fill_sentiment(item):
        sentiment = triple_classifier(item)
        item[XAPIAN_EXTRA_FIELD] = sentiment
        return item
    fill_field_funcs.append(fill_sentiment)

    s = load_scws()