Esempio n. 1
0
def aggmon_jobagg(argv):
    global component

    ap = argparse.ArgumentParser()
    ap.add_argument('-C', '--cmd-port', default="tcp://0.0.0.0:5501", action="store", help="RPC command port")
    ap.add_argument('-D', '--dispatcher', default="", action="store", help="agg_control dispatcher RPC command port")
    ap.add_argument('-j', '--jobid', default="", action="store", help="jobid for which this instance does aggregation")
    ap.add_argument('-l', '--log', default="info", action="store", help="logging: info, debug, ...")
    ap.add_argument('-L', '--listen', default="tcp://127.0.0.1:5560", action="store", help="zmq pull port to listen on")
    ap.add_argument('-M', '--msgbus', default=[], action="append",
                    help="subscription port(s) for message bus. can be used multiple times.")
    ap.add_argument('-s', '--stats', default=False, action="store_true", help="print statistics info")
    ap.add_argument('-v', '--verbose', type=int, default=0, action="store", help="verbosity")
    pargs = ap.parse_args(argv)

    log_level = eval("logging."+pargs.log.upper())
    FMT = "%(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s"
    logging.basicConfig( stream=sys.stderr, level=log_level, format=FMT )
    component = None

    if len(pargs.jobid) == 0:
        log.error("jobid argument can not be empty!")
        sys.exit(1)

    context = zmq.Context()
    try:
        jagg = JobAggregator(pargs.jobid, context)
    except Exception as e:
        log.error("Failed to create JobAggregator: %r" % e)
        sys.exit(1)
    jagg.start()

    # Socket to receive messages on
    receiver = context.socket(zmq.PULL)
    receiver.setsockopt(zmq.RCVHWM, 40000)
    recv_port = zmq_socket_bind_range(receiver, pargs.listen)
    assert(recv_port is not None)


    def aggregate_rpc(msg):
        agg_rpcs = component.state.get("stats.agg_rpcs", 0)
        agg_rpcs += 1
        num_sent = jagg.do_aggregate_and_send(msg)
        aggs_sent = component.state.get("stats.aggs_sent", 0) + num_sent
        component.update({"stats.agg_rpcs": agg_rpcs, "stats.aggs_sent": aggs_sent})

    def show_mcache(msg):
        return jagg.metric_caches

    def subscribe_collectors(__msg):
        for msgb in pargs.msgbus:
            log.info( "subscribing to msgs of job %s at %s" % (pargs.jobid, msgb) )
            me_addr = zmq_own_addr_for_uri(msgb)
            send_rpc(context, msgb, "subscribe", TARGET="tcp://%s:%d" % (me_addr, recv_port),
                     J=pargs.jobid)

    def unsubscribe_and_quit(__msg):
        # subscribe to message bus
        for msgb in pargs.msgbus:
            log.info( "unsubscribing jobid %s from %s" % (pargs.jobid, msgb) )
            me_addr = zmq_own_addr_for_uri(msgb)
            send_rpc(context, msgb, "unsubscribe", TARGET="tcp://%s:%d" % (me_addr, recv_port))
        os._exit(0)

    rpc = RPCThread(context, listen=pargs.cmd_port)
    rpc.start()
    rpc.register_rpc("agg", aggregate_rpc)
    rpc.register_rpc("quit", unsubscribe_and_quit, early_reply=True)
    rpc.register_rpc("resubscribe", subscribe_collectors)
    rpc.register_rpc("show_mcache", show_mcache)

    # subscribe to message bus
    subscribe_collectors(None)

    if len(pargs.dispatcher) > 0:
        me_addr = zmq_own_addr_for_uri(pargs.dispatcher)
        me_listen = "tcp://%s:%d" % (me_addr, recv_port)
        me_rpc = "tcp://%s:%d" % (me_addr, rpc.port)
        state = get_kwds(component="job_agg", cmd_port=me_rpc, listen=me_listen, jobid=pargs.jobid)
        component = ComponentState(context, pargs.dispatcher, state=state)
        rpc.register_rpc("resend_state", component.reset_timer)

    tstart = None
    log.info( "Started msg receiver on %s" % pargs.listen )
    count = 0
    while True:
        try:
            s = receiver.recv()
            #log.debug("received msg on PULL port: %r" % s)
            msg = json.loads(s)

            cmd = None
            if "_COMMAND_" in msg:
                cmd = msg["_COMMAND_"]

            if cmd is not None:
                if cmd["cmd"] == "quit":
                    log.info( "Stopping job aggregator for jobid %s on 'quit' command." % pargs.jobid )
                    break
                elif cmd["cmd"] == "resend_state":
                    log.info( "State resend requested." )
                    if component is not None:
                        component.reset_timer()
                    continue

            jagg.queue.put(msg)
            if count == 0 or (cmd is not None and cmd["cmd"] == "reset-stats"):
                tstart = time.time()
                count = 0
            count += 1
            component.update({"stats.val_msgs_recvd": count})
            if (pargs.stats and count % 10000 == 0) or \
               (cmd is not None and cmd["cmd"] == "show-stats"):
                tend = time.time()
                sys.stderr.write("%d msgs in %f seconds, %f msg/s\n" %
                                 (count, tend - tstart, float(count)/(tend - tstart)))
                sys.stderr.flush()
        except Exception as e:
            print "Exception in msg receiver: %r" % e
            jagg.stopping = True
            break

    time.sleep(0.1)
    print "%d messages received" % count
Esempio n. 2
0
def aggmon_data_store(argv):
    global component

    ap = argparse.ArgumentParser()
    ap.add_argument('-g', '--group', default="universe", action="store", help="group/cluster served by this daemon instance")
    ap.add_argument('-C', '--cmd-port', default="tcp://0.0.0.0:5511", action="store", help="RPC command port")
    ap.add_argument('-D', '--dispatcher', default="", action="store", help="agg_control dispatcher RPC command port")
    ap.add_argument('-e', '--expire', default=180, action="store", help="days for expiring value metrics")
    ap.add_argument('-b', '--backend', default="mongodb", action="store", help="database backend(s), comma separated. Default is 'mongodb'.")
    ap.add_argument('-H', '--host', default="localhost", action="store", help="data store host")
    ap.add_argument('-n', '--port', default=None, action="store", help="data store port")
    ap.add_argument('-d', '--dbname', default="metricdb", action="store", help="database name")
    ap.add_argument('-P', '--prefix', default="gmetric", action="store", help="collections prefix")
    ap.add_argument('-u', '--user', default="", action="store", help="user name")
    ap.add_argument('-p', '--passwd', default="", action="store", help="password")
    ap.add_argument('-l', '--log', default="info", action="store", help="logging: info, debug, ...")
    ap.add_argument('-L', '--listen', default="tcp://0.0.0.0:5550", action="store", help="zmq pull port to listen on")
    ap.add_argument('-M', '--msgbus', default=[], action="append",
                    help="subscription port(s) for message bus. can be used multiple times.")
    ap.add_argument('-s', '--stats', default=False, action="store_true", help="print statistics info")
    ap.add_argument('-v', '--verbose', type=int, default=0, action="store", help="verbosity")
    pargs = ap.parse_args(argv)

    log_level = eval("logging."+pargs.log.upper())
    FMT = "%(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s"
    logging.basicConfig( stream=sys.stderr, level=log_level, format=FMT )

    pargs.backend = pargs.backend.split(",")
    if pargs.port:
        pargs.port = pargs.port.split(",")      # TODO: get rid of this and move it into (shared) config
    else:
        pargs.port = [None, None]
    # open DB
    try:
        store = DataStore(pargs.backend, pargs.host, pargs.port, pargs.dbname, pargs.user, pargs.passwd,
                           pargs.group, coll_prefix=pargs.prefix, value_metrics_ttl=pargs.expire*24*3600)
    except Exception as e:
        log.error("Failed to create DataStore: %r" % e)
        sys.exit(1)
    store.start()

    context = zmq.Context()

    # Socket to receive messages on
    receiver = context.socket(zmq.PULL)
    receiver.setsockopt(zmq.RCVHWM, 40000)
    recv_port = zmq_socket_bind_range(receiver, pargs.listen)
    assert( recv_port is not None)


    def subscribe_collectors(__msg):
        for msgb in pargs.msgbus:
            log.info( "subscribing to all msgs at '%s'" % msgb )
            me_addr = zmq_own_addr_for_uri(msgb)
            send_rpc(context, msgb, "subscribe", TARGET="tcp://%s:%d" % (me_addr, recv_port))

    def unsubscribe_and_quit(__msg):
        for msgb in pargs.msgbus:
            log.info( "unsubscribing from '%s'" % msgb )
            me_addr = zmq_own_addr_for_uri(msgb)
            send_rpc(context, msgb, "unsubscribe", TARGET="tcp://%s:%d" % (me_addr, recv_port))
        os._exit(0)


    rpc = RPCThread(context, listen=pargs.cmd_port)
    rpc.start()
    rpc.register_rpc("quit", unsubscribe_and_quit, early_reply=True)
    rpc.register_rpc("resubscribe", subscribe_collectors)

    if len(pargs.dispatcher) > 0:
        me_addr = zmq_own_addr_for_uri(pargs.dispatcher)
        me_listen = "tcp://%s:%d" % (me_addr, recv_port)
        me_rpc = "tcp://%s:%d" % (me_addr, rpc.port)
        state = get_kwds(component="data_store", cmd_port=me_rpc, listen=me_listen, group=pargs.group)
        component = ComponentState(context, pargs.dispatcher, state=state)
        rpc.register_rpc("resend_state", component.reset_timer)

    # subscribe to message bus
    subscribe_collectors(None)

    tstart = None
    log.info( "Started msg receiver on %s" % pargs.listen )
    count = 0
    while True:
        try:
            s = receiver.recv()
            log.debug("received msg on PULL port: %r" % s)
            msg = json.loads(s)

            cmd = None
            if "_COMMAND_" in msg:
                log.info("_COMMAND_ received: msg = %r" % msg)
                cmd = msg["_COMMAND_"]

            if cmd is not None:
                if cmd["cmd"] == "quit":
                    log.info( "Stopping data_store on 'quit' command.")
                    # raw exit!!!
                    os._exit(0)
                    break
                elif cmd["cmd"] == "resend_state":
                    log.info( "State resend requested." )
                    if component is not None:
                        component.reset_timer()
                    continue
            
            store.queue.put(msg)
            if count == 0 or (cmd is not None and cmd["cmd"] == "reset-stats"):
                tstart = time.time()
                count = 0
            count += 1
            if component is not None:
                component.update({"stats.msgs_recvd": count})
            if (pargs.stats and count % 10000 == 0) or \
               (cmd is not None and cmd["cmd"] == "show-stats"):
                tend = time.time()
                sys.stdout.write("%d msgs in %f seconds, %f msg/s\n" %
                                 (count, tend - tstart, float(count)/(tend - tstart)))
                sys.stdout.flush()
        except Exception as e:
            print "Exception in msg receiver: %r" % e
            break
    log.info("THE END")