def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], 
                                         _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    prepare_ipc_path(_pull_socket_uri)

    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context = zmq.Context()

    pull_socket = _bind_pull_socket(zeromq_context)

    event_push_client = EventPushClient(zeromq_context, "service_availability")
    event_push_client.info("program-starts", 
                           "service availability monitor starts")

    message_count = 0
    try:
        ping_process_dict = _start_ping_processes(halt_event)

        while not halt_event.is_set():

            if message_count % len(ping_process_dict) == 0:
                for ping_process in ping_process_dict.values():
                    poll_subprocess(ping_process.process)

            message = pull_socket.recv_pyobj()
            assert not pull_socket.rcvmore

            _process_one_message(message, ping_process_dict, event_push_client)

            message_count += 1

    except KeyboardInterrupt: # convenience for testing
        log.info("keyboard interrupt: terminating normally")
    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program terminating normally; interrupted system call")
        else:
            log.exception("zeromq error processing request")
            event_push_client.exception(unhandled_exception_topic,
                                        "zeromq_error",
                                        exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        event_push_client.exception(unhandled_exception_topic,
                                    str(instance),
                                    exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminating normally")

    log.debug("terminating subprocesses")
    _terminate_ping_processes(ping_process_dict)
    pull_socket.close()
    event_push_client.close()
    zeromq_context.term()

    return return_value
Exemple #2
0
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"],
                                         _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context = zmq.Context()

    resources = \
        _resources_tuple(halt_event=Event(),
                         volume_by_space_id=_volume_name_by_space_id(),
                         pull_socket=zeromq_context.socket(zmq.PULL),
                         router_socket=zeromq_context.socket(zmq.ROUTER),
                         event_push_client=\
                            EventPushClient(zeromq_context,
                                            "rs_io_controller"),
                         pending_work_by_volume=defaultdict(deque),
                         available_ident_by_volume=defaultdict(deque))

    log.debug("binding to {0}".format(io_controller_pull_socket_uri))
    resources.pull_socket.bind(io_controller_pull_socket_uri)

    resources.router_socket.setsockopt(zmq.LINGER, 1000)
    log.debug("binding to {0}".format(io_controller_router_socket_uri))
    resources.router_socket.bind(io_controller_router_socket_uri)

    # we poll the sockets for readability, we assume we can always
    # write to the router socket
    poller = zmq.Poller()
    poller.register(resources.pull_socket, zmq.POLLIN | zmq.POLLERR)
    poller.register(resources.router_socket, zmq.POLLIN | zmq.POLLERR)

    worker_processes = list()
    for volume_name in set(resources.volume_by_space_id.values()):
        for index in range(_worker_count):
            worker_processes.append(_launch_io_worker(volume_name, index + 1))

    last_report_time = 0.0
    try:
        while not halt_event.is_set():
            for worker_process in worker_processes:
                poll_subprocess(worker_process)
            for active_socket, event_flags in poller.poll(_poll_timeout):
                if event_flags & zmq.POLLERR:
                    error_message = \
                        "error flags from zmq {0}".format(active_socket)
                    log.error(error_message)
                    raise PollError(error_message)
                if active_socket is resources.pull_socket:
                    _read_pull_socket(resources)
                elif active_socket is resources.router_socket:
                    _read_router_socket(resources)
                else:
                    log.error("unknown socket {0}".format(active_socket))

            current_time = time.time()
            elapsed_time = current_time - last_report_time
            if elapsed_time > _reporting_interval:
                pending_work = 0
                for volume_queue in resources.pending_work_by_volume.values():
                    pending_work += len(volume_queue)
                report_message = \
                    "{0:,} pending_work entries".format(pending_work)
                log.info(report_message)
                resources.event_push_client.info("queue_sizes",
                                                 report_message,
                                                 pending_work=pending_work)

                last_report_time = current_time

    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program teminates normally with interrupted system call")
        else:
            log.exception("zeromq error processing request")
            resources.event_push_client.exception(unhandled_exception_topic,
                                                  "zeromq_error",
                                                  exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        resources.event_push_client.exception(
            unhandled_exception_topic,
            str(instance),
            exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminates normally")
    finally:
        for worker_process in worker_processes:
            terminate_subprocess(worker_process)
        resources.pull_socket.close()
        resources.router_socket.close()
        resources.event_push_client.close()
        zeromq_context.term()

    return return_value
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], 
                                         _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context = zmq.Context()

    resources = \
        _resources_tuple(halt_event=Event(),
                         zeromq_context=zeromq_context,
                         reply_push_sockets=dict(),
                         pull_socket=zeromq_context.socket(zmq.PULL),
                         io_controller_push_socket=\
                            zeromq_context.socket(zmq.PUSH),
                         router_socket=zeromq_context.socket(zmq.ROUTER),
                         event_push_client=\
                            EventPushClient(zeromq_context, 
                                            "rs_db_pool_controller"),
                         active_retrieves=dict(),
                         pending_work_queue=deque(),
                         available_ident_queue=deque())

    log.debug("binding to {0}".format(db_controller_pull_socket_uri))
    resources.pull_socket.bind(db_controller_pull_socket_uri)

    log.debug("connecting to {0}".format(io_controller_pull_socket_uri))
    resources.io_controller_push_socket.connect(io_controller_pull_socket_uri)

    resources.router_socket.setsockopt(zmq.LINGER, 1000)
    log.debug("binding to {0}".format(db_controller_router_socket_uri))
    resources.router_socket.bind(db_controller_router_socket_uri)

    # we poll the sockets for readability, we assume we can always
    # write to the router socket
    poller = zmq.Poller()
    poller.register(resources.pull_socket, zmq.POLLIN | zmq.POLLERR)
    poller.register(resources.router_socket, zmq.POLLIN| zmq.POLLERR)

    worker_processes = list()
    for index in range(_worker_count):
        worker_processes.append(_launch_database_pool_worker(index+1))
    
    last_report_time = 0.0
    try:
        while not halt_event.is_set():
            for worker_process in worker_processes:
                poll_subprocess(worker_process)
            for active_socket, event_flags in poller.poll(_poll_timeout):
                if event_flags & zmq.POLLERR:
                    error_message = \
                        "error flags from zmq {0}".format(active_socket)
                    log.error(error_message)
                    raise PollError(error_message) 
                if active_socket is resources.pull_socket:
                    _read_pull_socket(resources)
                elif active_socket is resources.router_socket:
                    _read_router_socket(resources)
                else:
                    log.error("unknown socket {0}".format(active_socket))
            current_time = time.time()
            elapsed_time = current_time - last_report_time
            if elapsed_time > _reporting_interval:
                report_message = \
                    "{0:,} active_retrives, " \
                    "{1:,} pending_work_queue entries, " \
                    "{2:,} available_ident_queue entries" \
                    "".format(len(resources.active_retrieves),
                              len(resources.pending_work_queue),
                              len(resources.available_ident_queue))
                log.info(report_message)
                resources.event_push_client.info(
                    "queue_sizes", 
                    report_message,
                    active_retrieves=len(resources.active_retrieves),
                    pending_work_queue=len(resources.pending_work_queue),
                    available_ident_queue=len(resources.available_ident_queue))

                last_report_time = current_time

    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program teminates normally with interrupted system call")
        else:
            log.exception("zeromq error processing request")
            resources.event_push_client.exception(unhandled_exception_topic,
                                                  "zeromq_error",
                                                  exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        resources.event_push_client.exception(unhandled_exception_topic,
                                    str(instance),
                                    exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminates normally")
    finally:
        for worker_process in worker_processes:
            terminate_subprocess(worker_process)
        resources.pull_socket.close()
        resources.io_controller_push_socket.close()
        resources.router_socket.close()
        for push_socket in resources.reply_push_sockets.values():
            push_socket.close()
        resources.event_push_client.close()
        zeromq_context.term()

    return return_value
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    for internal_socket_uri in internal_socket_uri_list:
        prepare_ipc_path(internal_socket_uri)

    halt_event = Event()
    set_signal_handler(halt_event)

    database_pool_controller = _launch_database_pool_controller()
    io_controller = _launch_io_controller()

    zeromq_context = zmq.Context()
    rep_socket = _bind_rep_socket(zeromq_context)
    db_controller_push_socket = _connect_db_controller_push_socket(zeromq_context)
    event_push_client = EventPushClient(zeromq_context, "retrieve_source")
    event_push_client.info("program-starts", "retrieve source starts")

    # we poll the sockets for readability, we assume we can always
    # write to the push client sockets
    poller = zmq.Poller()
    poller.register(rep_socket, zmq.POLLIN | zmq.POLLERR)

    last_report_time = 0.0
    request_count = 0
    try:
        while not halt_event.is_set():
            poll_subprocess(database_pool_controller)
            poll_subprocess(io_controller)

            # we've only registered one socket, so we could use an 'if' here,
            # but this 'for' works ok and it has the same form as the other
            # places where we use poller
            for active_socket, event_flags in poller.poll(_poll_timeout):
                if event_flags & zmq.POLLERR:
                    error_message = "error flags from zmq {0}".format(active_socket)
                    log.error(error_message)
                    raise PollError(error_message)

                assert active_socket is rep_socket

                _process_one_request(rep_socket, db_controller_push_socket)

                request_count += 1

            current_time = time.time()
            elapsed_time = current_time - last_report_time
            if elapsed_time > _reporting_interval:
                report_message = "{0:,} requests".format(request_count)
                log.info(report_message)
                event_push_client.info("request_count", report_message, request_count=request_count)
                last_report_time = current_time
                request_count = 0

    except KeyboardInterrupt:  # convenience for testing
        log.info("keyboard interrupt: terminating normally")
    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program teminates normally with interrupted system call")
        else:
            log.exception("zeromq error processing request")
            event_push_client.exception(unhandled_exception_topic, "zeromq_error", exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        event_push_client.exception(unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminates normally")
    finally:
        terminate_subprocess(database_pool_controller)
        terminate_subprocess(io_controller)
        rep_socket.close()
        db_controller_push_socket.close()
        event_push_client.close()
        zeromq_context.term()

    return return_value
Exemple #5
0
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], 
                                         _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    prepare_ipc_path(_pull_socket_uri)

    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context = zmq.Context()

    pull_socket = _bind_pull_socket(zeromq_context)

    event_push_client = EventPushClient(zeromq_context, "service_availability")
    event_push_client.info("program-starts", 
                           "service availability monitor starts")

    message_count = 0
    try:
        ping_process_dict = _start_ping_processes(halt_event)

        while not halt_event.is_set():

            if message_count % len(ping_process_dict) == 0:
                for ping_process in ping_process_dict.values():
                    poll_subprocess(ping_process.process)

            message = pull_socket.recv_pyobj()
            assert not pull_socket.rcvmore

            _process_one_message(message, ping_process_dict, event_push_client)

            message_count += 1

    except KeyboardInterrupt: # convenience for testing
        log.info("keyboard interrupt: terminating normally")
    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program terminating normally; interrupted system call")
        else:
            log.exception("zeromq error processing request")
            event_push_client.exception(unhandled_exception_topic,
                                        "zeromq_error",
                                        exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        event_push_client.exception(unhandled_exception_topic,
                                    str(instance),
                                    exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminating normally")

    log.debug("terminating subprocesses")
    _terminate_ping_processes(ping_process_dict)
    pull_socket.close()
    event_push_client.close()
    zeromq_context.term()

    return return_value
Exemple #6
0
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"],
                                         _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    for internal_socket_uri in internal_socket_uri_list:
        prepare_ipc_path(internal_socket_uri)

    halt_event = Event()
    set_signal_handler(halt_event)

    database_pool_controller = _launch_database_pool_controller()
    io_controller = _launch_io_controller()

    zeromq_context = zmq.Context()
    rep_socket = _bind_rep_socket(zeromq_context)
    db_controller_push_socket = \
        _connect_db_controller_push_socket(zeromq_context)
    event_push_client = EventPushClient(zeromq_context, "retrieve_source")
    event_push_client.info("program-starts", "retrieve source starts")

    # we poll the sockets for readability, we assume we can always
    # write to the push client sockets
    poller = zmq.Poller()
    poller.register(rep_socket, zmq.POLLIN | zmq.POLLERR)

    last_report_time = 0.0
    request_count = 0
    try:
        while not halt_event.is_set():
            poll_subprocess(database_pool_controller)
            poll_subprocess(io_controller)

            # we've only registered one socket, so we could use an 'if' here,
            # but this 'for' works ok and it has the same form as the other
            # places where we use poller
            for active_socket, event_flags in poller.poll(_poll_timeout):
                if event_flags & zmq.POLLERR:
                    error_message = \
                        "error flags from zmq {0}".format(active_socket)
                    log.error(error_message)
                    raise PollError(error_message)

                assert active_socket is rep_socket

                _process_one_request(rep_socket, db_controller_push_socket)

                request_count += 1

            current_time = time.time()
            elapsed_time = current_time - last_report_time
            if elapsed_time > _reporting_interval:
                report_message = "{0:,} requests".format(request_count)
                log.info(report_message)
                event_push_client.info("request_count",
                                       report_message,
                                       request_count=request_count)
                last_report_time = current_time
                request_count = 0

    except KeyboardInterrupt:  # convenience for testing
        log.info("keyboard interrupt: terminating normally")
    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program teminates normally with interrupted system call")
        else:
            log.exception("zeromq error processing request")
            event_push_client.exception(unhandled_exception_topic,
                                        "zeromq_error",
                                        exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        event_push_client.exception(unhandled_exception_topic,
                                    str(instance),
                                    exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminates normally")
    finally:
        terminate_subprocess(database_pool_controller)
        terminate_subprocess(io_controller)
        rep_socket.close()
        db_controller_push_socket.close()
        event_push_client.close()
        zeromq_context.term()

    return return_value