# -*- coding: utf-8 -*- """ internal_sockets.py addresses of sockets used internally by retrieve_source external addresses come from environment variables """ import os from tools.zeromq_util import ipc_socket_uri _local_node_name = os.environ["NIMBUSIO_NODE_NAME"] _socket_dir = os.environ["NIMBUSIO_SOCKET_DIR"] db_controller_pull_socket_uri = ipc_socket_uri(_socket_dir, _local_node_name, "db_controller_pull") db_controller_router_socket_uri = ipc_socket_uri(_socket_dir, _local_node_name, "db_controller_router") io_controller_pull_socket_uri = ipc_socket_uri(_socket_dir, _local_node_name, "io_controller_pull") io_controller_router_socket_uri = ipc_socket_uri(_socket_dir, _local_node_name, "io_controller_router") internal_socket_uri_list = [ db_controller_pull_socket_uri, db_controller_router_socket_uri, io_controller_router_socket_uri, io_controller_router_socket_uri, ]
def process_segment_rows(halt_event, zeromq_context, args, node_dict, node_databases, raw_segment_rows): """ process handoffs of segment rows """ log = logging.getLogger("process_segment_rows") rep_socket_uri = ipc_socket_uri(_socket_dir, args.node_name, "handoff_client") prepare_ipc_path(rep_socket_uri) rep_socket = zeromq_context.socket(zmq.REP) rep_socket.setsockopt(zmq.SNDHWM, _socket_high_water_mark) rep_socket.setsockopt(zmq.RCVHWM, _socket_high_water_mark) log.info("binding rep socket to {0}".format(rep_socket_uri)) rep_socket.bind(rep_socket_uri) log.debug("starting workers") workers = list() for index in range(args.worker_count): worker_id = str(index+1) workers.append(_start_worker_process(worker_id, args, rep_socket_uri)) # loop until all handoffs have been accomplished log.debug("start handoffs") work_generator = _generate_segment_rows(raw_segment_rows) pending_handoff_count = 0 while not halt_event.is_set(): # get a segment row to process. If we are at EOF, segment_row = None try: source_node_names, segment_row = next(work_generator) except StopIteration: if pending_handoff_count == 0: break else: source_node_names, segment_row = None, None # if we have a segment row, and it is a tombstone, we can act # directly on the node database(s) without sending it to a worker if segment_row is not None: if segment_row["status"] == segment_status_tombstone: _process_tombstone(node_databases, source_node_names, segment_row) _purge_handoff_from_source_nodes(node_databases, source_node_names, segment_row["collection_id"], segment_row["key"], segment_row["unified_id"], segment_row["conjoined_part"], segment_row["handoff_node_id"], segment_status_tombstone) continue assert segment_row["status"] == segment_status_final, \ segment_row["status"] # at this point we eaither have a segment row in final status, or # None, indicating no more data # block until we have a ready worker try: request = rep_socket.recv_pyobj() except zmq.ZMQError as zmq_error: if is_interrupted_system_call(zmq_error) and halt_event.is_set(): log.warn("breaking due to halt_event") break raise assert not rep_socket.rcvmore # see how the worker handled the previous segment (if any) initial_request = False if request["message-type"] == "start": log.info("{0} initial request".format(request["worker-id"])) initial_request = True elif request["handoff-successful"]: log.info("{0} handoff ({1}, {2}) successful".format( request["worker-id"], request["unified-id"], request["conjoined-part"])) assert pending_handoff_count > 0 pending_handoff_count -= 1 _purge_handoff_from_source_nodes(node_databases, request["source-node-names"], request["collection-id"], request["key"], request["unified-id"], request["conjoined-part"], request["handoff-node-id"], segment_status_final) else: log.error("{0} handoff ({1}, {2}) failed: {3}".format( request["worker-id"], request["unified-id"], request["conjoined-part"], request["error-message"])) assert pending_handoff_count > 0 pending_handoff_count -= 1 if segment_row is None: # if we have no more work, tell the worker to stop work_message = {"message-type" : "stop"} else: # otherwise, send the segment to the worker work_message = {"message-type" : "work", "source-node-names" : source_node_names, "segment-row" : segment_row} # if this is the worker's first request, send him the node_dict if initial_request: work_message["node-dict"] = node_dict pending_handoff_count += 1 rep_socket.send_pyobj(work_message) log.debug("end of handoffs") for worker in workers: terminate_subprocess(worker) rep_socket.close()
from tools.standard_logging import initialize_logging from tools.zeromq_util import is_interrupted_system_call, \ prepare_ipc_path, \ ipc_socket_uri from tools.process_util import identify_program_dir, \ set_signal_handler, \ poll_subprocess from tools.event_push_client import EventPushClient, unhandled_exception_topic _node_names = os.environ["NIMBUSIO_NODE_NAME_SEQ"].split() _local_node_name = os.environ["NIMBUSIO_NODE_NAME"] _log_path_template = "{0}/nimbusio_service_availability_monitor_{1}.log" _socket_dir = os.environ["NIMBUSIO_SOCKET_DIR"] _pull_socket_uri = ipc_socket_uri(_socket_dir, _local_node_name, "service_availability_monitor") _pull_socket_hwm = 1000 _poll_timeout = 3000 # milliseconds _reporting_interval = 60.0 _ping_process_desc = namedtuple("PingProcessDesc", ["module_dir", "file_name", "service_name", "ping_uris", ]) _ping_process = namedtuple("PingProcess", ["service_name", "node_name", "process", "reachable_state", ]) _ping_process_descs = [
# -*- coding: utf-8 -*- """ internal_sockets.py addresses of sockets used internally by retrieve_source external addresses come from environment variables """ import os from tools.zeromq_util import ipc_socket_uri _local_node_name = os.environ["NIMBUSIO_NODE_NAME"] _socket_dir = os.environ["NIMBUSIO_SOCKET_DIR"] db_controller_pull_socket_uri = ipc_socket_uri(_socket_dir, _local_node_name, "db_controller_pull") db_controller_router_socket_uri = ipc_socket_uri(_socket_dir, _local_node_name, "db_controller_router") io_controller_pull_socket_uri = ipc_socket_uri(_socket_dir, _local_node_name, "io_controller_pull") io_controller_router_socket_uri = ipc_socket_uri(_socket_dir, _local_node_name, "io_controller_router") internal_socket_uri_list = [ db_controller_pull_socket_uri, db_controller_router_socket_uri, io_controller_router_socket_uri, io_controller_router_socket_uri,
def event_publisher_pull_addresses(self): s = self.socket_path return [ipc_socket_uri(s, n, "nimbusio-event-publisher") for n in self.node_names]
def event_publisher_pull_addresses(self): s = self.socket_path return [ipc_socket_uri(s, n, "nimbusio-event-publisher") \ for n in self.node_names]
def process_segment_rows(halt_event, zeromq_context, args, node_dict, node_databases, raw_segment_rows): """ process handoffs of segment rows """ log = logging.getLogger("process_segment_rows") rep_socket_uri = ipc_socket_uri(_socket_dir, args.node_name, "handoff_client") prepare_ipc_path(rep_socket_uri) rep_socket = zeromq_context.socket(zmq.REP) rep_socket.setsockopt(zmq.SNDHWM, _socket_high_water_mark) rep_socket.setsockopt(zmq.RCVHWM, _socket_high_water_mark) log.info("binding rep socket to {0}".format(rep_socket_uri)) rep_socket.bind(rep_socket_uri) log.debug("starting workers") workers = list() for index in range(args.worker_count): worker_id = str(index + 1) workers.append(_start_worker_process(worker_id, args, rep_socket_uri)) # loop until all handoffs have been accomplished log.debug("start handoffs") work_generator = _generate_segment_rows(raw_segment_rows) pending_handoff_count = 0 while not halt_event.is_set(): # get a segment row to process. If we are at EOF, segment_row = None try: source_node_names, segment_row = next(work_generator) except StopIteration: if pending_handoff_count == 0: break else: source_node_names, segment_row = None, None # if we have a segment row, and it is a tombstone, we can act # directly on the node database(s) without sending it to a worker if segment_row is not None: if segment_row["status"] == segment_status_tombstone: _process_tombstone(node_databases, source_node_names, segment_row) _purge_handoff_from_source_nodes( node_databases, source_node_names, segment_row["collection_id"], segment_row["key"], segment_row["unified_id"], segment_row["conjoined_part"], segment_row["handoff_node_id"], segment_status_tombstone) continue assert segment_row["status"] == segment_status_final, \ segment_row["status"] # at this point we eaither have a segment row in final status, or # None, indicating no more data # block until we have a ready worker try: request = rep_socket.recv_pyobj() except zmq.ZMQError as zmq_error: if is_interrupted_system_call(zmq_error) and halt_event.is_set(): log.warn("breaking due to halt_event") break raise assert not rep_socket.rcvmore # see how the worker handled the previous segment (if any) initial_request = False if request["message-type"] == "start": log.info("{0} initial request".format(request["worker-id"])) initial_request = True elif request["handoff-successful"]: log.info("{0} handoff ({1}, {2}) successful".format( request["worker-id"], request["unified-id"], request["conjoined-part"])) assert pending_handoff_count > 0 pending_handoff_count -= 1 _purge_handoff_from_source_nodes( node_databases, request["source-node-names"], request["collection-id"], request["key"], request["unified-id"], request["conjoined-part"], request["handoff-node-id"], segment_status_final) else: log.error("{0} handoff ({1}, {2}) failed: {3}".format( request["worker-id"], request["unified-id"], request["conjoined-part"], request["error-message"])) assert pending_handoff_count > 0 pending_handoff_count -= 1 if segment_row is None: # if we have no more work, tell the worker to stop work_message = {"message-type": "stop"} else: # otherwise, send the segment to the worker work_message = { "message-type": "work", "source-node-names": source_node_names, "segment-row": segment_row } # if this is the worker's first request, send him the node_dict if initial_request: work_message["node-dict"] = node_dict pending_handoff_count += 1 rep_socket.send_pyobj(work_message) log.debug("end of handoffs") for worker in workers: terminate_subprocess(worker) rep_socket.close()