Ejemplo n.º 1
0
    def run(self, halt_event):
        """
        poll for I/O ready objects
        This is a task for the time queue
        """
        
        if halt_event.is_set():
            self._log.info("halt_event set: exiting")
            for active_socket in self._active_sockets.keys():
                self._poller.unregister(active_socket)
            self._active_sockets.clear()
            return
        
        next_tasks = list()
        next_interval = time.time() + self._polling_interval

        try:
            result_list = self._poller.poll(timeout=self._poll_timeout)
        except zmq.ZMQError, zmq_error:
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                self._log.info("Interrupted with halt_event set: exiting")
                for active_socket in self._active_sockets.keys():
                    self._poller.unregister(active_socket)
                self._active_sockets.clear()
                return
            raise
Ejemplo n.º 2
0
def main():
    """
    main entry point
    """
    return_code = 0
    args = _parse_command_line()
    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context = zmq.Context()

    reporting_socket = zeromq_context.socket(zmq.PUSH)
    reporting_socket.setsockopt(zmq.LINGER, 5000)
    reporting_socket.setsockopt(zmq.SNDHWM, args.hwm)
    reporting_socket.connect(args.reporting_url)

    req_socket = None

    ping_message = {"message-type"   : "ping",
                    "message-id"     : uuid.uuid1().hex, }

    result_message = {"message-type"    : "ping-result",
                      "url"             : args.ping_url,
                      "result"          : None,
                      "socket-reconnection-number"  : 0,
                      "check-number"    : 0, }

    while not halt_event.is_set():
        try:
            req_socket = _process_one_ping(halt_event,
                                           args,
                                           zeromq_context,
                                           ping_message, 
                                           result_message, 
                                           req_socket, 
                                           reporting_socket)
         
        except zmq.ZMQError as zmq_error:
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                pass
            else:
                logging.exception(str(zmq_error))
                halt_event.set()
                return_code = 1

        except Exception as instance:
            logging.exception(str(instance))
            halt_event.set()
            return_code = 1

        else:
            halt_event.wait(args.interval)
        
    if req_socket is not None:
        req_socket.close()
    reporting_socket.close()
    zeromq_context.term()

    return return_code
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0
    initialize_logging(_log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    halt_event = Event()
    set_signal_handler(halt_event)

    memcached_client = memcache.Client(_memcached_nodes)

    zeromq_context = zmq.Context()
    sub_socket = _create_sub_socket(zeromq_context)

    expected_sequence = {
        _cache_update_channel : None,
    }

    while not halt_event.is_set():
        try:
            topic = sub_socket.recv()
            assert sub_socket.rcvmore
            meta = sub_socket.recv()
            if sub_socket.rcvmore:
                data = sub_socket.recv()
            else:
                data = ""

            _process_one_event(memcached_client, 
                               expected_sequence, 
                               topic, 
                               meta, 
                               data)

        except KeyboardInterrupt: # convenience for testing
            log.info("keyboard interrupt: terminating normally")
            halt_event.set()
        except zmq.ZMQError as zmq_error:
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                log.info("interrupted system call - ok at shutdown")
            else:
                log.exception("zeromq error processing request")
                return_value = 1
            halt_event.set()
        except Exception:
            log.exception("error processing request")
            return_value = 1
            halt_event.set()

    sub_socket.close()
    zeromq_context.term()

    log.info("program teminates: return value = {0}".format(return_value))
    return return_value
Ejemplo n.º 4
0
def _process_handoff(zeromq_context, halt_event, node_dict, pull_socket,
                     pull_socket_uri, client_tag, source_node_name,
                     dest_node_name, segment_row):
    log = logging.getLogger("_process_handoff")
    log.info("start ({0}, {1}) from {2}".format(segment_row["unified_id"],
                                                segment_row["conjoined_part"],
                                                source_node_name))

    reader_socket = ReqSocket(zeromq_context,
                              _reader_address_dict[source_node_name],
                              client_tag, pull_socket_uri, halt_event)

    writer_socket = ReqSocket(zeromq_context,
                              _writer_address_dict[dest_node_name], client_tag,
                              pull_socket_uri, halt_event)

    forwarder = forwarder_coroutine(node_dict, segment_row, writer_socket,
                                    reader_socket)

    next(forwarder)

    # loop, waiting for messages to the pull socket
    # until the forwarder tells us it is done
    while not halt_event.is_set():
        try:
            message = pull_socket.recv_json()
        except zmq.ZMQError as zmq_error:
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                log.info("interrupted system call with halt_event_set")
                break
            log.exception(str(zmq_error))
            sys.exit(1)

        data = list()
        while pull_socket.rcvmore:
            data.append(pull_socket.recv())
        if len(data) == 0:
            data = None

        result = forwarder.send((
            message,
            data,
        ))
        # the forwarder will yield the string 'done' when it is done
        if result is not None:
            assert result == "done", result
            break

    reader_socket.close()
    writer_socket.close()

    if halt_event.is_set():
        raise HaltEvent()

    log.info("done  ({0}, {1}) from {2}".format(segment_row["unified_id"],
                                                segment_row["conjoined_part"],
                                                source_node_name))
Ejemplo n.º 5
0
def main():
    """
    main entry point
    """
    return_code = 0
    args = _parse_command_line()
    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context = zmq.Context()

    reporting_socket = zeromq_context.socket(zmq.PUSH)
    reporting_socket.setsockopt(zmq.LINGER, 5000)
    reporting_socket.setsockopt(zmq.SNDHWM, args.hwm)
    reporting_socket.connect(args.reporting_url)

    req_socket = None

    ping_message = {"message-type": "ping", "message-id": uuid.uuid1().hex}

    result_message = {
        "message-type": "ping-result",
        "url": args.ping_url,
        "result": None,
        "socket-reconnection-number": 0,
        "check-number": 0,
    }

    while not halt_event.is_set():
        try:
            req_socket = _process_one_ping(
                halt_event, args, zeromq_context, ping_message, result_message, req_socket, reporting_socket
            )

        except zmq.ZMQError as zmq_error:
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                pass
            else:
                logging.exception(str(zmq_error))
                halt_event.set()
                return_code = 1

        except Exception as instance:
            logging.exception(str(instance))
            halt_event.set()
            return_code = 1

        else:
            halt_event.wait(args.interval)

    if req_socket is not None:
        req_socket.close()
    reporting_socket.close()
    zeromq_context.term()

    return return_code
Ejemplo n.º 6
0
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0
    initialize_logging(_log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    halt_event = Event()
    set_signal_handler(halt_event)

    memcached_client = memcache.Client(_memcached_nodes)

    zeromq_context = zmq.Context()
    sub_socket = _create_sub_socket(zeromq_context)

    expected_sequence = {
        _cache_update_channel: None,
    }

    while not halt_event.is_set():
        try:
            topic = sub_socket.recv()
            assert sub_socket.rcvmore
            meta = sub_socket.recv()
            if sub_socket.rcvmore:
                data = sub_socket.recv()
            else:
                data = ""

            _process_one_event(memcached_client, expected_sequence, topic,
                               meta, data)

        except KeyboardInterrupt:  # convenience for testing
            log.info("keyboard interrupt: terminating normally")
            halt_event.set()
        except zmq.ZMQError as zmq_error:
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                log.info("interrupted system call - ok at shutdown")
            else:
                log.exception("zeromq error processing request")
                return_value = 1
            halt_event.set()
        except Exception:
            log.exception("error processing request")
            return_value = 1
            halt_event.set()

    sub_socket.close()
    zeromq_context.term()

    log.info("program teminates: return value = {0}".format(return_value))
    return return_value
Ejemplo n.º 7
0
def _process_request(resources):
    """
    Wait for a reply to our last message from the controller.
    """
    log = logging.getLogger("_process_one_transaction")
    log.debug("waiting work request")
    try:
        request = resources.dealer_socket.recv_pyobj()
    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error):
            raise InterruptedSystemCall()
        raise

    assert resources.dealer_socket.rcvmore
    control = resources.dealer_socket.recv_pyobj()

    log.debug("user_request_id = {0}; control = {1}".format(
        request["user-request-id"], control))

    assert resources.dealer_socket.rcvmore
    sequence_row = resources.dealer_socket.recv_pyobj()

    value_file_path = compute_value_file_path(_repository_path,
                                              sequence_row["space_id"],
                                              sequence_row["value_file_id"])

    control["result"] = "success"
    control["error-message"] = ""

    if value_file_path in resources.file_cache:
        value_file, _ = resources.file_cache[value_file_path]
        del resources.file_cache[value_file_path]
    else:
        try:
            value_file = open(value_file_path, "rb")
        except Exception as instance:
            log.exception("user_request_id = {0}, " \
                          "read {1}".format(request["user-request-id"],
                                            value_file_path))
            resources.event_push_client.exception("error_opening_value_file",
                                                  str(instance))
            control["result"] = "error_opening_value_file"
            control["error-message"] = str(instance)

        if control["result"] != "success":
            _send_error_reply(resources, request, control)
            return

    read_offset = \
        sequence_row["value_file_offset"] + \
        (control["left-offset"] * encoded_block_slice_size)


    read_size = \
        sequence_row["size"] - \
        (control["left-offset"] * encoded_block_slice_size) - \
        (control["right-offset"] * encoded_block_slice_size)

    # Ticket #84 handle a short block
    # the last block in the file may be smaller than encoded_block_slice_size
    # so we might have subtracted too much for the right offset
    if control["right-offset"] > 0:
        block_modulus = sequence_row["size"] % encoded_block_slice_size
        last_block_size = (encoded_block_slice_size if block_modulus == 0 else \
                           block_modulus)
        last_block_delta = encoded_block_slice_size - last_block_size
        read_size += last_block_delta

    try:
        value_file.seek(read_offset)
        encoded_data = value_file.read(read_size)
    except Exception as instance:
        log.exception("user_request_id = {0}, " \
                      "read {1}".format(request["user-request-id"],
                                        value_file_path))
        resources.event_push_client.exception("error_reading_value_file",
                                              str(instance))
        control["result"] = "error_reading_vqalue_file"
        control["error-message"] = str(instance)

    if control["result"] != "success":
        value_file.close()
        _send_error_reply(resources, request, control)
        return

    resources.file_cache[value_file_path] = value_file, time.time()

    if len(encoded_data) != read_size:
        error_message = "{0} size mismatch {1} {2}".format(
            request["retrieve-id"], len(encoded_data), read_size)
        log.error("user_request_id = {0}, {1}".format(
            request["user-request-id"], error_message))
        resources.event_push_client.error("size_mismatch", error_message)
        control["result"] = "size_mismatch"
        control["error-message"] = error_message

    if control["result"] != "success":
        _send_error_reply(resources, request, control)
        return

    encoded_block_list = list(encoded_block_generator(encoded_data))

    segment_size = 0
    segment_adler32 = 0
    segment_md5 = hashlib.md5()
    for encoded_block in encoded_block_list:
        segment_size += len(encoded_block)
        segment_adler32 = zlib.adler32(encoded_block, segment_adler32)
        segment_md5.update(encoded_block)
    segment_md5_digest = segment_md5.digest()

    reply = {
        "message-type": "retrieve-key-reply",
        "user-request-id": request["user-request-id"],
        "client-tag": request["client-tag"],
        "message-id": request["message-id"],
        "retrieve-id": request["retrieve-id"],
        "segment-unified-id": request["segment-unified-id"],
        "segment-num": request["segment-num"],
        "segment-size": segment_size,
        "zfec-padding-size": sequence_row["zfec_padding_size"],
        "segment-adler32": segment_adler32,
        "segment-md5-digest": b64encode(segment_md5_digest).decode("utf-8"),
        "sequence-num": None,
        "completed": control["completed"],
        "result": "success",
        "error-message": "",
    }

    push_socket = _get_reply_push_socket(resources, request["client-address"])
    push_socket.send_json(reply, zmq.SNDMORE)
    for encoded_block in encoded_block_list[:-1]:
        push_socket.send(encoded_block, zmq.SNDMORE)
    push_socket.send(encoded_block_list[-1])
Ejemplo n.º 8
0
def _process_handoff(zeromq_context, 
                     halt_event, 
                     node_dict,
                     pull_socket,
                     pull_socket_uri,
                     client_tag,
                     source_node_name, 
                     dest_node_name,
                     segment_row):
    log = logging.getLogger("_process_handoff")
    log.info("start ({0}, {1}) from {2}".format(segment_row["unified_id"],
                                                segment_row["conjoined_part"],
                                                source_node_name))

    reader_socket = ReqSocket(zeromq_context,
                              _reader_address_dict[source_node_name],
                              client_tag,
                              pull_socket_uri,
                              halt_event)

    writer_socket = ReqSocket(zeromq_context,
                              _writer_address_dict[dest_node_name],
                              client_tag,
                              pull_socket_uri,
                              halt_event)

    forwarder = forwarder_coroutine(node_dict,
                                    segment_row, 
                                    writer_socket, 
                                    reader_socket)

    next(forwarder)

    # loop, waiting for messages to the pull socket
    # until the forwarder tells us it is done
    while not halt_event.is_set():
        try:
            message = pull_socket.recv_json()
        except zmq.ZMQError as zmq_error:
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                log.info("interrupted system call with halt_event_set")
                break
            log.exception(str(zmq_error))
            sys.exit(1)

        data = list()
        while pull_socket.rcvmore:
            data.append(pull_socket.recv())
        if len(data) == 0:
            data = None

        result = forwarder.send((message, data, ))
        # the forwarder will yield the string 'done' when it is done
        if result is not None:
            assert result == "done", result
            break

    reader_socket.close()
    writer_socket.close()

    if halt_event.is_set():
        raise HaltEvent()

    log.info("done  ({0}, {1}) from {2}".format(segment_row["unified_id"],
                                                segment_row["conjoined_part"],
                                                source_node_name))
Ejemplo n.º 9
0
def main(worker_id, host_name, base_port, dest_node_name, rep_socket_uri):
    """
    main entry point
    return 0 on normal termination (exit code)
    """
    log = logging.getLogger("main")

    client_tag = _client_tag_template.format(worker_id)
    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context =  zmq.Context()

    log.debug("creating pull socket")
    pull_socket_uri = "tcp://{0}:{1}".format(socket.gethostbyname(host_name), 
                                             base_port+worker_id)

    pull_socket = zeromq_context.socket(zmq.PULL)
    pull_socket.setsockopt(zmq.RCVHWM, _socket_high_water_mark)
    log.info("binding pull socket to {0}".format(pull_socket_uri))
    pull_socket.bind(pull_socket_uri)

    req_socket = zeromq_context.socket(zmq.REQ)
    req_socket.setsockopt(zmq.RCVHWM, _socket_high_water_mark)
    req_socket.setsockopt(zmq.SNDHWM, _socket_high_water_mark)
    req_socket.connect(rep_socket_uri)

    log.info("sending 'start' message")
    # notify our parent that we are ready to receive work
    request = {"message-type" : "start",
               "worker-id"    : worker_id}
    req_socket.send_pyobj(request)

    log.info("starting message loop")
    node_dict = None
    while not halt_event.is_set():
        try:
            message = req_socket.recv_pyobj()
        except zmq.ZMQError as zmq_error:
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                log.info("interrupted system call wiht halt_event_set")
                break
            log.exception(str(zmq_error))
            sys.exit(1)
        assert not req_socket.rcvmore

        if message["message-type"] == "stop":
            log.info("'stop' message received")
            break

        assert message["message-type"] == "work", message["message-type"]

        # we expect our parent to send us the node dict in our first message
        if "node-dict" in message:
            node_dict = message["node-dict"]
        assert node_dict is not None

        # aliases for brevity
        segment_row = message["segment-row"]
        source_node_names = message["source-node-names"]

        # send back enough information to purge the handoffs, if successful
        request = {"message-type"         : "handoff-complete",
                   "worker-id"            : worker_id,
                   "handoff-successful"   : False,
                   "unified-id"           : segment_row["unified_id"],
                   "collection-id"        : segment_row["collection_id"],
                   "key"                  : segment_row["key"],
                   "conjoined-part"       : segment_row["conjoined_part"],
                   "handoff-node-id"      : segment_row["handoff_node_id"],
                   "source-node-names"    : source_node_names,
                   "error-message"        : ""}

        for source_node_name in source_node_names:
            try:
                _process_handoff(zeromq_context, 
                                 halt_event,
                                 node_dict,
                                 pull_socket,
                                 pull_socket_uri,
                                 client_tag,
                                 source_node_name, 
                                 dest_node_name,
                                 segment_row)
            except Exception as instance:
                log.exception(instance)
                request["error-message"] = "".join([request["error-message"],
                                                    str(instance)])
            else:
                request["handoff-successful"] = True
                break

        req_socket.send_pyobj(request)
    log.info("end message loop")

    pull_socket.close()
    req_socket.close()
    zeromq_context.term()
    log.info("program terminates")
    return 0
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], 
                                         _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context = zmq.Context()

    resources = \
        _resources_tuple(halt_event=Event(),
                         zeromq_context=zeromq_context,
                         reply_push_sockets=dict(),
                         pull_socket=zeromq_context.socket(zmq.PULL),
                         io_controller_push_socket=\
                            zeromq_context.socket(zmq.PUSH),
                         router_socket=zeromq_context.socket(zmq.ROUTER),
                         event_push_client=\
                            EventPushClient(zeromq_context, 
                                            "rs_db_pool_controller"),
                         active_retrieves=dict(),
                         pending_work_queue=deque(),
                         available_ident_queue=deque())

    log.debug("binding to {0}".format(db_controller_pull_socket_uri))
    resources.pull_socket.bind(db_controller_pull_socket_uri)

    log.debug("connecting to {0}".format(io_controller_pull_socket_uri))
    resources.io_controller_push_socket.connect(io_controller_pull_socket_uri)

    resources.router_socket.setsockopt(zmq.LINGER, 1000)
    log.debug("binding to {0}".format(db_controller_router_socket_uri))
    resources.router_socket.bind(db_controller_router_socket_uri)

    # we poll the sockets for readability, we assume we can always
    # write to the router socket
    poller = zmq.Poller()
    poller.register(resources.pull_socket, zmq.POLLIN | zmq.POLLERR)
    poller.register(resources.router_socket, zmq.POLLIN| zmq.POLLERR)

    worker_processes = list()
    for index in range(_worker_count):
        worker_processes.append(_launch_database_pool_worker(index+1))
    
    last_report_time = 0.0
    try:
        while not halt_event.is_set():
            for worker_process in worker_processes:
                poll_subprocess(worker_process)
            for active_socket, event_flags in poller.poll(_poll_timeout):
                if event_flags & zmq.POLLERR:
                    error_message = \
                        "error flags from zmq {0}".format(active_socket)
                    log.error(error_message)
                    raise PollError(error_message) 
                if active_socket is resources.pull_socket:
                    _read_pull_socket(resources)
                elif active_socket is resources.router_socket:
                    _read_router_socket(resources)
                else:
                    log.error("unknown socket {0}".format(active_socket))
            current_time = time.time()
            elapsed_time = current_time - last_report_time
            if elapsed_time > _reporting_interval:
                report_message = \
                    "{0:,} active_retrives, " \
                    "{1:,} pending_work_queue entries, " \
                    "{2:,} available_ident_queue entries" \
                    "".format(len(resources.active_retrieves),
                              len(resources.pending_work_queue),
                              len(resources.available_ident_queue))
                log.info(report_message)
                resources.event_push_client.info(
                    "queue_sizes", 
                    report_message,
                    active_retrieves=len(resources.active_retrieves),
                    pending_work_queue=len(resources.pending_work_queue),
                    available_ident_queue=len(resources.available_ident_queue))

                last_report_time = current_time

    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program teminates normally with interrupted system call")
        else:
            log.exception("zeromq error processing request")
            resources.event_push_client.exception(unhandled_exception_topic,
                                                  "zeromq_error",
                                                  exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        resources.event_push_client.exception(unhandled_exception_topic,
                                    str(instance),
                                    exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminates normally")
    finally:
        for worker_process in worker_processes:
            terminate_subprocess(worker_process)
        resources.pull_socket.close()
        resources.io_controller_push_socket.close()
        resources.router_socket.close()
        for push_socket in resources.reply_push_sockets.values():
            push_socket.close()
        resources.event_push_client.close()
        zeromq_context.term()

    return return_value
Ejemplo n.º 11
0
def process_segment_rows(halt_event, zeromq_context, args, node_dict,
                         node_databases, raw_segment_rows):
    """
    process handoffs of segment rows
    """
    log = logging.getLogger("process_segment_rows")

    rep_socket_uri = ipc_socket_uri(_socket_dir, args.node_name,
                                    "handoff_client")
    prepare_ipc_path(rep_socket_uri)

    rep_socket = zeromq_context.socket(zmq.REP)
    rep_socket.setsockopt(zmq.SNDHWM, _socket_high_water_mark)
    rep_socket.setsockopt(zmq.RCVHWM, _socket_high_water_mark)
    log.info("binding rep socket to {0}".format(rep_socket_uri))
    rep_socket.bind(rep_socket_uri)

    log.debug("starting workers")
    workers = list()
    for index in range(args.worker_count):
        worker_id = str(index + 1)
        workers.append(_start_worker_process(worker_id, args, rep_socket_uri))

    # loop until all handoffs have been accomplished
    log.debug("start handoffs")
    work_generator = _generate_segment_rows(raw_segment_rows)
    pending_handoff_count = 0
    while not halt_event.is_set():

        # get a segment row to process. If we are at EOF, segment_row = None
        try:
            source_node_names, segment_row = next(work_generator)
        except StopIteration:
            if pending_handoff_count == 0:
                break
            else:
                source_node_names, segment_row = None, None

        # if we have a segment row, and it is a tombstone, we can act
        # directly on the node database(s) without sending it to a worker
        if segment_row is not None:
            if segment_row["status"] == segment_status_tombstone:
                _process_tombstone(node_databases, source_node_names,
                                   segment_row)
                _purge_handoff_from_source_nodes(
                    node_databases, source_node_names,
                    segment_row["collection_id"], segment_row["key"],
                    segment_row["unified_id"], segment_row["conjoined_part"],
                    segment_row["handoff_node_id"], segment_status_tombstone)
                continue
            assert segment_row["status"] == segment_status_final, \
                segment_row["status"]

        # at this point we eaither have a segment row in final status, or
        # None, indicating no more data
        # block until we have a ready worker
        try:
            request = rep_socket.recv_pyobj()
        except zmq.ZMQError as zmq_error:
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                log.warn("breaking due to halt_event")
                break
            raise
        assert not rep_socket.rcvmore

        # see how the worker handled the previous segment (if any)
        initial_request = False
        if request["message-type"] == "start":
            log.info("{0} initial request".format(request["worker-id"]))
            initial_request = True
        elif request["handoff-successful"]:
            log.info("{0} handoff ({1}, {2}) successful".format(
                request["worker-id"], request["unified-id"],
                request["conjoined-part"]))
            assert pending_handoff_count > 0
            pending_handoff_count -= 1
            _purge_handoff_from_source_nodes(
                node_databases, request["source-node-names"],
                request["collection-id"], request["key"],
                request["unified-id"], request["conjoined-part"],
                request["handoff-node-id"], segment_status_final)
        else:
            log.error("{0} handoff ({1}, {2}) failed: {3}".format(
                request["worker-id"], request["unified-id"],
                request["conjoined-part"], request["error-message"]))
            assert pending_handoff_count > 0
            pending_handoff_count -= 1

        if segment_row is None:
            # if we have no more work, tell the worker to stop
            work_message = {"message-type": "stop"}
        else:
            # otherwise, send the segment to the worker
            work_message = {
                "message-type": "work",
                "source-node-names": source_node_names,
                "segment-row": segment_row
            }
            # if this is the worker's first request, send him the node_dict
            if initial_request:
                work_message["node-dict"] = node_dict
            pending_handoff_count += 1

        rep_socket.send_pyobj(work_message)

    log.debug("end of handoffs")

    for worker in workers:
        terminate_subprocess(worker)

    rep_socket.close()
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], 
                                         _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    prepare_ipc_path(_pull_socket_uri)

    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context = zmq.Context()

    pull_socket = _bind_pull_socket(zeromq_context)

    event_push_client = EventPushClient(zeromq_context, "service_availability")
    event_push_client.info("program-starts", 
                           "service availability monitor starts")

    message_count = 0
    try:
        ping_process_dict = _start_ping_processes(halt_event)

        while not halt_event.is_set():

            if message_count % len(ping_process_dict) == 0:
                for ping_process in ping_process_dict.values():
                    poll_subprocess(ping_process.process)

            message = pull_socket.recv_pyobj()
            assert not pull_socket.rcvmore

            _process_one_message(message, ping_process_dict, event_push_client)

            message_count += 1

    except KeyboardInterrupt: # convenience for testing
        log.info("keyboard interrupt: terminating normally")
    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program terminating normally; interrupted system call")
        else:
            log.exception("zeromq error processing request")
            event_push_client.exception(unhandled_exception_topic,
                                        "zeromq_error",
                                        exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        event_push_client.exception(unhandled_exception_topic,
                                    str(instance),
                                    exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminating normally")

    log.debug("terminating subprocesses")
    _terminate_ping_processes(ping_process_dict)
    pull_socket.close()
    event_push_client.close()
    zeromq_context.term()

    return return_value
Ejemplo n.º 13
0
def process_segment_rows(halt_event, 
                         zeromq_context, 
                         args, 
                         node_dict,
                         node_databases,
                         raw_segment_rows):
    """
    process handoffs of segment rows
    """
    log = logging.getLogger("process_segment_rows")

    rep_socket_uri = ipc_socket_uri(_socket_dir, 
                                    args.node_name,
                                    "handoff_client")
    prepare_ipc_path(rep_socket_uri)

    rep_socket = zeromq_context.socket(zmq.REP)
    rep_socket.setsockopt(zmq.SNDHWM, _socket_high_water_mark)
    rep_socket.setsockopt(zmq.RCVHWM, _socket_high_water_mark)
    log.info("binding rep socket to {0}".format(rep_socket_uri))
    rep_socket.bind(rep_socket_uri)

    log.debug("starting workers")
    workers = list()
    for index in range(args.worker_count):
        worker_id = str(index+1)
        workers.append(_start_worker_process(worker_id, args, rep_socket_uri))

    # loop until all handoffs have been accomplished
    log.debug("start handoffs")
    work_generator =  _generate_segment_rows(raw_segment_rows)
    pending_handoff_count = 0
    while not halt_event.is_set():

        # get a segment row to process. If we are at EOF, segment_row = None
        try:
            source_node_names, segment_row = next(work_generator)
        except StopIteration:
            if pending_handoff_count == 0:
                break
            else:
                source_node_names, segment_row = None, None

        # if we have a segment row, and it is a tombstone, we can act
        # directly on the node database(s) without sending it to a worker
        if segment_row is not None:
            if segment_row["status"] == segment_status_tombstone:
                _process_tombstone(node_databases, 
                                   source_node_names, 
                                   segment_row)
                _purge_handoff_from_source_nodes(node_databases,
                                                 source_node_names,
                                                 segment_row["collection_id"],
                                                 segment_row["key"],
                                                 segment_row["unified_id"],
                                                 segment_row["conjoined_part"],
                                                 segment_row["handoff_node_id"],
                                                 segment_status_tombstone)
                continue
            assert segment_row["status"] == segment_status_final, \
                segment_row["status"]
    
        # at this point we eaither have a segment row in final status, or
        # None, indicating no more data
        # block until we have a ready worker
        try:
            request = rep_socket.recv_pyobj()
        except zmq.ZMQError as zmq_error:
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                log.warn("breaking due to halt_event")
                break
            raise
        assert not rep_socket.rcvmore

        # see how the worker handled the previous segment (if any)
        initial_request = False
        if request["message-type"] == "start":
            log.info("{0} initial request".format(request["worker-id"]))
            initial_request = True
        elif request["handoff-successful"]:
            log.info("{0} handoff ({1}, {2}) successful".format(
                request["worker-id"], 
                request["unified-id"], 
                request["conjoined-part"]))
            assert pending_handoff_count > 0
            pending_handoff_count -= 1
            _purge_handoff_from_source_nodes(node_databases, 
                                             request["source-node-names"],
                                             request["collection-id"],
                                             request["key"],
                                             request["unified-id"],
                                             request["conjoined-part"],
                                             request["handoff-node-id"],
                                             segment_status_final)
        else:
            log.error("{0} handoff ({1}, {2}) failed: {3}".format(
                request["worker-id"],
                request["unified-id"], 
                request["conjoined-part"],
                request["error-message"]))
            assert pending_handoff_count > 0
            pending_handoff_count -= 1

        if segment_row is None:
            # if we have no more work, tell the worker to stop
            work_message = {"message-type"        : "stop"}
        else:
            # otherwise, send the segment to the worker 
            work_message = {"message-type"        : "work",
                            "source-node-names"   : source_node_names,
                            "segment-row"         : segment_row}
            # if this is the worker's first request, send him the node_dict
            if initial_request:
                work_message["node-dict"] = node_dict
            pending_handoff_count += 1

        rep_socket.send_pyobj(work_message)

    log.debug("end of handoffs")

    for worker in workers:
        terminate_subprocess(worker)

    rep_socket.close()
Ejemplo n.º 14
0
def main(worker_id, host_name, base_port, dest_node_name, rep_socket_uri):
    """
    main entry point
    return 0 on normal termination (exit code)
    """
    log = logging.getLogger("main")

    client_tag = _client_tag_template.format(worker_id)
    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context = zmq.Context()

    log.debug("creating pull socket")
    pull_socket_uri = "tcp://{0}:{1}".format(socket.gethostbyname(host_name),
                                             base_port + worker_id)

    pull_socket = zeromq_context.socket(zmq.PULL)
    pull_socket.setsockopt(zmq.RCVHWM, _socket_high_water_mark)
    log.info("binding pull socket to {0}".format(pull_socket_uri))
    pull_socket.bind(pull_socket_uri)

    req_socket = zeromq_context.socket(zmq.REQ)
    req_socket.setsockopt(zmq.RCVHWM, _socket_high_water_mark)
    req_socket.setsockopt(zmq.SNDHWM, _socket_high_water_mark)
    req_socket.connect(rep_socket_uri)

    log.info("sending 'start' message")
    # notify our parent that we are ready to receive work
    request = {"message-type": "start", "worker-id": worker_id}
    req_socket.send_pyobj(request)

    log.info("starting message loop")
    node_dict = None
    while not halt_event.is_set():
        try:
            message = req_socket.recv_pyobj()
        except zmq.ZMQError as zmq_error:
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                log.info("interrupted system call wiht halt_event_set")
                break
            log.exception(str(zmq_error))
            sys.exit(1)
        assert not req_socket.rcvmore

        if message["message-type"] == "stop":
            log.info("'stop' message received")
            break

        assert message["message-type"] == "work", message["message-type"]

        # we expect our parent to send us the node dict in our first message
        if "node-dict" in message:
            node_dict = message["node-dict"]
        assert node_dict is not None

        # aliases for brevity
        segment_row = message["segment-row"]
        source_node_names = message["source-node-names"]

        # send back enough information to purge the handoffs, if successful
        request = {
            "message-type": "handoff-complete",
            "worker-id": worker_id,
            "handoff-successful": False,
            "unified-id": segment_row["unified_id"],
            "collection-id": segment_row["collection_id"],
            "key": segment_row["key"],
            "conjoined-part": segment_row["conjoined_part"],
            "handoff-node-id": segment_row["handoff_node_id"],
            "source-node-names": source_node_names,
            "error-message": ""
        }

        for source_node_name in source_node_names:
            try:
                _process_handoff(zeromq_context, halt_event, node_dict,
                                 pull_socket, pull_socket_uri, client_tag,
                                 source_node_name, dest_node_name, segment_row)
            except Exception as instance:
                log.exception(instance)
                request["error-message"] = "".join(
                    [request["error-message"],
                     str(instance)])
            else:
                request["handoff-successful"] = True
                break

        req_socket.send_pyobj(request)
    log.info("end message loop")

    pull_socket.close()
    req_socket.close()
    zeromq_context.term()
    log.info("program terminates")
    return 0
Ejemplo n.º 15
0
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], 
                                         _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    prepare_ipc_path(_pull_socket_uri)

    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context = zmq.Context()

    pull_socket = _bind_pull_socket(zeromq_context)

    event_push_client = EventPushClient(zeromq_context, "service_availability")
    event_push_client.info("program-starts", 
                           "service availability monitor starts")

    message_count = 0
    try:
        ping_process_dict = _start_ping_processes(halt_event)

        while not halt_event.is_set():

            if message_count % len(ping_process_dict) == 0:
                for ping_process in ping_process_dict.values():
                    poll_subprocess(ping_process.process)

            message = pull_socket.recv_pyobj()
            assert not pull_socket.rcvmore

            _process_one_message(message, ping_process_dict, event_push_client)

            message_count += 1

    except KeyboardInterrupt: # convenience for testing
        log.info("keyboard interrupt: terminating normally")
    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program terminating normally; interrupted system call")
        else:
            log.exception("zeromq error processing request")
            event_push_client.exception(unhandled_exception_topic,
                                        "zeromq_error",
                                        exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        event_push_client.exception(unhandled_exception_topic,
                                    str(instance),
                                    exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminating normally")

    log.debug("terminating subprocesses")
    _terminate_ping_processes(ping_process_dict)
    pull_socket.close()
    event_push_client.close()
    zeromq_context.term()

    return return_value
Ejemplo n.º 16
0
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"],
                                         _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    halt_event = Event()
    set_signal_handler(halt_event)

    zeromq_context = zmq.Context()

    resources = \
        _resources_tuple(halt_event=Event(),
                         volume_by_space_id=_volume_name_by_space_id(),
                         pull_socket=zeromq_context.socket(zmq.PULL),
                         router_socket=zeromq_context.socket(zmq.ROUTER),
                         event_push_client=\
                            EventPushClient(zeromq_context,
                                            "rs_io_controller"),
                         pending_work_by_volume=defaultdict(deque),
                         available_ident_by_volume=defaultdict(deque))

    log.debug("binding to {0}".format(io_controller_pull_socket_uri))
    resources.pull_socket.bind(io_controller_pull_socket_uri)

    resources.router_socket.setsockopt(zmq.LINGER, 1000)
    log.debug("binding to {0}".format(io_controller_router_socket_uri))
    resources.router_socket.bind(io_controller_router_socket_uri)

    # we poll the sockets for readability, we assume we can always
    # write to the router socket
    poller = zmq.Poller()
    poller.register(resources.pull_socket, zmq.POLLIN | zmq.POLLERR)
    poller.register(resources.router_socket, zmq.POLLIN | zmq.POLLERR)

    worker_processes = list()
    for volume_name in set(resources.volume_by_space_id.values()):
        for index in range(_worker_count):
            worker_processes.append(_launch_io_worker(volume_name, index + 1))

    last_report_time = 0.0
    try:
        while not halt_event.is_set():
            for worker_process in worker_processes:
                poll_subprocess(worker_process)
            for active_socket, event_flags in poller.poll(_poll_timeout):
                if event_flags & zmq.POLLERR:
                    error_message = \
                        "error flags from zmq {0}".format(active_socket)
                    log.error(error_message)
                    raise PollError(error_message)
                if active_socket is resources.pull_socket:
                    _read_pull_socket(resources)
                elif active_socket is resources.router_socket:
                    _read_router_socket(resources)
                else:
                    log.error("unknown socket {0}".format(active_socket))

            current_time = time.time()
            elapsed_time = current_time - last_report_time
            if elapsed_time > _reporting_interval:
                pending_work = 0
                for volume_queue in resources.pending_work_by_volume.values():
                    pending_work += len(volume_queue)
                report_message = \
                    "{0:,} pending_work entries".format(pending_work)
                log.info(report_message)
                resources.event_push_client.info("queue_sizes",
                                                 report_message,
                                                 pending_work=pending_work)

                last_report_time = current_time

    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program teminates normally with interrupted system call")
        else:
            log.exception("zeromq error processing request")
            resources.event_push_client.exception(unhandled_exception_topic,
                                                  "zeromq_error",
                                                  exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        resources.event_push_client.exception(
            unhandled_exception_topic,
            str(instance),
            exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminates normally")
    finally:
        for worker_process in worker_processes:
            terminate_subprocess(worker_process)
        resources.pull_socket.close()
        resources.router_socket.close()
        resources.event_push_client.close()
        zeromq_context.term()

    return return_value
Ejemplo n.º 17
0
    def run(self, halt_event):
        """
        poll for I/O ready objects
        This is a task for the time queue
        """

        if halt_event.is_set():
            self._log.info("halt_event set: exiting")
            for active_socket in self._active_sockets.keys():
                self._poller.unregister(active_socket)
            self._active_sockets.clear()
            return

        next_tasks = list()
        next_interval = time.time() + self._polling_interval

        try:
            result_list = self._poller.poll(timeout=self._poll_timeout)
        except zmq.ZMQError:
            zmq_error = sys.exc_info()[1]
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                self._log.info("Interrupted with halt_event set: exiting")
                for active_socket in self._active_sockets.keys():
                    self._poller.unregister(active_socket)
                self._active_sockets.clear()
                return
            raise

        for active_socket, event_flags in result_list:
            if active_socket not in self._active_sockets:
                self._log.warn("Ignoring unknown active_socket %s" % (
                    active_socket,
                ))
                self._poller.unregister(active_socket)
                continue

            if event_flags & zmq.POLLERR:
                message = ("Error flag from poll() %s" % (
                    active_socket,
                ))
                self._log.error(message)
                raise ZeroMQPollsterError(message)

            callback = self._active_sockets[active_socket]

            readable = (True if event_flags & zmq.POLLIN else False)
            writable = (True if event_flags & zmq.POLLOUT else False)

            result_list = callback(
                active_socket,
                readable=readable,
                writable=writable,
            )
            if result_list is not None:
                next_tasks.extend(result_list)

            # 2010-10-25 dougfort -- If the socket was readable, we probably
            # picked up new work. So go back and poll again right away.
            # These zmq sockets seem to pretty well always be writable,
            # so don't assume we actually did anything.
            if readable:
                next_interval = time.time()

        # put ourselves into the time queue to run at the next polling interval
        next_tasks.append((self.run, next_interval, ))
        return next_tasks
Ejemplo n.º 18
0
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"], _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    for internal_socket_uri in internal_socket_uri_list:
        prepare_ipc_path(internal_socket_uri)

    halt_event = Event()
    set_signal_handler(halt_event)

    database_pool_controller = _launch_database_pool_controller()
    io_controller = _launch_io_controller()

    zeromq_context = zmq.Context()
    rep_socket = _bind_rep_socket(zeromq_context)
    db_controller_push_socket = _connect_db_controller_push_socket(zeromq_context)
    event_push_client = EventPushClient(zeromq_context, "retrieve_source")
    event_push_client.info("program-starts", "retrieve source starts")

    # we poll the sockets for readability, we assume we can always
    # write to the push client sockets
    poller = zmq.Poller()
    poller.register(rep_socket, zmq.POLLIN | zmq.POLLERR)

    last_report_time = 0.0
    request_count = 0
    try:
        while not halt_event.is_set():
            poll_subprocess(database_pool_controller)
            poll_subprocess(io_controller)

            # we've only registered one socket, so we could use an 'if' here,
            # but this 'for' works ok and it has the same form as the other
            # places where we use poller
            for active_socket, event_flags in poller.poll(_poll_timeout):
                if event_flags & zmq.POLLERR:
                    error_message = "error flags from zmq {0}".format(active_socket)
                    log.error(error_message)
                    raise PollError(error_message)

                assert active_socket is rep_socket

                _process_one_request(rep_socket, db_controller_push_socket)

                request_count += 1

            current_time = time.time()
            elapsed_time = current_time - last_report_time
            if elapsed_time > _reporting_interval:
                report_message = "{0:,} requests".format(request_count)
                log.info(report_message)
                event_push_client.info("request_count", report_message, request_count=request_count)
                last_report_time = current_time
                request_count = 0

    except KeyboardInterrupt:  # convenience for testing
        log.info("keyboard interrupt: terminating normally")
    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program teminates normally with interrupted system call")
        else:
            log.exception("zeromq error processing request")
            event_push_client.exception(unhandled_exception_topic, "zeromq_error", exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        event_push_client.exception(unhandled_exception_topic, str(instance), exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminates normally")
    finally:
        terminate_subprocess(database_pool_controller)
        terminate_subprocess(io_controller)
        rep_socket.close()
        db_controller_push_socket.close()
        event_push_client.close()
        zeromq_context.term()

    return return_value
Ejemplo n.º 19
0
    def run(self, halt_event):
        """
        poll for I/O ready objects
        This is a task for the time queue
        """

        if halt_event.is_set():
            self._log.info("halt_event set: exiting")
            for active_socket in self._active_sockets.keys():
                self._poller.unregister(active_socket)
            self._active_sockets.clear()
            return

        next_tasks = list()
        next_interval = time.time() + self._polling_interval

        try:
            result_list = self._poller.poll(timeout=self._poll_timeout)
        except zmq.ZMQError:
            zmq_error = sys.exc_info()[1]
            if is_interrupted_system_call(zmq_error) and halt_event.is_set():
                self._log.info("Interrupted with halt_event set: exiting")
                for active_socket in self._active_sockets.keys():
                    self._poller.unregister(active_socket)
                self._active_sockets.clear()
                return
            raise

        for active_socket, event_flags in result_list:
            if active_socket not in self._active_sockets:
                self._log.warn("Ignoring unknown active_socket %s" %
                               (active_socket, ))
                self._poller.unregister(active_socket)
                continue

            if event_flags & zmq.POLLERR:
                message = ("Error flag from poll() %s" % (active_socket, ))
                self._log.error(message)
                raise ZeroMQPollsterError(message)

            callback = self._active_sockets[active_socket]

            readable = (True if event_flags & zmq.POLLIN else False)
            writable = (True if event_flags & zmq.POLLOUT else False)

            result_list = callback(
                active_socket,
                readable=readable,
                writable=writable,
            )
            if result_list is not None:
                next_tasks.extend(result_list)

            # 2010-10-25 dougfort -- If the socket was readable, we probably
            # picked up new work. So go back and poll again right away.
            # These zmq sockets seem to pretty well always be writable,
            # so don't assume we actually did anything.
            if readable:
                next_interval = time.time()

        # put ourselves into the time queue to run at the next polling interval
        next_tasks.append((
            self.run,
            next_interval,
        ))
        return next_tasks
Ejemplo n.º 20
0
def _process_one_transaction(dealer_socket, 
                             database_connection, 
                             event_push_client):
    """
    Wait for a reply to our last message from the controller.
    This will be a query request.
    We send the results to the controller and repeat the cycle, waiting for
    a reply
    """
    log = logging.getLogger("_process_one_transaction")
    log.debug("waiting work request")
    try:
        request = dealer_socket.recv_pyobj()
    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error):
            raise InterruptedSystemCall()
        raise
    assert dealer_socket.rcvmore
    control = dealer_socket.recv_pyobj()

    fields = _define_seq_val_fields()
    if request["handoff-node-id"] is None:
        query = _all_sequence_rows_for_segment_query.format(fields)
    else:
        query = _all_sequence_rows_for_handoff_query.format(fields)

    control["result"] = "success"
    control["error-message"] = ""
    try:
        result = database_connection.fetch_all_rows(query, request)    
    except psycopg2.OperationalError as instance:
        error_message = "database error {0}".format(instance)
        event_push_client.error("database_error", error_message)
        control["result"] = "database_error"
        control["error-message"] = error_message
    else:        
        if len(result) == 0:
            control["result"] = "no_sequence_rows_found"
            control["error-message"] = "no sequence rows found"

    if control["result"] != "success":
        log.error("user_request_id = {0}, " \
                  "{1} {2}".format(request["user-request-id"],
                                  control["result"], 
                                  control["error-message"]))
        dealer_socket.send_pyobj(request, zmq.SNDMORE)
        dealer_socket.send_pyobj(control)
        return

    result_list = list()
    for row in result:
        row_list = list(row)
        segment_sequence_row = segment_sequence_template._make(row_list[:-1]) 
        space_id = row_list[-1]
        row_dict = dict(segment_sequence_row._asdict().items())
        row_dict["hash"] = bytes(row_dict["hash"])
        row_dict["space_id"] = space_id
        result_list.append(row_dict)

    log.debug("user_request_id = {0}, " \
              " sending request back to controller".format(
              request["user-request-id"]))
    
    dealer_socket.send_pyobj(request, zmq.SNDMORE)
    dealer_socket.send_pyobj(control, zmq.SNDMORE)
    dealer_socket.send_pyobj(result_list)
Ejemplo n.º 21
0
def _process_request(resources):
    """
    Wait for a reply to our last message from the controller.
    """
    log = logging.getLogger("_process_one_transaction")
    log.debug("waiting work request")
    try:
        request = resources.dealer_socket.recv_pyobj()
    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error):
            raise InterruptedSystemCall()
        raise

    assert resources.dealer_socket.rcvmore
    control = resources.dealer_socket.recv_pyobj()

    log.debug("user_request_id = {0}; control = {1}".format(
              request["user-request-id"], control))

    assert resources.dealer_socket.rcvmore
    sequence_row = resources.dealer_socket.recv_pyobj()

    value_file_path = compute_value_file_path(_repository_path, 
                                              sequence_row["space_id"], 
                                              sequence_row["value_file_id"]) 

    control["result"] = "success"
    control["error-message"] = ""

    if value_file_path in resources.file_cache:
        value_file, _ = resources.file_cache[value_file_path]
        del resources.file_cache[value_file_path]
    else:
        try:
            value_file = open(value_file_path, "rb")
        except Exception as instance:
            log.exception("user_request_id = {0}, " \
                          "read {1}".format(request["user-request-id"],
                                            value_file_path))
            resources.event_push_client.exception("error_opening_value_file", 
                                                  str(instance))
            control["result"] = "error_opening_value_file"
            control["error-message"] = str(instance)
            
        if control["result"] != "success":
            _send_error_reply(resources, request, control)
            return

    read_offset = \
        sequence_row["value_file_offset"] + \
        (control["left-offset"] * encoded_block_slice_size)


    read_size = \
        sequence_row["size"] - \
        (control["left-offset"] * encoded_block_slice_size) - \
        (control["right-offset"] * encoded_block_slice_size) 

    # Ticket #84 handle a short block
    # the last block in the file may be smaller than encoded_block_slice_size
    # so we might have subtracted too much for the right offset
    if control["right-offset"] > 0:
        block_modulus = sequence_row["size"] % encoded_block_slice_size
        last_block_size = (encoded_block_slice_size if block_modulus == 0 else \
                           block_modulus)
        last_block_delta = encoded_block_slice_size - last_block_size
        read_size += last_block_delta 

    try:
        value_file.seek(read_offset)
        encoded_data = value_file.read(read_size)
    except Exception as instance:
        log.exception("user_request_id = {0}, " \
                      "read {1}".format(request["user-request-id"],
                                        value_file_path))
        resources.event_push_client.exception("error_reading_value_file", 
                                              str(instance))
        control["result"] = "error_reading_vqalue_file"
        control["error-message"] = str(instance)

    if control["result"] != "success":
        value_file.close()
        _send_error_reply(resources, request, control)
        return

    resources.file_cache[value_file_path] = value_file, time.time()

    if len(encoded_data) != read_size:
        error_message = "{0} size mismatch {1} {2}".format(
            request["retrieve-id"],
            len(encoded_data),
            read_size)
        log.error("user_request_id = {0}, {1}".format(request["user-request-id"],
                                                      error_message))
        resources.event_push_client.error("size_mismatch", error_message)
        control["result"] = "size_mismatch"
        control["error-message"] = error_message

    if control["result"] != "success":
        _send_error_reply(resources, request, control)
        return

    encoded_block_list = list(encoded_block_generator(encoded_data))

    segment_size = 0
    segment_adler32 = 0
    segment_md5 = hashlib.md5()
    for encoded_block in encoded_block_list:
        segment_size += len(encoded_block)
        segment_adler32 = zlib.adler32(encoded_block, segment_adler32) 
        segment_md5.update(encoded_block)
    segment_md5_digest = segment_md5.digest()

    reply = {
        "message-type"          : "retrieve-key-reply",
        "user-request-id"       : request["user-request-id"],
        "client-tag"            : request["client-tag"],
        "message-id"            : request["message-id"],
        "retrieve-id"           : request["retrieve-id"],
        "segment-unified-id"    : request["segment-unified-id"],
        "segment-num"           : request["segment-num"],
        "segment-size"          : segment_size,
        "zfec-padding-size"     : sequence_row["zfec_padding_size"],
        "segment-adler32"       : segment_adler32,
        "segment-md5-digest"    : b64encode(segment_md5_digest).decode("utf-8"),
        "sequence-num"          : None,
        "completed"             : control["completed"],
        "result"                : "success",
        "error-message"         : "",
    }

    push_socket = _get_reply_push_socket(resources, request["client-address"])
    push_socket.send_json(reply, zmq.SNDMORE)
    for encoded_block in encoded_block_list[:-1]:
        push_socket.send(encoded_block, zmq.SNDMORE)
    push_socket.send(encoded_block_list[-1])
Ejemplo n.º 22
0
def main():
    """
    main entry point
    returns 0 for normal termination (usually SIGTERM)
    """
    return_value = 0

    log_path = _log_path_template.format(os.environ["NIMBUSIO_LOG_DIR"],
                                         _local_node_name)
    initialize_logging(log_path)
    log = logging.getLogger("main")
    log.info("program starts")

    for internal_socket_uri in internal_socket_uri_list:
        prepare_ipc_path(internal_socket_uri)

    halt_event = Event()
    set_signal_handler(halt_event)

    database_pool_controller = _launch_database_pool_controller()
    io_controller = _launch_io_controller()

    zeromq_context = zmq.Context()
    rep_socket = _bind_rep_socket(zeromq_context)
    db_controller_push_socket = \
        _connect_db_controller_push_socket(zeromq_context)
    event_push_client = EventPushClient(zeromq_context, "retrieve_source")
    event_push_client.info("program-starts", "retrieve source starts")

    # we poll the sockets for readability, we assume we can always
    # write to the push client sockets
    poller = zmq.Poller()
    poller.register(rep_socket, zmq.POLLIN | zmq.POLLERR)

    last_report_time = 0.0
    request_count = 0
    try:
        while not halt_event.is_set():
            poll_subprocess(database_pool_controller)
            poll_subprocess(io_controller)

            # we've only registered one socket, so we could use an 'if' here,
            # but this 'for' works ok and it has the same form as the other
            # places where we use poller
            for active_socket, event_flags in poller.poll(_poll_timeout):
                if event_flags & zmq.POLLERR:
                    error_message = \
                        "error flags from zmq {0}".format(active_socket)
                    log.error(error_message)
                    raise PollError(error_message)

                assert active_socket is rep_socket

                _process_one_request(rep_socket, db_controller_push_socket)

                request_count += 1

            current_time = time.time()
            elapsed_time = current_time - last_report_time
            if elapsed_time > _reporting_interval:
                report_message = "{0:,} requests".format(request_count)
                log.info(report_message)
                event_push_client.info("request_count",
                                       report_message,
                                       request_count=request_count)
                last_report_time = current_time
                request_count = 0

    except KeyboardInterrupt:  # convenience for testing
        log.info("keyboard interrupt: terminating normally")
    except zmq.ZMQError as zmq_error:
        if is_interrupted_system_call(zmq_error) and halt_event.is_set():
            log.info("program teminates normally with interrupted system call")
        else:
            log.exception("zeromq error processing request")
            event_push_client.exception(unhandled_exception_topic,
                                        "zeromq_error",
                                        exctype="ZMQError")
            return_value = 1
    except Exception as instance:
        log.exception("error processing request")
        event_push_client.exception(unhandled_exception_topic,
                                    str(instance),
                                    exctype=instance.__class__.__name__)
        return_value = 1
    else:
        log.info("program teminates normally")
    finally:
        terminate_subprocess(database_pool_controller)
        terminate_subprocess(io_controller)
        rep_socket.close()
        db_controller_push_socket.close()
        event_push_client.close()
        zeromq_context.term()

    return return_value