Exemple #1
0
def start():

    # thread pools are started before anything else

    thread_count = pmnc.config.get("thread_count")
    sweep_period = pmnc.config.get("sweep_period")

    _start_thread_pools(thread_count, sweep_period)

    # now the interfaces can be started as they have worker threads to delegate requests to

    interface_names = ()
    if not pmnc.request.self_test:
        try:
            interface_names = pmnc.config.get("interfaces")
        except:
            pmnc.log.error(exc_string())  # log and ignore, empty tuple remains

    try:
        _start_interfaces(interface_names)
    except:
        pmnc.log.error(exc_string())  # log and ignore

    # finally, if we are the health monitor

    _start_health_monitor()
    def _poller_proc(self, source_cage):

        while not current_thread().stopped():
            try:

                # attach a new fake request to this thread so that
                # network access in RPC call times out properly

                fake_request(timeout = self._request_timeout, interface = self._name)
                pmnc.request.describe("polling cage {0:s}".format(source_cage))

                # fetch a request from the remote cage
                # and extract the call information from it

                try:
                    revrpc_request = self._poll(source_cage)
                except:
                    pmnc.log.error(exc_string())                    # failure to fetch a request
                    current_thread().stopped(self._request_timeout) # results in idle delay
                    continue

                if revrpc_request is None:
                    continue
                elif current_thread().stopped():
                    break

                assert revrpc_request.pop("target_cage") == __cage__, "expected call to this cage"
                assert revrpc_request.pop("source_cage") == source_cage, "expected call from {0:s}".format(source_cage)

                self._process_request(source_cage, revrpc_request)

            except:
                pmnc.log.error(exc_string())
Exemple #3
0
    def wu_process_request(self, source_cage: valid_cage_name, module: str,
                           method: str, args: tuple, kwargs: dict,
                           request_dict: dict):

        try:

            # see for how long the request was on the execution queue up to this moment
            # and whether it has expired in the meantime, if it did there is no reason
            # to proceed and we simply bail out

            if pmnc.request.expired:
                pmnc.log.error("request has expired and will not be processed")
                success = False
                return  # goes through finally section below

            # create a copy of and impersonate the original request, note that
            # the timeout is adjusted if current request has less time left

            request = pmnc.request.from_dict(request_dict,
                                             timeout=pmnc.request.remain)

            try:
                if pmnc.log.debug:
                    pmnc.log.debug("impersonating request {0:s}".format(
                        request.description))
                original_request = current_thread()._request
                current_thread()._request = request
                try:
                    with pmnc.performance.request_processing():
                        result = self._process_revrpc_request(
                            module, method, args, kwargs)
                finally:
                    current_thread()._request = original_request
            except:
                error = exc_string()
                pmnc.log.error(
                    "incoming reverse RPC call failed: {0:s}".format(error))
                revrpc_response = dict(exception=error)
                deliver_message = "reverse RPC error"
            else:
                revrpc_response = dict(result=result)
                deliver_message = "reverse RPC response"

            # submit the response back to the source cage using the request's unique id,
            # note that the delivery is done on behalf and within the local request

            if pmnc.log.debug:
                pmnc.log.debug("delivering {0:s}".format(deliver_message))
            self._post(source_cage, request_dict["unique_id"], revrpc_response)

        except:
            pmnc.log.error(exc_string())  # log and ignore
            success = False
        else:
            if pmnc.log.debug:
                pmnc.log.debug(
                    "{0:s} has been delivered".format(deliver_message))
            success = True
        finally:  # the request ends itself
            pmnc.interfaces.end_request(success)  # possibly way after deadline
Exemple #4
0
 def close(self):
     with self._db_cache_lock:
         for db in self._db_cache.values():
             try:
                 db.close()
             except:
                 pmnc.log.error(exc_string())  # log and ignore
         self._db_cache.clear()
     try:
         self._env.close()
     except:
         pmnc.log.error(exc_string())  # log and ignore
Exemple #5
0
 def close(self):
     with self._db_cache_lock:
         for db in self._db_cache.values():
             try:
                 db.close()
             except:
                 pmnc.log.error(exc_string())  # log and ignore
         self._db_cache.clear()
     try:
         self._env.close()
     except:
         pmnc.log.error(exc_string())  # log and ignore
    def wu_process_request(self, source_cage: valid_cage_name, module: str, method: str,
                           args: tuple, kwargs: dict, request_dict: dict):

        try:

            # see for how long the request was on the execution queue up to this moment
            # and whether it has expired in the meantime, if it did there is no reason
            # to proceed and we simply bail out

            if pmnc.request.expired:
                pmnc.log.error("request has expired and will not be processed")
                success = False
                return # goes through finally section below

            # create a copy of and impersonate the original request, note that
            # the timeout is adjusted if current request has less time left

            request = pmnc.request.from_dict(request_dict, timeout = pmnc.request.remain)

            try:
                pmnc.log.debug("impersonating request {0:s}".format(request.description))
                original_request = current_thread()._request
                current_thread()._request = request
                try:
                    with pmnc.performance.request_processing():
                        result = self._process_revrpc_request(module, method, args, kwargs)
                finally:
                    current_thread()._request = original_request
            except:
                error = exc_string()
                pmnc.log.error("incoming reverse RPC call failed: {0:s}".format(error))
                revrpc_response = dict(exception = error)
                deliver_message = "reverse RPC error"
            else:
                revrpc_response = dict(result = result)
                deliver_message = "reverse RPC response"

            # submit the response back to the source cage using the request's unique id,
            # note that the delivery is done on behalf and within the local request

            pmnc.log.debug("delivering {0:s}".format(deliver_message))
            self._post(source_cage, request_dict["unique_id"], revrpc_response)

        except:
            pmnc.log.error(exc_string()) # log and ignore
            success = False
        else:
            pmnc.log.debug("{0:s} has been delivered".format(deliver_message))
            success = True
        finally:                                 # the request ends itself
            pmnc.interfaces.end_request(success) # possibly way after deadline
    def _maintainer_proc(self):

        while not current_thread().stopped():
            try:

                # try to establish a connection, do it infinitely or until the interface is stopped

                while True:
                    try:
                        self._connection = self._create_connection()
                        self._connection.start()
                    except:
                        pmnc.log.error(exc_string())
                        failure_timeout = max(self._request_timeout, 30.0)
                        if current_thread().stopped(failure_timeout):
                            return
                    else:
                        break # while True

                try:

                    while not current_thread().stopped() and not self._connection.failed:

                        # process incoming PDUs

                        req = self._in_q.pop(1.0)
                        if req is not None:
                            self._handle_pdu(req)

                        # if there is an outstanding ping request, check for response

                        if self._ping_request and self._ping_response_timeout.expired:
                            ping_request, self._ping_request = self._ping_request, None
                            _wait_response(ping_request, 0.001)

                        # if it's time to send another ping request, do so

                        if self._ping_timeout and self._ping_timeout.expired:
                            try:
                                self._ping_request = EnquireLinkPDU.create()
                                self._out_q.push(self._ping_request)
                                self._ping_response_timeout.reset()
                            finally:
                                self._ping_timeout.reset()

                finally:
                    self._connection.stop()

            except:
                pmnc.log.error(exc_string()) # log and ignore
Exemple #8
0
def _stop_interfaces():

    # first, we make sure the interfaces cease to accept new
    # incoming requests but may keep processing already accepted

    for interface_name, interface in _interfaces.items():
        pmnc.log.message("ceasing interface {0:s}".format(interface_name))
        try:
            interface.cease()
        except:
            pmnc.log.message("interface {0:s} failed to cease: {1:s}".\
                             format(interface_name, exc_string())) # log and ignore
        else:
            pmnc.log.message("interface {0:s} has ceased".format(interface_name))

    # then we stop the request factory to stress the fact
    # that no more requests can be created any more

    _request_factory.stop()

    request_count = _request_factory.count
    pmnc.log.message("no more requests can be created{0:s}".\
                     format(request_count > 0
                            and ", {0:d} request(s) are still active".\
                                format(request_count)
                            or ""))

    # then we wait for all existing requests to complete for
    # at most one full timeout, after which they are dead anyway

    request_timeout = pmnc.config.get("request_timeout")
    if not _request_factory.wait(request_timeout):
        request_count = _request_factory.count
        if request_count > 0:
            pmnc.log.warning("proceeding with shutdown although {0:d} request(s) "
                             "are still active".format(request_count))

    # finally the interfaces are stopped completely
    # and no further processing is performed

    for interface_name, interface in _interfaces.items():
        pmnc.log.message("stopping interface {0:s}".format(interface_name))
        try:
            interface.stop()
        except:
            pmnc.log.message("interface {0:s} failed to stop: {1:s}".\
                             format(interface_name, exc_string())) # log and ignore
        else:
            pmnc.log.message("interface {0:s} has stopped".format(interface_name))
Exemple #9
0
    def _process_message(self, message_id, pkt, request_timeout):

        message_text = pkt.pop("XPmncMessageText")

        message_description = "JMS message {0:s}{1:s}".format(message_id,
                              ", corr.id {0:s}".format(pkt["JMSCorrelationID"])
                              if pkt.get("JMSCorrelationID") else "")

        request = pmnc.interfaces.begin_request(
                    timeout = min(request_timeout, self._request_timeout),
                    interface = self._name, protocol = "jms",
                    parameters = dict(auth_tokens = dict()),
                    description = message_description)
        try:
            pmnc.interfaces.enqueue(request, self.wu_process_request,
                                    (message_id, message_text, pkt.copy())).wait()
        except WorkUnitTimedOut:
            pmnc.log.error("{0:s} processing timed out".format(message_description))
            success = None
        except:
            pmnc.log.error("{0:s} processing failed: {1:s}".\
                           format(message_description, exc_string()))
            success = False
        else:
            if pmnc.log.debug:
                pmnc.log.debug("{0:s} processing succeeded".format(message_description))
            self._processed_messages.add(message_id)
            success = True
        finally:
            pmnc.interfaces.end_request(success, request)
            return success == True
Exemple #10
0
    def send(self, packet_b: bytes):

        packet_length = len(packet_b)

        pmnc.log.info("sending UDP packet to {0[0]:s}:{0[1]:d}, {1:d} byte(s)".\
                      format(self._server_address, packet_length))
        try:

            if not select([], [self._socket], [], pmnc.request.remain)[1]:
                raise Exception("request deadline writing data to {0:s}".format(self._server_info))

            if self._broadcast:
                sent = self._socket.sendto(packet_b, self._server_address)
            else:
                sent = self._socket.send(packet_b)

            if sent != packet_length:
                raise Exception("packet truncated, sent {0:d} byte(s)")

        except:
            pmnc.log.warning("sending UDP packet to {0[0]:s}:{0[1]:d}, {1:d} byte(s) failed: {2:s}".\
                             format(self._server_address, packet_length, exc_string()))
            raise
        else:
            pmnc.log.info("UDP packet to {0[0]:s}:{0[1]:d}, {1:d} byte(s) has been sent".\
                          format(self._server_address, packet_length))
Exemple #11
0
    def send(self, message_text: str, **kwargs) -> str:

        try:

            header_fields = " and {0:d} custom header field(s): {1:s}".\
                            format(len(kwargs), ", ".join(kwargs.keys())) \
                            if kwargs else ""

            correlation_id = kwargs.get("JMSCorrelationID")

            message_description = \
                "JMS message{0:s} with {1:d} content byte(s){2:s}".\
                format(" {0:s}".format(correlation_id) if correlation_id else "",
                       len(message_text), header_fields)

            pmnc.log.info("sending {0:s}".format(message_description))
            try:
                pkt = self._sync_adapter_command("SEND", XPmncMessageText = message_text, **kwargs)
            except:
                pmnc.log.warning("sending {0:s} failed: {1:s}".\
                                 format(message_description, exc_string()))
                raise
            else:
                self._message_id = pkt["XPmncMessageID"]

        except:
            ResourceError.rethrow(recoverable = True) # no irreversible changes
        else:
            return self._message_id
Exemple #12
0
 def _remove_file(self, file_name):
     try:
         _retry_remove_file(file_name, 2.0)
     except:
         pmnc.log.warning(exc_string())
     else:
         self._processed_files.remove(file_name)
Exemple #13
0
    def wu_handle_pdu(self, req: RequestPDU):

        try:

            # see for how long the request was on the execution queue up to this moment
            # and whether it has expired in the meantime, if it did there is no reason
            # to proceed and we simply bail out

            if pmnc.request.expired:
                pmnc.log.error("request has expired and will not be processed")
                success = False
                return # goes through finally section below

            with pmnc.performance.request_processing():
                request = dict(pdu = req)
                response = dict(pdu = req.create_nack(error_codes.ESME_RUNKNOWNERR))
                try:
                    self._process_request(request, response)
                except:
                    response["pdu"] = req.create_nack(error_codes.ESME_RSYSERR)
                    raise
                finally:
                    self._out_q.push(response["pdu"])

        except:
            pmnc.log.error(exc_string()) # log and ignore
            success = False
        else:
            success = True
        finally:                                 # the request ends itself
            pmnc.interfaces.end_request(success) # possibly way after deadline
Exemple #14
0
    def _reader_proc(self):

        self._read_data = b""

        while True:
            try:

                try:
                    pdu = PDU.read(self)
                except SMPPPDUReadError as e:                                  # mask out the exception from
                    if isinstance(e.__cause__, _SMPPConnection.ThreadStopped): # the thread being stopped
                        break # while
                    else:
                        raise

                if isinstance(pdu, RequestPDU):
                    if pmnc.log.debug:
                        pmnc.log.debug("<< [REQ] {0:s}".format(str(pdu)))
                    self._in_q.push(pdu)
                elif isinstance(pdu, ResponsePDU):
                    req = self._inflight.remove(pdu)
                    if req is not None:
                        if pmnc.log.debug:
                            pmnc.log.debug("<< [RSP] {0:s}".format(str(pdu)))
                        req.set_response(pdu)
                    else:
                        pmnc.log.warning("<< [RSP?!] {0:s}".format(str(pdu)))
                else:
                    pmnc.log.warning("<< [???] {0:s}".format(str(pdu)))

            except:
                pmnc.log.error(exc_string())
                self._failed.set()
                break
Exemple #15
0
 def disconnect(self):
     try:
         self._disconnect(self)
     except:
         pmnc.log.error(exc_string())  # log and ignore
     finally:
         TransactionalResource.disconnect(self)
 def disconnect(self):
     try:
         self._disconnect(self)
     except:
         pmnc.log.error(exc_string()) # log and ignore
     finally:
         TransactionalResource.disconnect(self)
Exemple #17
0
def _start_interfaces(interfaces: tuple_of(str)):

    # prepare the global request factory shared across all interfaces
    # see begin_request method below

    global _request_factory
    _request_factory = InterlockedFactory(Request)

    pmnc.log.message("as of this moment requests can be created")

    # the configured interfaces are started one by one and kept on file,
    # technically they can be restarted (presumably with modified configuration)
    # at runtime by stopping and recreating, but I don't see it big an issue
    # to restart a cage once its interface settings have changed

    for interface_name in interfaces:
        pmnc.log.message("starting interface {0:s}".format(interface_name))
        try:
            config_module_name = "config_interface_{0:s}".format(interface_name)
            config = pmnc.__getattr__(config_module_name).copy()
            interface = pmnc.interface.create(interface_name, **config)
            interface.start()
        except:
            pmnc.log.message("interface {0:s} failed to start: {1:s}".\
                             format(interface_name, exc_string())) # log and ignore
        else:
            pmnc.log.message("interface {0:s} has started".format(interface_name))
            _interfaces[interface_name] = interface
Exemple #18
0
def run(*, required_dirs: optional(tuple_of(str)) = ()):

    if getenv("PYTHONHASHSEED") != "0":
        raise SystemExit(
            "Running self-tests requires turning off hash randomization, "
            "set environment variable PYTHONHASHSEED to 0.")

    current_thread().name = "self_test"

    _log("***** STARTING SELF-TEST FOR MODULE {0:s} *****".format(
        main_module_name.upper()))

    test_cages_dir = _create_temp_cage_copy(required_dirs=required_dirs)
    try:
        pmnc = _start_pmnc(test_cages_dir)
        try:
            try:
                current_thread()._pmnc = pmnc  # to be used in active_interface
                assert pmnc.request.self_test == main_module_name
                pmnc.__getattr__(main_module_name).self_test()
            except:
                _log("***** FAILURE: {1:s}".format(main_module_name,
                                                   exc_string()))
            else:
                _log(
                    "***** SUCCESS, BUT EXAMINE THE LOG FOR UNEXPECTED ERRORS *****"
                )
        finally:
            _stop_pmnc(pmnc)
    finally:
        _remove_temp_cage_copy(test_cages_dir)
Exemple #19
0
    def _writer_proc(self):
        try:

            # perform synchronous bind, using cumulative timeout

            if not self._write_pdu(self._bind_pdu, False, self._bind_timeout):
                return
            _wait_response(self._bind_pdu, self._bind_timeout.remain)
            self._bound.set()

            while not current_thread().stopped(): # lifetime loop
                pdu = self._out_q.pop(1.0)
                if pdu and not self._write_pdu(pdu, False, Timeout(self._response_timeout)):
                    break

            # perform synchronous unbind

            unbind_pdu = UnbindPDU.create()
            unbind_timeout = Timeout(self._response_timeout)
            self._write_pdu(unbind_pdu, True, unbind_timeout)
            _wait_response(unbind_pdu, unbind_timeout.remain)

        except:
            pmnc.log.error(exc_string())
            self._failed.set()
Exemple #20
0
    def read(self, target_filename: str, *, remove: optional(bool) = None, frag_size: optional(int) = None) -> bytes:

        try:

            self._remove = remove or False
            self._target_filename, _ = self._normalize_filename(target_filename)

            file_size = os_path.getsize(self._target_filename)
            frag_size = frag_size or 65536

            pmnc.log.info("reading {0:d} byte(s) from file {1:s}".\
                          format(file_size, self._target_filename))
            try:

                data = BytesIO()

                with open(self._target_filename, "rb") as f:
                    while not pmnc.request.expired:
                        portion = f.read(frag_size)
                        if not portion:
                            break
                        data.write(portion)
                    else:
                        raise Exception("request deadline reading data from file {0:s}".\
                                        format(self._target_filename))

            except:
                pmnc.log.info("reading {0:d} byte(s) from file {1:s} failed: {2:s}".\
                              format(file_size, self._target_filename, exc_string()))
                raise
            else:
                return data.getvalue()

        except:
            ResourceError.rethrow(recoverable = True) # no irreversible changes
Exemple #21
0
    def _process_file(self, file_name):

        request = pmnc.interfaces.begin_request(
                    timeout = self._request_timeout,
                    interface = self._name, protocol = "file",
                    parameters = dict(auth_tokens = dict()),
                    description = "file {0:s}".format(file_name))

        # enqueue the request and wait for its completion

        try:
            pmnc.interfaces.enqueue(request, self.wu_process_request,
                                    (file_name, )).wait()
        except WorkUnitTimedOut:
            pmnc.log.error("file processing timed out")
            success = None
        except:
            pmnc.log.error("file processing failed: {0:s}".format(exc_string()))
            success = False
        else:
            if pmnc.log.debug:
                pmnc.log.debug("file processing succeeded")
            self._processed_files.add(file_name)
            success = True
        finally:
            pmnc.interfaces.end_request(success, request)
            return success == True
Exemple #22
0
 def disconnect(self):
     try:
         self._connection.close()
     except:
         pmnc.log.error(exc_string()) # log and ignore
     finally:
         SQLResource.disconnect(self)
Exemple #23
0
def _state_thread_proc():

    _check_free_disk_space()

    checkpoint_interval = pmnc.config.get("checkpoint_interval")

    while not current_thread().stopped(checkpoint_interval):
        try:
            _checkpoint_state()
            _check_free_disk_space()
        except:
            pmnc.log.error(exc_string())  # log and ignore

    try:
        _shutdown_state()
    except:
        pmnc.log.error(exc_string())  # log and ignore
Exemple #24
0
def _checkpoint_state():

    with _state_cache_lock:
        for module_state in _state_cache.values():
            try:
                module_state.checkpoint()
            except:
                pmnc.log.error(exc_string())  # log and ignore
Exemple #25
0
def _state_thread_proc():

    _check_free_disk_space()

    checkpoint_interval = pmnc.config.get("checkpoint_interval")

    while not current_thread().stopped(checkpoint_interval):
        try:
            _checkpoint_state()
            _check_free_disk_space()
        except:
            pmnc.log.error(exc_string())  # log and ignore

    try:
        _shutdown_state()
    except:
        pmnc.log.error(exc_string())  # log and ignore
Exemple #26
0
def _checkpoint_state():

    with _state_cache_lock:
        for module_state in _state_cache.values():
            try:
                module_state.checkpoint()
            except:
                pmnc.log.error(exc_string())  # log and ignore
Exemple #27
0
 def _stderr_reader_proc(self, stderr):
     try:
         try:
             while stderr.read(512): # this light thread exits only upon exception
                 pass
         finally:
             stderr.close()
     except:
         pmnc.log.error(exc_string()) # log and ignore
Exemple #28
0
def _shutdown_state():

    with _state_cache_lock:
        for module_state in _state_cache.values():
            try:
                module_state.close()
            except:
                pmnc.log.error(exc_string())  # log and ignore
        _state_cache.clear()
Exemple #29
0
def reload():

    # start new/stop missing interfaces

    try:
        interface_names = pmnc.config.get("interfaces")
    except:
        pmnc.log.error(exc_string())  # log and ignore
    else:
        try:
            _update_interfaces(interface_names)
        except:
            pmnc.log.error(exc_string())  # log and ignore

    # restart reconfigured interfaces

    for interface in _interfaces.values():
        interface.reload()
Exemple #30
0
def _shutdown_state():

    with _state_cache_lock:
        for module_state in _state_cache.values():
            try:
                module_state.close()
            except:
                pmnc.log.error(exc_string())  # log and ignore
        _state_cache.clear()
Exemple #31
0
    def _listener_proc(self):

        while not current_thread().stopped():
            try:

                s = self._create_server_socket()
                try:

                    pmnc.log.message("started listening for packets at {0[0]:s}:{0[1]:d}".\
                                     format(self._listener_address))

                    while not current_thread().stopped():

                        if not select([s], [], [], 1.0)[0]:
                            continue

                        try:
                            packet_b, (client_addr, client_port) = s.recvfrom(57344)
                        except socket_timeout:
                            continue
                        except socket_error as e:
                            if e.args[0] == 10054: # workaround for this issue: http://support.microsoft.com/kb/263823
                                continue
                            else:
                                raise

                        if pmnc.log.debug:
                            pmnc.log.debug("incoming UDP packet from {0:s}:{1:d}, {2:d} byte(s)".\
                                           format(client_addr, client_port, len(packet_b)))

                        # the listener thread initiates the received packet processing

                        try:
                            self._enqueue_packet(packet_b, client_addr, client_port)
                        except:
                            pmnc.log.error(exc_string()) # log and ignore

                finally:
                    s.close()

                pmnc.log.message("stopped listening")

            except:
                pmnc.log.error(exc_string()) # log and ignore
 def disconnect(self):
     try:
         if self._process and self._process.poll() is None:
             pmnc.log.warning("killing runaway process {0:s} (pid {1:d})".\
                              format(self._executable, self._process.pid))
             self._process.kill()
     except:
         pmnc.log.error(exc_string())  # log and ignore
     finally:
         TransactionalResource.disconnect(self)
Exemple #33
0
 def disconnect(self):
     try:
         try:
             self._sync_adapter_command("EXIT")
         finally:
             self._stop_adapter(Timeout(min(5.0, pmnc.request.remain)))
     except:
         pmnc.log.error(exc_string()) # log and ignore
     finally:
         TransactionalResource.disconnect(self)
 def disconnect(self):
     try:
         try:
             self._connection.Close()
         except com_error:
             self._rethrow_adodb_error()
     except:
         pmnc.log.error(exc_string()) # log and ignore
     finally:
         SQLResource.disconnect(self)
    def _scheduler_proc(self):

        while not current_thread().stopped(1.1 - modf(time())[0]):
            try:

                current_tick = Interface._get_current_tick()
                try:
                    for tick in range(self._last_tick + 1, current_tick + 1):
                        invocation_time = datetime.fromtimestamp(tick)
                        if self._match(invocation_time.strftime(self._format)):
                            try:
                                self._fire_request(invocation_time)
                            except:
                                pmnc.log.error(exc_string())  # log and ignore
                finally:
                    self._last_tick = tick

            except:
                pmnc.log.error(exc_string())  # log and ignore
Exemple #36
0
    def _probe_thread_proc(self):

        per_cage_interval = 0.0

        # calls to _poll_up_down_queue are interleaved and allow this thread
        # to maintain structures such as _up_cages in response to events
        # posted by the probe threads to the _up_down_queue

        while self._poll_up_down_queue(per_cage_interval):
            try:

                # extract all cages currently known to the rpc interface and
                # merge them with cages previously probed and found to be up,
                # except for the health_monitor cage itself should be skipped

                probe_cages = \
                    { known_cage: { known_node: dict(location = known_location, probe_result = None)
                                    for known_node, known_location in self._rpc_interface.get_nodes(known_cage).items() }
                      for known_cage in self._rpc_interface.get_cages() if known_cage != "health_monitor" }

                self._merge_cages(probe_cages, self._up_cages)

                probe_period = pmnc.config.get("probe_period")
                per_cage_interval = probe_period / (len(probe_cages) + 1)

                # walk through all cages to be probed and schedule calls to probe
                # to a private thread pool using fake unregistered requests

                for cage, nodes in probe_cages.items():

                    for node, cage_info in nodes.items():

                        cage_location = cage_info["location"]

                        # note that the requests created here are not registered with
                        # interfaces and enqueued to a different pool too, they are
                        # therefore entitled to termination without warning at shutdown,
                        # this is ok, because they do no useful work for the clients

                        request = Request(timeout = self._request_timeout,
                                          interface = "__health_monitor__", protocol = "n/a",
                                          parameters = dict(auth_tokens = dict()),
                                          description = "probing cage {0:s} at {1:s}".format(cage, cage_location))

                        self._probe_thread_pool.enqueue(request, self.wu_probe_cage,
                                (node, cage, cage_location, cage_info["probe_result"]), {})

                    # then again yield to polling the queue for a while

                    if not self._poll_up_down_queue(per_cage_interval):
                        break

            except:
                pmnc.log.error(exc_string()) # log and ignore
Exemple #37
0
 def _config(self):
     module_properties = {}
     config_module_name = "config_interface_{0:s}".format(self._name)
     try:
         config = pmnc.__getattr__(config_module_name).\
                       copy(__module_properties = module_properties)
     except:
         pmnc.log.error(exc_string())
         return None, None
     else:
         return config, module_properties["version"]
Exemple #38
0
def wu_restart_state():
    try:
        pmnc.log.warning(
            "the state is being restarted due to database recovery")
        pmnc.state.stop()
        pmnc.state.start()
        pmnc.log.warning("the state has been restarted")
    except:
        pmnc.log.error(exc_string())  # log and ignore
    finally:
        with _restart_pending_lock:
            _restart_pending.clear()
 def _sync_request(self,
                   rq: MongoDB_Request,
                   check: optional(callable) = None) -> MongoDB_Response:
     pmnc.log.info(">> {0:s}".format(rq))
     try:
         rs = self._connection.sync_request(rq,
                                            Timeout(pmnc.request.remain))
         if rs.cursor_not_found:
             raise MongoDB_Error("cursor not found")
         if check: check(rs)
     except MongoDB_Error as e:
         pmnc.log.warning("<< {0:s} !! {1:s}".format(rq, exc_string()))
         ResourceError.rethrow(code=e.code,
                               description=str(e),
                               terminal=e.code is not None)
     except Exception as e:
         pmnc.log.warning("<< {0:s} !! {1:s}".format(rq, exc_string()))
         ResourceError.rethrow(description=str(e))
     else:
         pmnc.log.info("<< OK, {0:s}".format(rs))
         return rs
Exemple #40
0
def _stop_health_monitor():

    # stop health monitor if it has been started

    if _health_monitor:
        pmnc.log.message("stopping health monitor")
        try:
            _health_monitor.stop()
        except:
            pmnc.log.error("health monitor failed to stop: {0:s}".\
                           format(exc_string()))
        else:
            pmnc.log.message("health monitor has stopped")
Exemple #41
0
    def wu_probe_cage(self, node, cage, location, prev_probe_result):

        if pmnc.request.expired:  # no need to report anything for a probing request
            return

        pmnc.log.debug("sending probe")
        try:
            probe_result = self._probe_cage(node, cage, location)
        except:
            pmnc.log.warning("probe failed: {0:s}".format(exc_string()))
            self._up_down_queue.push((node, cage, "down"))
        else:
            pmnc.log.debug("probe returned successfully")
            if prev_probe_result == "restarted":  # if the cage has restarted
                self._up_down_queue.push((node, cage, "down"))  # we push "down" event first
            self._up_down_queue.push((node, cage, "up", location, probe_result))
Exemple #42
0
def _start_health_monitor():

    # if this cage is called health_monitor, start health monitor

    if __cage__ == "health_monitor":
        pmnc.log.message("starting health monitor")
        try:
            health_monitor = \
                pmnc.health_monitor.HealthMonitor()
            health_monitor.start()
        except:
            pmnc.log.error("health monitor failed to start: {0:s}".\
                           format(exc_string()))
        else:
            global _health_monitor
            _health_monitor = health_monitor
            pmnc.log.message("health monitor has started")
Exemple #43
0
    def _poll_up_down_queue(self, timeout: float) -> bool:  # returns "should keep running"

        poll_timeout = Timeout(timeout)
        while not poll_timeout.expired:

            pop_timeout = Timeout(min(poll_timeout.remain, 1.0))
            while not pop_timeout.expired:

                event = pop_timeout.pop(self._up_down_queue)
                if event is not None:
                    try:

                        node, cage, up_down, *args = event
                        if up_down == "up":

                            location, probe_result = args

                            # add the cage to cages known to be up and schedule
                            # application notification call if it was down or
                            # returned a different probe result

                            cage_info = self._up_cages.setdefault(cage, {}).setdefault(node, {})
                            if not cage_info or cage_info["probe_result"] != probe_result:
                                self._schedule_up_down_event(node, cage, "up", probe_result)
                            cage_info.update(location=location, probe_result=probe_result)

                        elif up_down == "down":

                            # remove the cage from cages known to be up and schedule
                            # application notification call it was up

                            if self._up_cages.setdefault(cage, {}).pop(node, None):
                                self._schedule_up_down_event(node, cage, "down")

                    except:
                        pmnc.log.error(exc_string())  # log and ignore

            if current_thread().stopped():
                return False

        return True
Exemple #44
0
def run(*, required_dirs: optional(tuple_of(str)) = ()):

    current_thread().name = "self_test"

    _log("***** STARTING SELF-TEST FOR MODULE {0:s} *****".format(main_module_name.upper()))

    test_cages_dir = _create_temp_cage_copy(required_dirs = required_dirs)
    try:
        pmnc = _start_pmnc(test_cages_dir)
        try:
            try:
                current_thread()._pmnc = pmnc # to be used in active_interface
                assert pmnc.request.self_test == main_module_name
                pmnc.__getattr__(main_module_name).self_test()
            except:
                _log("***** FAILURE: {1:s}".format(main_module_name, exc_string()))
            else:
                _log("***** SUCCESS, BUT EXAMINE THE LOG FOR UNEXPECTED ERRORS *****")
        finally:
            _stop_pmnc(pmnc)
    finally:
        _remove_temp_cage_copy(test_cages_dir)
Exemple #45
0
    def wu_process_event(self, node: str, cage: str, up_down: one_of("up", "down"), probe_result: optional(dict)):
        try:

            # see for how long the request was on the execution queue up to this moment
            # and whether it has expired in the meantime, if it did there is no reason
            # to proceed and we simply bail out

            if pmnc.request.expired:
                pmnc.log.error("request has expired and will not be processed")
                success = False
                return  # goes through finally section below

            with pmnc.performance.request_processing():
                self._process_event(node, cage, up_down, probe_result)

        except:
            pmnc.log.error(exc_string())  # log and ignore
            success = False
        else:
            success = True
        finally:  # the request ends itself
            pmnc.interfaces.end_request(success)  # possibly way after deadline
Exemple #46
0
    def wu_participate(self, transaction_start, participant_index,
                       resource_name, attrs, args, kwargs, res_args, res_kwargs):

        # see whether the request by which this transaction was created
        # has expired in the meantime, and if it has, simply bail out
        # because the transaction should have long been perished

        # no attempt to execute the request is taken and no result
        # is delivered, simply because the transaction is assumed
        # to already be aborted, nowhere to report the result

        if pmnc.request.expired:
            pmnc.log.error("execution of resource {0:s} in transaction "
                           "{1:s} was late".format(resource_name, self))
            return

        try:

            pmnc.log.debug("resource {0:s} joins transaction {1:s}".\
                           format(resource_name, self))

            resource_instance = None        # no instance has been allocated yet
            resource_in_transaction = False # no transaction has been started on the instance
            resource_failed = True          # (preventive) request execution has been a failure

            while True: # breaks when the result is obtained, either value or exception

                # any failure prior to actual resource allocation results
                # in a recoverable ResourceError, pointlessly terminal

                try:

                    # the pending interval is measured from the beginning of the
                    # transaction, not from the beginning of the request

                    pending_ms = int((time() - transaction_start) * 1000)
                    pmnc.performance.sample("resource.{0:s}.pending_time".\
                                            format(resource_name), pending_ms)

                    # allocate a resource instance from a specific resource pool

                    resource_pool = pmnc.shared_pools.get_resource_pool(resource_name)
                    resource_instance = resource_pool.allocate()

                except: # tested
                    result = ResourceError.snap_exception(
                                    participant_index = participant_index,
                                    recoverable = True, terminal = True) # but not really terminal,
                    break # while True                                   # no instance to terminate

                # some resource instance has been allocated, beginning a transaction,
                # a failure would result in a ResourceError, recoverable yet terminal
                # unless explicitly specified otherwise

                try:

                    # see if a transaction should be started in as much time as the request has left

                    if pmnc.request.remain < resource_instance.min_time:
                        raise ResourceError(description = "transaction {0:s} is declined by resource instance " # tested
                                                          "{1:s}".format(self, resource_instance.name),
                                            recoverable = True, terminal = False) # the instance stays alive

                    pmnc.log.debug("resource instance {0:s} is used in transaction {1:s}, {2:s}".\
                                   format(resource_instance.name, self, self._resource_ttl(resource_instance)))

                    # begin a new transaction, this is presumably reversible operation

                    resource_instance.begin_transaction(self._xid,
                                                        source_module_name = self._source_module_name,
                                                        transaction_options = self._options,
                                                        resource_args = res_args,
                                                        resource_kwargs = res_kwargs)

                except ResourceError as e:
                    result = self._apply_error(participant_index, resource_instance, e)
                    break # while True
                except: # tested
                    result = ResourceError.snap_exception(
                                    participant_index = participant_index,
                                    recoverable = True, terminal = True)
                    resource_instance.expire()
                    break # while True
                else:
                    resource_in_transaction = True

                # resource instance is now in transaction, executing the request,
                # a failure would result in a ResourceError, unrecoverable and
                # terminal unless explicitly specified otherwise

                try:

                    # replay attribute accesses to obtain the actual target method

                    target_method = resource_instance
                    for attr in attrs:
                        target_method = getattr(target_method, attr)

                    # execute the request, registering the execution time

                    with pmnc.performance.timing("resource.{0:s}.processing_time".format(resource_name)):
                        result = target_method(*args, **kwargs)

                except ResourceError as e:
                    result = self._apply_error(participant_index, resource_instance, e)
                    break # while True
                except Exception: # tested
                    result = ResourceError.snap_exception(
                                    participant_index = participant_index,
                                    recoverable = False, terminal = True)
                    resource_instance.expire()
                    break # while True
                else:
                    resource_instance.reset_idle_timeout()
                    resource_failed = False
                    break # while True

            # we got an intermediate result, possibly an exception

            try:

                self._results.push((participant_index, result)) # deliver the result to the pending transaction

                # register the actual result of this participant

                pmnc.performance.event("resource.{0:s}.transaction_rate.{1:s}".\
                                       format(resource_name, resource_failed and "failure" or "success"))

                if not resource_in_transaction: # as we couldn't begin a transaction,
                    return "failure"            # we are not interested in the decision

                pmnc.log.debug("resource instance {0:s} is waiting for decision in "
                               "transaction {1:s}".format(resource_instance.name, self))

                # figure out whether the resource has to commit or rollback

                commit_transaction = False

                if pmnc.request.wait(self._decision): # wait for transaction's decision
                    if self._commit.is_set():
                        if not resource_failed:
                            commit_transaction = True
                            pmnc.log.debug("resource instance {0:s} decided to commit in transaction "
                                           "{1:s}".format(resource_instance.name, self))
                        else:
                            pmnc.log.warning("resource instance {0:s} had to rollback despite decision to commit "
                                             "in transaction {1:s}".format(resource_instance.name, self))
                    else:
                        pmnc.log.debug("resource instance {0:s} decided to rollback in transaction "
                                       "{1:s}".format(resource_instance.name, self))
                else:
                    pmnc.log.warning("resource instance {0:s} had to abandon waiting for decision and "
                                     "rollback in transaction {1:s}".format(resource_instance.name, self))

                # complete the transaction and return the final outcome

                if commit_transaction:
                    try:
                        resource_instance.commit()
                    except:
                        pmnc.log.error("resource instance {0:s} failed to commit in transaction {1:s}: "
                                       "{2:s}".format(resource_instance.name, self, exc_string())) # this is a severe problem
                        resource_instance.expire()
                        return "failure"
                    else:
                        pmnc.log.debug("resource instance {0:s} committed in transaction "
                                       "{1:s}".format(resource_instance.name, self))
                        return "commit"
                else:
                    try:
                        resource_instance.rollback()
                    except:
                        pmnc.log.warning("resource instance {0:s} failed to rollback in transaction {1:s}: "
                                         "{2:s}".format(resource_instance.name, self, exc_string())) # this is not a big deal
                        resource_instance.expire()
                        return "failure"
                    else:
                        pmnc.log.debug("resource instance {0:s} rolled back in transaction "
                                       "{1:s}".format(resource_instance.name, self))
                        return "rollback"

            finally:
                if resource_instance:
                    pmnc.log.debug("resource instance {0:s} is being released, {1:s}".\
                                   format(resource_instance.name, self._resource_ttl(resource_instance)))
                    resource_pool.release(resource_instance)

        except:
            pmnc.log.error(exc_string()) # this should not normally happen, but do
            raise                        # not allow such exception to be silenced
Exemple #47
0
def _perf_thread_proc():

    slice10s = TimeSlice(10)

    sampler60s = CumulativeSampler(60)
    sampler10s = CurrentSampler(10)

    while not current_thread().stopped():
        try:

            # drain the queue and append all the data to the current 10s sampler

            perf_info = _perf_queue.pop(1.0) # this causes a delay of up to 1 sec
            while perf_info:
                what, key, value = perf_info
                if what == "event":
                    sampler10s.tick(key)
                    if key.endswith(".success") or key.endswith(".failure"):
                        sampler10s.tick(key[:-8])
                elif what == "sample":
                    sampler10s.sample(key, value)
                    if key.endswith(".success") or key.endswith(".failure"):
                        sampler10s.sample(key[:-8], value)
                perf_info = _perf_queue.pop(0.0)

            # see if another 10s have passed

            next, slice = slice10s.next
            if next: # start of a 10s slice

                with _perf_lock: # update the shared 10s stats
                    completed_slice = slice - 1
                    _perf_dump_10s[completed_slice % 6] = (completed_slice * 10, sampler10s.dump())

                sampler60s.collect(sampler10s)   # append 10s stats to the 60s sampler
                sampler10s = CurrentSampler(10) # create a new empty 10s sampler

                if slice % 6 == 0: # start of a new minute

                    # update the shared 60s stats with the current 60s summary
                    # and reset the shared 10s stats

                    with _perf_lock:
                        completed_minute = slice - 6
                        _perf_dump_60s.append((completed_minute * 10, sampler60s.dump()))
                        if len(_perf_dump_60s) == 60:
                            _perf_dump_60s.pop(0)
                        _perf_dump_10s[:] = [None] * 6

                    sampler60s = CumulativeSampler(60) # create a new empty 60s sampler

                with _perf_lock: # update global statistics
                    wss = get_working_set_size()
                    if wss is not None:
                        _perf_stats.update(wss = wss)
                    cpu_ut_cpu_kt = get_cpu_times()
                    if cpu_ut_cpu_kt is not None:
                        cpu_ut, cpu_kt = cpu_ut_cpu_kt
                        _perf_stats.update(cpu_ut_percent = (cpu_ut - _perf_stats.get("cpu_ut", 0.0)) * 10.0,
                                           cpu_kt_percent = (cpu_kt - _perf_stats.get("cpu_kt", 0.0)) * 10.0,
                                           cpu_ut = cpu_ut, cpu_kt = cpu_kt)

        except:
            pmnc.log.error(exc_string()) # log and ignore
Exemple #48
0
    def _probe_thread_proc(self):

        per_cage_interval = 0.0

        # calls to _poll_up_down_queue are interleaved and allow this thread
        # to maintain structures such as _up_cages in response to events
        # posted by the probe threads to the _up_down_queue

        while self._poll_up_down_queue(per_cage_interval):
            try:

                # extract all cages currently known to the rpc interface and
                # merge them with cages previously probed and found to be up,
                # except for the health_monitor cage itself should be skipped

                probe_cages = {
                    known_cage: {
                        known_node: dict(location=known_location, probe_result=None)
                        for known_node, known_location in self._rpc_interface.get_nodes(known_cage).items()
                    }
                    for known_cage in self._rpc_interface.get_cages()
                    if known_cage != "health_monitor"
                }

                self._merge_cages(probe_cages, self._up_cages)

                probe_period = pmnc.config.get("probe_period")
                per_cage_interval = probe_period / (len(probe_cages) + 1)

                # walk through all cages to be probed and schedule calls to probe
                # to a private thread pool using fake unregistered requests

                for cage, nodes in probe_cages.items():

                    for node, cage_info in nodes.items():

                        cage_location = cage_info["location"]

                        # note that the requests created here are not registered with
                        # interfaces and enqueued to a different pool too, they are
                        # therefore entitled to termination without warning at shutdown,
                        # this is ok, because they do no useful work for the clients

                        request = Request(
                            timeout=self._request_timeout,
                            interface="__health_monitor__",
                            protocol="n/a",
                            parameters=dict(auth_tokens=dict()),
                            description="probing cage {0:s} at {1:s}".format(cage, cage_location),
                        )

                        self._probe_thread_pool.enqueue(
                            request, self.wu_probe_cage, (node, cage, cage_location, cage_info["probe_result"]), {}
                        )

                    # then again yield to polling the queue for a while

                    if not self._poll_up_down_queue(per_cage_interval):
                        break

            except:
                pmnc.log.error(exc_string())  # log and ignore
Exemple #49
0
def execute_reverse(target_cage: valid_cage_name, module: valid_module_name,
                    method: valid_method_name, args: tuple, kwargs: dict):

    # wrap up an RPC call identical to how it's done in protocol_rpc.py

    request_dict = pmnc.request.to_dict()

    # remove request parameters that must not cross the RPC border

    request_dict["parameters"].pop("retry", None)

    # wrap all the call parameters in a plain dict

    request = dict(source_cage = __cage__,
                   target_cage = target_cage,
                   module = module, method = method,
                   args = args, kwargs = kwargs,
                   request = request_dict)

    request_description = "reverse RPC request {0:s}.{1:s} to {2:s}". \
                          format(module, method, target_cage)

    # create a one-time response queue just for this request

    rs_queue = InterlockedQueue()
    request_id = pmnc.request.unique_id

    with _rs_queues_lock:
        _rs_queues[request_id] = rs_queue # register the call as being active
    try:

        pmnc.log.info("sending {0:s}".format(request_description))
        try:

            # enqueue the call and wait for response

            rq_queue = _get_rq_queue(target_cage)
            rq_queue.push((request_id, request))

            response = pmnc.request.pop(rs_queue)
            if response is None:
                raise Exception("request deadline waiting for response")

            try:
                result = response["result"]
            except KeyError:
                raise RPCError(description = response["exception"], terminal = False)

        except RPCError as e:
            pmnc.log.warning("{0:s} returned error: {1:s}".\
                             format(request_description, e.description))
            raise
        except:
            pmnc.log.warning("{0:s} failed: {1:s}".\
                             format(request_description, exc_string()))
            ResourceError.rethrow(recoverable = False)
        else:
            pmnc.log.info("reverse RPC request returned successfully")
            return result

    finally:
        with _rs_queues_lock:
            del _rs_queues[request_id] # unregister the call