def testNonZeroRemainingSeconds(self): et = ExpirationTimer(timedelta(milliseconds=10)) with et as c: assert 0.0 < c.remainingSeconds() assert 0.0101 > c.remainingSeconds() sleep(et.view().remainingSeconds()) assert 0.0 == et.view().remainingSeconds()
def _run_transport(self, maximumDuration=None, txonly=False, incomingHandler=None): # This is where multiple external threads are synchronized for # receives. Transmits will flow down into the transmit layer # where they are queued with thread safety, but threads # blocking on a receive will all be lined up through this point. max_runtime = ExpirationTimer(maximumDuration) with self._cv: while self._transport_runner: self._cv.wait(max_runtime.view().remainingSeconds()) if max_runtime.view().expired(): return None self._transport_runner = True try: r = Thespian__UpdateWork() while isinstance(r, Thespian__UpdateWork): r = self.transport.run(TransmitOnly if txonly else incomingHandler, max_runtime.view().remaining()) return r # incomingHandler callback could deadlock on this same thread; is it ever not None? finally: with self._cv: self._transport_runner = False self._cv.notify()
def testNonZeroRemaining(self): et = ExpirationTimer(timedelta(milliseconds=10)) ct = currentTime() assert timedelta(days=0) < et.view(ct).remaining() assert timedelta(milliseconds=11) > et.view(ct).remaining() sleep(et.view().remainingSeconds()) assert timedelta(days=0) == et.view().remaining()
def testUnExpiredToUnExpiredComparison(self): et1 = ExpirationTimer(timedelta(milliseconds=15)) et2 = ExpirationTimer(timedelta(milliseconds=10)) assert et1 != et2 assert et2 != et1 sleep(et2.view().remainingSeconds()) print(str(et1), str(et2)) # The following will fail if an extra 5ms delay has occurred assert et1 != et2 assert et2 != et1 sleep(et1.view().remainingSeconds()) assert et1 == et2 assert et2 == et1
def test_expiration_timer_None_period(): timer = ExpirationTimer(None) with timer as t: assert t.expired() == False assert timer.view().expired() == False time.sleep(0.2) with timer as t: assert t.expired() == False assert t.remainingSeconds() == None assert timer.view().expired() == False assert timer.view().remainingSeconds() == None
def test_expiration_timer(): timer = ExpirationTimer(duration=1.0) time.sleep(0.2) with timer as t: assert t.expired() == False assert 0.7 <= t.remainingSeconds() <= 0.9 assert timer.view().expired() == False assert 0.7 <= timer.view().remainingSeconds() <= 0.9 time.sleep(1.0) with timer as t: assert t.expired() == True assert t.remainingSeconds() == 0.0
def testNonZeroIsFalse(self): et = ExpirationTimer(timedelta(milliseconds=10)) assert not et assert not bool(et) sleep(et.view().remainingSeconds()) assert et assert bool(et)
def testExpiredToUnExpiredComparison(self): et1 = ExpirationTimer(timedelta(microseconds=0)) et2 = ExpirationTimer(timedelta(milliseconds=10)) assert et1 != et2 assert et2 != et1 sleep(et2.view().remainingSeconds()) assert et1 == et2 assert et2 == et1
def newPrimaryActor(self, actorClass, targetActorRequirements, globalName, sourceHash=None): self._numPrimaries = self._numPrimaries + 1 actorClassName = '%s.%s'%(actorClass.__module__, actorClass.__name__) \ if hasattr(actorClass, '__name__') else actorClass with closing(self.transport.external_transport_clone()) as tx_external: response = NewActorResponse(tx_external, self.adminAddr) tx_external.scheduleTransmit( None, TransmitIntent(self.adminAddr, PendingActor(actorClassName, None, self._numPrimaries, targetActorRequirements, globalName=globalName, sourceHash=sourceHash), onError=response.transmit_failed)) endwait = ExpirationTimer(MAX_CHILD_ACTOR_CREATE_DELAY) # Do not use _run_transport: the tx_external transport # context acquired above is unique to this thread and # should not be synchronized/restricted by other threads. tx_external.run(response, MAX_CHILD_ACTOR_CREATE_DELAY) # Other items might abort the transport run... like transmit # failures on a previous ask() that itself already timed out. while response.pending and not endwait.view().expired(): tx_external.run(response, MAX_CHILD_ACTOR_CREATE_DELAY) if response.failed: if response.failure == PendingActorResponse.ERROR_Invalid_SourceHash: raise InvalidActorSourceHash(sourceHash) if response.failure == PendingActorResponse.ERROR_Invalid_ActorClass: raise InvalidActorSpecification(actorClass, response.failure_message) if response.failure == PendingActorResponse.ERROR_Import: info = response.failure_message if info: thesplog('Actor Create Failure, Import Error: %s', info) raise ImportError(str(actorClass) + ': ' + info) thesplog('Actor Create Failure, Import Error') raise ImportError(actorClass) if response.failure == PendingActorResponse.ERROR_No_Compatible_ActorSystem: raise NoCompatibleSystemForActor( actorClass, 'No compatible ActorSystem could be found') raise ActorSystemFailure("Could not request new Actor from Admin (%s)" % (response.failure)) if response.actor_address: return response.actor_address if response.actor_address is False: raise NoCompatibleSystemForActor( actorClass, 'No compatible ActorSystem could be found') raise ActorSystemRequestTimeout( 'No response received to PendingActor request to Admin' ' at %s from %s'%(str(self.adminAddr), str(self.transport.myAddress)))
def testNoneComparedToNonZero(self): et1 = ExpirationTimer(None) et2 = ExpirationTimer(timedelta(milliseconds=10)) # None == forever, so it is greater than anything, although equal to itself assert et1 > et2 assert et2 < et1 assert et1 > et2 assert et2 < et1 sleep(et2.view().remainingSeconds()) assert et1 > et2 assert et2 < et1 assert et1 > et2 assert et2 < et1
def run(self, incomingHandler, maximumDuration=None): """Core scheduling method; called by the current Actor process when idle to await new messages (or to do background processing). """ max_runtime = ExpirationTimer(maximumDuration) # Always make at least one pass through to handle expired wakeups # and queued events; otherwise a null/negative maximumDuration could # block all processing. rval = self._run_subtransport(incomingHandler, max_runtime) while rval in (True, None) and not max_runtime.view().expired(): rval = self._run_subtransport(incomingHandler, max_runtime) return rval
def __init__(self, system, logDefs = None): ensure_TZ_set() # Expects self.transport has already been set by subclass __init__ super(systemBase, self).__init__( self.transport.getAdminAddr(system.capabilities)) tryingTime = ExpirationTimer(MAX_SYSTEM_SHUTDOWN_DELAY + timedelta(seconds=1)) while not tryingTime.view().expired(): if not self.transport.probeAdmin(self.adminAddr): self._startAdmin(self.adminAddr, self.transport.myAddress, system.capabilities, logDefs) if self._verifyAdminRunning(): return import time time.sleep(0.5) # Previous version may have been exiting if not self._verifyAdminRunning(): raise InvalidActorAddress(self.adminAddr, 'not a valid or useable ActorSystem Admin')
def startupASLogger(addrOfStarter, logEndpoint, logDefs, transportClass, aggregatorAddress): # Dirty trick here to completely re-initialize logging in this # process... something the standard Python logging interface does # not allow via the API. We also do not want to run # logging.shutdown() because (a) that does not do enough to reset, # and (b) it shuts down handlers, but we want to leave the # parent's handlers alone. Dirty trick here to completely # re-initialize logging in this process... something the standard # Python logging interface does not allow via the API. logging.root = logging.RootLogger(logging.WARNING) logging.Logger.root = logging.root logging.Logger.manager = logging.Manager(logging.Logger.root) if logDefs: dictConfig(logDefs) else: logging.basicConfig() # Disable thesplog from within the logging process (by setting the # logfile size to zero) to try to avoid recursive logging loops. thesplog_control(logging.WARNING, False, 0) #logging.info('ActorSystem Logging Initialized') transport = transportClass(logEndpoint) setProcName('logger', transport.myAddress) transport.scheduleTransmit( None, TransmitIntent(addrOfStarter, LoggerConnected())) fdup = None last_exception_time = None exception_count = 0 while True: try: r = transport.run(None) if isinstance(r, Thespian__UpdateWork): transport.scheduleTransmit( TransmitIntent(transport.myAddress, r)) continue logrecord = r.message if isinstance(logrecord, LoggerExitRequest): logging.info('ActorSystem Logging Shutdown') return elif isinstance(logrecord, LoggerFileDup): fdup = getattr(logrecord, 'fname', None) elif isinstance(logrecord, LogAggregator): aggregatorAddress = logrecord.aggregatorAddress elif isinstance(logrecord, logging.LogRecord): logging.getLogger(logrecord.name).handle(logrecord) if fdup: with open(fdup, 'a') as ldf: ldf.write('%s\n' % str(logrecord)) if aggregatorAddress and \ logrecord.levelno >= logging.WARNING: transport.scheduleTransmit( None, TransmitIntent(aggregatorAddress, logrecord)) else: logging.warn('Unknown message rcvd by logger: %s' % str(logrecord)) except Exception as ex: thesplog('Thespian Logger aborting (#%d) with error %s', exception_count, ex, exc_info=True) if last_exception_time is None or \ last_exception_time.view().expired(): last_exception_time = ExpirationTimer(timedelta(seconds=1)) exception_count = 0 else: exception_count += 1 if exception_count >= MAX_LOGGING_EXCEPTIONS_PER_SECOND: thesplog( 'Too many Thespian Logger exceptions (#%d in %s); exiting!', exception_count, timedelta(seconds=1) - last_exception_time.view().remaining()) return
class LocalConventionState(object): # The general process of leader management in an HA configuration # where there are multiple potential leaders is currently: # # 1. The self._conventionAddress is a list of potential leader # addresses. This is set from the capabilities (with an assist # from the transport to map the addresses to a # transport-specific address). At present, it is assumed that # all convention members are initialized with the same list, # and in the same order (excluding invite-only members). # # 2. The current active leader is the "highest" leader: the one # with the lowest index in the list (appears first) that is # also currently active (as provided by the # self._conventionMembers list that is already updated by # active registrations and either active deregistrations or # timeouts. The self._conventionLeaderIdx is a helper to # indicate the current active leader without searching the # self._conventionMembers array. # # 3. The standard operational mode of a convention is that the # leader is largely passive in terms of membership: members # initially join and subsequently periodically check-in by # sending a registration request (eliciting a corresponding # response from the leader). The leader removes them from the # convention if they don't check-in within a specified period # of time, but does not actively probe the member. The # principle behind this is that traffic should only be # generated for active members and not inactive members. # # 4. With the addition of HA support, the member registration and # check-in is always sent to *all* potential leaders, # regardless of which is thought to be the current active # leader by that member. This includes all potential leaders, # which send a registration to potential leaders higher than # themselves. # # 5. When a potential leader receives a check-in registration, it # will check to see if it believes itself to be the # highest-priority active leader. If so, it will respond and # the remote will see that it is the current leader (including # any other potential leaders, active or not). Potential # leaders that see a higher-priority leader will not respond to # a check-in request, but will have updated their internal # member information list. # # Based on the above, a leadership transition occurs naturally # (albeit slowly) through the passive combination of #2, #4, and # #5. At present, there is no exchange of state information # between leaders, so any context maintained by one leader will be # lost in moving to a new leader [this is an area that should be # improved in future work] def __init__(self, myAddress, capabilities, sCBStats, getConventionAddressFunc): self._myAddress = myAddress self._capabilities = capabilities self._sCBStats = sCBStats self._conventionMembers = AssocList( ) # key=Remote Admin Addr, value=ConventionMemberData self._conventionNotificationHandlers = [] self._getConventionAddr = getConventionAddressFunc self._conventionLeaderIdx = 0 self._conventionAddress = getConventionAddressFunc(capabilities) if not isinstance(self._conventionAddress, list): self._conventionAddress = [self._conventionAddress] self._conventionRegistration = ExpirationTimer( CONVENTION_REREGISTRATION_PERIOD) self._has_been_activated = False self._invited = False # entered convention as a result of an explicit invite @property def myAddress(self): return self._myAddress @property def capabilities(self): return self._capabilities def updateStatusResponse(self, resp): resp.setConventionLeaderAddress(self.conventionLeaderAddr) resp.setConventionRegisterTime(self._conventionRegistration) for each in self._conventionMembers.values(): resp.addConventioneer(each.remoteAddress, each.registryValid) resp.setNotifyHandlers(self._conventionNotificationHandlers) def active_in_convention(self): # If this is the convention leader, it is automatically # active, otherwise this convention member should have a # convention leader and that leader should have an active # entry in the _conventionMembers table (indicating it has # updated this system with its information) return bool( self.conventionLeaderAddr and self._conventionMembers.find(self.conventionLeaderAddr)) @property def conventionLeaderAddr(self): return self._conventionAddress[self._conventionLeaderIdx] def isConventionLeader(self): "Return true if this is the current leader of this convention" # This checks to see if the current system is the convention # leader. This check is dynamic and may have the effect of # changing the determination of which is the actual convention # leader. if self._conventionAddress == [None]: return True for (idx, myLeader) in enumerate(self._conventionAddress): if myLeader == self.myAddress: # I am the highest active leader, therefore I am the # current actual leader. self._conventionLeaderIdx = idx return True if self._conventionMembers.find(myLeader): # A leader higher priority than myself exists (this is # actually the highest active due to the processing # order), so it's the leader, not me. self._conventionLeaderIdx = idx return False return False def capabilities_have_changed(self, new_capabilities): self._capabilities = new_capabilities return self.setup_convention() def setup_convention(self, activation=False): """Called to perform the initial registration with the convention leader (unless this *is* the leader) and also whenever connectivity to the convention leader is restored. Performs some administration and then attempts to register with the convention leader. """ self._has_been_activated |= activation rmsgs = [] # If not specified in capabilities, don't override any invites # that may have been received. self._conventionAddress = self._getConventionAddr(self.capabilities) or \ self._conventionAddress if not isinstance(self._conventionAddress, list): self._conventionAddress = [self._conventionAddress] if self._conventionLeaderIdx >= len(self._conventionAddress): self._conventionLeaderIdx = 0 leader_is_gone = (self._conventionMembers.find(self.conventionLeaderAddr) is None) \ if self.conventionLeaderAddr else True # Register with all other leaders to notify them that this potential leader is online if self._conventionAddress and \ self._conventionAddress[0] != None: for possibleLeader in self._conventionAddress: if possibleLeader == self.myAddress: # Don't register with myself continue re_registering = not leader_is_gone and \ (possibleLeader == self.conventionLeaderAddr) thesplog( 'Admin registering with Convention @ %s (%s)', possibleLeader, 'first time' if not re_registering else 're-registering', level=logging.INFO, primary=True) rmsgs.append( HysteresisSend(possibleLeader, ConventionRegister(self.myAddress, self.capabilities, not re_registering), onSuccess=self._setupConventionCBGood, onError=self._setupConventionCBError)) self._conventionRegistration = ExpirationTimer( CONVENTION_REREGISTRATION_PERIOD) return rmsgs def _setupConventionCBGood(self, result, finishedIntent): self._sCBStats.inc('Admin Convention Registered') if hasattr(self, '_conventionLeaderMissCount'): delattr(self, '_conventionLeaderMissCount') def _setupConventionCBError(self, result, finishedIntent): self._sCBStats.inc('Admin Convention Registration Failed') if hasattr(self, '_conventionLeaderMissCount'): self._conventionLeaderMissCount += 1 else: self._conventionLeaderMissCount = 1 thesplog('Admin cannot register with convention @ %s (miss %d): %s', finishedIntent.targetAddr, self._conventionLeaderMissCount, result, level=logging.WARNING, primary=True) def got_convention_invite(self, sender): self._conventionAddress = [sender] self._conventionLeaderIdx = 0 self._invited = True return self.setup_convention() def got_convention_register(self, regmsg): # Called when remote convention member has sent a # ConventionRegister message. This is first called the leader # when the member registers with the leader, and then on the # member when the leader responds with same. Thus the current # node could be a member, a potential leader, the current # leader, or a potential leader with higher potential than the # current leader and which should become the new leader. self._sCBStats.inc('Admin Handle Convention Registration') if self._invited and not self.conventionLeaderAddr: # Lost connection to an invitation-only convention. # Cannot join again until another invitation is received. return [] # Remote member may re-register if changing capabilities rmsgs = [] registrant = regmsg.adminAddress prereg = getattr(regmsg, 'preRegister', False) # getattr used; see definition existing = self._conventionMembers.find(registrant) thesplog('Got Convention %sregistration from %s (%s) (new? %s)', 'pre-' if prereg else '', registrant, 'first time' if regmsg.firstTime else 're-registering', not existing, level=logging.DEBUG) if registrant == self.myAddress: # Either remote failed getting an external address and is # using 127.0.0.1 or else this is a malicious attempt to # make us talk to ourselves. Ignore it. thesplog( 'Convention registration from %s is an invalid address; ignoring.', registrant, level=logging.WARNING) return rmsgs existingPreReg = ( # existing.preRegOnly # or existing.preRegistered existing.permanentEntry) if existing else False notify = (not existing or existing.preRegOnly) and not prereg if regmsg.firstTime or not existing: if existing: existing = None notify = not prereg rmsgs.extend(self._remote_system_cleanup(registrant)) newmember = ConventionMemberData(registrant, regmsg.capabilities, prereg) if prereg or existingPreReg: newmember.preRegistered = PreRegistration() self._conventionMembers.add(registrant, newmember) else: existing.refresh(regmsg.capabilities, prereg or existingPreReg) if not prereg: existing.preRegOnly = False if not self.isConventionLeader(): self._conventionRegistration = ExpirationTimer( CONVENTION_REREGISTRATION_PERIOD) rmsgs.append(LogAggregator(self.conventionLeaderAddr)) # Convention Members normally periodically initiate a # membership message, to which the leader confirms by # responding. #if self.isConventionLeader() or prereg or regmsg.firstTime: if prereg: # If this was a pre-registration, that identifies this # system as the "leader" for that remote. Also, if the # remote sent this because it was a pre-registration # leader, it doesn't yet have all the member information # so the member should respond. rmsgs.append(HysteresisCancel(registrant)) rmsgs.append(TransmitIntent(registrant, ConventionInvite())) elif (self.isConventionLeader() or prereg or regmsg.firstTime or \ (existing and existing.permanentEntry)): # If we are the Convention Leader, this would be the point to # inform all other registrants of the new registrant. At # present, there is no reciprocity here, so just update the # new registrant with the leader's info. rmsgs.append( TransmitIntent( registrant, ConventionRegister(self.myAddress, self.capabilities))) if notify: rmsgs.extend( self._notifications_of( ActorSystemConventionUpdate(registrant, regmsg.capabilities, True))) return rmsgs def _notifications_of(self, msg): return [ TransmitIntent(H, msg) for H in self._conventionNotificationHandlers ] def add_notification_handler(self, addr): if addr not in self._conventionNotificationHandlers: self._conventionNotificationHandlers.append(addr) # Now update the registrant on the current state of all convention members return [ TransmitIntent( addr, ActorSystemConventionUpdate(M.remoteAddress, M.remoteCapabilities, True)) for M in self._conventionMembers.values() if not M.preRegOnly ] return [] def remove_notification_handler(self, addr): self._conventionNotificationHandlers = [ H for H in self._conventionNotificationHandlers if H != addr ] def got_convention_deregister(self, deregmsg): self._sCBStats.inc('Admin Handle Convention De-registration') remoteAdmin = deregmsg.adminAddress if remoteAdmin == self.myAddress: # Either remote failed getting an external address and is # using 127.0.0.1 or else this is a malicious attempt to # make us talk to ourselves. Ignore it. thesplog( 'Convention deregistration from %s is an invalid address; ignoring.', remoteAdmin, level=logging.WARNING) rmsgs = [] if getattr(deregmsg, 'preRegistered', False): # see definition for getattr use existing = self._conventionMembers.find(remoteAdmin) if existing: existing.preRegistered = None rmsgs.append( TransmitIntent(remoteAdmin, ConventionDeRegister(self.myAddress))) return rmsgs + self._remote_system_cleanup(remoteAdmin) def got_system_shutdown(self): return self.exit_convention() def exit_convention(self): self.invited = False gen_ops = lambda addr: [ HysteresisCancel(addr), TransmitIntent(addr, ConventionDeRegister(self.myAddress)), ] terminate = lambda a: [self._remote_system_cleanup(a), gen_ops(a)][-1] if self.conventionLeaderAddr and \ self.conventionLeaderAddr != self.myAddress: thesplog('Admin de-registering with Convention @ %s', str(self.conventionLeaderAddr), level=logging.INFO, primary=True) # Cache convention leader address because it might get reset by terminate() claddr = self.conventionLeaderAddr terminate(self.conventionLeaderAddr) return gen_ops(claddr) return join( fmap(terminate, [ M.remoteAddress for M in self._conventionMembers.values() if M.remoteAddress != self.myAddress ])) def check_convention(self): ct = currentTime() rmsgs = [] if self._has_been_activated: rmsgs = foldl( lambda x, y: x + y, [ self._check_preregistered_ping(ct, member) for member in self._conventionMembers.values() ], self._convention_leader_checks(ct) if self.isConventionLeader() or not self.conventionLeaderAddr else self._convention_member_checks(ct)) if self._conventionRegistration.view(ct).expired(): self._conventionRegistration = ExpirationTimer( CONVENTION_REREGISTRATION_PERIOD) return rmsgs def _convention_leader_checks(self, ct): return foldl(lambda x, y: x + y, [ self._missed_checkin_remote_cleanup(R) for R in [ member for member in self._conventionMembers.values() if member.registryValid.view(ct).expired() ] ], []) def _missed_checkin_remote_cleanup(self, remote_member): thesplog('%s missed %d checkins (%s); assuming it has died', str(remote_member), CONVENTION_REGISTRATION_MISS_MAX, str(remote_member.registryValid), level=logging.WARNING, primary=True) return self._remote_system_cleanup(remote_member.remoteAddress) def _convention_member_checks(self, ct): rmsgs = [] # Re-register with the Convention if it's time if self.conventionLeaderAddr and \ self._conventionRegistration.view(ct).expired(): if getattr(self, '_conventionLeaderMissCount', 0) >= \ CONVENTION_REGISTRATION_MISS_MAX: thesplog('Admin convention registration lost @ %s (miss %d)', self.conventionLeaderAddr, self._conventionLeaderMissCount, level=logging.WARNING, primary=True) rmsgs.extend( self._remote_system_cleanup(self.conventionLeaderAddr)) self._conventionLeaderMissCount = 0 else: rmsgs.extend(self.setup_convention()) return rmsgs def _check_preregistered_ping(self, ct, member): if member.preRegistered and \ member.preRegistered.pingValid.view(ct).expired() and \ not member.preRegistered.pingPending: member.preRegistered.pingPending = True # If remote misses a checkin, re-extend the # invitation. This also helps re-initiate a socket # connection if a TxOnly socket has been lost. member.preRegistered.pingValid = ExpirationTimer( convention_reinvite_adjustment( CONVENTION_RESTART_PERIOD if member.registryValid.view(ct). expired() else CONVENTION_REREGISTRATION_PERIOD)) return [ HysteresisSend(member.remoteAddress, ConventionInvite(), onSuccess=self._preRegQueryNotPending, onError=self._preRegQueryNotPending) ] return [] def _preRegQueryNotPending(self, result, finishedIntent): remoteAddr = finishedIntent.targetAddr member = self._conventionMembers.find(remoteAddr) if member and member.preRegistered: member.preRegistered.pingPending = False def _remote_system_cleanup(self, registrant): """Called when a RemoteActorSystem has exited and all associated Actors should be marked as exited and the ActorSystem removed from Convention membership. This is also called on a First Time connection from the remote to discard any previous connection information. """ thesplog('Convention cleanup or deregistration for %s (known? %s)', registrant, bool(self._conventionMembers.find(registrant)), level=logging.INFO) rmsgs = [LostRemote(registrant)] cmr = self._conventionMembers.find(registrant) if not cmr or cmr.preRegOnly: return [] # Send exited notification to conventionNotificationHandler (if any) for each in self._conventionNotificationHandlers: rmsgs.append( TransmitIntent( each, ActorSystemConventionUpdate(cmr.remoteAddress, cmr.remoteCapabilities, False))) # errors ignored # If the remote ActorSystem shutdown gracefully (i.e. sent # a Convention Deregistration) then it should not be # necessary to shutdown remote Actors (or notify of their # shutdown) because the remote ActorSystem should already # have caused this to occur. However, it won't hurt, and # it's necessary if the remote ActorSystem did not exit # gracefully. for lpa, raa in cmr.hasRemoteActors: # ignore errors: rmsgs.append(TransmitIntent(lpa, ChildActorExited(raa))) # n.b. at present, this means that the parent might # get duplicate notifications of ChildActorExited; it # is expected that Actors can handle this. # Remove remote system from conventionMembers if not cmr.preRegistered: cla = self.conventionLeaderAddr self._conventionMembers.rmv(registrant) if registrant == cla: if self._invited: # Don't clear invited: once invited, that # perpetually indicates this should be only a # member and never a leader. self._conventionAddress = [None] else: rmsgs.extend(self.setup_convention()) else: # This conventionMember needs to stay because the # current system needs to continue issuing # registration pings. By setting the registryValid # expiration to forever, this member won't re-time-out # and will therefore be otherwise ignored... until it # registers again at which point the membership will # be updated with new settings. cmr.registryValid = ExpirationTimer(None) cmr.preRegOnly = True return rmsgs + [HysteresisCancel(registrant)] def sentByRemoteAdmin(self, envelope): for each in self._conventionMembers.values(): if envelope.sender == each.remoteAddress: return True return False def convention_inattention_delay(self, current_time): return (self._conventionRegistration or ExpirationTimer( CONVENTION_REREGISTRATION_PERIOD if self.active_in_convention() or self.isConventionLeader() else CONVENTION_RESTART_PERIOD) ).view(current_time) def forward_pending_to_remote_system(self, childClass, envelope, sourceHash, acceptsCaps): alreadyTried = getattr(envelope.message, 'alreadyTried', []) ct = currentTime() if self.myAddress not in alreadyTried: # Don't send request back to this actor system: it cannot # handle it alreadyTried.append(self.myAddress) remoteCandidates = [ K for K in self._conventionMembers.values() if not K.registryValid.view(ct).expired() and K.remoteAddress != envelope.sender # source Admin and K.remoteAddress not in alreadyTried and acceptsCaps(K.remoteCapabilities) ] if not remoteCandidates: if self.isConventionLeader() or not self.conventionLeaderAddr: raise NoCompatibleSystemForActor( childClass, 'No known ActorSystems can handle a %s for %s', childClass, envelope.message.forActor) # Let the Convention Leader try to find an appropriate ActorSystem bestC = self.conventionLeaderAddr else: # distribute equally amongst candidates C = [(K.remoteAddress, len(K.hasRemoteActors)) for K in remoteCandidates] bestC = foldl( lambda best, possible: best if best[1] <= possible[1] else possible, C)[0] thesplog('Requesting creation of %s%s on remote admin %s', envelope.message.actorClassName, ' (%s)' % sourceHash if sourceHash else '', bestC) if bestC in alreadyTried: return [] # Have to give up, no-one can handle this # Don't send request to this remote again, it has already # been tried. This would also be indicated by that system # performing the add of self.myAddress as below, but if # there is disagreement between the local and remote # addresses, this addition will prevent continual # bounceback. alreadyTried.append(bestC) envelope.message.alreadyTried = alreadyTried return [TransmitIntent(bestC, envelope.message)] def send_to_all_members(self, message, exception_list=None): return [ HysteresisSend(M.remoteAddress, message) for M in self._conventionMembers.values() if M.remoteAddress not in (exception_list or []) ]
class HysteresisDelaySender(object): """Implements hysteresis delay for sending messages. This is intended to be used for messages exchanged between convention members to ensure that a mis-behaved member doesn't have the ability to inflict damage on the entire convention. The first time a message is sent via this sender it is passed on through, but that starts a blackout period that starts with the CONVENTION_HYSTERESIS_MIN_PERIOD. Each additional send attempt during that blackout period will cause the blackout period to be extended by the CONVENTION_HYSTERESIS_RATE, up to the CONVENTION_HYSTERESIS_MAX_PERIOD. Once the blackout period ends, the queued sends will be sent, but only the last attempted message of each type for the specified remote target. At that point, the hysteresis delay will be reduced by the CONVENTION_HYSTERESIS_RATE; further send attempts will affect the hysteresis blackout period as described as above but lack of sending attempts will continue to reduce the hysteresis back to a zero-delay setting. Note: delays are updated in a target-independent manner; the target is only considered when eliminating duplicates. Note: maxDelay on TransmitIntents is ignored by hysteresis delays. It is assumed that a transmit intent's maxDelay is greater than the maximum hysteresis period and/or that the hysteresis delay is more important than the transmit intent timeout. """ def __init__(self, actual_sender, hysteresis_min_period=HYSTERESIS_MIN_PERIOD, hysteresis_max_period=HYSTERESIS_MAX_PERIOD, hysteresis_rate=HYSTERESIS_RATE): self._sender = actual_sender self._hysteresis_until = ExpirationTimer(timedelta(seconds=0)) self._hysteresis_queue = [] self._current_hysteresis = None # timedelta self._hysteresis_min_period = hysteresis_min_period self._hysteresis_max_period = hysteresis_max_period self._hysteresis_rate = hysteresis_rate @property def delay(self): return self._hysteresis_until.view() def _has_hysteresis(self): return (self._current_hysteresis is not None and self._current_hysteresis >= self._hysteresis_min_period) def _increase_hysteresis(self): if self._has_hysteresis(): try: self._current_hysteresis = min( (self._current_hysteresis * self._hysteresis_rate), self._hysteresis_max_period) except TypeError: # See note below for _decrease_hysteresis self._current_hysteresis = min( timedelta(seconds=(self._current_hysteresis.seconds * self._hysteresis_rate)), self._hysteresis_max_period) else: self._current_hysteresis = self._hysteresis_min_period def _decrease_hysteresis(self): try: self._current_hysteresis = ((self._current_hysteresis / self._hysteresis_rate) if self._has_hysteresis() else None) except TypeError: # Python 2.x cannot multiply or divide a timedelta by a # fractional amount. There is also not a total_seconds # retrieval from a timedelta, but it should be safe to # assume that the hysteresis value is not greater than 1 # day. self._current_hysteresis = timedelta( seconds=(self._current_hysteresis.seconds / self._hysteresis_rate)) \ if self._has_hysteresis() else None def _update_remaining_hysteresis_period(self, reset=False): if not self._current_hysteresis: self._hysteresis_until = ExpirationTimer(timedelta(seconds=0)) else: if reset or not self._hysteresis_until: self._hysteresis_until = ExpirationTimer( self._current_hysteresis) else: self._hysteresis_until = ExpirationTimer( self._current_hysteresis - self._hysteresis_until.view().remaining()) def checkSends(self): if self.delay.expired(): self._decrease_hysteresis() self._update_remaining_hysteresis_period(reset=True) for intent in self._keepIf(lambda M: False): self._sender(intent) @staticmethod def safe_cmp(val1, val2): try: return val1 == val2 except Exception: return False def sendWithHysteresis(self, intent): if self._hysteresis_until.view().expired(): self._current_hysteresis = self._hysteresis_min_period self._sender(intent) else: dups = self._keepIf(lambda M: (M.targetAddr != intent.targetAddr or not HysteresisDelaySender.safe_cmp( M.message, intent.message))) # The dups are duplicate sends to the new intent's target; # complete them when the actual message is finally sent # with the same result if dups: intent.addCallback(self._dupSentGood(dups), self._dupSentFail(dups)) self._hysteresis_queue.append(intent) self._increase_hysteresis() self._update_remaining_hysteresis_period() def cancelSends(self, remoteAddr): for each in self._keepIf(lambda M: M.targetAddr != remoteAddr): each.tx_done(SendStatus.Failed) def _keepIf(self, keepFunc): requeues, removes = partition(keepFunc, self._hysteresis_queue) self._hysteresis_queue = requeues return removes @staticmethod def _dupSentGood(dups): def _finishDups(result, finishedIntent): for each in dups: each.tx_done(result) return _finishDups @staticmethod def _dupSentFail(dups): def _finishDups(result, finishedIntent): for each in dups: each.tx_done(result) return _finishDups
class wakeupTransportBase(object): """The wakeupTransportBase is designed to be used as a mixin-base for a Transport class and provides handling for the wakeupAfter() functionality. This base mixin provides the primary .run() entrypoint for the transport and a .run_time ExpirationTime member that provides the remaining time-to-run period. The system can handle .wakeupAfter() requests by calling this class's .addWakeup() method with the datetime.timedelta for the wakeup to be scheduled. The Transport should provide the following: ._runWithExpiry(incomingHandler) Called by this class's .run() entrypoint to do the actual transport-specific run routine. Should perform that activity while the self.run_time ExpirationTimer is not expired (self.run_time will be updated when new wakeupAfter() events are scheduled). """ def __init__(self, *args, **kw): super(wakeupTransportBase, self).__init__(*args, **kw) # _pendingWakeups is a sorted list of tuples, containing an # ExpirationTimer and a payload. It is sorted by ExpirationTimer # from the shortest to the longest. self._activeWakeups = [] # expired wakeups to be delivered self._wakeup_lock = threading.Lock() # protects the following: self._pendingWakeups = [] def _updateStatusResponse(self, resp): """Called to update a Thespian_SystemStatus or Thespian_ActorStatus with common information """ # we only pass the ExpirationTimer without payload as this should be # sufficient for status information. with self._wakeup_lock: resp.addWakeups([(self.myAddress, T[0]) for T in self._pendingWakeups]) for each in self._activeWakeups: resp.addPendingMessage(self.myAddress, self.myAddress, str(each.message)) def _update_runtime(self): self.run_time = self._pendingWakeups[0][ 0] if self._pendingWakeups else self._max_runtime def run(self, incomingHandler, maximumDuration=None): """Core scheduling method; called by the current Actor process when idle to await new messages (or to do background processing). """ self._max_runtime = ExpirationTimer(maximumDuration) # Always make at least one pass through to handle expired wakeups # and queued events; otherwise a null/negative maximumDuration could # block all processing. rval = self._run_subtransport(incomingHandler) while rval in (True, None) and not self._max_runtime.view().expired(): rval = self._run_subtransport(incomingHandler) return rval def _run_subtransport(self, incomingHandler): self._update_runtime() # ok not to lock: read once, no modifications rval = self._runWithExpiry(incomingHandler) if rval is not None and not isinstance(rval, Thespian__Run_Expired): return rval self._realizeWakeups() return self._deliver_wakeups(incomingHandler) def _deliver_wakeups(self, incomingHandler): while self._activeWakeups: w = self._activeWakeups.pop() if incomingHandler in (None, TransmitOnly): return w r = Thespian__Run_HandlerResult(incomingHandler(w)) if not r: return r return None def addWakeup(self, timePeriod, payload): with self._wakeup_lock: self._pendingWakeups.append((ExpirationTimer(timePeriod), payload)) self._pendingWakeups.sort(key=lambda t: t[0]) # The addWakeup method is called as a result of # self.wakeupAfter, so ensure that the current run time is # updated in case this new wakeup is the shortest. self._update_runtime() def _realizeWakeups(self): "Find any expired wakeups and queue them to the send processing queue" with self._wakeup_lock: ct = currentTime() starting_len = len(self._activeWakeups) while self._pendingWakeups and self._pendingWakeups[0][0].view( ct).expired(): timer, payload = self._pendingWakeups.pop(0) self._activeWakeups.append( ReceiveEnvelope(self.myAddress, WakeupMessage(timer.duration, payload))) return starting_len != len(self._activeWakeups)
class TransmitIntent(PauseWithBackoff): """An individual transmission of data can be encapsulated by a "transmit intent", which identifies the message and the target address, and which has a callback for eventual success or failure indication. Transmit intents may be chained together to represent a series of outbound transmits. Adding a transmit intent to the chain may block when the chain reaches an upper threshold, and remain blocked until enough transmits have occured (successful or failed) to reduce the size of the chain below a minimum threshold. This acts to implement server-side flow control in the system as a whole (although it can introduce a deadlock scenario if multiple actors form a transmit loop that is blocked at any point in the loop, so a transmit intent will fail if it reaches a maximum number of retries without success). The TransmitIntent is constructed with a target address, the message to send, and optional onSuccess and onError callbacks (both defaulting to None). The callbacks are passed the TransmitIntent when the transport is finished with it, selecting the appropriate callback based on the completion status (the `result' property will reveal the SendStatus actual result of the attempt). A callback of None will simply discard the TransmitIntent without passing it to a callback. The TransmitIntent is passed to the transport that should perform the intent; the transport may attach its own additional data to the intent during that processing. """ def __init__(self, targetAddr, msg, onSuccess=None, onError=None, maxPeriod=None, retryPeriod=TRANSMIT_RETRY_PERIOD): super(TransmitIntent, self).__init__() self._targetAddr = targetAddr self._message = msg self._callbackTo = ResultCallback(onSuccess, onError) self._resultsts = None self._quitTime = ExpirationTimer(maxPeriod or DEFAULT_MAX_TRANSMIT_PERIOD) self._attempts = 0 self.transmit_retry_period = retryPeriod @property def targetAddr(self): return self._targetAddr @property def message(self): return self._message def changeTargetAddr(self, newAddr): self._targetAddr = newAddr def changeMessage(self, newMessage): self._message = newMessage @property def result(self): return self._resultsts @result.setter def result(self, setResult): if not isinstance(setResult, SendStatus.BASE): raise TypeError( 'TransmitIntent result must be a SendStatus (got %s)' % type(setResult)) self._resultsts = setResult def completionCallback(self): "This is called by the transport to perform the success or failure callback operation." if not self.result: if self.result == SendStatus.DeadTarget: # Do not perform logging in case admin or logdirector # is dead (this will recurse infinitely). # logging.getLogger('Thespian').warning('Dead target: %s', self.targetAddr) pass else: thesplog('completion error: %s', str(self), level=logging.INFO) self._callbackTo.resultCallback(self.result, self) def addCallback(self, onSuccess=None, onFailure=None): self._callbackTo = ResultCallback(onSuccess, onFailure, self._callbackTo) def tx_done(self, status): self.result = status self.completionCallback() def awaitingTXSlot(self): self._awaitingTXSlot = True def retry(self, immediately=False): if self._attempts > MAX_TRANSMIT_RETRIES: return False if self._quitTime.view().expired(): return False self._attempts += 1 if immediately: self._retryTime = ExpirationTimer(0) else: self._retryTime = ExpirationTimer(self._attempts * self.transmit_retry_period) return True def timeToRetry(self, socketAvail=False): if socketAvail and hasattr(self, '_awaitingTXSlot'): delattr(self, '_awaitingTXSlot') if hasattr(self, '_retryTime'): delattr(self, '_retryTime') return True if hasattr(self, '_retryTime'): retryNow = self._retryTime.view().expired() if retryNow: delattr(self, '_retryTime') return retryNow return socketAvail def delay(self, current_time=None): ct = current_time or currentTime() qt = self._quitTime.view(ct) if getattr(self, '_awaitingTXSlot', False): if qt.expired(): return timedelta(seconds=0) return max(timedelta(milliseconds=10), (qt.remaining()) / 2) return max( timedelta(seconds=0), min( qt.remaining(), getattr(self, '_retryTime', self._quitTime).view(ct).remaining(), getattr(self, '_pauseUntil', self._quitTime).view(ct).remaining())) def expired(self): return self._quitTime.view().expired() def expiration(self): return self._quitTime def __str__(self): return '************* %s' % self.identify() def identify(self): try: smsg = str(self.message) except Exception: smsg = '<msg-cannot-convert-to-ascii>' if len(smsg) > MAX_SHOWLEN: smsg = smsg[:MAX_SHOWLEN] + '...' return 'TransportIntent(' + '-'.join( filter(None, [ str(self.targetAddr), 'pending' if self.result is None else '=' + str(self.result), '' if self.result is not None else 'ExpiresIn_' + str(self.delay()), 'WAITSLOT' if getattr(self, '_awaitingTXSlot', False) else None, 'retry#%d' % self._attempts if self._attempts else '', str(type(self.message)), smsg, 'quit_%s' % str(self._quitTime.view().remaining()), 'retry_%s' % str(self._retryTime.view().remaining()) if getattr(self, '_retryTime', None) else None, 'pause_%s' % str(self._pauseUntil.view().remaining()) if getattr(self, '_pauseUntil', None) else None, ])) + ')'
def testNoneExpired(self): et = ExpirationTimer(None) assert not et.view().expired()
def testNonZeroStr(self): et = ExpirationTimer(timedelta(milliseconds=10)) assert str(et).startswith('Expires_in_0:00:00.0') sleep(et.view().remainingSeconds()) assert str(et).startswith('Expired_for_0:00:00')
def testNoneRemainingSecondsExplicitForever(self): et = ExpirationTimer(None) assert 9 == et.view().remainingSeconds(9)
def testNoneRemainingExplicitForever(self): et = ExpirationTimer(None) assert 5 == et.view().remaining(5)
def testZeroRemainingSeconds(self): et = ExpirationTimer(timedelta(microseconds=0)) assert 0.0 == et.view().remainingSeconds()
def testNoneRemainingSeconds(self): et = ExpirationTimer(None) assert et.view().remainingSeconds() is None
def testZeroExpired(self): et = ExpirationTimer(timedelta(seconds=0)) assert et.view().expired()
def testNonZeroExpired(self): et = ExpirationTimer(timedelta(milliseconds=10)) assert not et.view().expired() sleep(et.view().remainingSeconds()) with et as c: assert c.expired()
def testZeroRemaining(self): et = ExpirationTimer(timedelta(seconds=0)) assert timedelta(days=0) == et.view().remaining()