def loadActorSource(self, fname): self._LOADFAILED = None loadLimit = ExpiryTime(MAX_LOAD_SOURCE_DELAY) f = fname if hasattr(fname, 'read') else open(fname, 'rb') try: d = f.read() import hashlib hval = hashlib.md5(d).hexdigest() self.transport.scheduleTransmit( None, TransmitIntent( self.adminAddr, ValidateSource( hval, d, getattr( f, 'name', str(fname) if hasattr(fname, 'read') else fname)), onError=self._loadReqFailed)) while not loadLimit.expired(): if not self.transport.run(TransmitOnly, loadLimit.remaining()): break # all transmits completed if self._LOADFAILED or loadLimit.expired(): raise ActorSystemFailure('Load source failed due to ' + ( 'failure response (%s)' % self._LOADFAILED if self._LOADFAILED else 'timeout (%s)' % str(loadLimit))) return hval finally: f.close()
def updateCapability(self, capabilityName, capabilityValue=None): self._updCAPFAILED = False attemptLimit = ExpiryTime(MAX_CAPABILITY_UPDATE_DELAY) self.transport.scheduleTransmit( None, TransmitIntent(self.adminAddr, CapabilityUpdate(capabilityName, capabilityValue), onError=self._updateCapsFailed)) while not attemptLimit.expired(): if not self.transport.run(TransmitOnly, attemptLimit.remaining()): break # all transmits completed if self._updCAPFAILED or attemptLimit.expired(): raise ActorSystemFailure( "Could not update Actor System Admin capabilities.")
def unloadActorSource(self, sourceHash): self._LOADFAILED = None loadLimit = ExpiryTime(MAX_LOAD_SOURCE_DELAY) self.transport.scheduleTransmit( None, TransmitIntent(self.adminAddr, ValidateSource(sourceHash, None), onError=self._loadReqFailed)) while not loadLimit.expired(): if not self.transport.run(TransmitOnly, loadLimit.remaining()): break # all transmits completed if self._LOADFAILED or loadLimit.expired(): raise ActorSystemFailure('Unload source failed due to ' + ( 'failure response' if self._LOADFAILED else 'timeout (%s)' % str(loadLimit)))
def tell(self, anActor, msg): attemptLimit = ExpiryTime(MAX_TELL_PERIOD) # transport may not use sockets, but this helps error handling # in case it does. import socket for attempt in range(5000): try: txwatch = self._tx_to_actor(anActor, msg) while not attemptLimit.expired(): if not self._run_transport(attemptLimit.remaining(), txonly=True): # all transmits completed return if txwatch.failed: raise ActorSystemFailure( 'Error sending to %s: %s' % (str(anActor), str(txwatch.failure))) raise ActorSystemRequestTimeout( 'Unable to send to %s within %s' % (str(anActor), str(MAX_CAPABILITY_UPDATE_DELAY))) except socket.error as ex: import errno if errno.EMFILE == ex.errno: import time time.sleep(0.1) else: raise
def shutdown(self): thesplog('ActorSystem shutdown requested.', level=logging.INFO) time_to_quit = ExpiryTime(MAX_SYSTEM_SHUTDOWN_DELAY) self.transport.scheduleTransmit( None, TransmitIntent(self.adminAddr, SystemShutdown(), onError=self._shutdownSendFailed)) while not time_to_quit.expired(): response = self.transport.run(None, time_to_quit.remaining()) if getattr(self, '_TASF', False): thesplog( 'Could not send shutdown request to Admin' '; aborting but not necessarily stopped', level=logging.WARNING) return if response: if isinstance(response.message, SystemShutdownCompleted): break else: thesplog('Expected shutdown completed message, got: %s', response.message, level=logging.WARNING) else: thesplog( 'No response to Admin shutdown request; Actor system not completely shutdown', level=logging.ERROR) thesplog('ActorSystem shutdown complete.')
def ask(self, anActor, msg, timeout): txwatch = self._tx_to_actor(anActor, msg) # KWQ: pass timeout on tx?? askLimit = ExpiryTime(toTimeDeltaOrNone(timeout)) while not askLimit.expired(): response = self._run_transport(askLimit.remaining()) if txwatch.failed: if txwatch.failure in [SendStatus.DeadTarget, SendStatus.Failed, SendStatus.NotSent]: # Silent failure; not all transports can indicate # this, so for conformity the Dead Letter handler is # the intended method of handling this issue. return None raise ActorSystemFailure('Transmit of ask message to %s failed (%s)'%( str(anActor), str(txwatch.failure))) if response is None: # Timed out, give up. return None # Do not send miscellaneous ActorSystemMessages to the # caller that it might not recognize. If one of those was # recieved, loop to get another response. if response and \ hasattr(response, 'message') and \ not isInternalActorSystemMessage(response.message): return response.message return None
def unloadActorSource(self, sourceHash): loadLimit = ExpiryTime(MAX_LOAD_SOURCE_DELAY) txwatch = self._tx_to_admin(ValidateSource(sourceHash, None)) while not loadLimit.expired(): if not self._run_transport(loadLimit.remaining(), txonly=True): return # all transmits completed if txwatch.failed: raise ActorSystemFailure( 'Error sending source unload to Admin: %s' % str(txwatch.failure)) raise ActorSystemRequestTimeout('Unload source timeout: ' + str(loadLimit))
def updateCapability(self, capabilityName, capabilityValue=None): attemptLimit = ExpiryTime(MAX_CAPABILITY_UPDATE_DELAY) txwatch = self._tx_to_admin(CapabilityUpdate(capabilityName, capabilityValue)) while not attemptLimit.expired(): if not self._run_transport(attemptLimit.remaining(), txonly=True): return # all transmits completed if txwatch.failed: raise ActorSystemFailure( 'Error sending capability updates to Admin: %s' % str(txwatch.failure)) raise ActorSystemRequestTimeout( 'Unable to confirm capability update in %s' % str(MAX_CAPABILITY_UPDATE_DELAY))
def __init__(self, system, logDefs=None): self._numPrimaries = 0 # Expects self.transport has already been set by subclass __init__ self.adminAddr = self.transport.getAdminAddr(system.capabilities) tryingTime = ExpiryTime(MAX_SYSTEM_SHUTDOWN_DELAY + timedelta(seconds=1)) while not tryingTime.expired(): if not self.transport.probeAdmin(self.adminAddr): self._startAdmin(self.adminAddr, self.transport.myAddress, system.capabilities, logDefs) if self._verifyAdminRunning(): return import time time.sleep(0.5) # Previous version may have been exiting if not self._verifyAdminRunning(): raise InvalidActorAddress( self.adminAddr, 'not a valid or useable ActorSystem Admin')
def __init__(self, system, logDefs = None): ensure_TZ_set() # Expects self.transport has already been set by subclass __init__ super(systemBase, self).__init__( self.transport.getAdminAddr(system.capabilities)) tryingTime = ExpiryTime(MAX_SYSTEM_SHUTDOWN_DELAY + timedelta(seconds=1)) while not tryingTime.expired(): if not self.transport.probeAdmin(self.adminAddr): self._startAdmin(self.adminAddr, self.transport.myAddress, system.capabilities, logDefs) if self._verifyAdminRunning(): return import time time.sleep(0.5) # Previous version may have been exiting if not self._verifyAdminRunning(): raise InvalidActorAddress(self.adminAddr, 'not a valid or useable ActorSystem Admin')
def tell(self, anActor, msg): attemptLimit = ExpiryTime(MAX_TELL_PERIOD) import socket for attempt in range(5000): try: self.transport.scheduleTransmit( None, TransmitIntent(anActor, msg, onError=self._tellFailed)) while not attemptLimit.expired(): if not self.transport.run(TransmitOnly, attemptLimit.remaining()): break # all transmits completed return except socket.error as ex: import errno if errno.EMFILE == ex.errno: import time time.sleep(0.1) else: raise
def shutdown(self): thesplog('ActorSystem shutdown requested.', level=logging.INFO) time_to_quit = ExpiryTime(MAX_SYSTEM_SHUTDOWN_DELAY) txwatch = self._tx_to_admin(SystemShutdown()) while not time_to_quit.expired(): response = self._run_transport(time_to_quit.remaining()) if txwatch.failed: thesplog('Could not send shutdown request to Admin' '; aborting but not necessarily stopped', level=logging.WARNING) return if response: if isinstance(response.message, SystemShutdownCompleted): break else: thesplog('Expected shutdown completed message, got: %s', response.message, level=logging.WARNING) else: thesplog('No response to Admin shutdown request; Actor system not completely shutdown', level=logging.ERROR) self.transport.close() thesplog('ActorSystem shutdown complete.')
def loadActorSource(self, fname): loadLimit = ExpiryTime(MAX_LOAD_SOURCE_DELAY) f = fname if hasattr(fname, 'read') else open(fname, 'rb') try: d = f.read() import hashlib hval = hashlib.md5(d).hexdigest() txwatch = self._tx_to_admin( ValidateSource(hval, d, getattr(f, 'name', str(fname) if hasattr(fname, 'read') else fname))) while not loadLimit.expired(): if not self._run_transport(loadLimit.remaining(), txonly=True): # All transmits completed return hval if txwatch.failed: raise ActorSystemFailure( 'Error sending source load to Admin: %s' % str(txwatch.failure)) raise ActorSystemRequestTimeout('Load source timeout: ' + str(loadLimit)) finally: f.close()
class wakeupTransportBase(object): """The wakeupTransportBase is designed to be used as a mixin-base for a Transport class and provides handling for the wakeupAfter() functionality. This base mixin provides the primary .run() entrypoint for the transport and a .run_time ExpiryTime member that provides the remaining time-to-run period. The system can handle .wakeupAfter() requests by calling this class's .addWakeup() method with the datetime.timedelta for the wakeup to be scheduled. The Transport should provide the following: ._runWithExpiry(incomingHandler) Called by this class's .run() entrypoint to do the actual transport-specific run routine. Should perform that activity while the self.run_time ExpiryTime is not expired (self.run_time will be updated when new wakeupAfter() events are scheduled). """ def __init__(self, *args, **kw): super(wakeupTransportBase, self).__init__(*args, **kw) # _pendingWakeups: key = datetime for wakeup, value = list of # pending wakeupAfter msgs to restart at that time self._pendingWakeups = {} self._activeWakeups = [] # expired wakeups to be delivered def _updateStatusResponse(self, resp): "Called to update a Thespian_SystemStatus or Thespian_ActorStatus with common information" resp.addWakeups(self._pendingWakeups) for each in self._activeWakeups: resp.addPendingMessage(self.myAddress, self.myAddress, str(each.message)) def run(self, incomingHandler, maximumDuration=None): """Core scheduling method; called by the current Actor process when idle to await new messages (or to do background processing). """ self._max_runtime = ExpiryTime(maximumDuration) # Always make at least one pass through to handle expired wakeups # and queued events; otherwise a null/negative maximumDuration could # block all processing. firstTime = True while firstTime or not self._max_runtime.expired(): firstTime = False now = datetime.now() self.run_time = min([ExpiryTime(P - now) for P in self._pendingWakeups] + [self._max_runtime]) rval = self._runWithExpiry(incomingHandler) if rval is not None: return rval if not self._realizeWakeups(): # No wakeups were processed, and the inner run # returned, so assume there's nothing to do and exit return rval while self._activeWakeups: w = self._activeWakeups.pop() if incomingHandler in (None, TransmitOnly): return w if not incomingHandler(w): return None return None def addWakeup(self, timePeriod): now = datetime.now() wakeupTime = now + timePeriod self._pendingWakeups.setdefault(wakeupTime, []) \ .append(ReceiveEnvelope(self.myAddress, WakeupMessage(timePeriod))) self.run_time = min([ExpiryTime(P - now) for P in self._pendingWakeups] + [self._max_runtime]) def _realizeWakeups(self): "Find any expired wakeups and queue them to the send processing queue" now = datetime.now() removals = [] for wakeupTime in self._pendingWakeups: if wakeupTime > now: continue self._activeWakeups.extend(self._pendingWakeups[wakeupTime]) removals.append(wakeupTime) for each in removals: del self._pendingWakeups[each] return bool(removals)
class LocalConventionState(object): def __init__(self, myAddress, capabilities, sCBStats, getConventionAddressFunc): self._myAddress = myAddress self._capabilities = capabilities self._sCBStats = sCBStats self._conventionMembers = AssocList() # key=Remote Admin Addr, value=ConventionMemberData self._conventionNotificationHandlers = [] self._getConventionAddr = getConventionAddressFunc self._conventionAddress = getConventionAddressFunc(capabilities) self._conventionRegistration = ExpiryTime(CONVENTION_REREGISTRATION_PERIOD) self._has_been_activated = False self._invited = False # entered convention as a result of an explicit invite @property def myAddress(self): return self._myAddress @property def capabilities(self): return self._capabilities def updateStatusResponse(self, resp): resp.setConventionLeaderAddress(self.conventionLeaderAddr) resp.setConventionRegisterTime(self._conventionRegistration) for each in self._conventionMembers.values(): resp.addConventioneer(each.remoteAddress, each.registryValid) resp.setNotifyHandlers(self._conventionNotificationHandlers) def active_in_convention(self): # If this is the convention leader, it is automatically # active, otherwise this convention member should have a # convention leader and that leader should have an active # entry in the _conventionMembers table (indicating it has # updated this system with its information) return bool(self.conventionLeaderAddr and self._conventionMembers.find(self.conventionLeaderAddr)) @property def conventionLeaderAddr(self): return self._conventionAddress def isConventionLeader(self): # Might also be the leader if self.conventionLeaderAddr is None return self.conventionLeaderAddr == self.myAddress def capabilities_have_changed(self, new_capabilities): self._capabilities = new_capabilities return self.setup_convention() def setup_convention(self, activation=False): self._has_been_activated |= activation rmsgs = [] # If not specified in capabilities, don't override any invites # that may have been received. self._conventionAddress = self._getConventionAddr(self.capabilities) or \ self._conventionAddress leader_is_gone = (self._conventionMembers.find(self.conventionLeaderAddr) is None) \ if self.conventionLeaderAddr else True if not self.isConventionLeader() and self.conventionLeaderAddr: thesplog('Admin registering with Convention @ %s (%s)', self.conventionLeaderAddr, 'first time' if leader_is_gone else 're-registering', level=logging.INFO, primary=True) rmsgs.append( HysteresisSend(self.conventionLeaderAddr, ConventionRegister(self.myAddress, self.capabilities, leader_is_gone), onSuccess = self._setupConventionCBGood, onError = self._setupConventionCBError)) rmsgs.append(LogAggregator(self.conventionLeaderAddr)) self._conventionRegistration = ExpiryTime(CONVENTION_REREGISTRATION_PERIOD) return rmsgs def _setupConventionCBGood(self, result, finishedIntent): self._sCBStats.inc('Admin Convention Registered') if hasattr(self, '_conventionLeaderMissCount'): delattr(self, '_conventionLeaderMissCount') def _setupConventionCBError(self, result, finishedIntent): self._sCBStats.inc('Admin Convention Registration Failed') if hasattr(self, '_conventionLeaderMissCount'): self._conventionLeaderMissCount += 1 else: self._conventionLeaderMissCount = 1 thesplog('Admin cannot register with convention @ %s (miss %d): %s', finishedIntent.targetAddr, self._conventionLeaderMissCount, result, level=logging.WARNING, primary=True) def got_convention_invite(self, sender): self._conventionAddress = sender self._invited = True return self.setup_convention() def got_convention_register(self, regmsg): # Called when remote convention member has sent a ConventionRegister message self._sCBStats.inc('Admin Handle Convention Registration') if self._invited and not self.conventionLeaderAddr: # Lost connection to an invitation-only convention. # Cannot join again until another invitation is received. return [] # Registrant may re-register if changing capabilities rmsgs = [] registrant = regmsg.adminAddress prereg = getattr(regmsg, 'preRegister', False) # getattr used; see definition existing = self._conventionMembers.find(registrant) thesplog('Got Convention %sregistration from %s (%s) (new? %s)', 'pre-' if prereg else '', registrant, 'first time' if regmsg.firstTime else 're-registering', not existing, level=logging.INFO) if registrant == self.myAddress: # Either remote failed getting an external address and is # using 127.0.0.1 or else this is a malicious attempt to # make us talk to ourselves. Ignore it. thesplog('Convention registration from %s is an invalid address; ignoring.', registrant, level=logging.WARNING) return rmsgs existingPreReg = ( # existing.preRegOnly # or existing.preRegistered existing.permanentEntry ) if existing else False notify = (not existing or existing.preRegOnly) and not prereg if regmsg.firstTime or not existing: if existing: existing = None notify = not prereg rmsgs.extend(self._remote_system_cleanup(registrant)) newmember = ConventionMemberData(registrant, regmsg.capabilities, prereg) if prereg or existingPreReg: newmember.preRegistered = PreRegistration() self._conventionMembers.add(registrant, newmember) else: existing.refresh(regmsg.capabilities, prereg or existingPreReg) if not prereg: existing.preRegOnly = False if not self.isConventionLeader(): self._conventionRegistration = ExpiryTime(CONVENTION_REREGISTRATION_PERIOD) # Convention Members normally periodically initiate a # membership message, to which the leader confirms by # responding; if this was a pre-registration, that identifies # this system as the "leader" for that remote. Also, if the # remote sent this because it was a pre-registration leader, # it doesn't yet have all the member information so the member # should respond. #if self.isConventionLeader() or prereg or regmsg.firstTime: if prereg: rmsgs.append(HysteresisCancel(registrant)) rmsgs.append(TransmitIntent(registrant, ConventionInvite())) elif (self.isConventionLeader() or prereg or regmsg.firstTime or \ (existing and existing.permanentEntry)): # If we are the Convention Leader, this would be the point to # inform all other registrants of the new registrant. At # present, there is no reciprocity here, so just update the # new registrant with the leader's info. rmsgs.append( TransmitIntent(registrant, ConventionRegister(self.myAddress, self.capabilities))) if notify: rmsgs.extend(self._notifications_of( ActorSystemConventionUpdate(registrant, regmsg.capabilities, True))) return rmsgs def _notifications_of(self, msg): return [TransmitIntent(H, msg) for H in self._conventionNotificationHandlers] def add_notification_handler(self, addr): if addr not in self._conventionNotificationHandlers: self._conventionNotificationHandlers.append(addr) # Now update the registrant on the current state of all convention members return [TransmitIntent(addr, ActorSystemConventionUpdate(M.remoteAddress, M.remoteCapabilities, True)) for M in self._conventionMembers.values() if not M.preRegOnly] return [] def remove_notification_handler(self, addr): self._conventionNotificationHandlers = [ H for H in self._conventionNotificationHandlers if H != addr] def got_convention_deregister(self, deregmsg): self._sCBStats.inc('Admin Handle Convention De-registration') remoteAdmin = deregmsg.adminAddress if remoteAdmin == self.myAddress: # Either remote failed getting an external address and is # using 127.0.0.1 or else this is a malicious attempt to # make us talk to ourselves. Ignore it. thesplog('Convention deregistration from %s is an invalid address; ignoring.', remoteAdmin, level=logging.WARNING) rmsgs = [] if getattr(deregmsg, 'preRegistered', False): # see definition for getattr use existing = self._conventionMembers.find(remoteAdmin) if existing: existing.preRegistered = None rmsgs.append(TransmitIntent(remoteAdmin, ConventionDeRegister(self.myAddress))) return rmsgs + self._remote_system_cleanup(remoteAdmin) def got_system_shutdown(self): return self.exit_convention() def exit_convention(self): self.invited = False gen_ops = lambda addr: [HysteresisCancel(addr), TransmitIntent(addr, ConventionDeRegister(self.myAddress)), ] terminate = lambda a: [ self._remote_system_cleanup(a), gen_ops(a) ][-1] if self.conventionLeaderAddr and \ self.conventionLeaderAddr != self.myAddress: thesplog('Admin de-registering with Convention @ %s', str(self.conventionLeaderAddr), level=logging.INFO, primary=True) # Cache convention leader address because it might get reset by terminate() claddr = self.conventionLeaderAddr terminate(self.conventionLeaderAddr) return gen_ops(claddr) return join(fmap(terminate, [M.remoteAddress for M in self._conventionMembers.values() if M.remoteAddress != self.myAddress])) def check_convention(self): rmsgs = [] if not self._has_been_activated: return rmsgs if self.isConventionLeader() or not self.conventionLeaderAddr: missing = [ each for each in self._conventionMembers.values() if each.registryValid.expired() ] for each in missing: thesplog('%s missed %d checkins (%s); assuming it has died', str(each), CONVENTION_REGISTRATION_MISS_MAX, str(each.registryValid), level=logging.WARNING, primary=True) rmsgs.extend(self._remote_system_cleanup(each.remoteAddress)) self._conventionRegistration = ExpiryTime(CONVENTION_REREGISTRATION_PERIOD) else: # Re-register with the Convention if it's time if self.conventionLeaderAddr and self._conventionRegistration.expired(): if getattr(self, '_conventionLeaderMissCount', 0) >= \ CONVENTION_REGISTRATION_MISS_MAX: thesplog('Admin convention registration lost @ %s (miss %d)', self.conventionLeaderAddr, self._conventionLeaderMissCount, level=logging.WARNING, primary=True) rmsgs.extend(self._remote_system_cleanup(self.conventionLeaderAddr)) self._conventionLeaderMissCount = 0 else: rmsgs.extend(self.setup_convention()) for member in self._conventionMembers.values(): if member.preRegistered and \ member.preRegistered.pingValid.expired() and \ not member.preRegistered.pingPending: member.preRegistered.pingPending = True # If remote misses a checkin, re-extend the # invitation. This also helps re-initiate a socket # connection if a TxOnly socket has been lost. member.preRegistered.pingValid = ExpiryTime( convention_reinvite_adjustment(CONVENTION_RESTART_PERIOD if member.registryValid.expired() else CONVENTION_REREGISTRATION_PERIOD)) rmsgs.append(HysteresisSend( member.remoteAddress, ConventionInvite(), onSuccess = self._preRegQueryNotPending, onError = self._preRegQueryNotPending)) return rmsgs def _preRegQueryNotPending(self, result, finishedIntent): remoteAddr = finishedIntent.targetAddr member = self._conventionMembers.find(remoteAddr) if member and member.preRegistered: member.preRegistered.pingPending = False def _remote_system_cleanup(self, registrant): """Called when a RemoteActorSystem has exited and all associated Actors should be marked as exited and the ActorSystem removed from Convention membership. This is also called on a First Time connection from the remote to discard any previous connection information. """ thesplog('Convention cleanup or deregistration for %s (known? %s)', registrant, bool(self._conventionMembers.find(registrant)), level=logging.INFO) rmsgs = [LostRemote(registrant)] cmr = self._conventionMembers.find(registrant) if not cmr or cmr.preRegOnly: return [] # Send exited notification to conventionNotificationHandler (if any) for each in self._conventionNotificationHandlers: rmsgs.append( TransmitIntent(each, ActorSystemConventionUpdate(cmr.remoteAddress, cmr.remoteCapabilities, False))) # errors ignored # If the remote ActorSystem shutdown gracefully (i.e. sent # a Convention Deregistration) then it should not be # necessary to shutdown remote Actors (or notify of their # shutdown) because the remote ActorSystem should already # have caused this to occur. However, it won't hurt, and # it's necessary if the remote ActorSystem did not exit # gracefully. for lpa, raa in cmr.hasRemoteActors: # ignore errors: rmsgs.append(TransmitIntent(lpa, ChildActorExited(raa))) # n.b. at present, this means that the parent might # get duplicate notifications of ChildActorExited; it # is expected that Actors can handle this. # Remove remote system from conventionMembers if not cmr.preRegistered: if registrant == self.conventionLeaderAddr and self._invited: self._conventionAddress = None # Don't clear invited: once invited, that # perpetually indicates this should be only a # member and never a leader. self._conventionMembers.rmv(registrant) else: # This conventionMember needs to stay because the # current system needs to continue issuing # registration pings. By setting the registryValid # expiration to forever, this member won't re-time-out # and will therefore be otherwise ignored... until it # registers again at which point the membership will # be updated with new settings. cmr.registryValid = ExpiryTime(None) cmr.preRegOnly = True return rmsgs + [HysteresisCancel(registrant)] def sentByRemoteAdmin(self, envelope): for each in self._conventionMembers.values(): if envelope.sender == each.remoteAddress: return True return False def convention_inattention_delay(self): return self._conventionRegistration or \ ExpiryTime(CONVENTION_REREGISTRATION_PERIOD if self.active_in_convention() or self.isConventionLeader() else CONVENTION_RESTART_PERIOD) def forward_pending_to_remote_system(self, childClass, envelope, sourceHash, acceptsCaps): alreadyTried = getattr(envelope.message, 'alreadyTried', []) remoteCandidates = [ K for K in self._conventionMembers.values() if not K.registryValid.expired() and K.remoteAddress != envelope.sender # source Admin and K.remoteAddress not in alreadyTried and acceptsCaps(K.remoteCapabilities)] if not remoteCandidates: if self.isConventionLeader() or not self.conventionLeaderAddr: raise NoCompatibleSystemForActor( childClass, 'No known ActorSystems can handle a %s for %s', childClass, envelope.message.forActor) # Let the Convention Leader try to find an appropriate ActorSystem bestC = self.conventionLeaderAddr else: # distribute equally amongst candidates C = [(K.remoteAddress, len(K.hasRemoteActors)) for K in remoteCandidates] bestC = foldl(lambda best,possible: best if best[1] <= possible[1] else possible, C)[0] thesplog('Requesting creation of %s%s on remote admin %s', envelope.message.actorClassName, ' (%s)'%sourceHash if sourceHash else '', bestC) if bestC not in alreadyTried: # Don't send request to this remote again, it has already # been tried. This would also be indicated by that system # performing the add of self.myAddress as below, but if # there is disagreement between the local and remote # addresses, this addition will prevent continual # bounceback. alreadyTried.append(bestC) if self.myAddress not in alreadyTried: # Don't send request back to this actor system: it cannot # handle it alreadyTried.append(self.myAddress) envelope.message.alreadyTried = alreadyTried return [TransmitIntent(bestC, envelope.message)] def send_to_all_members(self, message, exception_list=None): return [HysteresisSend(M.remoteAddress, message) for M in self._conventionMembers.values() if M.remoteAddress not in (exception_list or [])]
def testZeroExpired(self): et = ExpiryTime(timedelta(seconds=0)) assert et.expired()
def testNonZeroExpired(self): et = ExpiryTime(timedelta(milliseconds=10)) assert not et.expired() sleep(et.remainingSeconds()) assert et.expired()
class LocalConventionState(object): def __init__(self, myAddress, capabilities, sCBStats, getConventionAddressFunc): self._myAddress = myAddress self._capabilities = capabilities self._sCBStats = sCBStats self._conventionMembers = AssocList( ) # key=Remote Admin Addr, value=ConventionMemberData self._conventionNotificationHandlers = [] self._getConventionAddr = getConventionAddressFunc self._conventionAddress = getConventionAddressFunc(capabilities) self._conventionRegistration = ExpiryTime( CONVENTION_REREGISTRATION_PERIOD) self._has_been_activated = False self._invited = False # entered convention as a result of an explicit invite @property def myAddress(self): return self._myAddress @property def capabilities(self): return self._capabilities def updateStatusResponse(self, resp): resp.setConventionLeaderAddress(self.conventionLeaderAddr) resp.setConventionRegisterTime(self._conventionRegistration) for each in self._conventionMembers.values(): resp.addConventioneer(each.remoteAddress, each.registryValid) resp.setNotifyHandlers(self._conventionNotificationHandlers) def active_in_convention(self): # If this is the convention leader, it is automatically # active, otherwise this convention member should have a # convention leader and that leader should have an active # entry in the _conventionMembers table (indicating it has # updated this system with its information) return bool( self.conventionLeaderAddr and self._conventionMembers.find(self.conventionLeaderAddr)) @property def conventionLeaderAddr(self): return self._conventionAddress def isConventionLeader(self): # Might also be the leader if self.conventionLeaderAddr is None return self.conventionLeaderAddr == self.myAddress def capabilities_have_changed(self, new_capabilities): self._capabilities = new_capabilities return self.setup_convention() def setup_convention(self, activation=False): self._has_been_activated |= activation rmsgs = [] # If not specified in capabilities, don't override any invites # that may have been received. self._conventionAddress = self._getConventionAddr(self.capabilities) or \ self._conventionAddress leader_is_gone = (self._conventionMembers.find(self.conventionLeaderAddr) is None) \ if self.conventionLeaderAddr else True if not self.isConventionLeader() and self.conventionLeaderAddr: thesplog('Admin registering with Convention @ %s (%s)', self.conventionLeaderAddr, 'first time' if leader_is_gone else 're-registering', level=logging.INFO, primary=True) rmsgs.append( HysteresisSend(self.conventionLeaderAddr, ConventionRegister(self.myAddress, self.capabilities, leader_is_gone), onSuccess=self._setupConventionCBGood, onError=self._setupConventionCBError)) rmsgs.append(LogAggregator(self.conventionLeaderAddr)) self._conventionRegistration = ExpiryTime( CONVENTION_REREGISTRATION_PERIOD) return rmsgs def _setupConventionCBGood(self, result, finishedIntent): self._sCBStats.inc('Admin Convention Registered') if hasattr(self, '_conventionLeaderMissCount'): delattr(self, '_conventionLeaderMissCount') def _setupConventionCBError(self, result, finishedIntent): self._sCBStats.inc('Admin Convention Registration Failed') if hasattr(self, '_conventionLeaderMissCount'): self._conventionLeaderMissCount += 1 else: self._conventionLeaderMissCount = 1 thesplog('Admin cannot register with convention @ %s (miss %d): %s', finishedIntent.targetAddr, self._conventionLeaderMissCount, result, level=logging.WARNING, primary=True) def got_convention_invite(self, sender): self._conventionAddress = sender self._invited = True return self.setup_convention() def got_convention_register(self, regmsg): # Called when remote convention member has sent a ConventionRegister message self._sCBStats.inc('Admin Handle Convention Registration') if self._invited and not self.conventionLeaderAddr: # Lost connection to an invitation-only convention. # Cannot join again until another invitation is received. return [] # Registrant may re-register if changing capabilities rmsgs = [] registrant = regmsg.adminAddress prereg = getattr(regmsg, 'preRegister', False) # getattr used; see definition existing = self._conventionMembers.find(registrant) thesplog('Got Convention %sregistration from %s (%s) (new? %s)', 'pre-' if prereg else '', registrant, 'first time' if regmsg.firstTime else 're-registering', not existing, level=logging.INFO) if registrant == self.myAddress: # Either remote failed getting an external address and is # using 127.0.0.1 or else this is a malicious attempt to # make us talk to ourselves. Ignore it. thesplog( 'Convention registration from %s is an invalid address; ignoring.', registrant, level=logging.WARNING) return rmsgs existingPreReg = ( # existing.preRegOnly # or existing.preRegistered existing.permanentEntry) if existing else False notify = (not existing or existing.preRegOnly) and not prereg if regmsg.firstTime or not existing: if existing: existing = None notify = not prereg rmsgs.extend(self._remote_system_cleanup(registrant)) newmember = ConventionMemberData(registrant, regmsg.capabilities, prereg) if prereg or existingPreReg: newmember.preRegistered = PreRegistration() self._conventionMembers.add(registrant, newmember) else: existing.refresh(regmsg.capabilities, prereg or existingPreReg) if not prereg: existing.preRegOnly = False if not self.isConventionLeader(): self._conventionRegistration = ExpiryTime( CONVENTION_REREGISTRATION_PERIOD) # Convention Members normally periodically initiate a # membership message, to which the leader confirms by # responding; if this was a pre-registration, that identifies # this system as the "leader" for that remote. Also, if the # remote sent this because it was a pre-registration leader, # it doesn't yet have all the member information so the member # should respond. #if self.isConventionLeader() or prereg or regmsg.firstTime: if prereg: rmsgs.append(TransmitIntent(registrant, ConventionInvite())) elif (self.isConventionLeader() or prereg or regmsg.firstTime or \ (existing and existing.permanentEntry)): # If we are the Convention Leader, this would be the point to # inform all other registrants of the new registrant. At # present, there is no reciprocity here, so just update the # new registrant with the leader's info. rmsgs.append( TransmitIntent( registrant, ConventionRegister(self.myAddress, self.capabilities))) if notify: rmsgs.extend( self._notifications_of( ActorSystemConventionUpdate(registrant, regmsg.capabilities, True))) return rmsgs def _notifications_of(self, msg): return [ TransmitIntent(H, msg) for H in self._conventionNotificationHandlers ] def add_notification_handler(self, addr): if addr not in self._conventionNotificationHandlers: self._conventionNotificationHandlers.append(addr) # Now update the registrant on the current state of all convention members return [ TransmitIntent( addr, ActorSystemConventionUpdate(M.remoteAddress, M.remoteCapabilities, True)) for M in self._conventionMembers.values() ] return [] def remove_notification_handler(self, addr): self._conventionNotificationHandlers = [ H for H in self._conventionNotificationHandlers if H != addr ] def got_convention_deregister(self, deregmsg): self._sCBStats.inc('Admin Handle Convention De-registration') remoteAdmin = deregmsg.adminAddress if remoteAdmin == self.myAddress: # Either remote failed getting an external address and is # using 127.0.0.1 or else this is a malicious attempt to # make us talk to ourselves. Ignore it. thesplog( 'Convention deregistration from %s is an invalid address; ignoring.', remoteAdmin, level=logging.WARNING) rmsgs = [] if getattr(deregmsg, 'preRegistered', False): # see definition for getattr use existing = self._conventionMembers.find(remoteAdmin) if existing: existing.preRegistered = None rmsgs.append( TransmitIntent(remoteAdmin, ConventionDeRegister(self.myAddress))) return rmsgs + self._remote_system_cleanup(remoteAdmin) def got_system_shutdown(self): return self.exit_convention() def exit_convention(self): self.invited = False gen_ops = lambda addr: [ HysteresisCancel(addr), TransmitIntent(addr, ConventionDeRegister(self.myAddress)), ] terminate = lambda a: [self._remote_system_cleanup(a), gen_ops(a)][-1] if self.conventionLeaderAddr and \ self.conventionLeaderAddr != self.myAddress: thesplog('Admin de-registering with Convention @ %s', str(self.conventionLeaderAddr), level=logging.INFO, primary=True) # Cache convention leader address because it might get reset by terminate() claddr = self.conventionLeaderAddr terminate(self.conventionLeaderAddr) return gen_ops(claddr) return join( fmap(terminate, [ M.remoteAddress for M in self._conventionMembers.values() if M.remoteAddress != self.myAddress ])) def check_convention(self): rmsgs = [] if not self._has_been_activated: return rmsgs if self.isConventionLeader() or not self.conventionLeaderAddr: missing = [ each for each in self._conventionMembers.values() if each.registryValid.expired() ] for each in missing: thesplog('%s missed %d checkins (%s); assuming it has died', str(each), CONVENTION_REGISTRATION_MISS_MAX, str(each.registryValid), level=logging.WARNING, primary=True) rmsgs.extend(self._remote_system_cleanup(each.remoteAddress)) self._conventionRegistration = ExpiryTime( CONVENTION_REREGISTRATION_PERIOD) else: # Re-register with the Convention if it's time if self.conventionLeaderAddr and self._conventionRegistration.expired( ): if getattr(self, '_conventionLeaderMissCount', 0) >= \ CONVENTION_REGISTRATION_MISS_MAX: thesplog( 'Admin convention registration lost @ %s (miss %d)', self.conventionLeaderAddr, self._conventionLeaderMissCount, level=logging.ERROR, primary=True) rmsgs.extend( self._remote_system_cleanup(self.conventionLeaderAddr)) self._conventionLeaderMissCount = 0 else: rmsgs.extend(self.setup_convention()) for member in self._conventionMembers.values(): if member.preRegistered and \ member.preRegistered.pingValid.expired() and \ not member.preRegistered.pingPending: member.preRegistered.pingPending = True # If remote misses a checkin, re-extend the # invitation. This also helps re-initiate a socket # connection if a TxOnly socket has been lost. member.preRegistered.pingValid = ExpiryTime( convention_reinvite_adjustment( CONVENTION_RESTART_PERIOD if member.registryValid. expired() else CONVENTION_REREGISTRATION_PERIOD)) rmsgs.append( HysteresisSend(member.remoteAddress, ConventionInvite(), onSuccess=self._preRegQueryNotPending, onError=self._preRegQueryNotPending)) return rmsgs def _preRegQueryNotPending(self, result, finishedIntent): remoteAddr = finishedIntent.targetAddr member = self._conventionMembers.find(remoteAddr) if member and member.preRegistered: member.preRegistered.pingPending = False def _remote_system_cleanup(self, registrant): """Called when a RemoteActorSystem has exited and all associated Actors should be marked as exited and the ActorSystem removed from Convention membership. This is also called on a First Time connection from the remote to discard any previous connection information. """ thesplog('Convention cleanup or deregistration for %s (known? %s)', registrant, bool(self._conventionMembers.find(registrant)), level=logging.INFO) rmsgs = [LostRemote(registrant)] cmr = self._conventionMembers.find(registrant) if not cmr or cmr.preRegOnly: return [] # Send exited notification to conventionNotificationHandler (if any) for each in self._conventionNotificationHandlers: rmsgs.append( TransmitIntent( each, ActorSystemConventionUpdate(cmr.remoteAddress, cmr.remoteCapabilities, False))) # errors ignored # If the remote ActorSystem shutdown gracefully (i.e. sent # a Convention Deregistration) then it should not be # necessary to shutdown remote Actors (or notify of their # shutdown) because the remote ActorSystem should already # have caused this to occur. However, it won't hurt, and # it's necessary if the remote ActorSystem did not exit # gracefully. for lpa, raa in cmr.hasRemoteActors: # ignore errors: rmsgs.append(TransmitIntent(lpa, ChildActorExited(raa))) # n.b. at present, this means that the parent might # get duplicate notifications of ChildActorExited; it # is expected that Actors can handle this. # Remove remote system from conventionMembers if not cmr.preRegistered: if registrant == self.conventionLeaderAddr and self._invited: self._conventionAddress = None # Don't clear invited: once invited, that # perpetually indicates this should be only a # member and never a leader. self._conventionMembers.rmv(registrant) else: # This conventionMember needs to stay because the # current system needs to continue issuing # registration pings. By setting the registryValid # expiration to forever, this member won't re-time-out # and will therefore be otherwise ignored... until it # registers again at which point the membership will # be updated with new settings. cmr.registryValid = ExpiryTime(None) cmr.preRegOnly = True return rmsgs + [HysteresisCancel(registrant)] def sentByRemoteAdmin(self, envelope): for each in self._conventionMembers.values(): if envelope.sender == each.remoteAddress: return True return False def convention_inattention_delay(self): return self._conventionRegistration or \ ExpiryTime(CONVENTION_REREGISTRATION_PERIOD if self.active_in_convention() or self.isConventionLeader() else CONVENTION_RESTART_PERIOD) def forward_pending_to_remote_system(self, childClass, envelope, sourceHash, acceptsCaps): alreadyTried = getattr(envelope.message, 'alreadyTried', []) remoteCandidates = [ K for K in self._conventionMembers.values() if not K.registryValid.expired() and K.remoteAddress != envelope.sender # source Admin and K.remoteAddress not in alreadyTried and acceptsCaps(K.remoteCapabilities) ] if not remoteCandidates: if self.isConventionLeader() or not self.conventionLeaderAddr: raise NoCompatibleSystemForActor( childClass, 'No known ActorSystems can handle a %s for %s', childClass, envelope.message.forActor) # Let the Convention Leader try to find an appropriate ActorSystem bestC = self.conventionLeaderAddr else: # distribute equally amongst candidates C = [(K.remoteAddress, len(K.hasRemoteActors)) for K in remoteCandidates] bestC = foldl( lambda best, possible: best if best[1] <= possible[1] else possible, C)[0] thesplog('Requesting creation of %s%s on remote admin %s', envelope.message.actorClassName, ' (%s)' % sourceHash if sourceHash else '', bestC) if bestC not in alreadyTried: # Don't send request to this remote again, it has already # been tried. This would also be indicated by that system # performing the add of self.myAddress as below, but if # there is disagreement between the local and remote # addresses, this addition will prevent continual # bounceback. alreadyTried.append(bestC) if self.myAddress not in alreadyTried: # Don't send request back to this actor system: it cannot # handle it alreadyTried.append(self.myAddress) envelope.message.alreadyTried = alreadyTried return [TransmitIntent(bestC, envelope.message)] def send_to_all_members(self, message, exception_list=None): return [ HysteresisSend(M.remoteAddress, message) for M in self._conventionMembers.values() if M.remoteAddress not in (exception_list or []) ]
def testNoneExpired(self): et = ExpiryTime(None) assert not et.expired()
def drainTransmits(self): drainLimit = ExpiryTime(MAX_SHUTDOWN_DRAIN_PERIOD) while not drainLimit.expired(): if not self.transport.run(TransmitOnly, drainLimit.remaining()): break # no transmits left
class HysteresisDelaySender(object): """Implements hysteresis delay for sending messages. This is intended to be used for messages exchanged between convention members to ensure that a mis-behaved member doesn't have the ability to inflict damage on the entire convention. The first time a message is sent via this sender it is passed on through, but that starts a blackout period that starts with the CONVENTION_HYSTERESIS_MIN_PERIOD. Each additional send attempt during that blackout period will cause the blackout period to be extended by the CONVENTION_HYSTERESIS_RATE, up to the CONVENTION_HYSTERESIS_MAX_PERIOD. Once the blackout period ends, the queued sends will be sent, but only the last attempted message of each type for the specified remote target. At that point, the hysteresis delay will be reduced by the CONVENTION_HYSTERESIS_RATE; further send attempts will affect the hysteresis blackout period as described as above but lack of sending attempts will continue to reduce the hysteresis back to a zero-delay setting. Note: delays are updated in a target-independent manner; the target is only considered when eliminating duplicates. Note: maxDelay on TransmitIntents is ignored by hysteresis delays. It is assumed that a transmit intent's maxDelay is greater than the maximum hysteresis period and/or that the hysteresis delay is more important than the transmit intent timeout. """ def __init__(self, actual_sender, hysteresis_min_period=HYSTERESIS_MIN_PERIOD, hysteresis_max_period=HYSTERESIS_MAX_PERIOD, hysteresis_rate=HYSTERESIS_RATE): self._sender = actual_sender self._hysteresis_until = ExpiryTime(timedelta(seconds=0)) self._hysteresis_queue = [] self._current_hysteresis = None # timedelta self._hysteresis_min_period = hysteresis_min_period self._hysteresis_max_period = hysteresis_max_period self._hysteresis_rate = hysteresis_rate @property def delay(self): return self._hysteresis_until def _has_hysteresis(self): return (self._current_hysteresis is not None and self._current_hysteresis >= self._hysteresis_min_period) def _increase_hysteresis(self): if self._has_hysteresis(): try: self._current_hysteresis = min( (self._current_hysteresis * self._hysteresis_rate), self._hysteresis_max_period) except TypeError: # See note below for _decrease_hysteresis self._current_hysteresis = min( timedelta(seconds=(self._current_hysteresis.seconds * self._hysteresis_rate)), self._hysteresis_max_period) else: self._current_hysteresis = self._hysteresis_min_period def _decrease_hysteresis(self): try: self._current_hysteresis = ((self._current_hysteresis / self._hysteresis_rate) if self._has_hysteresis() else None) except TypeError: # Python 2.x cannot multiply or divide a timedelta by a # fractional amount. There is also not a total_seconds # retrieval from a timedelta, but it should be safe to # assume that the hysteresis value is not greater than 1 # day. self._current_hysteresis = timedelta( seconds=(self._current_hysteresis.seconds / self._hysteresis_rate)) \ if self._has_hysteresis() else None def _update_remaining_hysteresis_period(self, reset=False): if not self._current_hysteresis: self._hysteresis_until = ExpiryTime(timedelta(seconds=0)) else: if reset or not self._hysteresis_until: self._hysteresis_until = ExpiryTime(self._current_hysteresis) else: self._hysteresis_until = ExpiryTime( self._current_hysteresis - self._hysteresis_until.remaining()) def checkSends(self): if self.delay.expired(): self._decrease_hysteresis() self._update_remaining_hysteresis_period(reset=True) for intent in self._keepIf(lambda M: False): self._sender(intent) @staticmethod def safe_cmp(val1, val2): try: return val1 == val2 except Exception: return False def sendWithHysteresis(self, intent): if self._hysteresis_until.expired(): self._current_hysteresis = self._hysteresis_min_period self._sender(intent) else: dups = self._keepIf(lambda M: (M.targetAddr != intent.targetAddr or not HysteresisDelaySender.safe_cmp( M.message, intent.message))) # The dups are duplicate sends to the new intent's target; # complete them when the actual message is finally sent # with the same result if dups: intent.addCallback(self._dupSentGood(dups), self._dupSentFail(dups)) self._hysteresis_queue.append(intent) self._increase_hysteresis() self._update_remaining_hysteresis_period() def cancelSends(self, remoteAddr): for each in self._keepIf(lambda M: M.targetAddr != remoteAddr): each.tx_done(SendStatus.Failed) def _keepIf(self, keepFunc): requeues, removes = partition(keepFunc, self._hysteresis_queue) self._hysteresis_queue = requeues return removes @staticmethod def _dupSentGood(dups): def _finishDups(result, finishedIntent): for each in dups: each.tx_done(result) return _finishDups @staticmethod def _dupSentFail(dups): def _finishDups(result, finishedIntent): for each in dups: each.tx_done(result) return _finishDups