def __init__(self, uri, control_uri, attributes=None): super(Node, self).__init__() self.uri = uri self.control_uri = control_uri try: self.attributes = AttributeResolver(attributes) except: _log.exception("Attributes not correct, uses empty attribute!") self.attributes = AttributeResolver(None) self.id = calvinuuid.uuid("NODE") self.monitor = Event_Monitor() self.am = actormanager.ActorManager(self) self.control = calvincontrol.get_calvincontrol() _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler self.sched = _scheduler(self, self.am, self.monitor) self.control.start(node=self, uri=control_uri) self.async_msg_ids = {} self._calvinsys = CalvinSys(self) # Default will multicast and listen on all interfaces # TODO: be able to specify the interfaces # @TODO: Store capabilities self.storage = storage.Storage(self) self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) # The initialization that requires the main loop operating is deferred to start function async.DelayedCall(0, self.start)
def __init__(self, uri, control_uri, attributes=None): super(Node, self).__init__() self.uri = uri self.control_uri = control_uri self.attributes = attributes self.id = calvinuuid.uuid("NODE") _log.debug("Calvin init 1") self.monitor = Event_Monitor() _log.debug("Calvin init 2") self.am = actormanager.ActorManager(self) _log.debug("Calvin init 3") self.control = calvincontrol.get_calvincontrol() _log.debug("Calvin init 4") self.sched = scheduler.Scheduler(self, self.am, self.monitor) _log.debug("Calvin init 5") self.control.start(node=self, uri=control_uri) self.async_msg_ids = {} _log.debug("Calvin init 6") self.storage = storage.Storage() self.storage.start() _log.debug("Calvin init 7") self.network = CalvinNetwork(self) _log.debug("Calvin init 8") self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) _log.debug("Calvin init 9") self.app_manager = appmanager.AppManager(self) _log.debug("Calvin init 10") # The initialization that requires the main loop operating is deferred to start function async.DelayedCall(0, self.start) _log.debug("Calvin init 11")
def __init__(self, uri, control_uri, attributes=None): super(Node, self).__init__() self.uri = uri self.control_uri = control_uri self.external_uri = attributes.pop('external_uri', self.uri) \ if attributes else self.uri self.external_control_uri = attributes.pop('external_control_uri', self.control_uri) \ if attributes else self.control_uri try: self.attributes = AttributeResolver(attributes) except: _log.exception("Attributes not correct, uses empty attribute!") self.attributes = AttributeResolver(None) self.node_name = self.attributes.get_node_name_as_str() # Obtain node id, when using security also handle runtime certificate self.id = certificate.obtain_cert_node_info(self.node_name)['id'] self.authentication = authentication.Authentication(self) self.authorization = authorization.Authorization(self) try: self.domain = _conf.get("security", "security_domain_name") # cert_name is the node's certificate filename (without file extension) self.cert_name = certificate.get_own_cert_name(self.node_name) except: self.domain = None self.cert_name = None self.metering = metering.set_metering(metering.Metering(self)) self.monitor = Event_Monitor() self.am = actormanager.ActorManager(self) self.control = calvincontrol.get_calvincontrol() _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler self.sched = _scheduler(self, self.am, self.monitor) self.async_msg_ids = {} self._calvinsys = CalvinSys(self) # Default will multicast and listen on all interfaces # TODO: be able to specify the interfaces # @TODO: Store capabilities self.storage = storage.Storage(self) self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) # The initialization that requires the main loop operating is deferred to start function async.DelayedCall(0, self.start)
def __init__(self, uri, control_uri, attributes=None): super(Node, self).__init__() self.uri = uri self.control_uri = control_uri self.attributes = attributes self.id = calvinuuid.uuid("NODE") self.monitor = Event_Monitor() self.am = actormanager.ActorManager(self) self.control = calvincontrol.get_calvincontrol() _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler self.sched = _scheduler(self, self.am, self.monitor) self.control.start(node=self, uri=control_uri) self.async_msg_ids = {} self.storage = storage.Storage() self.storage.start() self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) # The initialization that requires the main loop operating is deferred to start function # FIXME: Don't use delayed call in calvin-tiny async.DelayedCall(0, self.start)
def __init__(self, uri, control_uri, attributes=None, self_start=True): super(Node, self).__init__() self.uri = uri self.control_uri = control_uri try: self.attributes = AttributeResolver(attributes) except: _log.exception("Attributes not correct, uses empty attribute!") self.attributes = AttributeResolver(None) self.id = calvinuuid.uuid("NODE") self.metering = metering.set_metering(metering.Metering(self)) self.monitor = Event_Monitor() self.am = actormanager.ActorManager(self) self.control = calvincontrol.get_calvincontrol() _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler self.sched = _scheduler(self, self.am, self.monitor) self.async_msg_ids = {} self._calvinsys = CalvinSys(self) hb_timeout = _conf.get('global', 'heartbeat_timeout') or DEFAULT_HEARTBEAT_TIMEOUT self.heartbeat_timeout = float(hb_timeout) hb_delay = _conf.get('global', 'heartbeat_delay') or DEFAULT_HEARTBEAT_DELAY self.heartbeat_delay = float(hb_delay) self.heartbeat_addr = self._clean_addr() self.heartbeat_port = _conf.get('global', 'heartbeat_port') or int(self._clean_uri().split(":")[1]) + DEFAULT_HEARTBEAT_PORT_DIFF rr_delay = _conf.get('global', 'resource_reporter_delay') or 0.25 self.resource_reporter_delay = float(rr_delay) # Default will multicast and listen on all interfaces # TODO: be able to specify the interfaces # @TODO: Store capabilities self.storage = storage.Storage(self) self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) self.resource_manager = ResourceManager() self.app_monitor = AppMonitor(self, self.app_manager, self.storage) self.lost_node_handler = LostNodeHandler(self, self.resource_manager, self.pm, self.am, self.storage) self.heartbeat_actor = None self.outgoing_heartbeats = defaultdict(list) # The initialization that requires the main loop operating is deferred to start function if self_start: async.DelayedCall(0, self.start)
def __init__(self, uris, control_uri, attributes=None): super(Node, self).__init__() self.quitting = False # Warn if its not a uri if not isinstance(uris, list): _log.error("Calvin uris must be a list %s" % uris) raise TypeError("Calvin uris must be a list!") # Uris self.uris = uris if attributes: ext_uris = attributes.pop('external_uri', None) if ext_uris is not None: self.uris += ext_uris # Control uri self.control_uri = control_uri self.external_control_uri = attributes.pop('external_control_uri', self.control_uri) \ if attributes else self.control_uri try: self.attributes = AttributeResolver(attributes) except: _log.exception("Attributes not correct, uses empty attribute!") self.attributes = AttributeResolver(None) self.node_name = self.attributes.get_node_name_as_str() # Obtain node id, when using security also handle runtime certificate try: security_dir = _conf.get("security", "security_dir") self.runtime_credentials = RuntimeCredentials(self.node_name, node=self, security_dir=security_dir) self.id = self.runtime_credentials.get_node_id() except Exception as err: _log.debug("No runtime credentials, err={}".format(err)) self.runtime_credentials = None self.id = calvinuuid.uuid("Node") self.certificate_authority = certificate_authority.CertificateAuthority(self) self.authentication = authentication.Authentication(self) self.authorization = authorization.Authorization(self) self.metering = metering.set_metering(metering.Metering(self)) self.monitor = Event_Monitor() self.am = actormanager.ActorManager(self) self.rm = replicationmanager.ReplicationManager(self) self.control = calvincontrol.get_calvincontrol() _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler self.sched = _scheduler(self, self.am, self.monitor) self.async_msg_ids = {} self._calvinsys = CalvinSys(self) calvinsys = get_calvinsys() calvinsys.init(self) calvinlib = get_calvinlib() calvinlib.init(self) # Default will multicast and listen on all interfaces # TODO: be able to specify the interfaces # @TODO: Store capabilities self.storage = storage.Storage(self) self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) # The initialization that requires the main loop operating is deferred to start function async.DelayedCall(0, self.start)
class Node(object): """A node of calvin the uri is a list of server connection points the control_uri is the local console attributes is a supplied list of external defined attributes that will be used as the key when storing index such as name of node """ def __init__(self, uris, control_uri, attributes=None): super(Node, self).__init__() self.quitting = False # Warn if its not a uri if not isinstance(uris, list): _log.error("Calvin uris must be a list %s" % uris) raise TypeError("Calvin uris must be a list!") # Uris self.uris = uris if attributes: ext_uris = attributes.pop('external_uri', None) if ext_uris is not None: self.uris += ext_uris # Control uri self.control_uri = control_uri self.external_control_uri = attributes.pop('external_control_uri', self.control_uri) \ if attributes else self.control_uri try: self.attributes = AttributeResolver(attributes) except: _log.exception("Attributes not correct, uses empty attribute!") self.attributes = AttributeResolver(None) self.node_name = self.attributes.get_node_name_as_str() # Obtain node id, when using security also handle runtime certificate try: security_dir = _conf.get("security", "security_dir") self.runtime_credentials = RuntimeCredentials(self.node_name, node=self, security_dir=security_dir) self.id = self.runtime_credentials.get_node_id() except Exception as err: _log.debug("No runtime credentials, err={}".format(err)) self.runtime_credentials = None self.id = calvinuuid.uuid("Node") self.certificate_authority = certificate_authority.CertificateAuthority(self) self.authentication = authentication.Authentication(self) self.authorization = authorization.Authorization(self) self.metering = metering.set_metering(metering.Metering(self)) self.monitor = Event_Monitor() self.am = actormanager.ActorManager(self) self.rm = replicationmanager.ReplicationManager(self) self.control = calvincontrol.get_calvincontrol() _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler self.sched = _scheduler(self, self.am, self.monitor) self.async_msg_ids = {} self._calvinsys = CalvinSys(self) calvinsys = get_calvinsys() calvinsys.init(self) calvinlib = get_calvinlib() calvinlib.init(self) # Default will multicast and listen on all interfaces # TODO: be able to specify the interfaces # @TODO: Store capabilities self.storage = storage.Storage(self) self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) # The initialization that requires the main loop operating is deferred to start function async.DelayedCall(0, self.start) def insert_local_reply(self): msg_id = calvinuuid.uuid("LMSG") self.async_msg_ids[msg_id] = None return msg_id def set_local_reply(self, msg_id, reply): if msg_id in self.async_msg_ids: self.async_msg_ids[msg_id] = reply def connect(self, actor_id=None, port_name=None, port_dir=None, port_properties=None, port_id=None, peer_node_id=None, peer_actor_id=None, peer_port_name=None, peer_port_dir=None, peer_port_properties=None, peer_port_id=None, cb=None): if port_properties is None and port_dir is not None: port_properties = {'direction': port_dir} if peer_port_properties is None and peer_port_dir is not None: peer_port_properties = {'direction': peer_port_dir} self.pm.connect(actor_id=actor_id, port_name=port_name, port_properties=port_properties, port_id=port_id, peer_node_id=peer_node_id, peer_actor_id=peer_actor_id, peer_port_name=peer_port_name, peer_port_properties=peer_port_properties, peer_port_id=peer_port_id, callback=CalvinCB(self.logging_callback, preamble="connect cb") if cb is None else cb) def peersetup(self, peers, cb=None): """ Sets up a RT to RT communication channel, only needed if the peer can't be found in storage. peers: a list of peer uris, e.g. ["calvinip://127.0.0.1:5001"] """ _log.debug("peersetup(%s)" % (peers)) peers_copy = peers[:] peer_node_ids = {} if not cb: callback = CalvinCB(self.logging_callback, preamble="peersetup cb") else: callback = CalvinCB(self.peersetup_collect_cb, peers=peers_copy, peer_node_ids=peer_node_ids, org_cb=cb) self.network.join(peers, callback=callback) def peersetup_collect_cb(self, status, uri, peer_node_id, peer_node_ids, peers, org_cb): if uri in peers: peers.remove(uri) peer_node_ids[uri] = (peer_node_id, status) if not peers: # Get highest status, i.e. any error comb_status = max([s for _, s in peer_node_ids.values()]) org_cb(peer_node_ids=peer_node_ids, status=comb_status) def logging_callback(self, preamble=None, *args, **kwargs): _log.debug("\n%s# NODE: %s \n# %s %s %s \n%s" % ('#' * 40, self.id, preamble if preamble else "*", args, kwargs, '#' * 40)) def new(self, actor_type, args, deploy_args=None, state=None, prev_connections=None, connection_list=None): # TODO requirements should be input to am.new # TODO: make it possible to use security/credentials here. actor_def, signer = self.am.lookup_and_verify(actor_type) actor_id = self.am.new(actor_type, args, state, prev_connections, connection_list, signature=deploy_args['signature'] if deploy_args and 'signature' in deploy_args else None, actor_def=actor_def) if deploy_args: app_id = deploy_args['app_id'] if 'app_name' not in deploy_args: app_name = app_id else: app_name = deploy_args['app_name'] self.app_manager.add(app_id, actor_id, deploy_info = deploy_args['deploy_info'] if 'deploy_info' in deploy_args else None) return actor_id def calvinsys(self): """Return a CalvinSys instance""" # FIXME: We still need to sort out actor requirements vs. node capabilities and user permissions. # @TODO: Write node capabilities to storage return self._calvinsys # # Event loop # def run(self): """main loop on node""" _log.debug("Node %s is running" % self.id) self.sched.run() def start(self): """ Run once when main loop is started """ interfaces = _conf.get(None, 'transports') self.network.register(interfaces, ['json']) self.network.start_listeners(self.uris) # Start storage after network, proto etc since storage proxy expects them self.storage.start(cb=CalvinCB(self._storage_started_cb)) self.storage.add_node(self) # Start control API proxy_control_uri = _conf.get(None, 'control_proxy') _log.debug("Start control API on %s with uri: %s and proxy: %s" % (self.id, self.control_uri, proxy_control_uri)) if proxy_control_uri is not None: self.control.start(node=self, uri=proxy_control_uri, tunnel=True) else: if self.control_uri is not None: self.control.start(node=self, uri=self.control_uri, external_uri=self.external_control_uri) def stop(self, callback=None): # TODO: Handle blocking in poorly implemented calvinsys/runtime south. self.quitting = True def stopped(*args): _log.analyze(self.id, "+", {'args': args}) _log.debug(args) self.sched.stop() _log.analyze(self.id, "+ SCHED STOPPED", {'args': args}) self.control.stop() _log.analyze(self.id, "+ CONTROL STOPPED", {'args': args}) def deleted_node(*args, **kwargs): _log.analyze(self.id, "+", {'args': args, 'kwargs': kwargs}) self.storage.stop(stopped) _log.analyze(self.id, "+", {}) self.storage.delete_node(self, cb=deleted_node) for link in self.network.list_direct_links(): self.network.link_get(link).close() def stop_with_cleanup(self): # Set timeout in case some actor is refusing to stop (or leave if already migrating) timeout = async.DelayedCall(50, self.stop) self.quitting = True # get all actors if not self.am.actors: # No actors, we're basically done return self.stop() actors = [] for actor in self.am.actors.values(): # Do not delete migrating actors (for now) if actor._migrating_to is None: actors.append(actor) # delete all actors for actor in actors: self.am.destroy(actor.id) # and die - hopefully, things should clean up nicely within reasonable time def poll_deleted(retry): if self.am.actors: _log.info("{} actors remaining, rechecking in {} secs".format(len(self.am.actors))) async.DelayedCall(1*retry, poll_deleted) else : _log.info("All done, exiting") timeout.cancel() self.stop() async.DelayedCall(0.5, poll_deleted, retry=1) def stop_with_migration(self, callback=None): # Set timeout if we are still failing after 50 seconds timeout_stop = async.DelayedCall(50, self.stop) self.quitting = True actors = [] already_migrating = [] if not self.am.actors: return self.stop(callback) for actor in self.am.actors.values(): if actor._migrating_to is None: actors.append(actor) else: already_migrating.append(actor.id) def poll_migrated(): # When already migrating, we can only poll, since we don't get the callback if self.am.actors: # Check again in a sec async.DelayedCall(1, poll_migrated) return timeout_stop.cancel() self.stop(callback) def migrated(actor_id, **kwargs): actor = self.am.actors.get(actor_id, None) status = kwargs['status'] if actor is not None: # Failed to migrate according to requirements, try the current known peers peer_ids = self.network.list_direct_links() if peer_ids: # This will remove the actor from the list of actors self.am.robust_migrate(actor_id, peer_ids, callback=CalvinCB(migrated, actor_id=actor_id)) return else: # Ok, we have failed migrate actor according to requirements and to any known peer # FIXME find unknown peers and try migrate to them, now just destroy actor, so storage is cleaned _log.error("Failed to evict actor %s before quitting" % actor_id) self.am.destroy(actor_id) if self.am.actors: return timeout_stop.cancel() self.stop(callback) if already_migrating: async.DelayedCall(1, poll_migrated) if not actors: return elif not actors: # No actors return self.stop(callback) # Migrate the actors according to their requirements # (even actors without explicit requirements will migrate based on e.g. requires and port property needs) for actor in actors: if actor._replication_data.terminate_with_node(actor.id): _log.info("TERMINATE REPLICA") self.rm.terminate(actor.id, callback=CalvinCB(migrated, actor_id=actor.id)) else: _log.info("TERMINATE MIGRATE ACTOR") self.am.update_requirements(actor.id, [], extend=True, move=True, authorization_check=False, callback=CalvinCB(migrated, actor_id=actor.id)) def _storage_started_cb(self, *args, **kwargs): self.authentication.find_authentication_server() self.authorization.register_node()
class Node(object): """A node of calvin the uri is used as server connection point the control_uri is the local console attributes is a supplied list of external defined attributes that will be used as the key when storing index such as name of node """ def __init__(self, uri, control_uri, attributes=None): super(Node, self).__init__() self.uri = uri self.control_uri = control_uri self.attributes = attributes self.id = calvinuuid.uuid("NODE") self.monitor = Event_Monitor() self.am = actormanager.ActorManager(self) self.control = calvincontrol.get_calvincontrol() _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler self.sched = _scheduler(self, self.am, self.monitor) self.control.start(node=self, uri=control_uri) self.async_msg_ids = {} self.storage = storage.Storage() self.storage.start() self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) # The initialization that requires the main loop operating is deferred to start function # FIXME: Don't use delayed call in calvin-tiny async.DelayedCall(0, self.start) # self.start() def insert_local_reply(self): msg_id = calvinuuid.uuid("LMSG") self.async_msg_ids[msg_id] = None return msg_id def set_local_reply(self, msg_id, reply): if msg_id in self.async_msg_ids: self.async_msg_ids[msg_id] = reply def connect(self, actor_id=None, port_name=None, port_dir=None, port_id=None, peer_node_id=None, peer_actor_id=None, peer_port_name=None, peer_port_dir=None, peer_port_id=None): # FIXME callback needed to send back a proper reply !!!!! self.pm.connect(actor_id=actor_id, port_name=port_name, port_dir=port_dir, port_id=port_id, peer_node_id=peer_node_id, peer_actor_id=peer_actor_id, peer_port_name=peer_port_name, peer_port_dir=peer_port_dir, peer_port_id=peer_port_id, callback=CalvinCB(self.logging_callback, preamble="connect cb")) def disconnect(self, actor_id=None, port_name=None, port_dir=None, port_id=None): # FIXME callback needed to send back a proper reply !!!!! _log.debug("disconnect(actor_id=%s, port_name=%s, port_dir=%s, port_id=%s)" % (actor_id if actor_id else "", port_name if port_name else "", port_dir if port_dir else "", port_id if port_id else "")) self.pm.disconnect(actor_id=actor_id, port_name=port_name, port_dir=port_dir, port_id=port_id, callback=CalvinCB(self.logging_callback, preamble="disconnect cb")) def peersetup(self, peers): """ Sets up a RT to RT communication channel, only needed if the peer can't be found in storage. peers: a list of peer uris, e.g. ["calvinip://127.0.0.1:5001"] """ # FIXME callback needed to send back a proper reply !!!!! _log.debug("peersetup(%s)" % (peers)) self.network.join(peers, callback=CalvinCB(self.logging_callback, preamble="peersetup cb")) def logging_callback(self, preamble=None, *args, **kwargs): _log.debug("\n%s# NODE: %s \n# %s %s %s \n%s" % ('#' * 40, self.id, preamble if preamble else "*", args, kwargs, '#' * 40)) def new(self, actor_type, args, deploy_args=None, state=None, prev_connections=None, connection_list=None): actor_id = self.am.new(actor_type, args, state, prev_connections, connection_list) if deploy_args: app_id = deploy_args['app_id'] if 'app_name' not in deploy_args: app_name = app_id else: app_name = deploy_args['app_name'] self.app_manager.add(app_id, app_name, actor_id) return actor_id def calvinsys(self, actor): """Return a CalvinSys instance""" # FIXME: We still need to sort out actor requirements vs. node capabilities and user permissions. return CalvinSys(actor, self) # # Event loop # def run(self): """main loop on node""" _log.debug("Node %s is running" % self.id) self.sched.run() def start(self): """ Run once when main loop is started """ # FIXME hardcoded which transport and encoder plugin we use, should be based on self.network.register(['calvinip'], ['json']) self.network.start_listeners([self.uri]) self.storage.add_node(self) def stop(self, callback=None): def stopped(*args): _log.debug(args) self.sched.stop() self.control.stop() def deleted_node(*args, **kwargs): self.storage.stop(stopped) self.storage.delete_node(self, cb=deleted_node)
class Node(object): """A node of calvin the uri is a list of server connection points the control_uri is the local console attributes is a supplied list of external defined attributes that will be used as the key when storing index such as name of node """ def __init__(self, uri, control_uri, attributes=None): super(Node, self).__init__() self.uri = uri self.control_uri = control_uri self.external_uri = attributes.pop('external_uri', self.uri) \ if attributes else self.uri self.external_control_uri = attributes.pop('external_control_uri', self.control_uri) \ if attributes else self.control_uri try: self.attributes = AttributeResolver(attributes) except: _log.exception("Attributes not correct, uses empty attribute!") self.attributes = AttributeResolver(None) # Obtain node id, when using security also handle runtime certificate self.id = certificate.obtain_cert_node_info(self.attributes.get_node_name_as_str())['id'] self.metering = metering.set_metering(metering.Metering(self)) self.monitor = Event_Monitor() self.am = actormanager.ActorManager(self) self.control = calvincontrol.get_calvincontrol() _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler self.sched = _scheduler(self, self.am, self.monitor) self.async_msg_ids = {} self._calvinsys = CalvinSys(self) # Default will multicast and listen on all interfaces # TODO: be able to specify the interfaces # @TODO: Store capabilities self.storage = storage.Storage(self) self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) # The initialization that requires the main loop operating is deferred to start function async.DelayedCall(0, self.start) def insert_local_reply(self): msg_id = calvinuuid.uuid("LMSG") self.async_msg_ids[msg_id] = None return msg_id def set_local_reply(self, msg_id, reply): if msg_id in self.async_msg_ids: self.async_msg_ids[msg_id] = reply def connect(self, actor_id=None, port_name=None, port_dir=None, port_id=None, peer_node_id=None, peer_actor_id=None, peer_port_name=None, peer_port_dir=None, peer_port_id=None, cb=None): self.pm.connect(actor_id=actor_id, port_name=port_name, port_dir=port_dir, port_id=port_id, peer_node_id=peer_node_id, peer_actor_id=peer_actor_id, peer_port_name=peer_port_name, peer_port_dir=peer_port_dir, peer_port_id=peer_port_id, callback=CalvinCB(self.logging_callback, preamble="connect cb") if cb is None else cb) def disconnect(self, actor_id=None, port_name=None, port_dir=None, port_id=None, cb=None): _log.debug("disconnect(actor_id=%s, port_name=%s, port_dir=%s, port_id=%s)" % (actor_id if actor_id else "", port_name if port_name else "", port_dir if port_dir else "", port_id if port_id else "")) self.pm.disconnect(actor_id=actor_id, port_name=port_name, port_dir=port_dir, port_id=port_id, callback=CalvinCB(self.logging_callback, preamble="disconnect cb") if cb is None else cb) def peersetup(self, peers, cb=None): """ Sets up a RT to RT communication channel, only needed if the peer can't be found in storage. peers: a list of peer uris, e.g. ["calvinip://127.0.0.1:5001"] """ _log.debug("peersetup(%s)" % (peers)) peers_copy = peers[:] peer_node_ids = {} if not cb: callback = CalvinCB(self.logging_callback, preamble="peersetup cb") else: callback = CalvinCB(self.peersetup_collect_cb, peers=peers_copy, peer_node_ids=peer_node_ids, org_cb=cb) self.network.join(peers, callback=callback) def peersetup_collect_cb(self, status, uri, peer_node_id, peer_node_ids, peers, org_cb): if uri in peers: peers.remove(uri) peer_node_ids[uri] = (peer_node_id, status) if not peers: # Get highest status, i.e. any error comb_status = max([s for _, s in peer_node_ids.values()]) org_cb(peer_node_ids=peer_node_ids, status=comb_status) def logging_callback(self, preamble=None, *args, **kwargs): _log.debug("\n%s# NODE: %s \n# %s %s %s \n%s" % ('#' * 40, self.id, preamble if preamble else "*", args, kwargs, '#' * 40)) def new(self, actor_type, args, deploy_args=None, state=None, prev_connections=None, connection_list=None): # TODO requirements should be input to am.new actor_id = self.am.new(actor_type, args, state, prev_connections, connection_list, signature=deploy_args['signature'] if deploy_args and 'signature' in deploy_args else None, credentials=deploy_args['credentials'] if deploy_args and 'credentials' in deploy_args else None) if deploy_args: app_id = deploy_args['app_id'] if 'app_name' not in deploy_args: app_name = app_id else: app_name = deploy_args['app_name'] self.app_manager.add(app_id, actor_id, deploy_info = deploy_args['deploy_info'] if 'deploy_info' in deploy_args else None) return actor_id def calvinsys(self): """Return a CalvinSys instance""" # FIXME: We still need to sort out actor requirements vs. node capabilities and user permissions. # @TODO: Write node capabilities to storage return self._calvinsys # # Event loop # def run(self): """main loop on node""" _log.debug("Node %s is running" % self.id) self.sched.run() def start(self): """ Run once when main loop is started """ interfaces = _conf.get(None, 'transports') self.network.register(interfaces, ['json']) self.network.start_listeners(self.uri) # Start storage after network, proto etc since storage proxy expects them self.storage.start() self.storage.add_node(self) # Start control api proxy_control_uri = _conf.get(None, 'control_proxy') _log.debug("Start control API on %s with uri: %s and proxy: %s" % (self.id, self.control_uri, proxy_control_uri)) if proxy_control_uri is not None: self.control.start(node=self, uri=proxy_control_uri, tunnel=True) else: if self.control_uri is not None: self.control.start(node=self, uri=self.control_uri) def stop(self, callback=None): def stopped(*args): _log.analyze(self.id, "+", {'args': args}) _log.debug(args) self.sched.stop() _log.analyze(self.id, "+ SCHED STOPPED", {'args': args}) self.control.stop() _log.analyze(self.id, "+ CONTROL STOPPED", {'args': args}) def deleted_node(*args, **kwargs): _log.analyze(self.id, "+", {'args': args, 'kwargs': kwargs}) self.storage.stop(stopped) _log.analyze(self.id, "+", {}) self.storage.delete_node(self, cb=deleted_node)
class Node(object): """A node of calvin the uri is a list of server connection points the control_uri is the local console attributes is a supplied list of external defined attributes that will be used as the key when storing index such as name of node """ def __init__(self, uri, control_uri, attributes=None, self_start=True): super(Node, self).__init__() self.uri = uri self.control_uri = control_uri try: self.attributes = AttributeResolver(attributes) except: _log.exception("Attributes not correct, uses empty attribute!") self.attributes = AttributeResolver(None) self.id = calvinuuid.uuid("NODE") self.metering = metering.set_metering(metering.Metering(self)) self.monitor = Event_Monitor() self.am = actormanager.ActorManager(self) self.control = calvincontrol.get_calvincontrol() _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler self.sched = _scheduler(self, self.am, self.monitor) self.async_msg_ids = {} self._calvinsys = CalvinSys(self) hb_timeout = _conf.get('global', 'heartbeat_timeout') or DEFAULT_HEARTBEAT_TIMEOUT self.heartbeat_timeout = float(hb_timeout) hb_delay = _conf.get('global', 'heartbeat_delay') or DEFAULT_HEARTBEAT_DELAY self.heartbeat_delay = float(hb_delay) self.heartbeat_addr = self._clean_addr() self.heartbeat_port = _conf.get('global', 'heartbeat_port') or int(self._clean_uri().split(":")[1]) + DEFAULT_HEARTBEAT_PORT_DIFF rr_delay = _conf.get('global', 'resource_reporter_delay') or 0.25 self.resource_reporter_delay = float(rr_delay) # Default will multicast and listen on all interfaces # TODO: be able to specify the interfaces # @TODO: Store capabilities self.storage = storage.Storage(self) self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) self.resource_manager = ResourceManager() self.app_monitor = AppMonitor(self, self.app_manager, self.storage) self.lost_node_handler = LostNodeHandler(self, self.resource_manager, self.pm, self.am, self.storage) self.heartbeat_actor = None self.outgoing_heartbeats = defaultdict(list) # The initialization that requires the main loop operating is deferred to start function if self_start: async.DelayedCall(0, self.start) @property def storage_node(self): return False def is_storage_node(self, node_id): if node_id == self.id: return self.storage_node #if node_id not in self.network.links: # return False return self.storage.proxy == self.network.links[node_id].transport.get_uri() def _clean_uri(self): return self.control_uri.replace("http://", "") def _clean_addr(self): uri = self._clean_uri() if uri == "localhost": addr = socket.gethostbyname(uri.split(":")[0]) else: addr = uri.split(":")[0] return addr def insert_local_reply(self): msg_id = calvinuuid.uuid("LMSG") self.async_msg_ids[msg_id] = None return msg_id def set_local_reply(self, msg_id, reply): if msg_id in self.async_msg_ids: self.async_msg_ids[msg_id] = reply def connect(self, actor_id=None, port_name=None, port_dir=None, port_id=None, peer_node_id=None, peer_actor_id=None, peer_port_name=None, peer_port_dir=None, peer_port_id=None, cb=None): self.pm.connect(actor_id=actor_id, port_name=port_name, port_dir=port_dir, port_id=port_id, peer_node_id=peer_node_id, peer_actor_id=peer_actor_id, peer_port_name=peer_port_name, peer_port_dir=peer_port_dir, peer_port_id=peer_port_id, callback=CalvinCB(self.logging_callback, preamble="connect cb") if cb is None else cb) def disconnect(self, actor_id=None, port_name=None, port_dir=None, port_id=None, cb=None): _log.debug("disconnect(actor_id=%s, port_name=%s, port_dir=%s, port_id=%s)" % (actor_id if actor_id else "", port_name if port_name else "", port_dir if port_dir else "", port_id if port_id else "")) self.pm.disconnect(actor_id=actor_id, port_name=port_name, port_dir=port_dir, port_id=port_id, callback=CalvinCB(self.logging_callback, preamble="disconnect cb") if cb is None else cb) def peersetup(self, peers, cb=None): """ Sets up a RT to RT communication channel, only needed if the peer can't be found in storage. peers: a list of peer uris, e.g. ["calvinip://127.0.0.1:5001"] """ _log.info("peersetup(%s)" % (peers)) peers_copy = peers[:] peer_node_ids = {} if not cb: callback = CalvinCB(self.logging_callback, preamble="peersetup cb") else: callback = CalvinCB(self.peersetup_collect_cb, peers=peers_copy, peer_node_ids=peer_node_ids, org_cb=cb) peers = filter(None, peers) self.network.join(peers, callback=callback) def peersetup_collect_cb(self, status, uri, peer_node_id, peer_node_ids, peers, org_cb): _log.debug("Peersetup collect cb: {} - {} - {} - {} - {}".format(status, uri, peer_node_id, peer_node_ids, peers)) self.resource_manager.register_uri(peer_node_id, uri) if status: self.resource_manager.register_uri(peer_node_id, uri) self._register_heartbeat_receiver(peer_node_id) if uri in peers: peers.remove(uri) peer_node_ids[uri] = (peer_node_id, status) if not peers: # Get highest status, i.e. any error comb_status = max([s for _, s in peer_node_ids.values()]) org_cb(peer_node_ids=peer_node_ids, status=comb_status) if peer_node_id and status: self._send_rm_info(peer_node_id) def _send_rm_info(self, peer_node_id): # Send own info to new peers and retreive there info callback = CalvinCB(self._send_rm_info_cb) usages = self.resource_manager.sync_info() self.proto.send_rm_info(peer_node_id, usages, callback) def _send_rm_info_cb(self, status, *args, **kwargs): # Receives other peers rm info if status.data: self.resource_manager.sync_info(status.data[0]) def sync_rm_info(self, usages, callback): # sync received info usages = self.resource_manager.sync_info(usages) callback(usages, status=response.CalvinResponse(True)) def logging_callback(self, preamble=None, *args, **kwargs): _log.debug("\n%s# NODE: %s \n# %s %s %s \n%s" % ('#' * 40, self.id, preamble if preamble else "*", args, kwargs, '#' * 40)) def new(self, actor_type, args, deploy_args=None, state=None, prev_connections=None, connection_list=None, callback=None): # TODO requirements should be input to am.new callback = CalvinCB(self._new, args=args, deploy_args=deploy_args, state=state, prev_connections=prev_connections, connection_list=connection_list, callback=callback) self.am.new(actor_type, args, state, prev_connections, connection_list, app_id=deploy_args['app_id'] if deploy_args else None, signature=deploy_args['signature'] if deploy_args and 'signature' in deploy_args else None, callback=callback) def _new(self, actor_id, status, args, deploy_args, state, prev_connections, connection_list, callback): if not status: if callback: callback(status=status, actor_id=actor_id) return if deploy_args: app_id = deploy_args['app_id'] if 'app_name' not in deploy_args: app_name = app_id else: app_name = deploy_args['app_name'] self.app_manager.add(app_id, actor_id, deploy_info = deploy_args['deploy_info'] if 'deploy_info' in deploy_args else None) if callback: callback(status=status, actor_id=actor_id) def deployment_control(self, app_id, actor_id, deploy_args): """ Updates an actor's deployment """ self.am.deployment_control(app_id, actor_id, deploy_args) def calvinsys(self): """Return a CalvinSys instance""" # FIXME: We still need to sort out actor requirements vs. node capabilities and user permissions. # @TODO: Write node capabilities to storage return self._calvinsys @property def hostname(self): return socket.gethostname() @property def testing(self): return "CALVIN_TESTING" in os.environ and os.environ["CALVIN_TESTING"] def report_resource_usage(self, usage): _log.debug("Reporting resource usage for node {}: {}".format(self.id, usage)) self.resource_manager.register(self.id, usage, self.uri) usage['uri'] = self.uri for peer_id in self.network.list_links(): callback = CalvinCB(self._report_resource_usage_cb, peer_id) self.proto.report_usage(peer_id, self.id, usage, callback=callback) self.app_monitor.check_reliabilities() def _report_resource_usage_cb(self, peer_id, status): if not status: _log.error("Failed to report resource usage to: {} - {} - {}".format(peer_id, self.resource_manager.node_uris.get(peer_id), status)) else: _log.debug("Report resource usage callback received status {} for {}".format(status, peer_id)) def register_resource_usage(self, node_id, usage, callback): if self.storage_node: callback(status=response.CalvinResponse(True)) return _log.debug("Registering resource usage for node {}: {}".format(node_id, usage)) uri = usage.get('uri') self.resource_manager.register(node_id, usage, uri) self._register_heartbeat_receiver(node_id) callback(status=response.CalvinResponse(True)) def report_replication_time(self, actor_type, replication_time, node_id): _log.info('New replication time: {} when handling lost node {}'.format(replication_time, node_id)) self.storage.new_replication_time(actor_type, replication_time) def lost_node(self, node_id): _log.debug("Lost node: {}".format(node_id)) _log.analyze(self.id, "+", "Lost node {}".format(node_id)) if self.storage_node: _log.debug("{} Is storage node, ignoring lost node".format(self.id)) return result = self.heartbeat_actor.receive() nodes = set() if result and result.production: if node_id in result.production[0]: return if node_id in self.network.links: link = self.network.links[node_id] self.network.peer_disconnected(link, node_id, "Heartbeat timeout") self.lost_node_handler.handle_lost_node(node_id) def lost_node_request(self, node_id, cb): _log.debug("Lost node: {}".format(node_id)) _log.analyze(self.id, "+", "Lost node {}".format(node_id)) if self.storage_node: _log.debug("{} Is storage node, ignoring lost node".format(self.id)) if cb: cb(status=response.CalvinResponse(False)) return self.lost_node_handler.handle_lost_node_request(node_id, cb) def lost_actor(self, lost_actor_id, lost_actor_info, required_reliability, cb): _log.analyze(self.id, "+", "Lost actor {}".format(lost_actor_id)) self.am.delete_actor(lost_actor_id) replicator = Replicator(self, lost_actor_id, lost_actor_info, required_reliability) replicator.replicate_lost_actor(cb, time.time()) def _heartbeat_timeout(self, node_id): self._clear_heartbeat_timeouts(node_id) self.heartbeat_actor.deregister(node_id) self.lost_node(node_id) def clear_outgoing_heartbeat(self, data): if "node_id" in data: node_id = data['node_id'] self._clear_heartbeat_timeouts(node_id) timeout_call = async.DelayedCall(self.heartbeat_timeout, CalvinCB(self._heartbeat_timeout, node_id=node_id)) self.outgoing_heartbeats[node_id].append(timeout_call) self.resource_manager.register_uri(data['node_id'], data['uri']) def _clear_heartbeat_timeouts(self, node_id): for timeout_call in self.outgoing_heartbeats[node_id]: try: timeout_call.cancel() except Exception as e: pass # # Event loop # def run(self): """main loop on node""" _log.debug("Node %s is running" % self.id) self.sched.run() def start(self): """ Run once when main loop is started """ if not self.storage_node: self._start_heartbeat_system() interfaces = _conf.get(None, 'transports') self.network.register(interfaces, ['json']) self.network.start_listeners(self.uri) # Start storage after network, proto etc since storage proxy expects them self.storage.start() self.storage.add_node(self) # Start control api proxy_control_uri = _conf.get(None, 'control_proxy') _log.debug("Start control API on %s with uri: %s and proxy: %s" % (self.id, self.control_uri, proxy_control_uri)) if proxy_control_uri is not None: self.control.start(node=self, uri=proxy_control_uri, tunnel=True) else: if self.control_uri is not None: self.control.start(node=self, uri=self.control_uri) if not self.storage_node: self._start_resource_reporter() def _start_resource_reporter(self): self.new("sys.NodeResourceReporter", {'node': self, 'delay': self.resource_reporter_delay}, callback=self._start_rr) def _start_rr(self, status, actor_id): if not status: _log.error("Failed to start resource reporter") return _log.info("Successfully started resource reporter with delay {}".format(self.resource_reporter_delay)) actor = self.am.actors[actor_id] if not actor.inports or not actor.outports: _log.warning("Could not set up ResourceReporter: {}".format(actor)) return in_port = actor.inports['in'] out_port = actor.outports['out'] self.connect(actor_id, port_name=in_port.name, port_dir='in', port_id=in_port.id, peer_node_id=self.id, peer_actor_id=actor_id, peer_port_name=out_port.name, peer_port_dir='out', peer_port_id=out_port.id) def _start_heartbeat_system(self): if self.testing: return port = self.heartbeat_port addr = self.heartbeat_addr self.new("net.Heartbeat", {'node': self, 'address': addr, 'port': port, 'delay': self.heartbeat_delay}, callback=self._start_hb) def _start_hb(self, status, actor_id): if not status: _log.error("Failed to start heartbeat system: ".format(status)) return _log.info("Successfully started heartbeat actor with timeout {} and delay {}".format(self.heartbeat_timeout, self.heartbeat_delay)) actor = self.am.actors[actor_id] in_port = actor.inports['in'] out_port = actor.outports['out'] self.connect(actor_id, port_name=in_port.name, port_dir='in', port_id=in_port.id, peer_node_id=self.id, peer_actor_id=actor_id, peer_port_name=out_port.name, peer_port_dir='out', peer_port_id=out_port.id) self.heartbeat_actor = actor def _register_heartbeat_receiver(self, node_id): if self.storage_node or self.is_storage_node(node_id): return if not self.heartbeat_actor: self._start_heartbeat_system() if self.testing or node_id in self.heartbeat_actor.nodes: return _log.info("Registering receiver: {}".format(node_id)) self.heartbeat_actor.register(node_id) def stop(self, callback=None): def stopped(*args): _log.analyze(self.id, "+", {'args': args}) _log.debug(args) self.sched.stop() _log.analyze(self.id, "+ SCHED STOPPED", {'args': args}) self.control.stop() _log.analyze(self.id, "+ CONTROL STOPPED", {'args': args}) def deleted_node(*args, **kwargs): _log.analyze(self.id, "+", {'args': args, 'kwargs': kwargs}) self.storage.stop(stopped) _log.analyze(self.id, "+", {}) self.storage.delete_node(self.id, self.attributes.get_indexed_public(), cb=deleted_node)
def __init__(self, uris, control_uri, attributes=None): super(Node, self).__init__() self.quitting = False self.super_node_class = None # Warn if its not a uri if not isinstance(uris, list): _log.error("Calvin uris must be a list %s" % uris) raise TypeError("Calvin uris must be a list!") # Uris self.uris = uris if attributes: ext_uris = attributes.pop('external_uri', None) if ext_uris is not None: self.uris += ext_uris # Control uri self.control_uri = control_uri self.external_control_uri = attributes.pop('external_control_uri', self.control_uri) \ if attributes else self.control_uri try: self.attributes = AttributeResolver(attributes) except: _log.exception("Attributes not correct, uses empty attribute!") self.attributes = AttributeResolver(None) self.node_name = self.attributes.get_node_name_as_str() # Obtain node id, when using security also handle runtime certificate try: security_dir = _conf.get("security", "security_dir") self.runtime_credentials = RuntimeCredentials(self.node_name, node=self, security_dir=security_dir) self.id = self.runtime_credentials.get_node_id() except Exception as err: _log.debug("No runtime credentials, err={}".format(err)) self.runtime_credentials = None self.id = calvinuuid.uuid("Node") self.certificate_authority = certificate_authority.CertificateAuthority(self) self.authentication = authentication.Authentication(self) self.authorization = authorization.Authorization(self) self.am = actormanager.ActorManager(self) self.rm = replicationmanager.ReplicationManager(self) self.control = calvincontrol.get_calvincontrol() # _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler # _scheduler = scheduler.NonPreemptiveScheduler # _scheduler = scheduler.RoundRobinScheduler _scheduler = scheduler.SimpleScheduler # _scheduler = scheduler.BaselineScheduler self.sched = _scheduler(self, self.am) self.async_msg_ids = {} calvinsys = get_calvinsys() calvinsys.init(self) calvinlib = get_calvinlib() calvinlib.init() # Default will multicast and listen on all interfaces # TODO: be able to specify the interfaces # @TODO: Store capabilities self.storage = storage.Storage(self) self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) self.cpu_monitor = CpuMonitor(self.id, self.storage) self.mem_monitor = MemMonitor(self.id, self.storage) self.proxy_handler = ProxyHandler(self) # The initialization that requires the main loop operating is deferred to start function async.DelayedCall(0, self.start)
class Node(object): """A node of calvin the uri is a list of server connection points the control_uri is the local console attributes is a supplied list of external defined attributes that will be used as the key when storing index such as name of node """ def __init__(self, uris, control_uri, attributes=None): super(Node, self).__init__() self.quitting = False self.super_node_class = None # Warn if its not a uri if not isinstance(uris, list): _log.error("Calvin uris must be a list %s" % uris) raise TypeError("Calvin uris must be a list!") # Uris self.uris = uris if attributes: ext_uris = attributes.pop('external_uri', None) if ext_uris is not None: self.uris += ext_uris # Control uri self.control_uri = control_uri self.external_control_uri = attributes.pop('external_control_uri', self.control_uri) \ if attributes else self.control_uri try: self.attributes = AttributeResolver(attributes) except: _log.exception("Attributes not correct, uses empty attribute!") self.attributes = AttributeResolver(None) self.node_name = self.attributes.get_node_name_as_str() # Obtain node id, when using security also handle runtime certificate try: security_dir = _conf.get("security", "security_dir") self.runtime_credentials = RuntimeCredentials(self.node_name, node=self, security_dir=security_dir) self.id = self.runtime_credentials.get_node_id() except Exception as err: _log.debug("No runtime credentials, err={}".format(err)) self.runtime_credentials = None self.id = calvinuuid.uuid("Node") self.certificate_authority = certificate_authority.CertificateAuthority(self) self.authentication = authentication.Authentication(self) self.authorization = authorization.Authorization(self) self.am = actormanager.ActorManager(self) self.rm = replicationmanager.ReplicationManager(self) self.control = calvincontrol.get_calvincontrol() # _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler # _scheduler = scheduler.NonPreemptiveScheduler # _scheduler = scheduler.RoundRobinScheduler _scheduler = scheduler.SimpleScheduler # _scheduler = scheduler.BaselineScheduler self.sched = _scheduler(self, self.am) self.async_msg_ids = {} calvinsys = get_calvinsys() calvinsys.init(self) calvinlib = get_calvinlib() calvinlib.init() # Default will multicast and listen on all interfaces # TODO: be able to specify the interfaces # @TODO: Store capabilities self.storage = storage.Storage(self) self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) self.cpu_monitor = CpuMonitor(self.id, self.storage) self.mem_monitor = MemMonitor(self.id, self.storage) self.proxy_handler = ProxyHandler(self) # The initialization that requires the main loop operating is deferred to start function async.DelayedCall(0, self.start) def insert_local_reply(self): msg_id = calvinuuid.uuid("LMSG") self.async_msg_ids[msg_id] = None return msg_id def set_local_reply(self, msg_id, reply): if msg_id in self.async_msg_ids: self.async_msg_ids[msg_id] = reply def connect(self, actor_id=None, port_name=None, port_dir=None, port_properties=None, port_id=None, peer_node_id=None, peer_actor_id=None, peer_port_name=None, peer_port_dir=None, peer_port_properties=None, peer_port_id=None, cb=None): if port_properties is None and port_dir is not None: port_properties = {'direction': port_dir} if peer_port_properties is None and peer_port_dir is not None: peer_port_properties = {'direction': peer_port_dir} self.pm.connect(actor_id=actor_id, port_name=port_name, port_properties=port_properties, port_id=port_id, peer_node_id=peer_node_id, peer_actor_id=peer_actor_id, peer_port_name=peer_port_name, peer_port_properties=peer_port_properties, peer_port_id=peer_port_id, callback=CalvinCB(self.logging_callback, preamble="connect cb") if cb is None else cb) def peersetup(self, peers, cb=None): """ Sets up a RT to RT communication channel, only needed if the peer can't be found in storage. peers: a list of peer uris, e.g. ["calvinip://127.0.0.1:5001"] """ _log.debug("peersetup(%s)" % (peers)) peers_copy = peers[:] peer_node_ids = {} if not cb: callback = CalvinCB(self.logging_callback, preamble="peersetup cb") else: callback = CalvinCB(self.peersetup_collect_cb, peers=peers_copy, peer_node_ids=peer_node_ids, org_cb=cb) self.network.join(peers, callback=callback) def peersetup_collect_cb(self, status, uri, peer_node_id, peer_node_ids, peers, org_cb): if uri in peers: peers.remove(uri) peer_node_ids[uri] = (peer_node_id, status) if not peers: # Get highest status, i.e. any error comb_status = max([s for _, s in peer_node_ids.values()]) org_cb(peer_node_ids=peer_node_ids, status=comb_status) def logging_callback(self, preamble=None, *args, **kwargs): _log.debug("\n%s# NODE: %s \n# %s %s %s \n%s" % ('#' * 40, self.id, preamble if preamble else "*", args, kwargs, '#' * 40)) def new(self, actor_type, args, deploy_args=None, state=None, prev_connections=None, connection_list=None, security=None, access_decision=None): # TODO requirements should be input to am.new # TODO: make it possible to use security/credentials here. actor_def, signer = self.am.lookup_and_verify(actor_type) actor_id = self.am.new(actor_type, args, state, prev_connections, connection_list, signature=deploy_args['signature'] if deploy_args and 'signature' in deploy_args else None, actor_def=actor_def, security=security, access_decision=access_decision) if deploy_args: app_id = deploy_args['app_id'] if 'app_name' not in deploy_args: app_name = app_id else: app_name = deploy_args['app_name'] self.app_manager.add(app_id, actor_id, deploy_info = deploy_args['deploy_info'] if 'deploy_info' in deploy_args else None) return actor_id # # Event loop # def run(self): """main loop on node""" _log.debug("Node %s is running" % self.id) self.sched.run() def start(self): """ Run once when main loop is started """ interfaces = _conf.get(None, 'transports') self.network.register(interfaces, ['json']) self.network.start_listeners(self.uris) # Start storage after network, proto etc since storage proxy expects them self.storage.start(cb=CalvinCB(self._storage_started_cb)) self.storage.add_node(self) # Start control API proxy_control_uri = _conf.get(None, 'control_proxy') _log.debug("Start control API on %s with uri: %s and proxy: %s" % (self.id, self.control_uri, proxy_control_uri)) if proxy_control_uri is not None: self.control.start(node=self, uri=proxy_control_uri, tunnel=True) else: if self.control_uri is not None: self.control.start(node=self, uri=self.control_uri, external_uri=self.external_control_uri) def stop(self, callback=None): # TODO: Handle blocking in poorly implemented calvinsys/runtime south. self.quitting = True def stopped(*args): _log.analyze(self.id, "+", {'args': args}) _log.debug(args) self.sched.stop() _log.analyze(self.id, "+ SCHED STOPPED", {'args': args}) self.control.stop() _log.analyze(self.id, "+ CONTROL STOPPED", {'args': args}) def deleted_node(*args, **kwargs): _log.analyze(self.id, "+", {'args': args, 'kwargs': kwargs}) self.storage.stop(stopped) _log.analyze(self.id, "+", {}) self.storage.delete_node(self, cb=deleted_node) self.cpu_monitor.stop() self.mem_monitor.stop() for link in self.network.list_direct_links(): self.network.link_get(link).close() def stop_with_cleanup(self): # Set timeout in case some actor is refusing to stop (or leave if already migrating) timeout = async.DelayedCall(50, self.stop) self.quitting = True # get all actors if not self.am.actors: # No actors, we're basically done return self.stop() actors = [] for actor in self.am.actors.values(): # Do not delete migrating actors (for now) if actor._migrating_to is None: actors.append(actor) # delete all actors for actor in actors: self.am.destroy(actor.id) # and die - hopefully, things should clean up nicely within reasonable time def poll_deleted(retry): if self.am.actors: _log.info("{} actors remaining, rechecking in {} secs".format(len(self.am.actors))) async.DelayedCall(1*retry, poll_deleted) else : _log.info("All done, exiting") timeout.cancel() self.stop() async.DelayedCall(0.5, poll_deleted, retry=1) def stop_with_migration(self, callback=None): # Set timeout if we are still failing after 50 seconds timeout_stop = async.DelayedCall(50, self.stop) self.quitting = True actors = [] already_migrating = [] if not self.am.actors: return self.stop(callback) for actor in self.am.actors.values(): if actor._migrating_to is None: actors.append(actor) else: already_migrating.append(actor.id) def poll_migrated(): # When already migrating, we can only poll, since we don't get the callback if self.am.actors: # Check again in a sec async.DelayedCall(1, poll_migrated) return timeout_stop.cancel() self.stop(callback) def migrated(actor_id, **kwargs): actor = self.am.actors.get(actor_id, None) status = kwargs['status'] if actor is not None: # Failed to migrate according to requirements, try the current known peers peer_ids = self.network.list_direct_links() if peer_ids: # This will remove the actor from the list of actors self.am.robust_migrate(actor_id, peer_ids, callback=CalvinCB(migrated, actor_id=actor_id)) return else: # Ok, we have failed migrate actor according to requirements and to any known peer # FIXME find unknown peers and try migrate to them, now just destroy actor, so storage is cleaned _log.error("Failed to evict actor %s before quitting" % actor_id) self.am.destroy(actor_id) if self.am.actors: return timeout_stop.cancel() self.stop(callback) if already_migrating: async.DelayedCall(1, poll_migrated) if not actors: return elif not actors: # No actors return self.stop(callback) # Migrate the actors according to their requirements # (even actors without explicit requirements will migrate based on e.g. requires and port property needs) for actor in actors: if actor._replication_id.terminate_with_node(actor.id): _log.info("TERMINATE REPLICA") self.rm.terminate(actor.id, callback=CalvinCB(migrated, actor_id=actor.id)) else: _log.info("TERMINATE MIGRATE ACTOR") self.am.update_requirements(actor.id, [], extend=True, move=True, authorization_check=False, callback=CalvinCB(migrated, actor_id=actor.id)) def _storage_started_cb(self, *args, **kwargs): self.authentication.find_authentication_server() self.authorization.register_node()
class Node(object): """A node of calvin the uri is a list of server connection points the control_uri is the local console attributes is a supplied list of external defined attributes that will be used as the key when storing index such as name of node """ def __init__(self, uri, control_uri, attributes=None): super(Node, self).__init__() self.uri = uri self.control_uri = control_uri self.external_uri = attributes.pop('external_uri', self.uri) \ if attributes else self.uri self.external_control_uri = attributes.pop('external_control_uri', self.control_uri) \ if attributes else self.control_uri try: self.attributes = AttributeResolver(attributes) except: _log.exception("Attributes not correct, uses empty attribute!") self.attributes = AttributeResolver(None) self.node_name = self.attributes.get_node_name_as_str() # Obtain node id, when using security also handle runtime certificate self.id = certificate.obtain_cert_node_info(self.node_name)['id'] self.authentication = authentication.Authentication(self) self.authorization = authorization.Authorization(self) try: self.domain = _conf.get("security", "security_domain_name") # cert_name is the node's certificate filename (without file extension) self.cert_name = certificate.get_own_cert_name(self.node_name) except: self.domain = None self.cert_name = None self.metering = metering.set_metering(metering.Metering(self)) self.monitor = Event_Monitor() self.am = actormanager.ActorManager(self) self.control = calvincontrol.get_calvincontrol() _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler self.sched = _scheduler(self, self.am, self.monitor) self.async_msg_ids = {} self._calvinsys = CalvinSys(self) # Default will multicast and listen on all interfaces # TODO: be able to specify the interfaces # @TODO: Store capabilities self.storage = storage.Storage(self) self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) # The initialization that requires the main loop operating is deferred to start function async.DelayedCall(0, self.start) def insert_local_reply(self): msg_id = calvinuuid.uuid("LMSG") self.async_msg_ids[msg_id] = None return msg_id def set_local_reply(self, msg_id, reply): if msg_id in self.async_msg_ids: self.async_msg_ids[msg_id] = reply def connect(self, actor_id=None, port_name=None, port_dir=None, port_properties=None, port_id=None, peer_node_id=None, peer_actor_id=None, peer_port_name=None, peer_port_dir=None, peer_port_properties=None, peer_port_id=None, cb=None): if port_properties is None and port_dir is not None: port_properties = {'direction': port_dir} if peer_port_properties is None and peer_port_dir is not None: peer_port_properties = {'direction': peer_port_dir} self.pm.connect(actor_id=actor_id, port_name=port_name, port_properties=port_properties, port_id=port_id, peer_node_id=peer_node_id, peer_actor_id=peer_actor_id, peer_port_name=peer_port_name, peer_port_properties=peer_port_properties, peer_port_id=peer_port_id, callback=CalvinCB(self.logging_callback, preamble="connect cb") if cb is None else cb) def disconnect(self, actor_id=None, port_name=None, port_dir=None, port_id=None, cb=None): _log.debug("disconnect(actor_id=%s, port_name=%s, port_dir=%s, port_id=%s)" % (actor_id if actor_id else "", port_name if port_name else "", port_dir if port_dir else "", port_id if port_id else "")) self.pm.disconnect(actor_id=actor_id, port_name=port_name, port_dir=port_dir, port_id=port_id, callback=CalvinCB(self.logging_callback, preamble="disconnect cb") if cb is None else cb) def peersetup(self, peers, cb=None): """ Sets up a RT to RT communication channel, only needed if the peer can't be found in storage. peers: a list of peer uris, e.g. ["calvinip://127.0.0.1:5001"] """ _log.debug("peersetup(%s)" % (peers)) peers_copy = peers[:] peer_node_ids = {} if not cb: callback = CalvinCB(self.logging_callback, preamble="peersetup cb") else: callback = CalvinCB(self.peersetup_collect_cb, peers=peers_copy, peer_node_ids=peer_node_ids, org_cb=cb) self.network.join(peers, callback=callback) def peersetup_collect_cb(self, status, uri, peer_node_id, peer_node_ids, peers, org_cb): if uri in peers: peers.remove(uri) peer_node_ids[uri] = (peer_node_id, status) if not peers: # Get highest status, i.e. any error comb_status = max([s for _, s in peer_node_ids.values()]) org_cb(peer_node_ids=peer_node_ids, status=comb_status) def logging_callback(self, preamble=None, *args, **kwargs): _log.debug("\n%s# NODE: %s \n# %s %s %s \n%s" % ('#' * 40, self.id, preamble if preamble else "*", args, kwargs, '#' * 40)) def new(self, actor_type, args, deploy_args=None, state=None, prev_connections=None, connection_list=None): # TODO requirements should be input to am.new # TODO: make it possible to use security/credentials here. actor_def, signer = self.am.lookup_and_verify(actor_type) actor_id = self.am.new(actor_type, args, state, prev_connections, connection_list, signature=deploy_args['signature'] if deploy_args and 'signature' in deploy_args else None, actor_def=actor_def) if deploy_args: app_id = deploy_args['app_id'] if 'app_name' not in deploy_args: app_name = app_id else: app_name = deploy_args['app_name'] self.app_manager.add(app_id, actor_id, deploy_info = deploy_args['deploy_info'] if 'deploy_info' in deploy_args else None) return actor_id def calvinsys(self): """Return a CalvinSys instance""" # FIXME: We still need to sort out actor requirements vs. node capabilities and user permissions. # @TODO: Write node capabilities to storage return self._calvinsys # # Event loop # def run(self): """main loop on node""" _log.debug("Node %s is running" % self.id) self.sched.run() def start(self): """ Run once when main loop is started """ interfaces = _conf.get(None, 'transports') self.network.register(interfaces, ['json']) self.network.start_listeners(self.uri) # Start storage after network, proto etc since storage proxy expects them self.storage.start(cb=CalvinCB(self._storage_started_cb)) self.storage.add_node(self) # Start control API proxy_control_uri = _conf.get(None, 'control_proxy') _log.debug("Start control API on %s with uri: %s and proxy: %s" % (self.id, self.control_uri, proxy_control_uri)) if proxy_control_uri is not None: self.control.start(node=self, uri=proxy_control_uri, tunnel=True) else: if self.control_uri is not None: self.control.start(node=self, uri=self.control_uri, external_uri=self.external_control_uri) def stop(self, callback=None): def stopped(*args): _log.analyze(self.id, "+", {'args': args}) _log.debug(args) self.sched.stop() _log.analyze(self.id, "+ SCHED STOPPED", {'args': args}) self.control.stop() _log.analyze(self.id, "+ CONTROL STOPPED", {'args': args}) def deleted_node(*args, **kwargs): _log.analyze(self.id, "+", {'args': args, 'kwargs': kwargs}) self.storage.stop(stopped) _log.analyze(self.id, "+", {}) self.storage.delete_node(self, cb=deleted_node) def _storage_started_cb(self, *args, **kwargs): self.authorization.register_node()
class Node(object): """A node of calvin the uri is used as server connection point the control_uri is the local console attributes is a supplied list of external defined attributes that will be used as the key when storing index such as name of node """ def __init__(self, uri, control_uri, attributes=None): super(Node, self).__init__() self.uri = uri self.control_uri = control_uri try: self.attributes = AttributeResolver(attributes) except: _log.exception("Attributes not correct, uses empty attribute!") self.attributes = AttributeResolver(None) self.id = calvinuuid.uuid("NODE") self.monitor = Event_Monitor() self.am = actormanager.ActorManager(self) self.control = calvincontrol.get_calvincontrol() _scheduler = scheduler.DebugScheduler if _log.getEffectiveLevel() <= logging.DEBUG else scheduler.Scheduler self.sched = _scheduler(self, self.am, self.monitor) self.control.start(node=self, uri=control_uri) self.async_msg_ids = {} self._calvinsys = CalvinSys(self) # Default will multicast and listen on all interfaces # TODO: be able to specify the interfaces # @TODO: Store capabilities self.storage = storage.Storage(self) self.network = CalvinNetwork(self) self.proto = CalvinProto(self, self.network) self.pm = PortManager(self, self.proto) self.app_manager = appmanager.AppManager(self) # The initialization that requires the main loop operating is deferred to start function async.DelayedCall(0, self.start) def insert_local_reply(self): msg_id = calvinuuid.uuid("LMSG") self.async_msg_ids[msg_id] = None return msg_id def set_local_reply(self, msg_id, reply): if msg_id in self.async_msg_ids: self.async_msg_ids[msg_id] = reply def connect(self, actor_id=None, port_name=None, port_dir=None, port_id=None, peer_node_id=None, peer_actor_id=None, peer_port_name=None, peer_port_dir=None, peer_port_id=None, cb=None): self.pm.connect(actor_id=actor_id, port_name=port_name, port_dir=port_dir, port_id=port_id, peer_node_id=peer_node_id, peer_actor_id=peer_actor_id, peer_port_name=peer_port_name, peer_port_dir=peer_port_dir, peer_port_id=peer_port_id, callback=CalvinCB(self.logging_callback, preamble="connect cb") if cb is None else cb) def disconnect(self, actor_id=None, port_name=None, port_dir=None, port_id=None, cb=None): _log.debug("disconnect(actor_id=%s, port_name=%s, port_dir=%s, port_id=%s)" % (actor_id if actor_id else "", port_name if port_name else "", port_dir if port_dir else "", port_id if port_id else "")) self.pm.disconnect(actor_id=actor_id, port_name=port_name, port_dir=port_dir, port_id=port_id, callback=CalvinCB(self.logging_callback, preamble="disconnect cb") if cb is None else cb) def peersetup(self, peers, cb=None): """ Sets up a RT to RT communication channel, only needed if the peer can't be found in storage. peers: a list of peer uris, e.g. ["calvinip://127.0.0.1:5001"] """ _log.debug("peersetup(%s)" % (peers)) peers_copy = peers[:] peer_node_ids = {} self.network.join(peers, callback=CalvinCB(self.logging_callback, preamble="peersetup cb") if cb is None else CalvinCB(self.peersetup_collect_cb, peers=peers_copy, peer_node_ids=peer_node_ids, org_cb=cb)) def peersetup_collect_cb(self, status, uri, peer_node_id, peer_node_ids, peers, org_cb): if uri in peers: peers.remove(uri) peer_node_ids[uri] = (peer_node_id, status) if not peers: # Get highest status, i.e. any error comb_status = max([s for _, s in peer_node_ids.values()]) org_cb(peer_node_ids=peer_node_ids, status=comb_status) def logging_callback(self, preamble=None, *args, **kwargs): _log.debug("\n%s# NODE: %s \n# %s %s %s \n%s" % ('#' * 40, self.id, preamble if preamble else "*", args, kwargs, '#' * 40)) def new(self, actor_type, args, deploy_args=None, state=None, prev_connections=None, connection_list=None): actor_id = self.am.new(actor_type, args, state, prev_connections, connection_list, signature=deploy_args['signature'] if deploy_args and 'signature' in deploy_args else None) if deploy_args: app_id = deploy_args['app_id'] if 'app_name' not in deploy_args: app_name = app_id else: app_name = deploy_args['app_name'] self.app_manager.add(app_id, app_name, actor_id) return actor_id def deployment_control(self, app_id, actor_id, deploy_args): """ Updates an actor's deployment """ self.am.deployment_control(app_id, actor_id, deploy_args) def calvinsys(self): """Return a CalvinSys instance""" # FIXME: We still need to sort out actor requirements vs. node capabilities and user permissions. # @TODO: Write node capabilities to storage return self._calvinsys # # Event loop # def run(self): """main loop on node""" _log.debug("Node %s is running" % self.id) self.sched.run() def start(self): """ Run once when main loop is started """ # FIXME hardcoded which transport and encoder plugin we use, should be based on self.network.register(['calvinip'], ['json']) self.network.start_listeners([self.uri]) # Start storage after network, proto etc since storage proxy expects them self.storage.start() self.storage.add_node(self) def stop(self, callback=None): def stopped(*args): _log.analyze(self.id, "+", {'args': args}) _log.debug(args) self.sched.stop() _log.analyze(self.id, "+ SCHED STOPPED", {'args': args}) self.control.stop() _log.analyze(self.id, "+ CONTROL STOPPED", {'args': args}) def deleted_node(*args, **kwargs): _log.analyze(self.id, "+", {'args': args, 'kwargs': kwargs}) self.storage.stop(stopped) _log.analyze(self.id, "+", {}) self.storage.delete_node(self, cb=deleted_node)