def register( self ): self.set_node_timeout( 500 ) msgid = remap_utils.unique_id() logger.info( "Registering with node" ) self.forward( "node._hello.%d"%(self.pid), {"msgid":msgid,"pid":self.pid,"priority":self.priority} ) # The while loop will terminate as soon as node stops sending messages, # so this should be safe to do. while True: try: msg = self.sub.recv() msgprefix, data = remap_utils.unpack_msg( msg ) recipientid,msgtype,senderid = remap_utils.split_prefix(msgprefix) if msgtype != "_hey": continue replymsgid = remap_utils.safe_get(data, "msgid") if replymsgid == msgid: # this is us self.coreid = remap_utils.safe_get(data, "coreid" ) self.sub.set_string_option( nn.SUB, nn.SUB_UNSUBSCRIBE, "" ) self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "global" ) self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "local" ) self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "notlocal" ) self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, self.coreid ) logger.info( "Received coreid %s."%( self.coreid )) return True except nn.NanoMsgAPIError as e: logger.error( "Node is currently not available." ) break logger.error( "Registration failed" ) return False
def process_hello( self, data ): msgid = remap_utils.safe_get(data, "msgid") pid = remap_utils.safe_get(data, "pid") priority = remap_utils.safe_get( data, "priority" ) coreid = remap_utils.core_id( self.nodeid, pid ) self.cores[ coreid ] = {"coreid":coreid,"ts_last_seen":time.time(),"progress":-1,"pid":pid,"priority":priority} msg = remap_utils.pack_msg( "%s._hey.%s"%(coreid, self.nodeid), {"msgid":msgid,"coreid":coreid} ) logger.info( "A core registered %s"%( coreid )) self.lpub.send( msg )
def handle_jobstart( self, recipientid, senderid, data ): avail, interruptable = self.hw.available_cpus( remap_utils.safe_get( data, "priority" ), self.cores ) numcores = len(remap_utils.safe_get( data, "cores" )) if (avail + interruptable) >= numcores: logger.info("Starting job with %d cores"%( numcores )) if not self.hw.start_job( self.remaproot, senderid, numcores, data ): logger.error("Error starting job") else: # Something changed in the meantime. Reject logger.info( "Initiator requested %d cores, %d can be committed. Rejecting"%( numcores, avail + interruptable )) msg = remap_utils.pack_msg( "%s.rejectjob.%s"%( senderid, self.nodeid ), {} ) self.forward_to_broker( msg ) self.coresChanged = True
def handle_jobstart(self, recipientid, senderid, data): avail, interruptable = self.hw.available_cpus( remap_utils.safe_get(data, "priority"), self.cores) numcores = len(remap_utils.safe_get(data, "cores")) if (avail + interruptable) >= numcores: logger.info("Starting job with %d cores" % (numcores)) if not self.hw.start_job(self.remaproot, senderid, numcores, data): logger.error("Error starting job") else: # Something changed in the meantime. Reject logger.info( "Initiator requested %d cores, %d can be committed. Rejecting" % (numcores, avail + interruptable)) msg = remap_utils.pack_msg( "%s.rejectjob.%s" % (senderid, self.nodeid), {}) self.forward_to_broker(msg) self.coresChanged = True
def process_hello(self, data): msgid = remap_utils.safe_get(data, "msgid") pid = remap_utils.safe_get(data, "pid") priority = remap_utils.safe_get(data, "priority") coreid = remap_utils.core_id(self.nodeid, pid) self.cores[coreid] = { "coreid": coreid, "ts_last_seen": time.time(), "progress": -1, "pid": pid, "priority": priority } msg = remap_utils.pack_msg("%s._hey.%s" % (coreid, self.nodeid), { "msgid": msgid, "coreid": coreid }) logger.info("A core registered %s" % (coreid)) self.lpub.send(msg)
def handle_showhands(self, recipientid, senderid, data): avail, interruptable = self.hw.available_cpus( remap_utils.safe_get(data, "priority"), self.cores) if avail > 0 or interruptable > 0: logger.info("Volunteering with %d cores, %d interruptable" % (avail, interruptable)) msg = remap_utils.pack_msg("tracker.raisehand.%s" % (self.nodeid), { "free": avail, "interruptable": interruptable }) self.forward_to_broker(msg)
def handle_showhands( self, recipientid, senderid, data ): avail, interruptable = self.hw.available_cpus( remap_utils.safe_get( data, "priority" ), self.cores ) if avail > 0 or interruptable > 0: logger.info( "Volunteering with %d cores, %d interruptable"%( avail, interruptable )) msg = remap_utils.pack_msg( "tracker.raisehand.%s"%( self.nodeid ), {"free":avail,"interruptable":interruptable} ) self.forward_to_broker( msg )