def register( self ): self.set_node_timeout( 500 ) msgid = remap_utils.unique_id() logger.info( "Registering with node" ) self.forward( "node._hello.%d"%(self.pid), {"msgid":msgid,"pid":self.pid,"priority":self.priority} ) # The while loop will terminate as soon as node stops sending messages, # so this should be safe to do. while True: try: msg = self.sub.recv() msgprefix, data = remap_utils.unpack_msg( msg ) recipientid,msgtype,senderid = remap_utils.split_prefix(msgprefix) if msgtype != "_hey": continue replymsgid = remap_utils.safe_get(data, "msgid") if replymsgid == msgid: # this is us self.coreid = remap_utils.safe_get(data, "coreid" ) self.sub.set_string_option( nn.SUB, nn.SUB_UNSUBSCRIBE, "" ) self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "global" ) self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "local" ) self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "notlocal" ) self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, self.coreid ) logger.info( "Received coreid %s."%( self.coreid )) return True except nn.NanoMsgAPIError as e: logger.error( "Node is currently not available." ) break logger.error( "Registration failed" ) return False
def start_job(self, jobdata): if self.job_status != "waiting": raise RemapException( "A job is currently in progress on this monitor") if "type" not in jobdata: raise RemapException("Must have job type specified") if "priority" not in jobdata: raise RemapException("Must have priority specified") if "parallellism" not in jobdata: raise RemapException("Must have parallellism specified") self.job_status = "preparing" self.prepare_start = time.time() self.jobtype = jobdata["type"] self.priority = jobdata["priority"] self.parallellism = jobdata["parallellism"] plugin = self.load_plugin(self.jobtype) self.rejectedtasks = {} self.completedtasks = {} if self.jobid != None: self.bsub.set_string_option(nn.SUB, nn.SUB_UNSUBSCRIBE, self.jobid) if "jobid" in jobdata: self.jobid = jobdata["jobid"] del jobdata["jobid"] else: self.jobid = remap_utils.unique_id() self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, self.jobid) if "app" not in jobdata: raise RemapException("The name of the app must be provided") if jobdata["app"] not in self.list_apps(): raise RemapException("No such application: %s" % (jobdata["app"])) config = {"jobid": self.jobid, "remaproot": self.remaproot} logger.info("Started a new job: %s" % (self.jobid)) self.manager = plugin.create_manager(jobdata, config) if ((time.time() - self.refreshed) > 60): # Not refreshed > 60s self.refresh_nodes(self.priority) # Wait for a bunch of nodes to advertise themselves r = Timer(1.0, self.resume, ()) r.start() else: self.resume() return {"jobid": self.jobid}
def start_job(self, jobdata): if self.job_status != "waiting": raise RemapException("A job is currently in progress on this monitor") if "type" not in jobdata: raise RemapException("Must have job type specified") if "priority" not in jobdata: raise RemapException("Must have priority specified") if "parallellism" not in jobdata: raise RemapException("Must have parallellism specified") self.job_status = "preparing" self.prepare_start = time.time() self.jobtype = jobdata["type"] self.priority = jobdata["priority"] self.parallellism = jobdata["parallellism"] plugin = self.load_plugin(self.jobtype) self.rejectedtasks = {} self.completedtasks = {} if self.jobid != None: self.bsub.set_string_option(nn.SUB, nn.SUB_UNSUBSCRIBE, self.jobid) if "jobid" in jobdata: self.jobid = jobdata["jobid"] del jobdata["jobid"] else: self.jobid = remap_utils.unique_id() self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, self.jobid) if "app" not in jobdata: raise RemapException("The name of the app must be provided") if jobdata["app"] not in self.list_apps(): raise RemapException("No such application: %s" % (jobdata["app"])) config = {"jobid": self.jobid, "remaproot": self.remaproot} logger.info("Started a new job: %s" % (self.jobid)) self.manager = plugin.create_manager(jobdata, config) if (time.time() - self.refreshed) > 60: # Not refreshed > 60s self.refresh_nodes(self.priority) # Wait for a bunch of nodes to advertise themselves r = Timer(1.0, self.resume, ()) r.start() else: self.resume() return {"jobid": self.jobid}