Example #1
0
    def register( self ):
        self.set_node_timeout( 500 )
        msgid = remap_utils.unique_id()

        logger.info( "Registering with node" )
        self.forward( "node._hello.%d"%(self.pid), {"msgid":msgid,"pid":self.pid,"priority":self.priority} )

        # The while loop will terminate as soon as node stops sending messages,
        # so this should be safe to do.
        while True:
            try:
                msg = self.sub.recv()
                msgprefix, data = remap_utils.unpack_msg( msg )
                recipientid,msgtype,senderid = remap_utils.split_prefix(msgprefix)
                if msgtype != "_hey":
                    continue
                
                replymsgid = remap_utils.safe_get(data, "msgid")
                if replymsgid == msgid:
                    # this is us
                    self.coreid = remap_utils.safe_get(data, "coreid" )
                    self.sub.set_string_option( nn.SUB, nn.SUB_UNSUBSCRIBE, "" )
                    self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "global" )
                    self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "local" )
                    self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "notlocal" )
                    self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, self.coreid )

                    logger.info( "Received coreid %s."%( self.coreid ))
                    return True
            except nn.NanoMsgAPIError as e:
                logger.error( "Node is currently not available." )
                break
        logger.error( "Registration failed" )
        return False
Example #2
0
    def start_job(self, jobdata):
        if self.job_status != "waiting":
            raise RemapException(
                "A job is currently in progress on this monitor")

        if "type" not in jobdata:
            raise RemapException("Must have job type specified")
        if "priority" not in jobdata:
            raise RemapException("Must have priority specified")
        if "parallellism" not in jobdata:
            raise RemapException("Must have parallellism specified")

        self.job_status = "preparing"
        self.prepare_start = time.time()

        self.jobtype = jobdata["type"]
        self.priority = jobdata["priority"]
        self.parallellism = jobdata["parallellism"]
        plugin = self.load_plugin(self.jobtype)
        self.rejectedtasks = {}
        self.completedtasks = {}

        if self.jobid != None:
            self.bsub.set_string_option(nn.SUB, nn.SUB_UNSUBSCRIBE, self.jobid)

        if "jobid" in jobdata:
            self.jobid = jobdata["jobid"]
            del jobdata["jobid"]
        else:
            self.jobid = remap_utils.unique_id()

        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, self.jobid)

        if "app" not in jobdata:
            raise RemapException("The name of the app must be provided")

        if jobdata["app"] not in self.list_apps():
            raise RemapException("No such application: %s" % (jobdata["app"]))

        config = {"jobid": self.jobid, "remaproot": self.remaproot}

        logger.info("Started a new job: %s" % (self.jobid))
        self.manager = plugin.create_manager(jobdata, config)

        if ((time.time() - self.refreshed) > 60):
            # Not refreshed > 60s
            self.refresh_nodes(self.priority)
            # Wait for a bunch of nodes to advertise themselves
            r = Timer(1.0, self.resume, ())
            r.start()
        else:
            self.resume()

        return {"jobid": self.jobid}
Example #3
0
    def start_job(self, jobdata):
        if self.job_status != "waiting":
            raise RemapException("A job is currently in progress on this monitor")

        if "type" not in jobdata:
            raise RemapException("Must have job type specified")
        if "priority" not in jobdata:
            raise RemapException("Must have priority specified")
        if "parallellism" not in jobdata:
            raise RemapException("Must have parallellism specified")

        self.job_status = "preparing"
        self.prepare_start = time.time()

        self.jobtype = jobdata["type"]
        self.priority = jobdata["priority"]
        self.parallellism = jobdata["parallellism"]
        plugin = self.load_plugin(self.jobtype)
        self.rejectedtasks = {}
        self.completedtasks = {}

        if self.jobid != None:
            self.bsub.set_string_option(nn.SUB, nn.SUB_UNSUBSCRIBE, self.jobid)

        if "jobid" in jobdata:
            self.jobid = jobdata["jobid"]
            del jobdata["jobid"]
        else:
            self.jobid = remap_utils.unique_id()

        self.bsub.set_string_option(nn.SUB, nn.SUB_SUBSCRIBE, self.jobid)

        if "app" not in jobdata:
            raise RemapException("The name of the app must be provided")

        if jobdata["app"] not in self.list_apps():
            raise RemapException("No such application: %s" % (jobdata["app"]))

        config = {"jobid": self.jobid, "remaproot": self.remaproot}

        logger.info("Started a new job: %s" % (self.jobid))
        self.manager = plugin.create_manager(jobdata, config)

        if (time.time() - self.refreshed) > 60:
            # Not refreshed > 60s
            self.refresh_nodes(self.priority)
            # Wait for a bunch of nodes to advertise themselves
            r = Timer(1.0, self.resume, ())
            r.start()
        else:
            self.resume()

        return {"jobid": self.jobid}