def status(self): self.checkpid() if not self._started: return AS.STATUS_NOT_STARTED elif not self._pid or not self._ppid: return AS.STATUS_NOT_STARTED else: status = rspawn.remote_status( self._pid, self._ppid, host = self.node.hostname, port = None, user = self.node.slicename, agent = None, ident_key = self.node.ident_path, server_key = self.node.server_key ) if status is rspawn.NOT_STARTED: return AS.STATUS_NOT_STARTED elif status is rspawn.RUNNING: return AS.STATUS_RUNNING elif status is rspawn.FINISHED: return AS.STATUS_FINISHED else: # WTF? return AS.STATUS_NOT_STARTED
def status(self): self.checkpid() if not self._started: return AS.STATUS_NOT_STARTED elif not self._pid or not self._ppid: return AS.STATUS_NOT_STARTED else: status = rspawn.remote_status(self._pid, self._ppid, host=self.node.hostname, port=None, user=self.node.slicename, agent=None, ident_key=self.node.ident_path, server_key=self.node.server_key) if status is rspawn.NOT_STARTED: return AS.STATUS_NOT_STARTED elif status is rspawn.RUNNING: return AS.STATUS_RUNNING elif status is rspawn.FINISHED: return AS.STATUS_FINISHED else: # WTF? return AS.STATUS_NOT_STARTED
def status(self): local = self.local() if not local: raise RuntimeError, "Lost reference to local interface" self.checkpid() if not self._started: return rspawn.NOT_STARTED elif not self._pid or not self._ppid: return rspawn.NOT_STARTED else: status = rspawn.remote_status(self._pid, self._ppid, host=local.node.hostname, port=None, user=local.node.slicename, agent=None, ident_key=local.node.ident_path, server_key=local.node.server_key) return status
def status(self): local = self.local() if not local: raise RuntimeError, "Lost reference to local interface" self.checkpid() if not self._started: return rspawn.NOT_STARTED elif not self._pid or not self._ppid: return rspawn.NOT_STARTED else: status = rspawn.remote_status( self._pid, self._ppid, host = local.node.hostname, port = None, user = local.node.slicename, agent = None, ident_key = local.node.ident_path, server_key = local.node.server_key ) return status
def _do_wait_build(self, trial=0): pid = self._build_pid ppid = self._build_ppid if pid and ppid: delay = 1.0 first = True bustspin = 0 while True: status = rspawn.remote_status( pid, ppid, host = self.node.hostname, port = None, user = self.node.slicename, agent = None, ident_key = self.node.ident_path, server_key = self.node.server_key, hostip = self.node.hostip ) if status is rspawn.FINISHED: self._build_pid = self._build_ppid = None break elif status is not rspawn.RUNNING: self._logger.warn("Busted waiting for %s to finish building at %s %s", self, self.node.hostname, "(build slave)" if self._master is not None else "(build master)") bustspin += 1 time.sleep(delay*(5.5+random.random())) if bustspin > 12: self._build_pid = self._build_ppid = None break else: if first: self._logger.info("Waiting for %s to finish building at %s %s", self, self.node.hostname, "(build slave)" if self._master is not None else "(build master)") first = False time.sleep(delay*(0.5+random.random())) delay = min(30,delay*1.2) bustspin = 0 # check build token slave_token = "" for i in xrange(3): (out, err), proc = self._popen_ssh_command( "cat %(token_path)s" % { 'token_path' : os.path.join(self.home_path, 'build.token'), }, timeout = 120, noerrors = True) if not proc.wait() and out: slave_token = out.strip() if slave_token: break else: time.sleep(2) if slave_token != self._master_token: # Get buildlog for the error message (buildlog, err), proc = self._popen_ssh_command( "cat %(buildlog)s" % { 'buildlog' : os.path.join(self.home_path, 'buildlog'), 'buildscript' : os.path.join(self.home_path, 'nepi-build.sh'), }, timeout = 120, noerrors = True) proc.wait() if self.check_bad_host(buildlog, err): self.node.blacklist() elif self._master and trial < 3 and 'BAD TOKEN' in buildlog or 'BAD TOKEN' in err: # bad sync with master, may try again # but first wait for master self._master.async_setup_wait() self._launch_build(trial+1) return self._do_wait_build(trial+1) elif trial < 3: return self._do_wait_build(trial+1) else: # No longer need'em self._master_prk = None self._master_puk = None raise RuntimeError, "Failed to set up application %s: "\ "build failed, got wrong token from pid %s/%s "\ "(expected %r, got %r), see buildlog at %s:\n%s" % ( self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog) # No longer need'em self._master_prk = None self._master_puk = None self._logger.info("Built %s at %s", self, self.node.hostname)
def _do_wait_build(self, trial=0): pid = self._build_pid ppid = self._build_ppid if pid and ppid: delay = 1.0 first = True bustspin = 0 while True: status = rspawn.remote_status(pid, ppid, host=self.node.hostname, port=None, user=self.node.slicename, agent=None, ident_key=self.node.ident_path, server_key=self.node.server_key, hostip=self.node.hostip) if status is rspawn.FINISHED: self._build_pid = self._build_ppid = None break elif status is not rspawn.RUNNING: self._logger.warn( "Busted waiting for %s to finish building at %s %s", self, self.node.hostname, "(build slave)" if self._master is not None else "(build master)") bustspin += 1 time.sleep(delay * (5.5 + random.random())) if bustspin > 12: self._build_pid = self._build_ppid = None break else: if first: self._logger.info( "Waiting for %s to finish building at %s %s", self, self.node.hostname, "(build slave)" if self._master is not None else "(build master)") first = False time.sleep(delay * (0.5 + random.random())) delay = min(30, delay * 1.2) bustspin = 0 # check build token slave_token = "" for i in xrange(3): (out, err), proc = self._popen_ssh_command("cat %(token_path)s" % { 'token_path': os.path.join(self.home_path, 'build.token'), }, timeout=120, noerrors=True) if not proc.wait() and out: slave_token = out.strip() if slave_token: break else: time.sleep(2) if slave_token != self._master_token: # Get buildlog for the error message (buildlog, err), proc = self._popen_ssh_command("cat %(buildlog)s" % { 'buildlog': os.path.join(self.home_path, 'buildlog'), 'buildscript': os.path.join(self.home_path, 'nepi-build.sh'), }, timeout=120, noerrors=True) proc.wait() if self.check_bad_host(buildlog, err): self.node.blacklist() elif self._master and trial < 3 and 'BAD TOKEN' in buildlog or 'BAD TOKEN' in err: # bad sync with master, may try again # but first wait for master self._master.async_setup_wait() self._launch_build(trial + 1) return self._do_wait_build(trial + 1) elif trial < 3: return self._do_wait_build(trial + 1) else: # No longer need'em self._master_prk = None self._master_puk = None raise RuntimeError, "Failed to set up application %s: "\ "build failed, got wrong token from pid %s/%s "\ "(expected %r, got %r), see buildlog at %s:\n%s" % ( self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog) # No longer need'em self._master_prk = None self._master_puk = None self._logger.info("Built %s at %s", self, self.node.hostname)