def run(self, args): """Run 'connect' subcommand.""" optparser = ConnectOptions(go_args=args, envvar_prefix=self.envvar_prefix, usage=self.usage_txt) try: if len(optparser.args) > 1: label = optparser.args[1] else: self.report_error("No label provided.") print "Connecting to HOD cluster with label '%s'..." % label try: jobid = cluster_jobid(label) env_script = cluster_env_file(label) except ValueError as err: self.report_error(err) print "Job ID found: %s" % jobid pbs = rm_pbs.Pbs(optparser) jobs = pbs.state() pbsjobs = [job for job in jobs if job.jobid == jobid] if len(pbsjobs) == 0: self.report_error("Job with job ID '%s' not found by pbs.", jobid) elif len(pbsjobs) > 1: self.report_error("Multiple jobs found with job ID '%s': %s", jobid, pbsjobs) pbsjob = pbsjobs[0] if pbsjob.state == ['Q', 'H']: # This should never happen since the hod.d/<jobid>/env file is # written on cluster startup. Maybe someone hacked the dirs. self.report_error( "Cannot connect to cluster with job ID '%s' yet. It is still queued.", jobid) else: print "HOD cluster '%s' @ job ID %s appears to be running..." % ( label, jobid) print "Setting up SSH connection to %s..." % pbsjob.hosts # -i: interactive non-login shell cmd = [ 'ssh', '-t', pbsjob.hosts, 'exec', 'bash', '--rcfile', env_script, '-i' ] self.log.info("Logging in using command: %s", ' '.join(cmd)) os.execvp('/usr/bin/ssh', cmd) except StandardError as err: self._log_and_raise(err) return 0
def run(self, args): """Run 'connect' subcommand.""" optparser = ConnectOptions(go_args=args, envvar_prefix=self.envvar_prefix, usage=self.usage_txt) try: if len(optparser.args) > 1: label = optparser.args[1] else: _log.error("No label provided.") sys.exit(1) print "Connecting to HOD cluster with label '%s'..." % label try: jobid = cluster_jobid(label) env_script = cluster_env_file(label) except ValueError as err: _log.error(err) sys.exit(1) print "Job ID found: %s" % jobid pbs = rm_pbs.Pbs(optparser) jobs = pbs.state() pbsjobs = [job for job in jobs if job.jobid == jobid] if len(pbsjobs) == 0: _log.error("Job with job ID '%s' not found by pbs.", jobid) sys.exit(1) elif len(pbsjobs) > 1: _log.error("Multiple jobs found with job ID '%s': %s", jobid, pbsjobs) sys.exit(1) pbsjob = pbsjobs[0] if pbsjob.state == ['Q', 'H']: # This should never happen since the hod.d/<jobid>/env file is # written on cluster startup. Maybe someone hacked the dirs. _log.error("Cannot connect to cluster with job ID '%s' yet. It is still queued.", jobid) sys.exit(1) else: print "HOD cluster '%s' @ job ID %s appears to be running..." % (label, jobid) print "Setting up SSH connection to %s..." % pbsjob.hosts # -i: interactive non-login shell cmd = ['ssh', '-t', pbsjob.hosts, 'exec', 'bash', '--rcfile', env_script, '-i'] _log.info("Logging in using command: %s", ' '.join(cmd)) os.execvp('/usr/bin/ssh', cmd) return 0 # pragma: no cover except StandardError as err: fancylogger.setLogFormat(fancylogger.TEST_LOGGING_FORMAT) fancylogger.logToScreen(enable=True) _log.raiseException(err)
def run(self, args): """Run 'destroy' subcommand.""" optparser = DestroyOptions(go_args=args, envvar_prefix=self.envvar_prefix, usage=self.usage_txt) try: label, jobid = None, None if len(optparser.args) > 1: label = optparser.args[1] print "Destroying HOD cluster with label '%s'..." % label else: _log.error("No label provided.") sys.exit(1) try: jobid = cluster_jobid(label) print "Job ID: %s" % jobid except ValueError as err: _log.error(err) sys.exit(1) # try to figure out job state job_state = None pbs = rm_pbs.Pbs(optparser) jobs = pbs.state() pbsjobs = [job for job in jobs if job.jobid == jobid] _log.debug("Matching jobs for job ID '%s': %s", jobid, pbsjobs) if len(pbsjobs) == 1: job_state = pbsjobs[0].state print "Job status: %s" % job_state elif len(pbsjobs) == 0: print "(job no longer found)" else: _log.error("Multiple jobs found with job ID '%s': %s", jobid, pbsjobs) sys.exit(1) # request confirmation is case the job is currently running if job_state == 'R': resp = raw_input("Confirm destroying the *running* HOD cluster with label '%s'? [y/n]: " % label) if resp != 'y': print "(destruction aborted)" return elif job_state in ['C', 'E']: print "(job has already ended/completed)" job_state = None print "\nStarting actual destruction of HOD cluster with label '%s'...\n" % label # actually destroy HOD cluster by deleting job and removing cluster info dir and local work dir if job_state is not None: # if job was not successfully deleted, pbs.remove will print an error message if pbs.remove(jobid): print "Job with ID %s deleted." % jobid rm_cluster_localworkdir(label) if cluster_info_exists(label): rm_cluster_info(label) print "\nHOD cluster with label '%s' (job ID: %s) destroyed." % (label, jobid) except StandardError as err: fancylogger.setLogFormat(fancylogger.TEST_LOGGING_FORMAT) fancylogger.logToScreen(enable=True) _log.raiseException(err)
def run(self, args): """Run 'destroy' subcommand.""" optparser = DestroyOptions(go_args=args, envvar_prefix=self.envvar_prefix, usage=self.usage_txt) try: label, jobid = None, None if len(optparser.args) > 1: label = optparser.args[1] print "Destroying HOD cluster with label '%s'..." % label else: self.report_error("No label provided.") try: jobid = cluster_jobid(label) print "Job ID: %s" % jobid except ValueError as err: self.report_error(err) # try to figure out job state job_state = None pbs = rm_pbs.Pbs(optparser) jobs = pbs.state() pbsjobs = [job for job in jobs if job.jobid == jobid] self.log.debug("Matching jobs for job ID '%s': %s", jobid, pbsjobs) if len(pbsjobs) == 1: job_state = pbsjobs[0].state print "Job status: %s" % job_state elif len(pbsjobs) == 0: print "(job no longer found)" else: self.report_error("Multiple jobs found with job ID '%s': %s", jobid, pbsjobs) # request confirmation is case the job is currently running if job_state == 'R': resp = raw_input( "Confirm destroying the *running* HOD cluster with label '%s'? [y/n]: " % label) if resp != 'y': print "(destruction aborted)" return elif job_state in ['C', 'E']: print "(job has already ended/completed)" job_state = None print "\nStarting actual destruction of HOD cluster with label '%s'...\n" % label # actually destroy HOD cluster by deleting job and removing cluster info dir and local work dir if job_state is not None: # if job was not successfully deleted, pbs.remove will print an error message if pbs.remove(jobid): print "Job with ID %s deleted." % jobid rm_cluster_localworkdir(label) if cluster_info_exists(label): rm_cluster_info(label) print "\nHOD cluster with label '%s' (job ID: %s) destroyed." % ( label, jobid) except StandardError as err: self._log_and_raise(err) return 0