def run(self, args): """Run 'connect' subcommand.""" optparser = ConnectOptions(go_args=args, envvar_prefix=self.envvar_prefix, usage=self.usage_txt) try: if len(optparser.args) > 1: label = optparser.args[1] else: self.report_error("No label provided.") print "Connecting to HOD cluster with label '%s'..." % label try: jobid = cluster_jobid(label) env_script = cluster_env_file(label) except ValueError as err: self.report_error(err) print "Job ID found: %s" % jobid pbs = rm_pbs.Pbs(optparser) jobs = pbs.state() pbsjobs = [job for job in jobs if job.jobid == jobid] if len(pbsjobs) == 0: self.report_error("Job with job ID '%s' not found by pbs.", jobid) elif len(pbsjobs) > 1: self.report_error("Multiple jobs found with job ID '%s': %s", jobid, pbsjobs) pbsjob = pbsjobs[0] if pbsjob.state == ['Q', 'H']: # This should never happen since the hod.d/<jobid>/env file is # written on cluster startup. Maybe someone hacked the dirs. self.report_error( "Cannot connect to cluster with job ID '%s' yet. It is still queued.", jobid) else: print "HOD cluster '%s' @ job ID %s appears to be running..." % ( label, jobid) print "Setting up SSH connection to %s..." % pbsjob.hosts # -i: interactive non-login shell cmd = [ 'ssh', '-t', pbsjob.hosts, 'exec', 'bash', '--rcfile', env_script, '-i' ] self.log.info("Logging in using command: %s", ' '.join(cmd)) os.execvp('/usr/bin/ssh', cmd) except StandardError as err: self._log_and_raise(err) return 0
def run(self, args): """Run 'connect' subcommand.""" optparser = ConnectOptions(go_args=args, envvar_prefix=self.envvar_prefix, usage=self.usage_txt) try: if len(optparser.args) > 1: label = optparser.args[1] else: _log.error("No label provided.") sys.exit(1) print "Connecting to HOD cluster with label '%s'..." % label try: jobid = cluster_jobid(label) env_script = cluster_env_file(label) except ValueError as err: _log.error(err) sys.exit(1) print "Job ID found: %s" % jobid pbs = rm_pbs.Pbs(optparser) jobs = pbs.state() pbsjobs = [job for job in jobs if job.jobid == jobid] if len(pbsjobs) == 0: _log.error("Job with job ID '%s' not found by pbs.", jobid) sys.exit(1) elif len(pbsjobs) > 1: _log.error("Multiple jobs found with job ID '%s': %s", jobid, pbsjobs) sys.exit(1) pbsjob = pbsjobs[0] if pbsjob.state == ['Q', 'H']: # This should never happen since the hod.d/<jobid>/env file is # written on cluster startup. Maybe someone hacked the dirs. _log.error("Cannot connect to cluster with job ID '%s' yet. It is still queued.", jobid) sys.exit(1) else: print "HOD cluster '%s' @ job ID %s appears to be running..." % (label, jobid) print "Setting up SSH connection to %s..." % pbsjob.hosts # -i: interactive non-login shell cmd = ['ssh', '-t', pbsjob.hosts, 'exec', 'bash', '--rcfile', env_script, '-i'] _log.info("Logging in using command: %s", ' '.join(cmd)) os.execvp('/usr/bin/ssh', cmd) return 0 # pragma: no cover except StandardError as err: fancylogger.setLogFormat(fancylogger.TEST_LOGGING_FORMAT) fancylogger.logToScreen(enable=True) _log.raiseException(err)
def distribution(self, *master_template_args, **kwargs): """Master makes the distribution""" self.tasks = [] config_path = resolve_config_paths(self.options.hodconf, self.options.dist) m_config = load_hod_config(config_path, self.options.workdir, self.options.modulepaths, self.options.modules) m_config.autogen_configs() resolver = _setup_template_resolver(m_config, master_template_args) _setup_config_paths(m_config, resolver) master_env = dict([(v, os.getenv(v)) for v in m_config.master_env]) # There may be scripts in the hod.conf dir so add it to the PATH master_env['PATH'] = master_env.get('PATH', os.getenv('PATH')) + os.pathsep + m_config.hodconfdir self.log.debug('MasterEnv is: %s', env2str(master_env)) svc_cfgs = m_config.service_files self.log.info('Loading %d service configs.', len(svc_cfgs)) for config_filename in svc_cfgs: self.log.info('Loading "%s" service config', config_filename) config = ConfigOpts.from_file(open(config_filename, 'r'), resolver) ranks_to_run = config.runs_on(MASTERRANK, range(self.size)) self.log.debug('Adding ConfiguredService Task to work with config: %s', str(config)) cfg_opts = config.to_params(m_config.workdir, m_config.modulepaths, m_config.modules, master_template_args) self.tasks.append(Task(ConfiguredService, config.name, ranks_to_run, cfg_opts, master_env)) if hasattr(self.options, 'script') and self.options.script is not None: label = self.options.label env_script = 'source ' + hc.cluster_env_file(label) script = self.options.script script_stdout, script_stderr = _script_output_paths(script, label) redirection = ' > %s 2> %s' % (script_stdout, script_stderr) start_script = env_script + ' && ' + script + redirection + '; qdel $PBS_JOBID' self.log.debug('Adding script Task: %s', start_script) # TODO: How can we test this? config = ConfigOpts(script, RUNS_ON_MASTER, '', start_script, '', master_env, resolver, timeout=NO_TIMEOUT) ranks_to_run = config.runs_on(MASTERRANK, range(self.size)) cfg_opts = config.to_params(m_config.workdir, m_config.modulepaths, m_config.modules, master_template_args) self.tasks.append(Task(ConfiguredService, config.name, ranks_to_run, cfg_opts, master_env))
def test_cluster_env_file(self): with patch('hod.cluster._cluster_info', side_effect=lambda x, y: '%s/%s'% (x, y)): self.assertEqual(hc.cluster_env_file('label'), 'label/env')