Example #1
0
    def run(self, args):
        """Run 'connect' subcommand."""
        optparser = ConnectOptions(go_args=args,
                                   envvar_prefix=self.envvar_prefix,
                                   usage=self.usage_txt)
        try:
            if len(optparser.args) > 1:
                label = optparser.args[1]
            else:
                self.report_error("No label provided.")

            print "Connecting to HOD cluster with label '%s'..." % label

            try:
                jobid = cluster_jobid(label)
                env_script = cluster_env_file(label)
            except ValueError as err:
                self.report_error(err)

            print "Job ID found: %s" % jobid

            pbs = rm_pbs.Pbs(optparser)
            jobs = pbs.state()
            pbsjobs = [job for job in jobs if job.jobid == jobid]

            if len(pbsjobs) == 0:
                self.report_error("Job with job ID '%s' not found by pbs.",
                                  jobid)
            elif len(pbsjobs) > 1:
                self.report_error("Multiple jobs found with job ID '%s': %s",
                                  jobid, pbsjobs)

            pbsjob = pbsjobs[0]
            if pbsjob.state == ['Q', 'H']:
                # This should never happen since the hod.d/<jobid>/env file is
                # written on cluster startup. Maybe someone hacked the dirs.
                self.report_error(
                    "Cannot connect to cluster with job ID '%s' yet. It is still queued.",
                    jobid)
            else:
                print "HOD cluster '%s' @ job ID %s appears to be running..." % (
                    label, jobid)

            print "Setting up SSH connection to %s..." % pbsjob.hosts

            # -i: interactive non-login shell
            cmd = [
                'ssh', '-t', pbsjob.hosts, 'exec', 'bash', '--rcfile',
                env_script, '-i'
            ]
            self.log.info("Logging in using command: %s", ' '.join(cmd))
            os.execvp('/usr/bin/ssh', cmd)

        except StandardError as err:
            self._log_and_raise(err)

        return 0
Example #2
0
    def run(self, args):
        """Run 'connect' subcommand."""
        optparser = ConnectOptions(go_args=args, envvar_prefix=self.envvar_prefix, usage=self.usage_txt)
        try:
            if len(optparser.args) > 1:
                label = optparser.args[1]
            else:
                _log.error("No label provided.")
                sys.exit(1)

            print "Connecting to HOD cluster with label '%s'..." % label

            try:
                jobid = cluster_jobid(label)
                env_script = cluster_env_file(label)
            except ValueError as err:
                _log.error(err)
                sys.exit(1)

            print "Job ID found: %s" % jobid

            pbs = rm_pbs.Pbs(optparser)
            jobs = pbs.state()
            pbsjobs = [job for job in jobs if job.jobid == jobid]

            if len(pbsjobs) == 0:
                _log.error("Job with job ID '%s' not found by pbs.", jobid)
                sys.exit(1)
            elif len(pbsjobs) > 1:
                _log.error("Multiple jobs found with job ID '%s': %s", jobid, pbsjobs)
                sys.exit(1)

            pbsjob = pbsjobs[0]
            if pbsjob.state == ['Q', 'H']:
                # This should never happen since the hod.d/<jobid>/env file is
                # written on cluster startup. Maybe someone hacked the dirs.
                _log.error("Cannot connect to cluster with job ID '%s' yet. It is still queued.", jobid)
                sys.exit(1)
            else:
                print "HOD cluster '%s' @ job ID %s appears to be running..." % (label, jobid)

            print "Setting up SSH connection to %s..." % pbsjob.hosts

            # -i: interactive non-login shell
            cmd = ['ssh', '-t', pbsjob.hosts, 'exec', 'bash', '--rcfile', env_script, '-i']
            _log.info("Logging in using command: %s", ' '.join(cmd))
            os.execvp('/usr/bin/ssh', cmd)
            return 0 # pragma: no cover

        except StandardError as err:
            fancylogger.setLogFormat(fancylogger.TEST_LOGGING_FORMAT)
            fancylogger.logToScreen(enable=True)
            _log.raiseException(err)
    def distribution(self, *master_template_args, **kwargs):
        """Master makes the distribution"""
        self.tasks = []
        config_path = resolve_config_paths(self.options.hodconf, self.options.dist)
        m_config = load_hod_config(config_path, self.options.workdir, self.options.modulepaths, self.options.modules)
        m_config.autogen_configs()

        resolver = _setup_template_resolver(m_config, master_template_args)
        _setup_config_paths(m_config, resolver)

        master_env = dict([(v, os.getenv(v)) for v in m_config.master_env])
        # There may be scripts in the hod.conf dir so add it to the PATH
        master_env['PATH'] = master_env.get('PATH', os.getenv('PATH')) + os.pathsep + m_config.hodconfdir
        self.log.debug('MasterEnv is: %s', env2str(master_env))

        svc_cfgs = m_config.service_files
        self.log.info('Loading %d service configs.', len(svc_cfgs))
        for config_filename in svc_cfgs:
            self.log.info('Loading "%s" service config', config_filename)
            config = ConfigOpts.from_file(open(config_filename, 'r'), resolver)
            ranks_to_run = config.runs_on(MASTERRANK, range(self.size))
            self.log.debug('Adding ConfiguredService Task to work with config: %s', str(config))
            cfg_opts = config.to_params(m_config.workdir, m_config.modulepaths, m_config.modules, master_template_args)
            self.tasks.append(Task(ConfiguredService, config.name, ranks_to_run, cfg_opts, master_env))

        if hasattr(self.options, 'script') and self.options.script is not None:
            label = self.options.label
            env_script = 'source ' + hc.cluster_env_file(label)
            script = self.options.script
            script_stdout, script_stderr = _script_output_paths(script, label)
            redirection = ' > %s 2> %s' % (script_stdout, script_stderr)
            start_script = env_script + ' && ' + script + redirection + '; qdel $PBS_JOBID'
            self.log.debug('Adding script Task: %s', start_script)
            # TODO: How can we test this?
            config = ConfigOpts(script, RUNS_ON_MASTER, '', start_script, '', master_env, resolver, timeout=NO_TIMEOUT)
            ranks_to_run = config.runs_on(MASTERRANK, range(self.size))
            cfg_opts = config.to_params(m_config.workdir, m_config.modulepaths, m_config.modules, master_template_args)
            self.tasks.append(Task(ConfiguredService, config.name, ranks_to_run, cfg_opts, master_env))
Example #4
0
 def test_cluster_env_file(self):
     with patch('hod.cluster._cluster_info', side_effect=lambda x, y: '%s/%s'% (x, y)):
         self.assertEqual(hc.cluster_env_file('label'), 'label/env')
Example #5
0
 def test_cluster_env_file(self):
     with patch('hod.cluster._cluster_info', side_effect=lambda x, y: '%s/%s'% (x, y)):
         self.assertEqual(hc.cluster_env_file('label'), 'label/env')